Print this page
patch lower-case-segops


 691          * Invoke ALL callbacks. as_do_callbacks will do one callback
 692          * per call, and not return (-1) until the callback has completed.
 693          * When as_do_callbacks returns zero, all callbacks have completed.
 694          */
 695         mutex_enter(&as->a_contents);
 696         while (as->a_callbacks && as_do_callbacks(as, AS_ALL_EVENT, 0, 0))
 697                 ;
 698 
 699         mutex_exit(&as->a_contents);
 700         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
 701 
 702         if (!free_started) {
 703                 free_started = B_TRUE;
 704                 hat_free_start(hat);
 705         }
 706         for (seg = AS_SEGFIRST(as); seg != NULL; seg = next) {
 707                 int err;
 708 
 709                 next = AS_SEGNEXT(as, seg);
 710 retry:
 711                 err = SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
 712                 if (err == EAGAIN) {
 713                         mutex_enter(&as->a_contents);
 714                         if (as->a_callbacks) {
 715                                 AS_LOCK_EXIT(as, &as->a_lock);
 716                         } else if (!AS_ISNOUNMAPWAIT(as)) {
 717                                 /*
 718                                  * Memory is currently locked. Wait for a
 719                                  * cv_signal that it has been unlocked, then
 720                                  * try the operation again.
 721                                  */
 722                                 if (AS_ISUNMAPWAIT(as) == 0)
 723                                         cv_broadcast(&as->a_cv);
 724                                 AS_SETUNMAPWAIT(as);
 725                                 AS_LOCK_EXIT(as, &as->a_lock);
 726                                 while (AS_ISUNMAPWAIT(as))
 727                                         cv_wait(&as->a_cv, &as->a_contents);
 728                         } else {
 729                                 /*
 730                                  * We may have raced with
 731                                  * segvn_reclaim()/segspt_reclaim(). In this


 784 
 785         AS_LOCK_ENTER(newas, &newas->a_lock, RW_WRITER);
 786 
 787         (void) hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_SRD);
 788 
 789         for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
 790 
 791                 if (seg->s_flags & S_PURGE) {
 792                         purgesize += seg->s_size;
 793                         continue;
 794                 }
 795 
 796                 newseg = seg_alloc(newas, seg->s_base, seg->s_size);
 797                 if (newseg == NULL) {
 798                         AS_LOCK_EXIT(newas, &newas->a_lock);
 799                         as_setwatch(as);
 800                         AS_LOCK_EXIT(as, &as->a_lock);
 801                         as_free(newas);
 802                         return (-1);
 803                 }
 804                 if ((error = SEGOP_DUP(seg, newseg)) != 0) {
 805                         /*
 806                          * We call seg_free() on the new seg
 807                          * because the segment is not set up
 808                          * completely; i.e. it has no ops.
 809                          */
 810                         as_setwatch(as);
 811                         AS_LOCK_EXIT(as, &as->a_lock);
 812                         seg_free(newseg);
 813                         AS_LOCK_EXIT(newas, &newas->a_lock);
 814                         as_free(newas);
 815                         return (error);
 816                 }
 817                 newas->a_size += seg->s_size;
 818         }
 819         newas->a_resvsize = as->a_resvsize - purgesize;
 820 
 821         error = hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_ALL);
 822 
 823         AS_LOCK_EXIT(newas, &newas->a_lock);
 824 


 923 
 924                 as_lock_held = 1;
 925         }
 926 
 927         addrsav = raddr;
 928         segsav = seg;
 929 
 930         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
 931                 if (raddr >= seg->s_base + seg->s_size) {
 932                         seg = AS_SEGNEXT(as, seg);
 933                         if (seg == NULL || raddr != seg->s_base) {
 934                                 res = FC_NOMAP;
 935                                 break;
 936                         }
 937                 }
 938                 if (raddr + rsize > seg->s_base + seg->s_size)
 939                         ssize = seg->s_base + seg->s_size - raddr;
 940                 else
 941                         ssize = rsize;
 942 
 943                 res = SEGOP_FAULT(hat, seg, raddr, ssize, type, rw);
 944 
 945                 /* Restore watchpoints */
 946                 if (holding_wpage) {
 947                         as_setwatch(as);
 948                         holding_wpage = 0;
 949                 }
 950 
 951                 if (res != 0)
 952                         break;
 953         }
 954 
 955         /*
 956          * If we were SOFTLOCKing and encountered a failure,
 957          * we must SOFTUNLOCK the range we already did. (Maybe we
 958          * should just panic if we are SOFTLOCKing or even SOFTUNLOCKing
 959          * right here...)
 960          */
 961         if (res != 0 && type == F_SOFTLOCK) {
 962                 for (seg = segsav; addrsav < raddr; addrsav += ssize) {
 963                         if (addrsav >= seg->s_base + seg->s_size)
 964                                 seg = AS_SEGNEXT(as, seg);
 965                         ASSERT(seg != NULL);
 966                         /*
 967                          * Now call the fault routine again to perform the
 968                          * unlock using S_OTHER instead of the rw variable
 969                          * since we never got a chance to touch the pages.
 970                          */
 971                         if (raddr > seg->s_base + seg->s_size)
 972                                 ssize = seg->s_base + seg->s_size - addrsav;
 973                         else
 974                                 ssize = raddr - addrsav;
 975                         (void) SEGOP_FAULT(hat, seg, addrsav, ssize,
 976                             F_SOFTUNLOCK, S_OTHER);
 977                 }
 978         }
 979         if (as_lock_held)
 980                 AS_LOCK_EXIT(as, &as->a_lock);
 981         if (lwp != NULL)
 982                 lwp->lwp_nostop--;
 983 
 984         /*
 985          * If the lower levels returned EDEADLK for a fault,
 986          * It means that we should retry the fault.  Let's wait
 987          * a bit also to let the deadlock causing condition clear.
 988          * This is part of a gross hack to work around a design flaw
 989          * in the ufs/sds logging code and should go away when the
 990          * logging code is re-designed to fix the problem. See bug
 991          * 4125102 for details of the problem.
 992          */
 993         if (FC_ERRNO(res) == EDEADLK) {
 994                 delay(deadlk_wait);
 995                 res = 0;


1025         rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
1026             (size_t)raddr;
1027 
1028         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1029         seg = as_segat(as, raddr);
1030         if (seg == NULL) {
1031                 AS_LOCK_EXIT(as, &as->a_lock);
1032                 if (lwp != NULL)
1033                         lwp->lwp_nostop--;
1034                 return (FC_NOMAP);
1035         }
1036 
1037         for (; rsize != 0; rsize -= PAGESIZE, raddr += PAGESIZE) {
1038                 if (raddr >= seg->s_base + seg->s_size) {
1039                         seg = AS_SEGNEXT(as, seg);
1040                         if (seg == NULL || raddr != seg->s_base) {
1041                                 res = FC_NOMAP;
1042                                 break;
1043                         }
1044                 }
1045                 res = SEGOP_FAULTA(seg, raddr);
1046                 if (res != 0)
1047                         break;
1048         }
1049         AS_LOCK_EXIT(as, &as->a_lock);
1050         if (lwp != NULL)
1051                 lwp->lwp_nostop--;
1052         /*
1053          * If the lower levels returned EDEADLK for a fault,
1054          * It means that we should retry the fault.  Let's wait
1055          * a bit also to let the deadlock causing condition clear.
1056          * This is part of a gross hack to work around a design flaw
1057          * in the ufs/sds logging code and should go away when the
1058          * logging code is re-designed to fix the problem. See bug
1059          * 4125102 for details of the problem.
1060          */
1061         if (FC_ERRNO(res) == EDEADLK) {
1062                 delay(deadlk_wait);
1063                 res = 0;
1064                 goto retry;
1065         }


1115         seg = as_segat(as, raddr);
1116         if (seg == NULL) {
1117                 as_setwatch(as);
1118                 AS_LOCK_EXIT(as, &as->a_lock);
1119                 return (ENOMEM);
1120         }
1121 
1122         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
1123                 if (raddr >= seg->s_base + seg->s_size) {
1124                         seg = AS_SEGNEXT(as, seg);
1125                         if (seg == NULL || raddr != seg->s_base) {
1126                                 error = ENOMEM;
1127                                 break;
1128                         }
1129                 }
1130                 if ((raddr + rsize) > (seg->s_base + seg->s_size))
1131                         ssize = seg->s_base + seg->s_size - raddr;
1132                 else
1133                         ssize = rsize;
1134 retry:
1135                 error = SEGOP_SETPROT(seg, raddr, ssize, prot);
1136 
1137                 if (error == IE_NOMEM) {
1138                         error = EAGAIN;
1139                         break;
1140                 }
1141 
1142                 if (error == IE_RETRY) {
1143                         AS_LOCK_EXIT(as, &as->a_lock);
1144                         writer = 1;
1145                         goto setprot_top;
1146                 }
1147 
1148                 if (error == EAGAIN) {
1149                         /*
1150                          * Make sure we have a_lock as writer.
1151                          */
1152                         if (writer == 0) {
1153                                 AS_LOCK_EXIT(as, &as->a_lock);
1154                                 writer = 1;
1155                                 goto setprot_top;


1266         seg = as_segat(as, raddr);
1267         if (seg == NULL) {
1268                 as_setwatch(as);
1269                 AS_LOCK_EXIT(as, &as->a_lock);
1270                 return (ENOMEM);
1271         }
1272 
1273         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
1274                 if (raddr >= seg->s_base + seg->s_size) {
1275                         seg = AS_SEGNEXT(as, seg);
1276                         if (seg == NULL || raddr != seg->s_base) {
1277                                 error = ENOMEM;
1278                                 break;
1279                         }
1280                 }
1281                 if ((raddr + rsize) > (seg->s_base + seg->s_size))
1282                         ssize = seg->s_base + seg->s_size - raddr;
1283                 else
1284                         ssize = rsize;
1285 
1286                 error = SEGOP_CHECKPROT(seg, raddr, ssize, prot);
1287                 if (error != 0)
1288                         break;
1289         }
1290         as_setwatch(as);
1291         AS_LOCK_EXIT(as, &as->a_lock);
1292         return (error);
1293 }
1294 
1295 int
1296 as_unmap(struct as *as, caddr_t addr, size_t size)
1297 {
1298         struct seg *seg, *seg_next;
1299         struct as_callback *cb;
1300         caddr_t raddr, eaddr;
1301         size_t ssize, rsize = 0;
1302         int err;
1303 
1304 top:
1305         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1306         eaddr = (caddr_t)(((uintptr_t)(addr + size) + PAGEOFFSET) &


1332                 else
1333                         ssize = eaddr - raddr;
1334 
1335                 /*
1336                  * Save next segment pointer since seg can be
1337                  * destroyed during the segment unmap operation.
1338                  */
1339                 seg_next = AS_SEGNEXT(as, seg);
1340 
1341                 /*
1342                  * We didn't count /dev/null mappings, so ignore them here.
1343                  * We'll handle MAP_NORESERVE cases in segvn_unmap(). (Again,
1344                  * we have to do this check here while we have seg.)
1345                  */
1346                 rsize = 0;
1347                 if (!SEG_IS_DEVNULL_MAPPING(seg) &&
1348                     !SEG_IS_PARTIAL_RESV(seg))
1349                         rsize = ssize;
1350 
1351 retry:
1352                 err = SEGOP_UNMAP(seg, raddr, ssize);
1353                 if (err == EAGAIN) {
1354                         /*
1355                          * Memory is currently locked.  It must be unlocked
1356                          * before this operation can succeed through a retry.
1357                          * The possible reasons for locked memory and
1358                          * corresponding strategies for unlocking are:
1359                          * (1) Normal I/O
1360                          *      wait for a signal that the I/O operation
1361                          *      has completed and the memory is unlocked.
1362                          * (2) Asynchronous I/O
1363                          *      The aio subsystem does not unlock pages when
1364                          *      the I/O is completed. Those pages are unlocked
1365                          *      when the application calls aiowait/aioerror.
1366                          *      So, to prevent blocking forever, cv_broadcast()
1367                          *      is done to wake up aio_cleanup_thread.
1368                          *      Subsequently, segvn_reclaim will be called, and
1369                          *      that will do AS_CLRUNMAPWAIT() and wake us up.
1370                          * (3) Long term page locking:
1371                          *      Drivers intending to have pages locked for a
1372                          *      period considerably longer than for normal I/O


1770  */
1771 void
1772 as_purge(struct as *as)
1773 {
1774         struct seg *seg;
1775         struct seg *next_seg;
1776 
1777         /*
1778          * the setting of NEEDSPURGE is protect by as_rangelock(), so
1779          * no need to grab a_contents mutex for this check
1780          */
1781         if ((as->a_flags & AS_NEEDSPURGE) == 0)
1782                 return;
1783 
1784         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1785         next_seg = NULL;
1786         seg = AS_SEGFIRST(as);
1787         while (seg != NULL) {
1788                 next_seg = AS_SEGNEXT(as, seg);
1789                 if (seg->s_flags & S_PURGE)
1790                         SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
1791                 seg = next_seg;
1792         }
1793         AS_LOCK_EXIT(as, &as->a_lock);
1794 
1795         mutex_enter(&as->a_contents);
1796         as->a_flags &= ~AS_NEEDSPURGE;
1797         mutex_exit(&as->a_contents);
1798 }
1799 
1800 /*
1801  * Find a hole within [*basep, *basep + *lenp), which contains a mappable
1802  * range of addresses at least "minlen" long, where the base of the range is
1803  * at "off" phase from an "align" boundary and there is space for a
1804  * "redzone"-sized redzone on eithe rside of the range.  Thus,
1805  * if align was 4M and off was 16k, the user wants a hole which will start
1806  * 16k into a 4M page.
1807  *
1808  * If flags specifies AH_HI, the hole will have the highest possible address
1809  * in the range.  We use the as->a_lastgap field to figure out where to
1810  * start looking for a gap.


2082 
2083         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2084         seg = as_segat(as, raddr);
2085         if (seg == NULL) {
2086                 AS_LOCK_EXIT(as, &as->a_lock);
2087                 return (-1);
2088         }
2089 
2090         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
2091                 if (raddr >= seg->s_base + seg->s_size) {
2092                         seg = AS_SEGNEXT(as, seg);
2093                         if (seg == NULL || raddr != seg->s_base) {
2094                                 error = -1;
2095                                 break;
2096                         }
2097                 }
2098                 if ((raddr + rsize) > (seg->s_base + seg->s_size))
2099                         ssize = seg->s_base + seg->s_size - raddr;
2100                 else
2101                         ssize = rsize;
2102                 *sizep += isize = SEGOP_INCORE(seg, raddr, ssize, vec);
2103                 if (isize != ssize) {
2104                         error = -1;
2105                         break;
2106                 }
2107                 vec += btopr(ssize);
2108         }
2109         AS_LOCK_EXIT(as, &as->a_lock);
2110         return (error);
2111 }
2112 
2113 static void
2114 as_segunlock(struct seg *seg, caddr_t addr, int attr,
2115         ulong_t *bitmap, size_t position, size_t npages)
2116 {
2117         caddr_t range_start;
2118         size_t  pos1 = position;
2119         size_t  pos2;
2120         size_t  size;
2121         size_t  end_pos = npages + position;
2122 
2123         while (bt_range(bitmap, &pos1, &pos2, end_pos)) {
2124                 size = ptob((pos2 - pos1));
2125                 range_start = (caddr_t)((uintptr_t)addr +
2126                     ptob(pos1 - position));
2127 
2128                 (void) SEGOP_LOCKOP(seg, range_start, size, attr, MC_UNLOCK,
2129                     (ulong_t *)NULL, (size_t)NULL);
2130                 pos1 = pos2;
2131         }
2132 }
2133 
2134 static void
2135 as_unlockerr(struct as *as, int attr, ulong_t *mlock_map,
2136         caddr_t raddr, size_t rsize)
2137 {
2138         struct seg *seg = as_segat(as, raddr);
2139         size_t ssize;
2140 
2141         while (rsize != 0) {
2142                 if (raddr >= seg->s_base + seg->s_size)
2143                         seg = AS_SEGNEXT(as, seg);
2144 
2145                 if ((raddr + rsize) > (seg->s_base + seg->s_size))
2146                         ssize = seg->s_base + seg->s_size - raddr;
2147                 else
2148                         ssize = rsize;


2204                 if (seg == NULL) {
2205                         AS_LOCK_EXIT(as, &as->a_lock);
2206                         return (0);
2207                 }
2208 
2209                 do {
2210                         raddr = (caddr_t)((uintptr_t)seg->s_base &
2211                             (uintptr_t)PAGEMASK);
2212                         rlen += (((uintptr_t)(seg->s_base + seg->s_size) +
2213                             PAGEOFFSET) & PAGEMASK) - (uintptr_t)raddr;
2214                 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2215 
2216                 mlock_size = BT_BITOUL(btopr(rlen));
2217                 if ((mlock_map = (ulong_t *)kmem_zalloc(mlock_size *
2218                     sizeof (ulong_t), KM_NOSLEEP)) == NULL) {
2219                                 AS_LOCK_EXIT(as, &as->a_lock);
2220                                 return (EAGAIN);
2221                 }
2222 
2223                 for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) {
2224                         error = SEGOP_LOCKOP(seg, seg->s_base,
2225                             seg->s_size, attr, MC_LOCK, mlock_map, pos);
2226                         if (error != 0)
2227                                 break;
2228                         pos += seg_pages(seg);
2229                 }
2230 
2231                 if (error) {
2232                         for (seg = AS_SEGFIRST(as); seg != NULL;
2233                             seg = AS_SEGNEXT(as, seg)) {
2234 
2235                                 raddr = (caddr_t)((uintptr_t)seg->s_base &
2236                                     (uintptr_t)PAGEMASK);
2237                                 npages = seg_pages(seg);
2238                                 as_segunlock(seg, raddr, attr, mlock_map,
2239                                     idx, npages);
2240                                 idx += npages;
2241                         }
2242                 }
2243 
2244                 kmem_free(mlock_map, mlock_size * sizeof (ulong_t));
2245                 AS_LOCK_EXIT(as, &as->a_lock);
2246                 goto lockerr;
2247         } else if (func == MC_UNLOCKAS) {
2248                 mutex_enter(&as->a_contents);
2249                 AS_CLRPGLCK(as);
2250                 mutex_exit(&as->a_contents);
2251 
2252                 for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) {
2253                         error = SEGOP_LOCKOP(seg, seg->s_base,
2254                             seg->s_size, attr, MC_UNLOCK, NULL, 0);
2255                         if (error != 0)
2256                                 break;
2257                 }
2258 
2259                 AS_LOCK_EXIT(as, &as->a_lock);
2260                 goto lockerr;
2261         }
2262 
2263         /*
2264          * Normalize addresses and sizes.
2265          */
2266         initraddr = raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2267         initrsize = rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
2268             (size_t)raddr;
2269 
2270         if (raddr + rsize < raddr) {         /* check for wraparound */
2271                 AS_LOCK_EXIT(as, &as->a_lock);
2272                 return (ENOMEM);
2273         }


2311                                 }
2312                                 AS_LOCK_EXIT(as, &as->a_lock);
2313                                 return (ENOMEM);
2314                         }
2315                 }
2316                 if ((raddr + rsize) > (seg->s_base + seg->s_size))
2317                         ssize = seg->s_base + seg->s_size - raddr;
2318                 else
2319                         ssize = rsize;
2320 
2321                 /*
2322                  * Dispatch on specific function.
2323                  */
2324                 switch (func) {
2325 
2326                 /*
2327                  * Synchronize cached data from mappings with backing
2328                  * objects.
2329                  */
2330                 case MC_SYNC:
2331                         if (error = SEGOP_SYNC(seg, raddr, ssize,
2332                             attr, (uint_t)arg)) {
2333                                 AS_LOCK_EXIT(as, &as->a_lock);
2334                                 return (error);
2335                         }
2336                         break;
2337 
2338                 /*
2339                  * Lock pages in memory.
2340                  */
2341                 case MC_LOCK:
2342                         if (error = SEGOP_LOCKOP(seg, raddr, ssize,
2343                             attr, func, mlock_map, pos)) {
2344                                 as_unlockerr(as, attr, mlock_map, initraddr,
2345                                     initrsize - rsize + ssize);
2346                                 kmem_free(mlock_map, mlock_size *
2347                                     sizeof (ulong_t));
2348                                 AS_LOCK_EXIT(as, &as->a_lock);
2349                                 goto lockerr;
2350                         }
2351                         break;
2352 
2353                 /*
2354                  * Unlock mapped pages.
2355                  */
2356                 case MC_UNLOCK:
2357                         (void) SEGOP_LOCKOP(seg, raddr, ssize, attr, func,
2358                             (ulong_t *)NULL, (size_t)NULL);
2359                         break;
2360 
2361                 /*
2362                  * Store VM advise for mapped pages in segment layer.
2363                  */
2364                 case MC_ADVISE:
2365                         error = SEGOP_ADVISE(seg, raddr, ssize, (uint_t)arg);
2366 
2367                         /*
2368                          * Check for regular errors and special retry error
2369                          */
2370                         if (error) {
2371                                 if (error == IE_RETRY) {
2372                                         /*
2373                                          * Need to acquire writers lock, so
2374                                          * have to drop readers lock and start
2375                                          * all over again
2376                                          */
2377                                         AS_LOCK_EXIT(as, &as->a_lock);
2378                                         goto retry;
2379                                 } else if (error == IE_REATTACH) {
2380                                         /*
2381                                          * Find segment for current address
2382                                          * because current segment just got
2383                                          * split or concatenated
2384                                          */
2385                                         seg = as_segat(as, raddr);
2386                                         if (seg == NULL) {
2387                                                 AS_LOCK_EXIT(as, &as->a_lock);
2388                                                 return (ENOMEM);
2389                                         }
2390                                 } else {
2391                                         /*
2392                                          * Regular error
2393                                          */
2394                                         AS_LOCK_EXIT(as, &as->a_lock);
2395                                         return (error);
2396                                 }
2397                         }
2398                         break;
2399 
2400                 case MC_INHERIT_ZERO:
2401                         if (seg->s_ops->inherit == NULL) {
2402                                 error = ENOTSUP;
2403                         } else {
2404                                 error = SEGOP_INHERIT(seg, raddr, ssize,
2405                                     SEGP_INH_ZERO);
2406                         }
2407                         if (error != 0) {
2408                                 AS_LOCK_EXIT(as, &as->a_lock);
2409                                 return (error);
2410                         }
2411                         break;
2412 
2413                 /*
2414                  * Can't happen.
2415                  */
2416                 default:
2417                         panic("as_ctl: bad operation %d", func);
2418                         /*NOTREACHED*/
2419                 }
2420 
2421                 rsize -= ssize;
2422                 raddr += ssize;
2423         }
2424 


2499         /*
2500          * Count the number of segments covered by the range we are about to
2501          * lock. The segment count is used to size the shadow list we return
2502          * back to the caller.
2503          */
2504         for (; size != 0; size -= ssize, addr += ssize) {
2505                 if (addr >= seg->s_base + seg->s_size) {
2506 
2507                         seg = AS_SEGNEXT(as, seg);
2508                         if (seg == NULL || addr != seg->s_base) {
2509                                 AS_LOCK_EXIT(as, &as->a_lock);
2510                                 return (EFAULT);
2511                         }
2512                         /*
2513                          * Do a quick check if subsequent segments
2514                          * will most likely support pagelock.
2515                          */
2516                         if (seg->s_ops == &segvn_ops) {
2517                                 vnode_t *vp;
2518 
2519                                 if (SEGOP_GETVP(seg, addr, &vp) != 0 ||
2520                                     vp != NULL) {
2521                                         AS_LOCK_EXIT(as, &as->a_lock);
2522                                         goto slow;
2523                                 }
2524                         } else if (seg->s_ops != &segspt_shmops) {
2525                                 AS_LOCK_EXIT(as, &as->a_lock);
2526                                 goto slow;
2527                         }
2528                         segcnt++;
2529                 }
2530                 if (addr + size > seg->s_base + seg->s_size) {
2531                         ssize = seg->s_base + seg->s_size - addr;
2532                 } else {
2533                         ssize = size;
2534                 }
2535         }
2536         ASSERT(segcnt > 1);
2537 
2538         plist = kmem_zalloc((npages + segcnt) * sizeof (page_t *), KM_SLEEP);
2539 
2540         addr = sv_addr;
2541         size = sv_size;
2542         seg = sv_seg;
2543 
2544         for (cnt = 0, pl_off = 0; size != 0; size -= ssize, addr += ssize) {
2545                 if (addr >= seg->s_base + seg->s_size) {
2546                         seg = AS_SEGNEXT(as, seg);
2547                         ASSERT(seg != NULL && addr == seg->s_base);
2548                         cnt++;
2549                         ASSERT(cnt < segcnt);
2550                 }
2551                 if (addr + size > seg->s_base + seg->s_size) {
2552                         ssize = seg->s_base + seg->s_size - addr;
2553                 } else {
2554                         ssize = size;
2555                 }
2556                 pl = &plist[npages + cnt];
2557                 error = SEGOP_PAGELOCK(seg, addr, ssize, (page_t ***)pl,
2558                     L_PAGELOCK, rw);
2559                 if (error) {
2560                         break;
2561                 }
2562                 ASSERT(plist[npages + cnt] != NULL);
2563                 ASSERT(pl_off + btop(ssize) <= npages);
2564                 bcopy(plist[npages + cnt], &plist[pl_off],
2565                     btop(ssize) * sizeof (page_t *));
2566                 pl_off += btop(ssize);
2567         }
2568 
2569         if (size == 0) {
2570                 AS_LOCK_EXIT(as, &as->a_lock);
2571                 ASSERT(cnt == segcnt - 1);
2572                 *ppp = plist;
2573                 return (0);
2574         }
2575 
2576         /*
2577          * one of pagelock calls failed. The error type is in error variable.


2580          * back to the caller.
2581          */
2582 
2583         eaddr = addr;
2584         seg = sv_seg;
2585 
2586         for (cnt = 0, addr = sv_addr; addr < eaddr; addr += ssize) {
2587                 if (addr >= seg->s_base + seg->s_size) {
2588                         seg = AS_SEGNEXT(as, seg);
2589                         ASSERT(seg != NULL && addr == seg->s_base);
2590                         cnt++;
2591                         ASSERT(cnt < segcnt);
2592                 }
2593                 if (eaddr > seg->s_base + seg->s_size) {
2594                         ssize = seg->s_base + seg->s_size - addr;
2595                 } else {
2596                         ssize = eaddr - addr;
2597                 }
2598                 pl = &plist[npages + cnt];
2599                 ASSERT(*pl != NULL);
2600                 (void) SEGOP_PAGELOCK(seg, addr, ssize, (page_t ***)pl,
2601                     L_PAGEUNLOCK, rw);
2602         }
2603 
2604         AS_LOCK_EXIT(as, &as->a_lock);
2605 
2606         kmem_free(plist, (npages + segcnt) * sizeof (page_t *));
2607 
2608         if (error != ENOTSUP && error != EFAULT) {
2609                 return (error);
2610         }
2611 
2612 slow:
2613         /*
2614          * If we are here because pagelock failed due to the need to cow fault
2615          * in the pages we want to lock F_SOFTLOCK will do this job and in
2616          * next as_pagelock() call for this address range pagelock will
2617          * hopefully succeed.
2618          */
2619         fault_err = as_fault(as->a_hat, as, sv_addr, sv_size, F_SOFTLOCK, rw);
2620         if (fault_err != 0) {


2655         seg = as_segat(as, raddr);
2656         if (seg == NULL) {
2657                 AS_LOCK_EXIT(as, &as->a_lock);
2658                 return (EFAULT);
2659         }
2660         ASSERT(raddr >= seg->s_base && raddr < seg->s_base + seg->s_size);
2661         if (raddr + rsize > seg->s_base + seg->s_size) {
2662                 return (as_pagelock_segs(as, seg, ppp, raddr, rsize, rw));
2663         }
2664         if (raddr + rsize <= raddr) {
2665                 AS_LOCK_EXIT(as, &as->a_lock);
2666                 return (EFAULT);
2667         }
2668 
2669         TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEG_LOCK_START,
2670             "seg_lock_1_start: raddr %p rsize %ld", raddr, rsize);
2671 
2672         /*
2673          * try to lock pages and pass back shadow list
2674          */
2675         err = SEGOP_PAGELOCK(seg, raddr, rsize, ppp, L_PAGELOCK, rw);
2676 
2677         TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_SEG_LOCK_END, "seg_lock_1_end");
2678 
2679         AS_LOCK_EXIT(as, &as->a_lock);
2680 
2681         if (err == 0 || (err != ENOTSUP && err != EFAULT)) {
2682                 return (err);
2683         }
2684 
2685         /*
2686          * Use F_SOFTLOCK to lock the pages because pagelock failed either due
2687          * to no pagelock support for this segment or pages need to be cow
2688          * faulted in. If fault is needed F_SOFTLOCK will do this job for
2689          * this as_pagelock() call and in the next as_pagelock() call for the
2690          * same address range pagelock call will hopefull succeed.
2691          */
2692         fault_err = as_fault(as->a_hat, as, addr, size, F_SOFTLOCK, rw);
2693         if (fault_err != 0) {
2694                 return (fc_decode(fault_err));
2695         }


2718         ASSERT(seg != NULL);
2719         ASSERT(addr >= seg->s_base && addr < seg->s_base + seg->s_size);
2720         ASSERT(addr + size > seg->s_base + seg->s_size);
2721         ASSERT(IS_P2ALIGNED(size, PAGESIZE));
2722         ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
2723         ASSERT(plist != NULL);
2724 
2725         for (cnt = 0; addr < eaddr; addr += ssize) {
2726                 if (addr >= seg->s_base + seg->s_size) {
2727                         seg = AS_SEGNEXT(as, seg);
2728                         ASSERT(seg != NULL && addr == seg->s_base);
2729                         cnt++;
2730                 }
2731                 if (eaddr > seg->s_base + seg->s_size) {
2732                         ssize = seg->s_base + seg->s_size - addr;
2733                 } else {
2734                         ssize = eaddr - addr;
2735                 }
2736                 pl = &plist[npages + cnt];
2737                 ASSERT(*pl != NULL);
2738                 (void) SEGOP_PAGELOCK(seg, addr, ssize, (page_t ***)pl,
2739                     L_PAGEUNLOCK, rw);
2740         }
2741         ASSERT(cnt > 0);
2742         AS_LOCK_EXIT(as, &as->a_lock);
2743 
2744         cnt++;
2745         kmem_free(plist, (npages + cnt) * sizeof (page_t *));
2746 }
2747 
2748 /*
2749  * unlock pages in a given address range
2750  */
2751 void
2752 as_pageunlock(struct as *as, struct page **pp, caddr_t addr, size_t size,
2753     enum seg_rw rw)
2754 {
2755         struct seg *seg;
2756         size_t rsize;
2757         caddr_t raddr;
2758 


2764          * falling back to as_fault
2765          */
2766         if (pp == NULL) {
2767                 (void) as_fault(as->a_hat, as, addr, size, F_SOFTUNLOCK, rw);
2768                 return;
2769         }
2770 
2771         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2772         rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
2773             (size_t)raddr;
2774 
2775         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2776         seg = as_segat(as, raddr);
2777         ASSERT(seg != NULL);
2778 
2779         TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEG_UNLOCK_START,
2780             "seg_unlock_start: raddr %p rsize %ld", raddr, rsize);
2781 
2782         ASSERT(raddr >= seg->s_base && raddr < seg->s_base + seg->s_size);
2783         if (raddr + rsize <= seg->s_base + seg->s_size) {
2784                 SEGOP_PAGELOCK(seg, raddr, rsize, &pp, L_PAGEUNLOCK, rw);
2785         } else {
2786                 as_pageunlock_segs(as, seg, raddr, rsize, pp, rw);
2787                 return;
2788         }
2789         AS_LOCK_EXIT(as, &as->a_lock);
2790         TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_AS_UNLOCK_END, "as_pageunlock_end");
2791 }
2792 
2793 int
2794 as_setpagesize(struct as *as, caddr_t addr, size_t size, uint_t szc,
2795     boolean_t wait)
2796 {
2797         struct seg *seg;
2798         size_t ssize;
2799         caddr_t raddr;                  /* rounded down addr */
2800         size_t rsize;                   /* rounded up size */
2801         int error = 0;
2802         size_t pgsz = page_get_pagesize(szc);
2803 
2804 setpgsz_top:


2819                 as_setwatch(as);
2820                 AS_LOCK_EXIT(as, &as->a_lock);
2821                 return (ENOMEM);
2822         }
2823 
2824         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
2825                 if (raddr >= seg->s_base + seg->s_size) {
2826                         seg = AS_SEGNEXT(as, seg);
2827                         if (seg == NULL || raddr != seg->s_base) {
2828                                 error = ENOMEM;
2829                                 break;
2830                         }
2831                 }
2832                 if ((raddr + rsize) > (seg->s_base + seg->s_size)) {
2833                         ssize = seg->s_base + seg->s_size - raddr;
2834                 } else {
2835                         ssize = rsize;
2836                 }
2837 
2838 retry:
2839                 error = SEGOP_SETPAGESIZE(seg, raddr, ssize, szc);
2840 
2841                 if (error == IE_NOMEM) {
2842                         error = EAGAIN;
2843                         break;
2844                 }
2845 
2846                 if (error == IE_RETRY) {
2847                         AS_LOCK_EXIT(as, &as->a_lock);
2848                         goto setpgsz_top;
2849                 }
2850 
2851                 if (error == ENOTSUP) {
2852                         error = EINVAL;
2853                         break;
2854                 }
2855 
2856                 if (wait && (error == EAGAIN)) {
2857                         /*
2858                          * Memory is currently locked.  It must be unlocked
2859                          * before this operation can succeed through a retry.


2898                                  * number of retries without sleeping should
2899                                  * be very small. See segvn_reclaim() for
2900                                  * more comments.
2901                                  */
2902                                 AS_CLRNOUNMAPWAIT(as);
2903                                 mutex_exit(&as->a_contents);
2904                                 goto retry;
2905                         }
2906                         mutex_exit(&as->a_contents);
2907                         goto setpgsz_top;
2908                 } else if (error != 0) {
2909                         break;
2910                 }
2911         }
2912         as_setwatch(as);
2913         AS_LOCK_EXIT(as, &as->a_lock);
2914         return (error);
2915 }
2916 
2917 /*
2918  * as_iset3_default_lpsize() just calls SEGOP_SETPAGESIZE() on all segments
2919  * in its chunk where s_szc is less than the szc we want to set.
2920  */
2921 static int
2922 as_iset3_default_lpsize(struct as *as, caddr_t raddr, size_t rsize, uint_t szc,
2923     int *retry)
2924 {
2925         struct seg *seg;
2926         size_t ssize;
2927         int error;
2928 
2929         ASSERT(AS_WRITE_HELD(as, &as->a_lock));
2930 
2931         seg = as_segat(as, raddr);
2932         if (seg == NULL) {
2933                 panic("as_iset3_default_lpsize: no seg");
2934         }
2935 
2936         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
2937                 if (raddr >= seg->s_base + seg->s_size) {
2938                         seg = AS_SEGNEXT(as, seg);
2939                         if (seg == NULL || raddr != seg->s_base) {
2940                                 panic("as_iset3_default_lpsize: as changed");
2941                         }
2942                 }
2943                 if ((raddr + rsize) > (seg->s_base + seg->s_size)) {
2944                         ssize = seg->s_base + seg->s_size - raddr;
2945                 } else {
2946                         ssize = rsize;
2947                 }
2948 
2949                 if (szc > seg->s_szc) {
2950                         error = SEGOP_SETPAGESIZE(seg, raddr, ssize, szc);
2951                         /* Only retry on EINVAL segments that have no vnode. */
2952                         if (error == EINVAL) {
2953                                 vnode_t *vp = NULL;
2954                                 if ((SEGOP_GETTYPE(seg, raddr) & MAP_SHARED) &&
2955                                     (SEGOP_GETVP(seg, raddr, &vp) != 0 ||
2956                                     vp == NULL)) {
2957                                         *retry = 1;
2958                                 } else {
2959                                         *retry = 0;
2960                                 }
2961                         }
2962                         if (error) {
2963                                 return (error);
2964                         }
2965                 }
2966         }
2967         return (0);
2968 }
2969 
2970 /*
2971  * as_iset2_default_lpsize() calls as_iset3_default_lpsize() to set the
2972  * pagesize on each segment in its range, but if any fails with EINVAL,
2973  * then it reduces the pagesizes to the next size in the bitmap and
2974  * retries as_iset3_default_lpsize(). The reason why the code retries
2975  * smaller allowed sizes on EINVAL is because (a) the anon offset may not


3178         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3179 again:
3180         error = 0;
3181 
3182         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3183         rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
3184             (size_t)raddr;
3185 
3186         if (raddr + rsize < raddr) {         /* check for wraparound */
3187                 AS_LOCK_EXIT(as, &as->a_lock);
3188                 return (ENOMEM);
3189         }
3190         as_clearwatchprot(as, raddr, rsize);
3191         seg = as_segat(as, raddr);
3192         if (seg == NULL) {
3193                 as_setwatch(as);
3194                 AS_LOCK_EXIT(as, &as->a_lock);
3195                 return (ENOMEM);
3196         }
3197         if (seg->s_ops == &segvn_ops) {
3198                 rtype = SEGOP_GETTYPE(seg, addr);
3199                 rflags = rtype & (MAP_TEXT | MAP_INITDATA);
3200                 rtype = rtype & (MAP_SHARED | MAP_PRIVATE);
3201                 segvn = 1;
3202         } else {
3203                 segvn = 0;
3204         }
3205         setaddr = raddr;
3206         setsize = 0;
3207 
3208         for (; rsize != 0; rsize -= ssize, raddr += ssize, setsize += ssize) {
3209                 if (raddr >= (seg->s_base + seg->s_size)) {
3210                         seg = AS_SEGNEXT(as, seg);
3211                         if (seg == NULL || raddr != seg->s_base) {
3212                                 error = ENOMEM;
3213                                 break;
3214                         }
3215                         if (seg->s_ops == &segvn_ops) {
3216                                 stype = SEGOP_GETTYPE(seg, raddr);
3217                                 sflags = stype & (MAP_TEXT | MAP_INITDATA);
3218                                 stype &= (MAP_SHARED | MAP_PRIVATE);
3219                                 if (segvn && (rflags != sflags ||
3220                                     rtype != stype)) {
3221                                         /*
3222                                          * The next segment is also segvn but
3223                                          * has different flags and/or type.
3224                                          */
3225                                         ASSERT(setsize != 0);
3226                                         error = as_iset_default_lpsize(as,
3227                                             setaddr, setsize, rflags, rtype);
3228                                         if (error) {
3229                                                 break;
3230                                         }
3231                                         rflags = sflags;
3232                                         rtype = stype;
3233                                         setaddr = raddr;
3234                                         setsize = 0;
3235                                 } else if (!segvn) {
3236                                         rflags = sflags;


3310 as_setwatch(struct as *as)
3311 {
3312         struct watched_page *pwp;
3313         struct seg *seg;
3314         caddr_t vaddr;
3315         uint_t prot;
3316         int  err, retrycnt;
3317 
3318         if (avl_numnodes(&as->a_wpage) == 0)
3319                 return;
3320 
3321         ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3322 
3323         for (pwp = avl_first(&as->a_wpage); pwp != NULL;
3324             pwp = AVL_NEXT(&as->a_wpage, pwp)) {
3325                 retrycnt = 0;
3326         retry:
3327                 vaddr = pwp->wp_vaddr;
3328                 if (pwp->wp_oprot != 0 ||    /* already set up */
3329                     (seg = as_segat(as, vaddr)) == NULL ||
3330                     SEGOP_GETPROT(seg, vaddr, 0, &prot) != 0)
3331                         continue;
3332 
3333                 pwp->wp_oprot = prot;
3334                 if (pwp->wp_read)
3335                         prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3336                 if (pwp->wp_write)
3337                         prot &= ~PROT_WRITE;
3338                 if (pwp->wp_exec)
3339                         prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3340                 if (!(pwp->wp_flags & WP_NOWATCH) && prot != pwp->wp_oprot) {
3341                         err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, prot);
3342                         if (err == IE_RETRY) {
3343                                 pwp->wp_oprot = 0;
3344                                 ASSERT(retrycnt == 0);
3345                                 retrycnt++;
3346                                 goto retry;
3347                         }
3348                 }
3349                 pwp->wp_prot = prot;
3350         }
3351 }
3352 
3353 /*
3354  * Clear all of the watched pages in the address space.
3355  */
3356 void
3357 as_clearwatch(struct as *as)
3358 {
3359         struct watched_page *pwp;
3360         struct seg *seg;
3361         caddr_t vaddr;
3362         uint_t prot;
3363         int err, retrycnt;
3364 
3365         if (avl_numnodes(&as->a_wpage) == 0)
3366                 return;
3367 
3368         ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3369 
3370         for (pwp = avl_first(&as->a_wpage); pwp != NULL;
3371             pwp = AVL_NEXT(&as->a_wpage, pwp)) {
3372                 retrycnt = 0;
3373         retry:
3374                 vaddr = pwp->wp_vaddr;
3375                 if (pwp->wp_oprot == 0 ||    /* not set up */
3376                     (seg = as_segat(as, vaddr)) == NULL)
3377                         continue;
3378 
3379                 if ((prot = pwp->wp_oprot) != pwp->wp_prot) {
3380                         err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, prot);
3381                         if (err == IE_RETRY) {
3382                                 ASSERT(retrycnt == 0);
3383                                 retrycnt++;
3384                                 goto retry;
3385                         }
3386                 }
3387                 pwp->wp_oprot = 0;
3388                 pwp->wp_prot = 0;
3389         }
3390 }
3391 
3392 /*
3393  * Force a new setup for all the watched pages in the range.
3394  */
3395 static void
3396 as_setwatchprot(struct as *as, caddr_t addr, size_t size, uint_t prot)
3397 {
3398         struct watched_page *pwp;
3399         struct watched_page tpw;
3400         caddr_t eaddr = addr + size;


3414                 pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER);
3415 
3416         while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3417                 retrycnt = 0;
3418                 vaddr = pwp->wp_vaddr;
3419 
3420                 wprot = prot;
3421                 if (pwp->wp_read)
3422                         wprot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3423                 if (pwp->wp_write)
3424                         wprot &= ~PROT_WRITE;
3425                 if (pwp->wp_exec)
3426                         wprot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3427                 if (!(pwp->wp_flags & WP_NOWATCH) && wprot != pwp->wp_oprot) {
3428                 retry:
3429                         seg = as_segat(as, vaddr);
3430                         if (seg == NULL) {
3431                                 panic("as_setwatchprot: no seg");
3432                                 /*NOTREACHED*/
3433                         }
3434                         err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, wprot);
3435                         if (err == IE_RETRY) {
3436                                 ASSERT(retrycnt == 0);
3437                                 retrycnt++;
3438                                 goto retry;
3439                         }
3440                 }
3441                 pwp->wp_oprot = prot;
3442                 pwp->wp_prot = wprot;
3443 
3444                 pwp = AVL_NEXT(&as->a_wpage, pwp);
3445         }
3446 }
3447 
3448 /*
3449  * Clear all of the watched pages in the range.
3450  */
3451 static void
3452 as_clearwatchprot(struct as *as, caddr_t addr, size_t size)
3453 {
3454         caddr_t eaddr = addr + size;


3461 
3462         if (avl_numnodes(&as->a_wpage) == 0)
3463                 return;
3464 
3465         tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3466         if ((pwp = avl_find(&as->a_wpage, &tpw, &where)) == NULL)
3467                 pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER);
3468 
3469         ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3470 
3471         while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3472 
3473                 if ((prot = pwp->wp_oprot) != 0) {
3474                         retrycnt = 0;
3475 
3476                         if (prot != pwp->wp_prot) {
3477                         retry:
3478                                 seg = as_segat(as, pwp->wp_vaddr);
3479                                 if (seg == NULL)
3480                                         continue;
3481                                 err = SEGOP_SETPROT(seg, pwp->wp_vaddr,
3482                                     PAGESIZE, prot);
3483                                 if (err == IE_RETRY) {
3484                                         ASSERT(retrycnt == 0);
3485                                         retrycnt++;
3486                                         goto retry;
3487 
3488                                 }
3489                         }
3490                         pwp->wp_oprot = 0;
3491                         pwp->wp_prot = 0;
3492                 }
3493 
3494                 pwp = AVL_NEXT(&as->a_wpage, pwp);
3495         }
3496 }
3497 
3498 void
3499 as_signal_proc(struct as *as, k_siginfo_t *siginfo)
3500 {
3501         struct proc *p;


3518 int
3519 as_getmemid(struct as *as, caddr_t addr, memid_t *memidp)
3520 {
3521         struct seg      *seg;
3522         int             sts;
3523 
3524         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
3525         seg = as_segat(as, addr);
3526         if (seg == NULL) {
3527                 AS_LOCK_EXIT(as, &as->a_lock);
3528                 return (EFAULT);
3529         }
3530         /*
3531          * catch old drivers which may not support getmemid
3532          */
3533         if (seg->s_ops->getmemid == NULL) {
3534                 AS_LOCK_EXIT(as, &as->a_lock);
3535                 return (ENODEV);
3536         }
3537 
3538         sts = SEGOP_GETMEMID(seg, addr, memidp);
3539 
3540         AS_LOCK_EXIT(as, &as->a_lock);
3541         return (sts);
3542 }


 691          * Invoke ALL callbacks. as_do_callbacks will do one callback
 692          * per call, and not return (-1) until the callback has completed.
 693          * When as_do_callbacks returns zero, all callbacks have completed.
 694          */
 695         mutex_enter(&as->a_contents);
 696         while (as->a_callbacks && as_do_callbacks(as, AS_ALL_EVENT, 0, 0))
 697                 ;
 698 
 699         mutex_exit(&as->a_contents);
 700         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
 701 
 702         if (!free_started) {
 703                 free_started = B_TRUE;
 704                 hat_free_start(hat);
 705         }
 706         for (seg = AS_SEGFIRST(as); seg != NULL; seg = next) {
 707                 int err;
 708 
 709                 next = AS_SEGNEXT(as, seg);
 710 retry:
 711                 err = segop_unmap(seg, seg->s_base, seg->s_size);
 712                 if (err == EAGAIN) {
 713                         mutex_enter(&as->a_contents);
 714                         if (as->a_callbacks) {
 715                                 AS_LOCK_EXIT(as, &as->a_lock);
 716                         } else if (!AS_ISNOUNMAPWAIT(as)) {
 717                                 /*
 718                                  * Memory is currently locked. Wait for a
 719                                  * cv_signal that it has been unlocked, then
 720                                  * try the operation again.
 721                                  */
 722                                 if (AS_ISUNMAPWAIT(as) == 0)
 723                                         cv_broadcast(&as->a_cv);
 724                                 AS_SETUNMAPWAIT(as);
 725                                 AS_LOCK_EXIT(as, &as->a_lock);
 726                                 while (AS_ISUNMAPWAIT(as))
 727                                         cv_wait(&as->a_cv, &as->a_contents);
 728                         } else {
 729                                 /*
 730                                  * We may have raced with
 731                                  * segvn_reclaim()/segspt_reclaim(). In this


 784 
 785         AS_LOCK_ENTER(newas, &newas->a_lock, RW_WRITER);
 786 
 787         (void) hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_SRD);
 788 
 789         for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
 790 
 791                 if (seg->s_flags & S_PURGE) {
 792                         purgesize += seg->s_size;
 793                         continue;
 794                 }
 795 
 796                 newseg = seg_alloc(newas, seg->s_base, seg->s_size);
 797                 if (newseg == NULL) {
 798                         AS_LOCK_EXIT(newas, &newas->a_lock);
 799                         as_setwatch(as);
 800                         AS_LOCK_EXIT(as, &as->a_lock);
 801                         as_free(newas);
 802                         return (-1);
 803                 }
 804                 if ((error = segop_dup(seg, newseg)) != 0) {
 805                         /*
 806                          * We call seg_free() on the new seg
 807                          * because the segment is not set up
 808                          * completely; i.e. it has no ops.
 809                          */
 810                         as_setwatch(as);
 811                         AS_LOCK_EXIT(as, &as->a_lock);
 812                         seg_free(newseg);
 813                         AS_LOCK_EXIT(newas, &newas->a_lock);
 814                         as_free(newas);
 815                         return (error);
 816                 }
 817                 newas->a_size += seg->s_size;
 818         }
 819         newas->a_resvsize = as->a_resvsize - purgesize;
 820 
 821         error = hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_ALL);
 822 
 823         AS_LOCK_EXIT(newas, &newas->a_lock);
 824 


 923 
 924                 as_lock_held = 1;
 925         }
 926 
 927         addrsav = raddr;
 928         segsav = seg;
 929 
 930         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
 931                 if (raddr >= seg->s_base + seg->s_size) {
 932                         seg = AS_SEGNEXT(as, seg);
 933                         if (seg == NULL || raddr != seg->s_base) {
 934                                 res = FC_NOMAP;
 935                                 break;
 936                         }
 937                 }
 938                 if (raddr + rsize > seg->s_base + seg->s_size)
 939                         ssize = seg->s_base + seg->s_size - raddr;
 940                 else
 941                         ssize = rsize;
 942 
 943                 res = segop_fault(hat, seg, raddr, ssize, type, rw);
 944 
 945                 /* Restore watchpoints */
 946                 if (holding_wpage) {
 947                         as_setwatch(as);
 948                         holding_wpage = 0;
 949                 }
 950 
 951                 if (res != 0)
 952                         break;
 953         }
 954 
 955         /*
 956          * If we were SOFTLOCKing and encountered a failure,
 957          * we must SOFTUNLOCK the range we already did. (Maybe we
 958          * should just panic if we are SOFTLOCKing or even SOFTUNLOCKing
 959          * right here...)
 960          */
 961         if (res != 0 && type == F_SOFTLOCK) {
 962                 for (seg = segsav; addrsav < raddr; addrsav += ssize) {
 963                         if (addrsav >= seg->s_base + seg->s_size)
 964                                 seg = AS_SEGNEXT(as, seg);
 965                         ASSERT(seg != NULL);
 966                         /*
 967                          * Now call the fault routine again to perform the
 968                          * unlock using S_OTHER instead of the rw variable
 969                          * since we never got a chance to touch the pages.
 970                          */
 971                         if (raddr > seg->s_base + seg->s_size)
 972                                 ssize = seg->s_base + seg->s_size - addrsav;
 973                         else
 974                                 ssize = raddr - addrsav;
 975                         (void) segop_fault(hat, seg, addrsav, ssize,
 976                             F_SOFTUNLOCK, S_OTHER);
 977                 }
 978         }
 979         if (as_lock_held)
 980                 AS_LOCK_EXIT(as, &as->a_lock);
 981         if (lwp != NULL)
 982                 lwp->lwp_nostop--;
 983 
 984         /*
 985          * If the lower levels returned EDEADLK for a fault,
 986          * It means that we should retry the fault.  Let's wait
 987          * a bit also to let the deadlock causing condition clear.
 988          * This is part of a gross hack to work around a design flaw
 989          * in the ufs/sds logging code and should go away when the
 990          * logging code is re-designed to fix the problem. See bug
 991          * 4125102 for details of the problem.
 992          */
 993         if (FC_ERRNO(res) == EDEADLK) {
 994                 delay(deadlk_wait);
 995                 res = 0;


1025         rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
1026             (size_t)raddr;
1027 
1028         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1029         seg = as_segat(as, raddr);
1030         if (seg == NULL) {
1031                 AS_LOCK_EXIT(as, &as->a_lock);
1032                 if (lwp != NULL)
1033                         lwp->lwp_nostop--;
1034                 return (FC_NOMAP);
1035         }
1036 
1037         for (; rsize != 0; rsize -= PAGESIZE, raddr += PAGESIZE) {
1038                 if (raddr >= seg->s_base + seg->s_size) {
1039                         seg = AS_SEGNEXT(as, seg);
1040                         if (seg == NULL || raddr != seg->s_base) {
1041                                 res = FC_NOMAP;
1042                                 break;
1043                         }
1044                 }
1045                 res = segop_faulta(seg, raddr);
1046                 if (res != 0)
1047                         break;
1048         }
1049         AS_LOCK_EXIT(as, &as->a_lock);
1050         if (lwp != NULL)
1051                 lwp->lwp_nostop--;
1052         /*
1053          * If the lower levels returned EDEADLK for a fault,
1054          * It means that we should retry the fault.  Let's wait
1055          * a bit also to let the deadlock causing condition clear.
1056          * This is part of a gross hack to work around a design flaw
1057          * in the ufs/sds logging code and should go away when the
1058          * logging code is re-designed to fix the problem. See bug
1059          * 4125102 for details of the problem.
1060          */
1061         if (FC_ERRNO(res) == EDEADLK) {
1062                 delay(deadlk_wait);
1063                 res = 0;
1064                 goto retry;
1065         }


1115         seg = as_segat(as, raddr);
1116         if (seg == NULL) {
1117                 as_setwatch(as);
1118                 AS_LOCK_EXIT(as, &as->a_lock);
1119                 return (ENOMEM);
1120         }
1121 
1122         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
1123                 if (raddr >= seg->s_base + seg->s_size) {
1124                         seg = AS_SEGNEXT(as, seg);
1125                         if (seg == NULL || raddr != seg->s_base) {
1126                                 error = ENOMEM;
1127                                 break;
1128                         }
1129                 }
1130                 if ((raddr + rsize) > (seg->s_base + seg->s_size))
1131                         ssize = seg->s_base + seg->s_size - raddr;
1132                 else
1133                         ssize = rsize;
1134 retry:
1135                 error = segop_setprot(seg, raddr, ssize, prot);
1136 
1137                 if (error == IE_NOMEM) {
1138                         error = EAGAIN;
1139                         break;
1140                 }
1141 
1142                 if (error == IE_RETRY) {
1143                         AS_LOCK_EXIT(as, &as->a_lock);
1144                         writer = 1;
1145                         goto setprot_top;
1146                 }
1147 
1148                 if (error == EAGAIN) {
1149                         /*
1150                          * Make sure we have a_lock as writer.
1151                          */
1152                         if (writer == 0) {
1153                                 AS_LOCK_EXIT(as, &as->a_lock);
1154                                 writer = 1;
1155                                 goto setprot_top;


1266         seg = as_segat(as, raddr);
1267         if (seg == NULL) {
1268                 as_setwatch(as);
1269                 AS_LOCK_EXIT(as, &as->a_lock);
1270                 return (ENOMEM);
1271         }
1272 
1273         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
1274                 if (raddr >= seg->s_base + seg->s_size) {
1275                         seg = AS_SEGNEXT(as, seg);
1276                         if (seg == NULL || raddr != seg->s_base) {
1277                                 error = ENOMEM;
1278                                 break;
1279                         }
1280                 }
1281                 if ((raddr + rsize) > (seg->s_base + seg->s_size))
1282                         ssize = seg->s_base + seg->s_size - raddr;
1283                 else
1284                         ssize = rsize;
1285 
1286                 error = segop_checkprot(seg, raddr, ssize, prot);
1287                 if (error != 0)
1288                         break;
1289         }
1290         as_setwatch(as);
1291         AS_LOCK_EXIT(as, &as->a_lock);
1292         return (error);
1293 }
1294 
1295 int
1296 as_unmap(struct as *as, caddr_t addr, size_t size)
1297 {
1298         struct seg *seg, *seg_next;
1299         struct as_callback *cb;
1300         caddr_t raddr, eaddr;
1301         size_t ssize, rsize = 0;
1302         int err;
1303 
1304 top:
1305         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1306         eaddr = (caddr_t)(((uintptr_t)(addr + size) + PAGEOFFSET) &


1332                 else
1333                         ssize = eaddr - raddr;
1334 
1335                 /*
1336                  * Save next segment pointer since seg can be
1337                  * destroyed during the segment unmap operation.
1338                  */
1339                 seg_next = AS_SEGNEXT(as, seg);
1340 
1341                 /*
1342                  * We didn't count /dev/null mappings, so ignore them here.
1343                  * We'll handle MAP_NORESERVE cases in segvn_unmap(). (Again,
1344                  * we have to do this check here while we have seg.)
1345                  */
1346                 rsize = 0;
1347                 if (!SEG_IS_DEVNULL_MAPPING(seg) &&
1348                     !SEG_IS_PARTIAL_RESV(seg))
1349                         rsize = ssize;
1350 
1351 retry:
1352                 err = segop_unmap(seg, raddr, ssize);
1353                 if (err == EAGAIN) {
1354                         /*
1355                          * Memory is currently locked.  It must be unlocked
1356                          * before this operation can succeed through a retry.
1357                          * The possible reasons for locked memory and
1358                          * corresponding strategies for unlocking are:
1359                          * (1) Normal I/O
1360                          *      wait for a signal that the I/O operation
1361                          *      has completed and the memory is unlocked.
1362                          * (2) Asynchronous I/O
1363                          *      The aio subsystem does not unlock pages when
1364                          *      the I/O is completed. Those pages are unlocked
1365                          *      when the application calls aiowait/aioerror.
1366                          *      So, to prevent blocking forever, cv_broadcast()
1367                          *      is done to wake up aio_cleanup_thread.
1368                          *      Subsequently, segvn_reclaim will be called, and
1369                          *      that will do AS_CLRUNMAPWAIT() and wake us up.
1370                          * (3) Long term page locking:
1371                          *      Drivers intending to have pages locked for a
1372                          *      period considerably longer than for normal I/O


1770  */
1771 void
1772 as_purge(struct as *as)
1773 {
1774         struct seg *seg;
1775         struct seg *next_seg;
1776 
1777         /*
1778          * the setting of NEEDSPURGE is protect by as_rangelock(), so
1779          * no need to grab a_contents mutex for this check
1780          */
1781         if ((as->a_flags & AS_NEEDSPURGE) == 0)
1782                 return;
1783 
1784         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1785         next_seg = NULL;
1786         seg = AS_SEGFIRST(as);
1787         while (seg != NULL) {
1788                 next_seg = AS_SEGNEXT(as, seg);
1789                 if (seg->s_flags & S_PURGE)
1790                         segop_unmap(seg, seg->s_base, seg->s_size);
1791                 seg = next_seg;
1792         }
1793         AS_LOCK_EXIT(as, &as->a_lock);
1794 
1795         mutex_enter(&as->a_contents);
1796         as->a_flags &= ~AS_NEEDSPURGE;
1797         mutex_exit(&as->a_contents);
1798 }
1799 
1800 /*
1801  * Find a hole within [*basep, *basep + *lenp), which contains a mappable
1802  * range of addresses at least "minlen" long, where the base of the range is
1803  * at "off" phase from an "align" boundary and there is space for a
1804  * "redzone"-sized redzone on eithe rside of the range.  Thus,
1805  * if align was 4M and off was 16k, the user wants a hole which will start
1806  * 16k into a 4M page.
1807  *
1808  * If flags specifies AH_HI, the hole will have the highest possible address
1809  * in the range.  We use the as->a_lastgap field to figure out where to
1810  * start looking for a gap.


2082 
2083         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2084         seg = as_segat(as, raddr);
2085         if (seg == NULL) {
2086                 AS_LOCK_EXIT(as, &as->a_lock);
2087                 return (-1);
2088         }
2089 
2090         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
2091                 if (raddr >= seg->s_base + seg->s_size) {
2092                         seg = AS_SEGNEXT(as, seg);
2093                         if (seg == NULL || raddr != seg->s_base) {
2094                                 error = -1;
2095                                 break;
2096                         }
2097                 }
2098                 if ((raddr + rsize) > (seg->s_base + seg->s_size))
2099                         ssize = seg->s_base + seg->s_size - raddr;
2100                 else
2101                         ssize = rsize;
2102                 *sizep += isize = segop_incore(seg, raddr, ssize, vec);
2103                 if (isize != ssize) {
2104                         error = -1;
2105                         break;
2106                 }
2107                 vec += btopr(ssize);
2108         }
2109         AS_LOCK_EXIT(as, &as->a_lock);
2110         return (error);
2111 }
2112 
2113 static void
2114 as_segunlock(struct seg *seg, caddr_t addr, int attr,
2115         ulong_t *bitmap, size_t position, size_t npages)
2116 {
2117         caddr_t range_start;
2118         size_t  pos1 = position;
2119         size_t  pos2;
2120         size_t  size;
2121         size_t  end_pos = npages + position;
2122 
2123         while (bt_range(bitmap, &pos1, &pos2, end_pos)) {
2124                 size = ptob((pos2 - pos1));
2125                 range_start = (caddr_t)((uintptr_t)addr +
2126                     ptob(pos1 - position));
2127 
2128                 (void) segop_lockop(seg, range_start, size, attr, MC_UNLOCK,
2129                     (ulong_t *)NULL, (size_t)NULL);
2130                 pos1 = pos2;
2131         }
2132 }
2133 
2134 static void
2135 as_unlockerr(struct as *as, int attr, ulong_t *mlock_map,
2136         caddr_t raddr, size_t rsize)
2137 {
2138         struct seg *seg = as_segat(as, raddr);
2139         size_t ssize;
2140 
2141         while (rsize != 0) {
2142                 if (raddr >= seg->s_base + seg->s_size)
2143                         seg = AS_SEGNEXT(as, seg);
2144 
2145                 if ((raddr + rsize) > (seg->s_base + seg->s_size))
2146                         ssize = seg->s_base + seg->s_size - raddr;
2147                 else
2148                         ssize = rsize;


2204                 if (seg == NULL) {
2205                         AS_LOCK_EXIT(as, &as->a_lock);
2206                         return (0);
2207                 }
2208 
2209                 do {
2210                         raddr = (caddr_t)((uintptr_t)seg->s_base &
2211                             (uintptr_t)PAGEMASK);
2212                         rlen += (((uintptr_t)(seg->s_base + seg->s_size) +
2213                             PAGEOFFSET) & PAGEMASK) - (uintptr_t)raddr;
2214                 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2215 
2216                 mlock_size = BT_BITOUL(btopr(rlen));
2217                 if ((mlock_map = (ulong_t *)kmem_zalloc(mlock_size *
2218                     sizeof (ulong_t), KM_NOSLEEP)) == NULL) {
2219                                 AS_LOCK_EXIT(as, &as->a_lock);
2220                                 return (EAGAIN);
2221                 }
2222 
2223                 for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) {
2224                         error = segop_lockop(seg, seg->s_base,
2225                             seg->s_size, attr, MC_LOCK, mlock_map, pos);
2226                         if (error != 0)
2227                                 break;
2228                         pos += seg_pages(seg);
2229                 }
2230 
2231                 if (error) {
2232                         for (seg = AS_SEGFIRST(as); seg != NULL;
2233                             seg = AS_SEGNEXT(as, seg)) {
2234 
2235                                 raddr = (caddr_t)((uintptr_t)seg->s_base &
2236                                     (uintptr_t)PAGEMASK);
2237                                 npages = seg_pages(seg);
2238                                 as_segunlock(seg, raddr, attr, mlock_map,
2239                                     idx, npages);
2240                                 idx += npages;
2241                         }
2242                 }
2243 
2244                 kmem_free(mlock_map, mlock_size * sizeof (ulong_t));
2245                 AS_LOCK_EXIT(as, &as->a_lock);
2246                 goto lockerr;
2247         } else if (func == MC_UNLOCKAS) {
2248                 mutex_enter(&as->a_contents);
2249                 AS_CLRPGLCK(as);
2250                 mutex_exit(&as->a_contents);
2251 
2252                 for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) {
2253                         error = segop_lockop(seg, seg->s_base,
2254                             seg->s_size, attr, MC_UNLOCK, NULL, 0);
2255                         if (error != 0)
2256                                 break;
2257                 }
2258 
2259                 AS_LOCK_EXIT(as, &as->a_lock);
2260                 goto lockerr;
2261         }
2262 
2263         /*
2264          * Normalize addresses and sizes.
2265          */
2266         initraddr = raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2267         initrsize = rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
2268             (size_t)raddr;
2269 
2270         if (raddr + rsize < raddr) {         /* check for wraparound */
2271                 AS_LOCK_EXIT(as, &as->a_lock);
2272                 return (ENOMEM);
2273         }


2311                                 }
2312                                 AS_LOCK_EXIT(as, &as->a_lock);
2313                                 return (ENOMEM);
2314                         }
2315                 }
2316                 if ((raddr + rsize) > (seg->s_base + seg->s_size))
2317                         ssize = seg->s_base + seg->s_size - raddr;
2318                 else
2319                         ssize = rsize;
2320 
2321                 /*
2322                  * Dispatch on specific function.
2323                  */
2324                 switch (func) {
2325 
2326                 /*
2327                  * Synchronize cached data from mappings with backing
2328                  * objects.
2329                  */
2330                 case MC_SYNC:
2331                         if (error = segop_sync(seg, raddr, ssize,
2332                             attr, (uint_t)arg)) {
2333                                 AS_LOCK_EXIT(as, &as->a_lock);
2334                                 return (error);
2335                         }
2336                         break;
2337 
2338                 /*
2339                  * Lock pages in memory.
2340                  */
2341                 case MC_LOCK:
2342                         if (error = segop_lockop(seg, raddr, ssize,
2343                             attr, func, mlock_map, pos)) {
2344                                 as_unlockerr(as, attr, mlock_map, initraddr,
2345                                     initrsize - rsize + ssize);
2346                                 kmem_free(mlock_map, mlock_size *
2347                                     sizeof (ulong_t));
2348                                 AS_LOCK_EXIT(as, &as->a_lock);
2349                                 goto lockerr;
2350                         }
2351                         break;
2352 
2353                 /*
2354                  * Unlock mapped pages.
2355                  */
2356                 case MC_UNLOCK:
2357                         (void) segop_lockop(seg, raddr, ssize, attr, func,
2358                             (ulong_t *)NULL, (size_t)NULL);
2359                         break;
2360 
2361                 /*
2362                  * Store VM advise for mapped pages in segment layer.
2363                  */
2364                 case MC_ADVISE:
2365                         error = segop_advise(seg, raddr, ssize, (uint_t)arg);
2366 
2367                         /*
2368                          * Check for regular errors and special retry error
2369                          */
2370                         if (error) {
2371                                 if (error == IE_RETRY) {
2372                                         /*
2373                                          * Need to acquire writers lock, so
2374                                          * have to drop readers lock and start
2375                                          * all over again
2376                                          */
2377                                         AS_LOCK_EXIT(as, &as->a_lock);
2378                                         goto retry;
2379                                 } else if (error == IE_REATTACH) {
2380                                         /*
2381                                          * Find segment for current address
2382                                          * because current segment just got
2383                                          * split or concatenated
2384                                          */
2385                                         seg = as_segat(as, raddr);
2386                                         if (seg == NULL) {
2387                                                 AS_LOCK_EXIT(as, &as->a_lock);
2388                                                 return (ENOMEM);
2389                                         }
2390                                 } else {
2391                                         /*
2392                                          * Regular error
2393                                          */
2394                                         AS_LOCK_EXIT(as, &as->a_lock);
2395                                         return (error);
2396                                 }
2397                         }
2398                         break;
2399 
2400                 case MC_INHERIT_ZERO:
2401                         if (seg->s_ops->inherit == NULL) {
2402                                 error = ENOTSUP;
2403                         } else {
2404                                 error = segop_inherit(seg, raddr, ssize,
2405                                     SEGP_INH_ZERO);
2406                         }
2407                         if (error != 0) {
2408                                 AS_LOCK_EXIT(as, &as->a_lock);
2409                                 return (error);
2410                         }
2411                         break;
2412 
2413                 /*
2414                  * Can't happen.
2415                  */
2416                 default:
2417                         panic("as_ctl: bad operation %d", func);
2418                         /*NOTREACHED*/
2419                 }
2420 
2421                 rsize -= ssize;
2422                 raddr += ssize;
2423         }
2424 


2499         /*
2500          * Count the number of segments covered by the range we are about to
2501          * lock. The segment count is used to size the shadow list we return
2502          * back to the caller.
2503          */
2504         for (; size != 0; size -= ssize, addr += ssize) {
2505                 if (addr >= seg->s_base + seg->s_size) {
2506 
2507                         seg = AS_SEGNEXT(as, seg);
2508                         if (seg == NULL || addr != seg->s_base) {
2509                                 AS_LOCK_EXIT(as, &as->a_lock);
2510                                 return (EFAULT);
2511                         }
2512                         /*
2513                          * Do a quick check if subsequent segments
2514                          * will most likely support pagelock.
2515                          */
2516                         if (seg->s_ops == &segvn_ops) {
2517                                 vnode_t *vp;
2518 
2519                                 if (segop_getvp(seg, addr, &vp) != 0 ||
2520                                     vp != NULL) {
2521                                         AS_LOCK_EXIT(as, &as->a_lock);
2522                                         goto slow;
2523                                 }
2524                         } else if (seg->s_ops != &segspt_shmops) {
2525                                 AS_LOCK_EXIT(as, &as->a_lock);
2526                                 goto slow;
2527                         }
2528                         segcnt++;
2529                 }
2530                 if (addr + size > seg->s_base + seg->s_size) {
2531                         ssize = seg->s_base + seg->s_size - addr;
2532                 } else {
2533                         ssize = size;
2534                 }
2535         }
2536         ASSERT(segcnt > 1);
2537 
2538         plist = kmem_zalloc((npages + segcnt) * sizeof (page_t *), KM_SLEEP);
2539 
2540         addr = sv_addr;
2541         size = sv_size;
2542         seg = sv_seg;
2543 
2544         for (cnt = 0, pl_off = 0; size != 0; size -= ssize, addr += ssize) {
2545                 if (addr >= seg->s_base + seg->s_size) {
2546                         seg = AS_SEGNEXT(as, seg);
2547                         ASSERT(seg != NULL && addr == seg->s_base);
2548                         cnt++;
2549                         ASSERT(cnt < segcnt);
2550                 }
2551                 if (addr + size > seg->s_base + seg->s_size) {
2552                         ssize = seg->s_base + seg->s_size - addr;
2553                 } else {
2554                         ssize = size;
2555                 }
2556                 pl = &plist[npages + cnt];
2557                 error = segop_pagelock(seg, addr, ssize, (page_t ***)pl,
2558                     L_PAGELOCK, rw);
2559                 if (error) {
2560                         break;
2561                 }
2562                 ASSERT(plist[npages + cnt] != NULL);
2563                 ASSERT(pl_off + btop(ssize) <= npages);
2564                 bcopy(plist[npages + cnt], &plist[pl_off],
2565                     btop(ssize) * sizeof (page_t *));
2566                 pl_off += btop(ssize);
2567         }
2568 
2569         if (size == 0) {
2570                 AS_LOCK_EXIT(as, &as->a_lock);
2571                 ASSERT(cnt == segcnt - 1);
2572                 *ppp = plist;
2573                 return (0);
2574         }
2575 
2576         /*
2577          * one of pagelock calls failed. The error type is in error variable.


2580          * back to the caller.
2581          */
2582 
2583         eaddr = addr;
2584         seg = sv_seg;
2585 
2586         for (cnt = 0, addr = sv_addr; addr < eaddr; addr += ssize) {
2587                 if (addr >= seg->s_base + seg->s_size) {
2588                         seg = AS_SEGNEXT(as, seg);
2589                         ASSERT(seg != NULL && addr == seg->s_base);
2590                         cnt++;
2591                         ASSERT(cnt < segcnt);
2592                 }
2593                 if (eaddr > seg->s_base + seg->s_size) {
2594                         ssize = seg->s_base + seg->s_size - addr;
2595                 } else {
2596                         ssize = eaddr - addr;
2597                 }
2598                 pl = &plist[npages + cnt];
2599                 ASSERT(*pl != NULL);
2600                 (void) segop_pagelock(seg, addr, ssize, (page_t ***)pl,
2601                     L_PAGEUNLOCK, rw);
2602         }
2603 
2604         AS_LOCK_EXIT(as, &as->a_lock);
2605 
2606         kmem_free(plist, (npages + segcnt) * sizeof (page_t *));
2607 
2608         if (error != ENOTSUP && error != EFAULT) {
2609                 return (error);
2610         }
2611 
2612 slow:
2613         /*
2614          * If we are here because pagelock failed due to the need to cow fault
2615          * in the pages we want to lock F_SOFTLOCK will do this job and in
2616          * next as_pagelock() call for this address range pagelock will
2617          * hopefully succeed.
2618          */
2619         fault_err = as_fault(as->a_hat, as, sv_addr, sv_size, F_SOFTLOCK, rw);
2620         if (fault_err != 0) {


2655         seg = as_segat(as, raddr);
2656         if (seg == NULL) {
2657                 AS_LOCK_EXIT(as, &as->a_lock);
2658                 return (EFAULT);
2659         }
2660         ASSERT(raddr >= seg->s_base && raddr < seg->s_base + seg->s_size);
2661         if (raddr + rsize > seg->s_base + seg->s_size) {
2662                 return (as_pagelock_segs(as, seg, ppp, raddr, rsize, rw));
2663         }
2664         if (raddr + rsize <= raddr) {
2665                 AS_LOCK_EXIT(as, &as->a_lock);
2666                 return (EFAULT);
2667         }
2668 
2669         TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEG_LOCK_START,
2670             "seg_lock_1_start: raddr %p rsize %ld", raddr, rsize);
2671 
2672         /*
2673          * try to lock pages and pass back shadow list
2674          */
2675         err = segop_pagelock(seg, raddr, rsize, ppp, L_PAGELOCK, rw);
2676 
2677         TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_SEG_LOCK_END, "seg_lock_1_end");
2678 
2679         AS_LOCK_EXIT(as, &as->a_lock);
2680 
2681         if (err == 0 || (err != ENOTSUP && err != EFAULT)) {
2682                 return (err);
2683         }
2684 
2685         /*
2686          * Use F_SOFTLOCK to lock the pages because pagelock failed either due
2687          * to no pagelock support for this segment or pages need to be cow
2688          * faulted in. If fault is needed F_SOFTLOCK will do this job for
2689          * this as_pagelock() call and in the next as_pagelock() call for the
2690          * same address range pagelock call will hopefull succeed.
2691          */
2692         fault_err = as_fault(as->a_hat, as, addr, size, F_SOFTLOCK, rw);
2693         if (fault_err != 0) {
2694                 return (fc_decode(fault_err));
2695         }


2718         ASSERT(seg != NULL);
2719         ASSERT(addr >= seg->s_base && addr < seg->s_base + seg->s_size);
2720         ASSERT(addr + size > seg->s_base + seg->s_size);
2721         ASSERT(IS_P2ALIGNED(size, PAGESIZE));
2722         ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
2723         ASSERT(plist != NULL);
2724 
2725         for (cnt = 0; addr < eaddr; addr += ssize) {
2726                 if (addr >= seg->s_base + seg->s_size) {
2727                         seg = AS_SEGNEXT(as, seg);
2728                         ASSERT(seg != NULL && addr == seg->s_base);
2729                         cnt++;
2730                 }
2731                 if (eaddr > seg->s_base + seg->s_size) {
2732                         ssize = seg->s_base + seg->s_size - addr;
2733                 } else {
2734                         ssize = eaddr - addr;
2735                 }
2736                 pl = &plist[npages + cnt];
2737                 ASSERT(*pl != NULL);
2738                 (void) segop_pagelock(seg, addr, ssize, (page_t ***)pl,
2739                     L_PAGEUNLOCK, rw);
2740         }
2741         ASSERT(cnt > 0);
2742         AS_LOCK_EXIT(as, &as->a_lock);
2743 
2744         cnt++;
2745         kmem_free(plist, (npages + cnt) * sizeof (page_t *));
2746 }
2747 
2748 /*
2749  * unlock pages in a given address range
2750  */
2751 void
2752 as_pageunlock(struct as *as, struct page **pp, caddr_t addr, size_t size,
2753     enum seg_rw rw)
2754 {
2755         struct seg *seg;
2756         size_t rsize;
2757         caddr_t raddr;
2758 


2764          * falling back to as_fault
2765          */
2766         if (pp == NULL) {
2767                 (void) as_fault(as->a_hat, as, addr, size, F_SOFTUNLOCK, rw);
2768                 return;
2769         }
2770 
2771         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2772         rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
2773             (size_t)raddr;
2774 
2775         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2776         seg = as_segat(as, raddr);
2777         ASSERT(seg != NULL);
2778 
2779         TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEG_UNLOCK_START,
2780             "seg_unlock_start: raddr %p rsize %ld", raddr, rsize);
2781 
2782         ASSERT(raddr >= seg->s_base && raddr < seg->s_base + seg->s_size);
2783         if (raddr + rsize <= seg->s_base + seg->s_size) {
2784                 segop_pagelock(seg, raddr, rsize, &pp, L_PAGEUNLOCK, rw);
2785         } else {
2786                 as_pageunlock_segs(as, seg, raddr, rsize, pp, rw);
2787                 return;
2788         }
2789         AS_LOCK_EXIT(as, &as->a_lock);
2790         TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_AS_UNLOCK_END, "as_pageunlock_end");
2791 }
2792 
2793 int
2794 as_setpagesize(struct as *as, caddr_t addr, size_t size, uint_t szc,
2795     boolean_t wait)
2796 {
2797         struct seg *seg;
2798         size_t ssize;
2799         caddr_t raddr;                  /* rounded down addr */
2800         size_t rsize;                   /* rounded up size */
2801         int error = 0;
2802         size_t pgsz = page_get_pagesize(szc);
2803 
2804 setpgsz_top:


2819                 as_setwatch(as);
2820                 AS_LOCK_EXIT(as, &as->a_lock);
2821                 return (ENOMEM);
2822         }
2823 
2824         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
2825                 if (raddr >= seg->s_base + seg->s_size) {
2826                         seg = AS_SEGNEXT(as, seg);
2827                         if (seg == NULL || raddr != seg->s_base) {
2828                                 error = ENOMEM;
2829                                 break;
2830                         }
2831                 }
2832                 if ((raddr + rsize) > (seg->s_base + seg->s_size)) {
2833                         ssize = seg->s_base + seg->s_size - raddr;
2834                 } else {
2835                         ssize = rsize;
2836                 }
2837 
2838 retry:
2839                 error = segop_setpagesize(seg, raddr, ssize, szc);
2840 
2841                 if (error == IE_NOMEM) {
2842                         error = EAGAIN;
2843                         break;
2844                 }
2845 
2846                 if (error == IE_RETRY) {
2847                         AS_LOCK_EXIT(as, &as->a_lock);
2848                         goto setpgsz_top;
2849                 }
2850 
2851                 if (error == ENOTSUP) {
2852                         error = EINVAL;
2853                         break;
2854                 }
2855 
2856                 if (wait && (error == EAGAIN)) {
2857                         /*
2858                          * Memory is currently locked.  It must be unlocked
2859                          * before this operation can succeed through a retry.


2898                                  * number of retries without sleeping should
2899                                  * be very small. See segvn_reclaim() for
2900                                  * more comments.
2901                                  */
2902                                 AS_CLRNOUNMAPWAIT(as);
2903                                 mutex_exit(&as->a_contents);
2904                                 goto retry;
2905                         }
2906                         mutex_exit(&as->a_contents);
2907                         goto setpgsz_top;
2908                 } else if (error != 0) {
2909                         break;
2910                 }
2911         }
2912         as_setwatch(as);
2913         AS_LOCK_EXIT(as, &as->a_lock);
2914         return (error);
2915 }
2916 
2917 /*
2918  * as_iset3_default_lpsize() just calls segop_setpagesize() on all segments
2919  * in its chunk where s_szc is less than the szc we want to set.
2920  */
2921 static int
2922 as_iset3_default_lpsize(struct as *as, caddr_t raddr, size_t rsize, uint_t szc,
2923     int *retry)
2924 {
2925         struct seg *seg;
2926         size_t ssize;
2927         int error;
2928 
2929         ASSERT(AS_WRITE_HELD(as, &as->a_lock));
2930 
2931         seg = as_segat(as, raddr);
2932         if (seg == NULL) {
2933                 panic("as_iset3_default_lpsize: no seg");
2934         }
2935 
2936         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
2937                 if (raddr >= seg->s_base + seg->s_size) {
2938                         seg = AS_SEGNEXT(as, seg);
2939                         if (seg == NULL || raddr != seg->s_base) {
2940                                 panic("as_iset3_default_lpsize: as changed");
2941                         }
2942                 }
2943                 if ((raddr + rsize) > (seg->s_base + seg->s_size)) {
2944                         ssize = seg->s_base + seg->s_size - raddr;
2945                 } else {
2946                         ssize = rsize;
2947                 }
2948 
2949                 if (szc > seg->s_szc) {
2950                         error = segop_setpagesize(seg, raddr, ssize, szc);
2951                         /* Only retry on EINVAL segments that have no vnode. */
2952                         if (error == EINVAL) {
2953                                 vnode_t *vp = NULL;
2954                                 if ((segop_gettype(seg, raddr) & MAP_SHARED) &&
2955                                     (segop_getvp(seg, raddr, &vp) != 0 ||
2956                                     vp == NULL)) {
2957                                         *retry = 1;
2958                                 } else {
2959                                         *retry = 0;
2960                                 }
2961                         }
2962                         if (error) {
2963                                 return (error);
2964                         }
2965                 }
2966         }
2967         return (0);
2968 }
2969 
2970 /*
2971  * as_iset2_default_lpsize() calls as_iset3_default_lpsize() to set the
2972  * pagesize on each segment in its range, but if any fails with EINVAL,
2973  * then it reduces the pagesizes to the next size in the bitmap and
2974  * retries as_iset3_default_lpsize(). The reason why the code retries
2975  * smaller allowed sizes on EINVAL is because (a) the anon offset may not


3178         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3179 again:
3180         error = 0;
3181 
3182         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3183         rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
3184             (size_t)raddr;
3185 
3186         if (raddr + rsize < raddr) {         /* check for wraparound */
3187                 AS_LOCK_EXIT(as, &as->a_lock);
3188                 return (ENOMEM);
3189         }
3190         as_clearwatchprot(as, raddr, rsize);
3191         seg = as_segat(as, raddr);
3192         if (seg == NULL) {
3193                 as_setwatch(as);
3194                 AS_LOCK_EXIT(as, &as->a_lock);
3195                 return (ENOMEM);
3196         }
3197         if (seg->s_ops == &segvn_ops) {
3198                 rtype = segop_gettype(seg, addr);
3199                 rflags = rtype & (MAP_TEXT | MAP_INITDATA);
3200                 rtype = rtype & (MAP_SHARED | MAP_PRIVATE);
3201                 segvn = 1;
3202         } else {
3203                 segvn = 0;
3204         }
3205         setaddr = raddr;
3206         setsize = 0;
3207 
3208         for (; rsize != 0; rsize -= ssize, raddr += ssize, setsize += ssize) {
3209                 if (raddr >= (seg->s_base + seg->s_size)) {
3210                         seg = AS_SEGNEXT(as, seg);
3211                         if (seg == NULL || raddr != seg->s_base) {
3212                                 error = ENOMEM;
3213                                 break;
3214                         }
3215                         if (seg->s_ops == &segvn_ops) {
3216                                 stype = segop_gettype(seg, raddr);
3217                                 sflags = stype & (MAP_TEXT | MAP_INITDATA);
3218                                 stype &= (MAP_SHARED | MAP_PRIVATE);
3219                                 if (segvn && (rflags != sflags ||
3220                                     rtype != stype)) {
3221                                         /*
3222                                          * The next segment is also segvn but
3223                                          * has different flags and/or type.
3224                                          */
3225                                         ASSERT(setsize != 0);
3226                                         error = as_iset_default_lpsize(as,
3227                                             setaddr, setsize, rflags, rtype);
3228                                         if (error) {
3229                                                 break;
3230                                         }
3231                                         rflags = sflags;
3232                                         rtype = stype;
3233                                         setaddr = raddr;
3234                                         setsize = 0;
3235                                 } else if (!segvn) {
3236                                         rflags = sflags;


3310 as_setwatch(struct as *as)
3311 {
3312         struct watched_page *pwp;
3313         struct seg *seg;
3314         caddr_t vaddr;
3315         uint_t prot;
3316         int  err, retrycnt;
3317 
3318         if (avl_numnodes(&as->a_wpage) == 0)
3319                 return;
3320 
3321         ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3322 
3323         for (pwp = avl_first(&as->a_wpage); pwp != NULL;
3324             pwp = AVL_NEXT(&as->a_wpage, pwp)) {
3325                 retrycnt = 0;
3326         retry:
3327                 vaddr = pwp->wp_vaddr;
3328                 if (pwp->wp_oprot != 0 ||    /* already set up */
3329                     (seg = as_segat(as, vaddr)) == NULL ||
3330                     segop_getprot(seg, vaddr, 0, &prot) != 0)
3331                         continue;
3332 
3333                 pwp->wp_oprot = prot;
3334                 if (pwp->wp_read)
3335                         prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3336                 if (pwp->wp_write)
3337                         prot &= ~PROT_WRITE;
3338                 if (pwp->wp_exec)
3339                         prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3340                 if (!(pwp->wp_flags & WP_NOWATCH) && prot != pwp->wp_oprot) {
3341                         err = segop_setprot(seg, vaddr, PAGESIZE, prot);
3342                         if (err == IE_RETRY) {
3343                                 pwp->wp_oprot = 0;
3344                                 ASSERT(retrycnt == 0);
3345                                 retrycnt++;
3346                                 goto retry;
3347                         }
3348                 }
3349                 pwp->wp_prot = prot;
3350         }
3351 }
3352 
3353 /*
3354  * Clear all of the watched pages in the address space.
3355  */
3356 void
3357 as_clearwatch(struct as *as)
3358 {
3359         struct watched_page *pwp;
3360         struct seg *seg;
3361         caddr_t vaddr;
3362         uint_t prot;
3363         int err, retrycnt;
3364 
3365         if (avl_numnodes(&as->a_wpage) == 0)
3366                 return;
3367 
3368         ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3369 
3370         for (pwp = avl_first(&as->a_wpage); pwp != NULL;
3371             pwp = AVL_NEXT(&as->a_wpage, pwp)) {
3372                 retrycnt = 0;
3373         retry:
3374                 vaddr = pwp->wp_vaddr;
3375                 if (pwp->wp_oprot == 0 ||    /* not set up */
3376                     (seg = as_segat(as, vaddr)) == NULL)
3377                         continue;
3378 
3379                 if ((prot = pwp->wp_oprot) != pwp->wp_prot) {
3380                         err = segop_setprot(seg, vaddr, PAGESIZE, prot);
3381                         if (err == IE_RETRY) {
3382                                 ASSERT(retrycnt == 0);
3383                                 retrycnt++;
3384                                 goto retry;
3385                         }
3386                 }
3387                 pwp->wp_oprot = 0;
3388                 pwp->wp_prot = 0;
3389         }
3390 }
3391 
3392 /*
3393  * Force a new setup for all the watched pages in the range.
3394  */
3395 static void
3396 as_setwatchprot(struct as *as, caddr_t addr, size_t size, uint_t prot)
3397 {
3398         struct watched_page *pwp;
3399         struct watched_page tpw;
3400         caddr_t eaddr = addr + size;


3414                 pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER);
3415 
3416         while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3417                 retrycnt = 0;
3418                 vaddr = pwp->wp_vaddr;
3419 
3420                 wprot = prot;
3421                 if (pwp->wp_read)
3422                         wprot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3423                 if (pwp->wp_write)
3424                         wprot &= ~PROT_WRITE;
3425                 if (pwp->wp_exec)
3426                         wprot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3427                 if (!(pwp->wp_flags & WP_NOWATCH) && wprot != pwp->wp_oprot) {
3428                 retry:
3429                         seg = as_segat(as, vaddr);
3430                         if (seg == NULL) {
3431                                 panic("as_setwatchprot: no seg");
3432                                 /*NOTREACHED*/
3433                         }
3434                         err = segop_setprot(seg, vaddr, PAGESIZE, wprot);
3435                         if (err == IE_RETRY) {
3436                                 ASSERT(retrycnt == 0);
3437                                 retrycnt++;
3438                                 goto retry;
3439                         }
3440                 }
3441                 pwp->wp_oprot = prot;
3442                 pwp->wp_prot = wprot;
3443 
3444                 pwp = AVL_NEXT(&as->a_wpage, pwp);
3445         }
3446 }
3447 
3448 /*
3449  * Clear all of the watched pages in the range.
3450  */
3451 static void
3452 as_clearwatchprot(struct as *as, caddr_t addr, size_t size)
3453 {
3454         caddr_t eaddr = addr + size;


3461 
3462         if (avl_numnodes(&as->a_wpage) == 0)
3463                 return;
3464 
3465         tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3466         if ((pwp = avl_find(&as->a_wpage, &tpw, &where)) == NULL)
3467                 pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER);
3468 
3469         ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3470 
3471         while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3472 
3473                 if ((prot = pwp->wp_oprot) != 0) {
3474                         retrycnt = 0;
3475 
3476                         if (prot != pwp->wp_prot) {
3477                         retry:
3478                                 seg = as_segat(as, pwp->wp_vaddr);
3479                                 if (seg == NULL)
3480                                         continue;
3481                                 err = segop_setprot(seg, pwp->wp_vaddr,
3482                                     PAGESIZE, prot);
3483                                 if (err == IE_RETRY) {
3484                                         ASSERT(retrycnt == 0);
3485                                         retrycnt++;
3486                                         goto retry;
3487 
3488                                 }
3489                         }
3490                         pwp->wp_oprot = 0;
3491                         pwp->wp_prot = 0;
3492                 }
3493 
3494                 pwp = AVL_NEXT(&as->a_wpage, pwp);
3495         }
3496 }
3497 
3498 void
3499 as_signal_proc(struct as *as, k_siginfo_t *siginfo)
3500 {
3501         struct proc *p;


3518 int
3519 as_getmemid(struct as *as, caddr_t addr, memid_t *memidp)
3520 {
3521         struct seg      *seg;
3522         int             sts;
3523 
3524         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
3525         seg = as_segat(as, addr);
3526         if (seg == NULL) {
3527                 AS_LOCK_EXIT(as, &as->a_lock);
3528                 return (EFAULT);
3529         }
3530         /*
3531          * catch old drivers which may not support getmemid
3532          */
3533         if (seg->s_ops->getmemid == NULL) {
3534                 AS_LOCK_EXIT(as, &as->a_lock);
3535                 return (ENODEV);
3536         }
3537 
3538         sts = segop_getmemid(seg, addr, memidp);
3539 
3540         AS_LOCK_EXIT(as, &as->a_lock);
3541         return (sts);
3542 }