691 * Invoke ALL callbacks. as_do_callbacks will do one callback
692 * per call, and not return (-1) until the callback has completed.
693 * When as_do_callbacks returns zero, all callbacks have completed.
694 */
695 mutex_enter(&as->a_contents);
696 while (as->a_callbacks && as_do_callbacks(as, AS_ALL_EVENT, 0, 0))
697 ;
698
699 mutex_exit(&as->a_contents);
700 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
701
702 if (!free_started) {
703 free_started = B_TRUE;
704 hat_free_start(hat);
705 }
706 for (seg = AS_SEGFIRST(as); seg != NULL; seg = next) {
707 int err;
708
709 next = AS_SEGNEXT(as, seg);
710 retry:
711 err = SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
712 if (err == EAGAIN) {
713 mutex_enter(&as->a_contents);
714 if (as->a_callbacks) {
715 AS_LOCK_EXIT(as, &as->a_lock);
716 } else if (!AS_ISNOUNMAPWAIT(as)) {
717 /*
718 * Memory is currently locked. Wait for a
719 * cv_signal that it has been unlocked, then
720 * try the operation again.
721 */
722 if (AS_ISUNMAPWAIT(as) == 0)
723 cv_broadcast(&as->a_cv);
724 AS_SETUNMAPWAIT(as);
725 AS_LOCK_EXIT(as, &as->a_lock);
726 while (AS_ISUNMAPWAIT(as))
727 cv_wait(&as->a_cv, &as->a_contents);
728 } else {
729 /*
730 * We may have raced with
731 * segvn_reclaim()/segspt_reclaim(). In this
784
785 AS_LOCK_ENTER(newas, &newas->a_lock, RW_WRITER);
786
787 (void) hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_SRD);
788
789 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
790
791 if (seg->s_flags & S_PURGE) {
792 purgesize += seg->s_size;
793 continue;
794 }
795
796 newseg = seg_alloc(newas, seg->s_base, seg->s_size);
797 if (newseg == NULL) {
798 AS_LOCK_EXIT(newas, &newas->a_lock);
799 as_setwatch(as);
800 AS_LOCK_EXIT(as, &as->a_lock);
801 as_free(newas);
802 return (-1);
803 }
804 if ((error = SEGOP_DUP(seg, newseg)) != 0) {
805 /*
806 * We call seg_free() on the new seg
807 * because the segment is not set up
808 * completely; i.e. it has no ops.
809 */
810 as_setwatch(as);
811 AS_LOCK_EXIT(as, &as->a_lock);
812 seg_free(newseg);
813 AS_LOCK_EXIT(newas, &newas->a_lock);
814 as_free(newas);
815 return (error);
816 }
817 newas->a_size += seg->s_size;
818 }
819 newas->a_resvsize = as->a_resvsize - purgesize;
820
821 error = hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_ALL);
822
823 AS_LOCK_EXIT(newas, &newas->a_lock);
824
923
924 as_lock_held = 1;
925 }
926
927 addrsav = raddr;
928 segsav = seg;
929
930 for (; rsize != 0; rsize -= ssize, raddr += ssize) {
931 if (raddr >= seg->s_base + seg->s_size) {
932 seg = AS_SEGNEXT(as, seg);
933 if (seg == NULL || raddr != seg->s_base) {
934 res = FC_NOMAP;
935 break;
936 }
937 }
938 if (raddr + rsize > seg->s_base + seg->s_size)
939 ssize = seg->s_base + seg->s_size - raddr;
940 else
941 ssize = rsize;
942
943 res = SEGOP_FAULT(hat, seg, raddr, ssize, type, rw);
944
945 /* Restore watchpoints */
946 if (holding_wpage) {
947 as_setwatch(as);
948 holding_wpage = 0;
949 }
950
951 if (res != 0)
952 break;
953 }
954
955 /*
956 * If we were SOFTLOCKing and encountered a failure,
957 * we must SOFTUNLOCK the range we already did. (Maybe we
958 * should just panic if we are SOFTLOCKing or even SOFTUNLOCKing
959 * right here...)
960 */
961 if (res != 0 && type == F_SOFTLOCK) {
962 for (seg = segsav; addrsav < raddr; addrsav += ssize) {
963 if (addrsav >= seg->s_base + seg->s_size)
964 seg = AS_SEGNEXT(as, seg);
965 ASSERT(seg != NULL);
966 /*
967 * Now call the fault routine again to perform the
968 * unlock using S_OTHER instead of the rw variable
969 * since we never got a chance to touch the pages.
970 */
971 if (raddr > seg->s_base + seg->s_size)
972 ssize = seg->s_base + seg->s_size - addrsav;
973 else
974 ssize = raddr - addrsav;
975 (void) SEGOP_FAULT(hat, seg, addrsav, ssize,
976 F_SOFTUNLOCK, S_OTHER);
977 }
978 }
979 if (as_lock_held)
980 AS_LOCK_EXIT(as, &as->a_lock);
981 if (lwp != NULL)
982 lwp->lwp_nostop--;
983
984 /*
985 * If the lower levels returned EDEADLK for a fault,
986 * It means that we should retry the fault. Let's wait
987 * a bit also to let the deadlock causing condition clear.
988 * This is part of a gross hack to work around a design flaw
989 * in the ufs/sds logging code and should go away when the
990 * logging code is re-designed to fix the problem. See bug
991 * 4125102 for details of the problem.
992 */
993 if (FC_ERRNO(res) == EDEADLK) {
994 delay(deadlk_wait);
995 res = 0;
1025 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
1026 (size_t)raddr;
1027
1028 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1029 seg = as_segat(as, raddr);
1030 if (seg == NULL) {
1031 AS_LOCK_EXIT(as, &as->a_lock);
1032 if (lwp != NULL)
1033 lwp->lwp_nostop--;
1034 return (FC_NOMAP);
1035 }
1036
1037 for (; rsize != 0; rsize -= PAGESIZE, raddr += PAGESIZE) {
1038 if (raddr >= seg->s_base + seg->s_size) {
1039 seg = AS_SEGNEXT(as, seg);
1040 if (seg == NULL || raddr != seg->s_base) {
1041 res = FC_NOMAP;
1042 break;
1043 }
1044 }
1045 res = SEGOP_FAULTA(seg, raddr);
1046 if (res != 0)
1047 break;
1048 }
1049 AS_LOCK_EXIT(as, &as->a_lock);
1050 if (lwp != NULL)
1051 lwp->lwp_nostop--;
1052 /*
1053 * If the lower levels returned EDEADLK for a fault,
1054 * It means that we should retry the fault. Let's wait
1055 * a bit also to let the deadlock causing condition clear.
1056 * This is part of a gross hack to work around a design flaw
1057 * in the ufs/sds logging code and should go away when the
1058 * logging code is re-designed to fix the problem. See bug
1059 * 4125102 for details of the problem.
1060 */
1061 if (FC_ERRNO(res) == EDEADLK) {
1062 delay(deadlk_wait);
1063 res = 0;
1064 goto retry;
1065 }
1115 seg = as_segat(as, raddr);
1116 if (seg == NULL) {
1117 as_setwatch(as);
1118 AS_LOCK_EXIT(as, &as->a_lock);
1119 return (ENOMEM);
1120 }
1121
1122 for (; rsize != 0; rsize -= ssize, raddr += ssize) {
1123 if (raddr >= seg->s_base + seg->s_size) {
1124 seg = AS_SEGNEXT(as, seg);
1125 if (seg == NULL || raddr != seg->s_base) {
1126 error = ENOMEM;
1127 break;
1128 }
1129 }
1130 if ((raddr + rsize) > (seg->s_base + seg->s_size))
1131 ssize = seg->s_base + seg->s_size - raddr;
1132 else
1133 ssize = rsize;
1134 retry:
1135 error = SEGOP_SETPROT(seg, raddr, ssize, prot);
1136
1137 if (error == IE_NOMEM) {
1138 error = EAGAIN;
1139 break;
1140 }
1141
1142 if (error == IE_RETRY) {
1143 AS_LOCK_EXIT(as, &as->a_lock);
1144 writer = 1;
1145 goto setprot_top;
1146 }
1147
1148 if (error == EAGAIN) {
1149 /*
1150 * Make sure we have a_lock as writer.
1151 */
1152 if (writer == 0) {
1153 AS_LOCK_EXIT(as, &as->a_lock);
1154 writer = 1;
1155 goto setprot_top;
1266 seg = as_segat(as, raddr);
1267 if (seg == NULL) {
1268 as_setwatch(as);
1269 AS_LOCK_EXIT(as, &as->a_lock);
1270 return (ENOMEM);
1271 }
1272
1273 for (; rsize != 0; rsize -= ssize, raddr += ssize) {
1274 if (raddr >= seg->s_base + seg->s_size) {
1275 seg = AS_SEGNEXT(as, seg);
1276 if (seg == NULL || raddr != seg->s_base) {
1277 error = ENOMEM;
1278 break;
1279 }
1280 }
1281 if ((raddr + rsize) > (seg->s_base + seg->s_size))
1282 ssize = seg->s_base + seg->s_size - raddr;
1283 else
1284 ssize = rsize;
1285
1286 error = SEGOP_CHECKPROT(seg, raddr, ssize, prot);
1287 if (error != 0)
1288 break;
1289 }
1290 as_setwatch(as);
1291 AS_LOCK_EXIT(as, &as->a_lock);
1292 return (error);
1293 }
1294
1295 int
1296 as_unmap(struct as *as, caddr_t addr, size_t size)
1297 {
1298 struct seg *seg, *seg_next;
1299 struct as_callback *cb;
1300 caddr_t raddr, eaddr;
1301 size_t ssize, rsize = 0;
1302 int err;
1303
1304 top:
1305 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1306 eaddr = (caddr_t)(((uintptr_t)(addr + size) + PAGEOFFSET) &
1332 else
1333 ssize = eaddr - raddr;
1334
1335 /*
1336 * Save next segment pointer since seg can be
1337 * destroyed during the segment unmap operation.
1338 */
1339 seg_next = AS_SEGNEXT(as, seg);
1340
1341 /*
1342 * We didn't count /dev/null mappings, so ignore them here.
1343 * We'll handle MAP_NORESERVE cases in segvn_unmap(). (Again,
1344 * we have to do this check here while we have seg.)
1345 */
1346 rsize = 0;
1347 if (!SEG_IS_DEVNULL_MAPPING(seg) &&
1348 !SEG_IS_PARTIAL_RESV(seg))
1349 rsize = ssize;
1350
1351 retry:
1352 err = SEGOP_UNMAP(seg, raddr, ssize);
1353 if (err == EAGAIN) {
1354 /*
1355 * Memory is currently locked. It must be unlocked
1356 * before this operation can succeed through a retry.
1357 * The possible reasons for locked memory and
1358 * corresponding strategies for unlocking are:
1359 * (1) Normal I/O
1360 * wait for a signal that the I/O operation
1361 * has completed and the memory is unlocked.
1362 * (2) Asynchronous I/O
1363 * The aio subsystem does not unlock pages when
1364 * the I/O is completed. Those pages are unlocked
1365 * when the application calls aiowait/aioerror.
1366 * So, to prevent blocking forever, cv_broadcast()
1367 * is done to wake up aio_cleanup_thread.
1368 * Subsequently, segvn_reclaim will be called, and
1369 * that will do AS_CLRUNMAPWAIT() and wake us up.
1370 * (3) Long term page locking:
1371 * Drivers intending to have pages locked for a
1372 * period considerably longer than for normal I/O
1770 */
1771 void
1772 as_purge(struct as *as)
1773 {
1774 struct seg *seg;
1775 struct seg *next_seg;
1776
1777 /*
1778 * the setting of NEEDSPURGE is protect by as_rangelock(), so
1779 * no need to grab a_contents mutex for this check
1780 */
1781 if ((as->a_flags & AS_NEEDSPURGE) == 0)
1782 return;
1783
1784 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1785 next_seg = NULL;
1786 seg = AS_SEGFIRST(as);
1787 while (seg != NULL) {
1788 next_seg = AS_SEGNEXT(as, seg);
1789 if (seg->s_flags & S_PURGE)
1790 SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
1791 seg = next_seg;
1792 }
1793 AS_LOCK_EXIT(as, &as->a_lock);
1794
1795 mutex_enter(&as->a_contents);
1796 as->a_flags &= ~AS_NEEDSPURGE;
1797 mutex_exit(&as->a_contents);
1798 }
1799
1800 /*
1801 * Find a hole within [*basep, *basep + *lenp), which contains a mappable
1802 * range of addresses at least "minlen" long, where the base of the range is
1803 * at "off" phase from an "align" boundary and there is space for a
1804 * "redzone"-sized redzone on eithe rside of the range. Thus,
1805 * if align was 4M and off was 16k, the user wants a hole which will start
1806 * 16k into a 4M page.
1807 *
1808 * If flags specifies AH_HI, the hole will have the highest possible address
1809 * in the range. We use the as->a_lastgap field to figure out where to
1810 * start looking for a gap.
2082
2083 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2084 seg = as_segat(as, raddr);
2085 if (seg == NULL) {
2086 AS_LOCK_EXIT(as, &as->a_lock);
2087 return (-1);
2088 }
2089
2090 for (; rsize != 0; rsize -= ssize, raddr += ssize) {
2091 if (raddr >= seg->s_base + seg->s_size) {
2092 seg = AS_SEGNEXT(as, seg);
2093 if (seg == NULL || raddr != seg->s_base) {
2094 error = -1;
2095 break;
2096 }
2097 }
2098 if ((raddr + rsize) > (seg->s_base + seg->s_size))
2099 ssize = seg->s_base + seg->s_size - raddr;
2100 else
2101 ssize = rsize;
2102 *sizep += isize = SEGOP_INCORE(seg, raddr, ssize, vec);
2103 if (isize != ssize) {
2104 error = -1;
2105 break;
2106 }
2107 vec += btopr(ssize);
2108 }
2109 AS_LOCK_EXIT(as, &as->a_lock);
2110 return (error);
2111 }
2112
2113 static void
2114 as_segunlock(struct seg *seg, caddr_t addr, int attr,
2115 ulong_t *bitmap, size_t position, size_t npages)
2116 {
2117 caddr_t range_start;
2118 size_t pos1 = position;
2119 size_t pos2;
2120 size_t size;
2121 size_t end_pos = npages + position;
2122
2123 while (bt_range(bitmap, &pos1, &pos2, end_pos)) {
2124 size = ptob((pos2 - pos1));
2125 range_start = (caddr_t)((uintptr_t)addr +
2126 ptob(pos1 - position));
2127
2128 (void) SEGOP_LOCKOP(seg, range_start, size, attr, MC_UNLOCK,
2129 (ulong_t *)NULL, (size_t)NULL);
2130 pos1 = pos2;
2131 }
2132 }
2133
2134 static void
2135 as_unlockerr(struct as *as, int attr, ulong_t *mlock_map,
2136 caddr_t raddr, size_t rsize)
2137 {
2138 struct seg *seg = as_segat(as, raddr);
2139 size_t ssize;
2140
2141 while (rsize != 0) {
2142 if (raddr >= seg->s_base + seg->s_size)
2143 seg = AS_SEGNEXT(as, seg);
2144
2145 if ((raddr + rsize) > (seg->s_base + seg->s_size))
2146 ssize = seg->s_base + seg->s_size - raddr;
2147 else
2148 ssize = rsize;
2204 if (seg == NULL) {
2205 AS_LOCK_EXIT(as, &as->a_lock);
2206 return (0);
2207 }
2208
2209 do {
2210 raddr = (caddr_t)((uintptr_t)seg->s_base &
2211 (uintptr_t)PAGEMASK);
2212 rlen += (((uintptr_t)(seg->s_base + seg->s_size) +
2213 PAGEOFFSET) & PAGEMASK) - (uintptr_t)raddr;
2214 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2215
2216 mlock_size = BT_BITOUL(btopr(rlen));
2217 if ((mlock_map = (ulong_t *)kmem_zalloc(mlock_size *
2218 sizeof (ulong_t), KM_NOSLEEP)) == NULL) {
2219 AS_LOCK_EXIT(as, &as->a_lock);
2220 return (EAGAIN);
2221 }
2222
2223 for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) {
2224 error = SEGOP_LOCKOP(seg, seg->s_base,
2225 seg->s_size, attr, MC_LOCK, mlock_map, pos);
2226 if (error != 0)
2227 break;
2228 pos += seg_pages(seg);
2229 }
2230
2231 if (error) {
2232 for (seg = AS_SEGFIRST(as); seg != NULL;
2233 seg = AS_SEGNEXT(as, seg)) {
2234
2235 raddr = (caddr_t)((uintptr_t)seg->s_base &
2236 (uintptr_t)PAGEMASK);
2237 npages = seg_pages(seg);
2238 as_segunlock(seg, raddr, attr, mlock_map,
2239 idx, npages);
2240 idx += npages;
2241 }
2242 }
2243
2244 kmem_free(mlock_map, mlock_size * sizeof (ulong_t));
2245 AS_LOCK_EXIT(as, &as->a_lock);
2246 goto lockerr;
2247 } else if (func == MC_UNLOCKAS) {
2248 mutex_enter(&as->a_contents);
2249 AS_CLRPGLCK(as);
2250 mutex_exit(&as->a_contents);
2251
2252 for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) {
2253 error = SEGOP_LOCKOP(seg, seg->s_base,
2254 seg->s_size, attr, MC_UNLOCK, NULL, 0);
2255 if (error != 0)
2256 break;
2257 }
2258
2259 AS_LOCK_EXIT(as, &as->a_lock);
2260 goto lockerr;
2261 }
2262
2263 /*
2264 * Normalize addresses and sizes.
2265 */
2266 initraddr = raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2267 initrsize = rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
2268 (size_t)raddr;
2269
2270 if (raddr + rsize < raddr) { /* check for wraparound */
2271 AS_LOCK_EXIT(as, &as->a_lock);
2272 return (ENOMEM);
2273 }
2311 }
2312 AS_LOCK_EXIT(as, &as->a_lock);
2313 return (ENOMEM);
2314 }
2315 }
2316 if ((raddr + rsize) > (seg->s_base + seg->s_size))
2317 ssize = seg->s_base + seg->s_size - raddr;
2318 else
2319 ssize = rsize;
2320
2321 /*
2322 * Dispatch on specific function.
2323 */
2324 switch (func) {
2325
2326 /*
2327 * Synchronize cached data from mappings with backing
2328 * objects.
2329 */
2330 case MC_SYNC:
2331 if (error = SEGOP_SYNC(seg, raddr, ssize,
2332 attr, (uint_t)arg)) {
2333 AS_LOCK_EXIT(as, &as->a_lock);
2334 return (error);
2335 }
2336 break;
2337
2338 /*
2339 * Lock pages in memory.
2340 */
2341 case MC_LOCK:
2342 if (error = SEGOP_LOCKOP(seg, raddr, ssize,
2343 attr, func, mlock_map, pos)) {
2344 as_unlockerr(as, attr, mlock_map, initraddr,
2345 initrsize - rsize + ssize);
2346 kmem_free(mlock_map, mlock_size *
2347 sizeof (ulong_t));
2348 AS_LOCK_EXIT(as, &as->a_lock);
2349 goto lockerr;
2350 }
2351 break;
2352
2353 /*
2354 * Unlock mapped pages.
2355 */
2356 case MC_UNLOCK:
2357 (void) SEGOP_LOCKOP(seg, raddr, ssize, attr, func,
2358 (ulong_t *)NULL, (size_t)NULL);
2359 break;
2360
2361 /*
2362 * Store VM advise for mapped pages in segment layer.
2363 */
2364 case MC_ADVISE:
2365 error = SEGOP_ADVISE(seg, raddr, ssize, (uint_t)arg);
2366
2367 /*
2368 * Check for regular errors and special retry error
2369 */
2370 if (error) {
2371 if (error == IE_RETRY) {
2372 /*
2373 * Need to acquire writers lock, so
2374 * have to drop readers lock and start
2375 * all over again
2376 */
2377 AS_LOCK_EXIT(as, &as->a_lock);
2378 goto retry;
2379 } else if (error == IE_REATTACH) {
2380 /*
2381 * Find segment for current address
2382 * because current segment just got
2383 * split or concatenated
2384 */
2385 seg = as_segat(as, raddr);
2386 if (seg == NULL) {
2387 AS_LOCK_EXIT(as, &as->a_lock);
2388 return (ENOMEM);
2389 }
2390 } else {
2391 /*
2392 * Regular error
2393 */
2394 AS_LOCK_EXIT(as, &as->a_lock);
2395 return (error);
2396 }
2397 }
2398 break;
2399
2400 case MC_INHERIT_ZERO:
2401 if (seg->s_ops->inherit == NULL) {
2402 error = ENOTSUP;
2403 } else {
2404 error = SEGOP_INHERIT(seg, raddr, ssize,
2405 SEGP_INH_ZERO);
2406 }
2407 if (error != 0) {
2408 AS_LOCK_EXIT(as, &as->a_lock);
2409 return (error);
2410 }
2411 break;
2412
2413 /*
2414 * Can't happen.
2415 */
2416 default:
2417 panic("as_ctl: bad operation %d", func);
2418 /*NOTREACHED*/
2419 }
2420
2421 rsize -= ssize;
2422 raddr += ssize;
2423 }
2424
2499 /*
2500 * Count the number of segments covered by the range we are about to
2501 * lock. The segment count is used to size the shadow list we return
2502 * back to the caller.
2503 */
2504 for (; size != 0; size -= ssize, addr += ssize) {
2505 if (addr >= seg->s_base + seg->s_size) {
2506
2507 seg = AS_SEGNEXT(as, seg);
2508 if (seg == NULL || addr != seg->s_base) {
2509 AS_LOCK_EXIT(as, &as->a_lock);
2510 return (EFAULT);
2511 }
2512 /*
2513 * Do a quick check if subsequent segments
2514 * will most likely support pagelock.
2515 */
2516 if (seg->s_ops == &segvn_ops) {
2517 vnode_t *vp;
2518
2519 if (SEGOP_GETVP(seg, addr, &vp) != 0 ||
2520 vp != NULL) {
2521 AS_LOCK_EXIT(as, &as->a_lock);
2522 goto slow;
2523 }
2524 } else if (seg->s_ops != &segspt_shmops) {
2525 AS_LOCK_EXIT(as, &as->a_lock);
2526 goto slow;
2527 }
2528 segcnt++;
2529 }
2530 if (addr + size > seg->s_base + seg->s_size) {
2531 ssize = seg->s_base + seg->s_size - addr;
2532 } else {
2533 ssize = size;
2534 }
2535 }
2536 ASSERT(segcnt > 1);
2537
2538 plist = kmem_zalloc((npages + segcnt) * sizeof (page_t *), KM_SLEEP);
2539
2540 addr = sv_addr;
2541 size = sv_size;
2542 seg = sv_seg;
2543
2544 for (cnt = 0, pl_off = 0; size != 0; size -= ssize, addr += ssize) {
2545 if (addr >= seg->s_base + seg->s_size) {
2546 seg = AS_SEGNEXT(as, seg);
2547 ASSERT(seg != NULL && addr == seg->s_base);
2548 cnt++;
2549 ASSERT(cnt < segcnt);
2550 }
2551 if (addr + size > seg->s_base + seg->s_size) {
2552 ssize = seg->s_base + seg->s_size - addr;
2553 } else {
2554 ssize = size;
2555 }
2556 pl = &plist[npages + cnt];
2557 error = SEGOP_PAGELOCK(seg, addr, ssize, (page_t ***)pl,
2558 L_PAGELOCK, rw);
2559 if (error) {
2560 break;
2561 }
2562 ASSERT(plist[npages + cnt] != NULL);
2563 ASSERT(pl_off + btop(ssize) <= npages);
2564 bcopy(plist[npages + cnt], &plist[pl_off],
2565 btop(ssize) * sizeof (page_t *));
2566 pl_off += btop(ssize);
2567 }
2568
2569 if (size == 0) {
2570 AS_LOCK_EXIT(as, &as->a_lock);
2571 ASSERT(cnt == segcnt - 1);
2572 *ppp = plist;
2573 return (0);
2574 }
2575
2576 /*
2577 * one of pagelock calls failed. The error type is in error variable.
2580 * back to the caller.
2581 */
2582
2583 eaddr = addr;
2584 seg = sv_seg;
2585
2586 for (cnt = 0, addr = sv_addr; addr < eaddr; addr += ssize) {
2587 if (addr >= seg->s_base + seg->s_size) {
2588 seg = AS_SEGNEXT(as, seg);
2589 ASSERT(seg != NULL && addr == seg->s_base);
2590 cnt++;
2591 ASSERT(cnt < segcnt);
2592 }
2593 if (eaddr > seg->s_base + seg->s_size) {
2594 ssize = seg->s_base + seg->s_size - addr;
2595 } else {
2596 ssize = eaddr - addr;
2597 }
2598 pl = &plist[npages + cnt];
2599 ASSERT(*pl != NULL);
2600 (void) SEGOP_PAGELOCK(seg, addr, ssize, (page_t ***)pl,
2601 L_PAGEUNLOCK, rw);
2602 }
2603
2604 AS_LOCK_EXIT(as, &as->a_lock);
2605
2606 kmem_free(plist, (npages + segcnt) * sizeof (page_t *));
2607
2608 if (error != ENOTSUP && error != EFAULT) {
2609 return (error);
2610 }
2611
2612 slow:
2613 /*
2614 * If we are here because pagelock failed due to the need to cow fault
2615 * in the pages we want to lock F_SOFTLOCK will do this job and in
2616 * next as_pagelock() call for this address range pagelock will
2617 * hopefully succeed.
2618 */
2619 fault_err = as_fault(as->a_hat, as, sv_addr, sv_size, F_SOFTLOCK, rw);
2620 if (fault_err != 0) {
2655 seg = as_segat(as, raddr);
2656 if (seg == NULL) {
2657 AS_LOCK_EXIT(as, &as->a_lock);
2658 return (EFAULT);
2659 }
2660 ASSERT(raddr >= seg->s_base && raddr < seg->s_base + seg->s_size);
2661 if (raddr + rsize > seg->s_base + seg->s_size) {
2662 return (as_pagelock_segs(as, seg, ppp, raddr, rsize, rw));
2663 }
2664 if (raddr + rsize <= raddr) {
2665 AS_LOCK_EXIT(as, &as->a_lock);
2666 return (EFAULT);
2667 }
2668
2669 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEG_LOCK_START,
2670 "seg_lock_1_start: raddr %p rsize %ld", raddr, rsize);
2671
2672 /*
2673 * try to lock pages and pass back shadow list
2674 */
2675 err = SEGOP_PAGELOCK(seg, raddr, rsize, ppp, L_PAGELOCK, rw);
2676
2677 TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_SEG_LOCK_END, "seg_lock_1_end");
2678
2679 AS_LOCK_EXIT(as, &as->a_lock);
2680
2681 if (err == 0 || (err != ENOTSUP && err != EFAULT)) {
2682 return (err);
2683 }
2684
2685 /*
2686 * Use F_SOFTLOCK to lock the pages because pagelock failed either due
2687 * to no pagelock support for this segment or pages need to be cow
2688 * faulted in. If fault is needed F_SOFTLOCK will do this job for
2689 * this as_pagelock() call and in the next as_pagelock() call for the
2690 * same address range pagelock call will hopefull succeed.
2691 */
2692 fault_err = as_fault(as->a_hat, as, addr, size, F_SOFTLOCK, rw);
2693 if (fault_err != 0) {
2694 return (fc_decode(fault_err));
2695 }
2718 ASSERT(seg != NULL);
2719 ASSERT(addr >= seg->s_base && addr < seg->s_base + seg->s_size);
2720 ASSERT(addr + size > seg->s_base + seg->s_size);
2721 ASSERT(IS_P2ALIGNED(size, PAGESIZE));
2722 ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
2723 ASSERT(plist != NULL);
2724
2725 for (cnt = 0; addr < eaddr; addr += ssize) {
2726 if (addr >= seg->s_base + seg->s_size) {
2727 seg = AS_SEGNEXT(as, seg);
2728 ASSERT(seg != NULL && addr == seg->s_base);
2729 cnt++;
2730 }
2731 if (eaddr > seg->s_base + seg->s_size) {
2732 ssize = seg->s_base + seg->s_size - addr;
2733 } else {
2734 ssize = eaddr - addr;
2735 }
2736 pl = &plist[npages + cnt];
2737 ASSERT(*pl != NULL);
2738 (void) SEGOP_PAGELOCK(seg, addr, ssize, (page_t ***)pl,
2739 L_PAGEUNLOCK, rw);
2740 }
2741 ASSERT(cnt > 0);
2742 AS_LOCK_EXIT(as, &as->a_lock);
2743
2744 cnt++;
2745 kmem_free(plist, (npages + cnt) * sizeof (page_t *));
2746 }
2747
2748 /*
2749 * unlock pages in a given address range
2750 */
2751 void
2752 as_pageunlock(struct as *as, struct page **pp, caddr_t addr, size_t size,
2753 enum seg_rw rw)
2754 {
2755 struct seg *seg;
2756 size_t rsize;
2757 caddr_t raddr;
2758
2764 * falling back to as_fault
2765 */
2766 if (pp == NULL) {
2767 (void) as_fault(as->a_hat, as, addr, size, F_SOFTUNLOCK, rw);
2768 return;
2769 }
2770
2771 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2772 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
2773 (size_t)raddr;
2774
2775 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2776 seg = as_segat(as, raddr);
2777 ASSERT(seg != NULL);
2778
2779 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEG_UNLOCK_START,
2780 "seg_unlock_start: raddr %p rsize %ld", raddr, rsize);
2781
2782 ASSERT(raddr >= seg->s_base && raddr < seg->s_base + seg->s_size);
2783 if (raddr + rsize <= seg->s_base + seg->s_size) {
2784 SEGOP_PAGELOCK(seg, raddr, rsize, &pp, L_PAGEUNLOCK, rw);
2785 } else {
2786 as_pageunlock_segs(as, seg, raddr, rsize, pp, rw);
2787 return;
2788 }
2789 AS_LOCK_EXIT(as, &as->a_lock);
2790 TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_AS_UNLOCK_END, "as_pageunlock_end");
2791 }
2792
2793 int
2794 as_setpagesize(struct as *as, caddr_t addr, size_t size, uint_t szc,
2795 boolean_t wait)
2796 {
2797 struct seg *seg;
2798 size_t ssize;
2799 caddr_t raddr; /* rounded down addr */
2800 size_t rsize; /* rounded up size */
2801 int error = 0;
2802 size_t pgsz = page_get_pagesize(szc);
2803
2804 setpgsz_top:
2819 as_setwatch(as);
2820 AS_LOCK_EXIT(as, &as->a_lock);
2821 return (ENOMEM);
2822 }
2823
2824 for (; rsize != 0; rsize -= ssize, raddr += ssize) {
2825 if (raddr >= seg->s_base + seg->s_size) {
2826 seg = AS_SEGNEXT(as, seg);
2827 if (seg == NULL || raddr != seg->s_base) {
2828 error = ENOMEM;
2829 break;
2830 }
2831 }
2832 if ((raddr + rsize) > (seg->s_base + seg->s_size)) {
2833 ssize = seg->s_base + seg->s_size - raddr;
2834 } else {
2835 ssize = rsize;
2836 }
2837
2838 retry:
2839 error = SEGOP_SETPAGESIZE(seg, raddr, ssize, szc);
2840
2841 if (error == IE_NOMEM) {
2842 error = EAGAIN;
2843 break;
2844 }
2845
2846 if (error == IE_RETRY) {
2847 AS_LOCK_EXIT(as, &as->a_lock);
2848 goto setpgsz_top;
2849 }
2850
2851 if (error == ENOTSUP) {
2852 error = EINVAL;
2853 break;
2854 }
2855
2856 if (wait && (error == EAGAIN)) {
2857 /*
2858 * Memory is currently locked. It must be unlocked
2859 * before this operation can succeed through a retry.
2898 * number of retries without sleeping should
2899 * be very small. See segvn_reclaim() for
2900 * more comments.
2901 */
2902 AS_CLRNOUNMAPWAIT(as);
2903 mutex_exit(&as->a_contents);
2904 goto retry;
2905 }
2906 mutex_exit(&as->a_contents);
2907 goto setpgsz_top;
2908 } else if (error != 0) {
2909 break;
2910 }
2911 }
2912 as_setwatch(as);
2913 AS_LOCK_EXIT(as, &as->a_lock);
2914 return (error);
2915 }
2916
2917 /*
2918 * as_iset3_default_lpsize() just calls SEGOP_SETPAGESIZE() on all segments
2919 * in its chunk where s_szc is less than the szc we want to set.
2920 */
2921 static int
2922 as_iset3_default_lpsize(struct as *as, caddr_t raddr, size_t rsize, uint_t szc,
2923 int *retry)
2924 {
2925 struct seg *seg;
2926 size_t ssize;
2927 int error;
2928
2929 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
2930
2931 seg = as_segat(as, raddr);
2932 if (seg == NULL) {
2933 panic("as_iset3_default_lpsize: no seg");
2934 }
2935
2936 for (; rsize != 0; rsize -= ssize, raddr += ssize) {
2937 if (raddr >= seg->s_base + seg->s_size) {
2938 seg = AS_SEGNEXT(as, seg);
2939 if (seg == NULL || raddr != seg->s_base) {
2940 panic("as_iset3_default_lpsize: as changed");
2941 }
2942 }
2943 if ((raddr + rsize) > (seg->s_base + seg->s_size)) {
2944 ssize = seg->s_base + seg->s_size - raddr;
2945 } else {
2946 ssize = rsize;
2947 }
2948
2949 if (szc > seg->s_szc) {
2950 error = SEGOP_SETPAGESIZE(seg, raddr, ssize, szc);
2951 /* Only retry on EINVAL segments that have no vnode. */
2952 if (error == EINVAL) {
2953 vnode_t *vp = NULL;
2954 if ((SEGOP_GETTYPE(seg, raddr) & MAP_SHARED) &&
2955 (SEGOP_GETVP(seg, raddr, &vp) != 0 ||
2956 vp == NULL)) {
2957 *retry = 1;
2958 } else {
2959 *retry = 0;
2960 }
2961 }
2962 if (error) {
2963 return (error);
2964 }
2965 }
2966 }
2967 return (0);
2968 }
2969
2970 /*
2971 * as_iset2_default_lpsize() calls as_iset3_default_lpsize() to set the
2972 * pagesize on each segment in its range, but if any fails with EINVAL,
2973 * then it reduces the pagesizes to the next size in the bitmap and
2974 * retries as_iset3_default_lpsize(). The reason why the code retries
2975 * smaller allowed sizes on EINVAL is because (a) the anon offset may not
3178 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3179 again:
3180 error = 0;
3181
3182 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3183 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
3184 (size_t)raddr;
3185
3186 if (raddr + rsize < raddr) { /* check for wraparound */
3187 AS_LOCK_EXIT(as, &as->a_lock);
3188 return (ENOMEM);
3189 }
3190 as_clearwatchprot(as, raddr, rsize);
3191 seg = as_segat(as, raddr);
3192 if (seg == NULL) {
3193 as_setwatch(as);
3194 AS_LOCK_EXIT(as, &as->a_lock);
3195 return (ENOMEM);
3196 }
3197 if (seg->s_ops == &segvn_ops) {
3198 rtype = SEGOP_GETTYPE(seg, addr);
3199 rflags = rtype & (MAP_TEXT | MAP_INITDATA);
3200 rtype = rtype & (MAP_SHARED | MAP_PRIVATE);
3201 segvn = 1;
3202 } else {
3203 segvn = 0;
3204 }
3205 setaddr = raddr;
3206 setsize = 0;
3207
3208 for (; rsize != 0; rsize -= ssize, raddr += ssize, setsize += ssize) {
3209 if (raddr >= (seg->s_base + seg->s_size)) {
3210 seg = AS_SEGNEXT(as, seg);
3211 if (seg == NULL || raddr != seg->s_base) {
3212 error = ENOMEM;
3213 break;
3214 }
3215 if (seg->s_ops == &segvn_ops) {
3216 stype = SEGOP_GETTYPE(seg, raddr);
3217 sflags = stype & (MAP_TEXT | MAP_INITDATA);
3218 stype &= (MAP_SHARED | MAP_PRIVATE);
3219 if (segvn && (rflags != sflags ||
3220 rtype != stype)) {
3221 /*
3222 * The next segment is also segvn but
3223 * has different flags and/or type.
3224 */
3225 ASSERT(setsize != 0);
3226 error = as_iset_default_lpsize(as,
3227 setaddr, setsize, rflags, rtype);
3228 if (error) {
3229 break;
3230 }
3231 rflags = sflags;
3232 rtype = stype;
3233 setaddr = raddr;
3234 setsize = 0;
3235 } else if (!segvn) {
3236 rflags = sflags;
3310 as_setwatch(struct as *as)
3311 {
3312 struct watched_page *pwp;
3313 struct seg *seg;
3314 caddr_t vaddr;
3315 uint_t prot;
3316 int err, retrycnt;
3317
3318 if (avl_numnodes(&as->a_wpage) == 0)
3319 return;
3320
3321 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3322
3323 for (pwp = avl_first(&as->a_wpage); pwp != NULL;
3324 pwp = AVL_NEXT(&as->a_wpage, pwp)) {
3325 retrycnt = 0;
3326 retry:
3327 vaddr = pwp->wp_vaddr;
3328 if (pwp->wp_oprot != 0 || /* already set up */
3329 (seg = as_segat(as, vaddr)) == NULL ||
3330 SEGOP_GETPROT(seg, vaddr, 0, &prot) != 0)
3331 continue;
3332
3333 pwp->wp_oprot = prot;
3334 if (pwp->wp_read)
3335 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3336 if (pwp->wp_write)
3337 prot &= ~PROT_WRITE;
3338 if (pwp->wp_exec)
3339 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3340 if (!(pwp->wp_flags & WP_NOWATCH) && prot != pwp->wp_oprot) {
3341 err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, prot);
3342 if (err == IE_RETRY) {
3343 pwp->wp_oprot = 0;
3344 ASSERT(retrycnt == 0);
3345 retrycnt++;
3346 goto retry;
3347 }
3348 }
3349 pwp->wp_prot = prot;
3350 }
3351 }
3352
3353 /*
3354 * Clear all of the watched pages in the address space.
3355 */
3356 void
3357 as_clearwatch(struct as *as)
3358 {
3359 struct watched_page *pwp;
3360 struct seg *seg;
3361 caddr_t vaddr;
3362 uint_t prot;
3363 int err, retrycnt;
3364
3365 if (avl_numnodes(&as->a_wpage) == 0)
3366 return;
3367
3368 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3369
3370 for (pwp = avl_first(&as->a_wpage); pwp != NULL;
3371 pwp = AVL_NEXT(&as->a_wpage, pwp)) {
3372 retrycnt = 0;
3373 retry:
3374 vaddr = pwp->wp_vaddr;
3375 if (pwp->wp_oprot == 0 || /* not set up */
3376 (seg = as_segat(as, vaddr)) == NULL)
3377 continue;
3378
3379 if ((prot = pwp->wp_oprot) != pwp->wp_prot) {
3380 err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, prot);
3381 if (err == IE_RETRY) {
3382 ASSERT(retrycnt == 0);
3383 retrycnt++;
3384 goto retry;
3385 }
3386 }
3387 pwp->wp_oprot = 0;
3388 pwp->wp_prot = 0;
3389 }
3390 }
3391
3392 /*
3393 * Force a new setup for all the watched pages in the range.
3394 */
3395 static void
3396 as_setwatchprot(struct as *as, caddr_t addr, size_t size, uint_t prot)
3397 {
3398 struct watched_page *pwp;
3399 struct watched_page tpw;
3400 caddr_t eaddr = addr + size;
3414 pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER);
3415
3416 while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3417 retrycnt = 0;
3418 vaddr = pwp->wp_vaddr;
3419
3420 wprot = prot;
3421 if (pwp->wp_read)
3422 wprot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3423 if (pwp->wp_write)
3424 wprot &= ~PROT_WRITE;
3425 if (pwp->wp_exec)
3426 wprot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3427 if (!(pwp->wp_flags & WP_NOWATCH) && wprot != pwp->wp_oprot) {
3428 retry:
3429 seg = as_segat(as, vaddr);
3430 if (seg == NULL) {
3431 panic("as_setwatchprot: no seg");
3432 /*NOTREACHED*/
3433 }
3434 err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, wprot);
3435 if (err == IE_RETRY) {
3436 ASSERT(retrycnt == 0);
3437 retrycnt++;
3438 goto retry;
3439 }
3440 }
3441 pwp->wp_oprot = prot;
3442 pwp->wp_prot = wprot;
3443
3444 pwp = AVL_NEXT(&as->a_wpage, pwp);
3445 }
3446 }
3447
3448 /*
3449 * Clear all of the watched pages in the range.
3450 */
3451 static void
3452 as_clearwatchprot(struct as *as, caddr_t addr, size_t size)
3453 {
3454 caddr_t eaddr = addr + size;
3461
3462 if (avl_numnodes(&as->a_wpage) == 0)
3463 return;
3464
3465 tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3466 if ((pwp = avl_find(&as->a_wpage, &tpw, &where)) == NULL)
3467 pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER);
3468
3469 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3470
3471 while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3472
3473 if ((prot = pwp->wp_oprot) != 0) {
3474 retrycnt = 0;
3475
3476 if (prot != pwp->wp_prot) {
3477 retry:
3478 seg = as_segat(as, pwp->wp_vaddr);
3479 if (seg == NULL)
3480 continue;
3481 err = SEGOP_SETPROT(seg, pwp->wp_vaddr,
3482 PAGESIZE, prot);
3483 if (err == IE_RETRY) {
3484 ASSERT(retrycnt == 0);
3485 retrycnt++;
3486 goto retry;
3487
3488 }
3489 }
3490 pwp->wp_oprot = 0;
3491 pwp->wp_prot = 0;
3492 }
3493
3494 pwp = AVL_NEXT(&as->a_wpage, pwp);
3495 }
3496 }
3497
3498 void
3499 as_signal_proc(struct as *as, k_siginfo_t *siginfo)
3500 {
3501 struct proc *p;
3518 int
3519 as_getmemid(struct as *as, caddr_t addr, memid_t *memidp)
3520 {
3521 struct seg *seg;
3522 int sts;
3523
3524 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
3525 seg = as_segat(as, addr);
3526 if (seg == NULL) {
3527 AS_LOCK_EXIT(as, &as->a_lock);
3528 return (EFAULT);
3529 }
3530 /*
3531 * catch old drivers which may not support getmemid
3532 */
3533 if (seg->s_ops->getmemid == NULL) {
3534 AS_LOCK_EXIT(as, &as->a_lock);
3535 return (ENODEV);
3536 }
3537
3538 sts = SEGOP_GETMEMID(seg, addr, memidp);
3539
3540 AS_LOCK_EXIT(as, &as->a_lock);
3541 return (sts);
3542 }
|
691 * Invoke ALL callbacks. as_do_callbacks will do one callback
692 * per call, and not return (-1) until the callback has completed.
693 * When as_do_callbacks returns zero, all callbacks have completed.
694 */
695 mutex_enter(&as->a_contents);
696 while (as->a_callbacks && as_do_callbacks(as, AS_ALL_EVENT, 0, 0))
697 ;
698
699 mutex_exit(&as->a_contents);
700 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
701
702 if (!free_started) {
703 free_started = B_TRUE;
704 hat_free_start(hat);
705 }
706 for (seg = AS_SEGFIRST(as); seg != NULL; seg = next) {
707 int err;
708
709 next = AS_SEGNEXT(as, seg);
710 retry:
711 err = segop_unmap(seg, seg->s_base, seg->s_size);
712 if (err == EAGAIN) {
713 mutex_enter(&as->a_contents);
714 if (as->a_callbacks) {
715 AS_LOCK_EXIT(as, &as->a_lock);
716 } else if (!AS_ISNOUNMAPWAIT(as)) {
717 /*
718 * Memory is currently locked. Wait for a
719 * cv_signal that it has been unlocked, then
720 * try the operation again.
721 */
722 if (AS_ISUNMAPWAIT(as) == 0)
723 cv_broadcast(&as->a_cv);
724 AS_SETUNMAPWAIT(as);
725 AS_LOCK_EXIT(as, &as->a_lock);
726 while (AS_ISUNMAPWAIT(as))
727 cv_wait(&as->a_cv, &as->a_contents);
728 } else {
729 /*
730 * We may have raced with
731 * segvn_reclaim()/segspt_reclaim(). In this
784
785 AS_LOCK_ENTER(newas, &newas->a_lock, RW_WRITER);
786
787 (void) hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_SRD);
788
789 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
790
791 if (seg->s_flags & S_PURGE) {
792 purgesize += seg->s_size;
793 continue;
794 }
795
796 newseg = seg_alloc(newas, seg->s_base, seg->s_size);
797 if (newseg == NULL) {
798 AS_LOCK_EXIT(newas, &newas->a_lock);
799 as_setwatch(as);
800 AS_LOCK_EXIT(as, &as->a_lock);
801 as_free(newas);
802 return (-1);
803 }
804 if ((error = segop_dup(seg, newseg)) != 0) {
805 /*
806 * We call seg_free() on the new seg
807 * because the segment is not set up
808 * completely; i.e. it has no ops.
809 */
810 as_setwatch(as);
811 AS_LOCK_EXIT(as, &as->a_lock);
812 seg_free(newseg);
813 AS_LOCK_EXIT(newas, &newas->a_lock);
814 as_free(newas);
815 return (error);
816 }
817 newas->a_size += seg->s_size;
818 }
819 newas->a_resvsize = as->a_resvsize - purgesize;
820
821 error = hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_ALL);
822
823 AS_LOCK_EXIT(newas, &newas->a_lock);
824
923
924 as_lock_held = 1;
925 }
926
927 addrsav = raddr;
928 segsav = seg;
929
930 for (; rsize != 0; rsize -= ssize, raddr += ssize) {
931 if (raddr >= seg->s_base + seg->s_size) {
932 seg = AS_SEGNEXT(as, seg);
933 if (seg == NULL || raddr != seg->s_base) {
934 res = FC_NOMAP;
935 break;
936 }
937 }
938 if (raddr + rsize > seg->s_base + seg->s_size)
939 ssize = seg->s_base + seg->s_size - raddr;
940 else
941 ssize = rsize;
942
943 res = segop_fault(hat, seg, raddr, ssize, type, rw);
944
945 /* Restore watchpoints */
946 if (holding_wpage) {
947 as_setwatch(as);
948 holding_wpage = 0;
949 }
950
951 if (res != 0)
952 break;
953 }
954
955 /*
956 * If we were SOFTLOCKing and encountered a failure,
957 * we must SOFTUNLOCK the range we already did. (Maybe we
958 * should just panic if we are SOFTLOCKing or even SOFTUNLOCKing
959 * right here...)
960 */
961 if (res != 0 && type == F_SOFTLOCK) {
962 for (seg = segsav; addrsav < raddr; addrsav += ssize) {
963 if (addrsav >= seg->s_base + seg->s_size)
964 seg = AS_SEGNEXT(as, seg);
965 ASSERT(seg != NULL);
966 /*
967 * Now call the fault routine again to perform the
968 * unlock using S_OTHER instead of the rw variable
969 * since we never got a chance to touch the pages.
970 */
971 if (raddr > seg->s_base + seg->s_size)
972 ssize = seg->s_base + seg->s_size - addrsav;
973 else
974 ssize = raddr - addrsav;
975 (void) segop_fault(hat, seg, addrsav, ssize,
976 F_SOFTUNLOCK, S_OTHER);
977 }
978 }
979 if (as_lock_held)
980 AS_LOCK_EXIT(as, &as->a_lock);
981 if (lwp != NULL)
982 lwp->lwp_nostop--;
983
984 /*
985 * If the lower levels returned EDEADLK for a fault,
986 * It means that we should retry the fault. Let's wait
987 * a bit also to let the deadlock causing condition clear.
988 * This is part of a gross hack to work around a design flaw
989 * in the ufs/sds logging code and should go away when the
990 * logging code is re-designed to fix the problem. See bug
991 * 4125102 for details of the problem.
992 */
993 if (FC_ERRNO(res) == EDEADLK) {
994 delay(deadlk_wait);
995 res = 0;
1025 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
1026 (size_t)raddr;
1027
1028 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1029 seg = as_segat(as, raddr);
1030 if (seg == NULL) {
1031 AS_LOCK_EXIT(as, &as->a_lock);
1032 if (lwp != NULL)
1033 lwp->lwp_nostop--;
1034 return (FC_NOMAP);
1035 }
1036
1037 for (; rsize != 0; rsize -= PAGESIZE, raddr += PAGESIZE) {
1038 if (raddr >= seg->s_base + seg->s_size) {
1039 seg = AS_SEGNEXT(as, seg);
1040 if (seg == NULL || raddr != seg->s_base) {
1041 res = FC_NOMAP;
1042 break;
1043 }
1044 }
1045 res = segop_faulta(seg, raddr);
1046 if (res != 0)
1047 break;
1048 }
1049 AS_LOCK_EXIT(as, &as->a_lock);
1050 if (lwp != NULL)
1051 lwp->lwp_nostop--;
1052 /*
1053 * If the lower levels returned EDEADLK for a fault,
1054 * It means that we should retry the fault. Let's wait
1055 * a bit also to let the deadlock causing condition clear.
1056 * This is part of a gross hack to work around a design flaw
1057 * in the ufs/sds logging code and should go away when the
1058 * logging code is re-designed to fix the problem. See bug
1059 * 4125102 for details of the problem.
1060 */
1061 if (FC_ERRNO(res) == EDEADLK) {
1062 delay(deadlk_wait);
1063 res = 0;
1064 goto retry;
1065 }
1115 seg = as_segat(as, raddr);
1116 if (seg == NULL) {
1117 as_setwatch(as);
1118 AS_LOCK_EXIT(as, &as->a_lock);
1119 return (ENOMEM);
1120 }
1121
1122 for (; rsize != 0; rsize -= ssize, raddr += ssize) {
1123 if (raddr >= seg->s_base + seg->s_size) {
1124 seg = AS_SEGNEXT(as, seg);
1125 if (seg == NULL || raddr != seg->s_base) {
1126 error = ENOMEM;
1127 break;
1128 }
1129 }
1130 if ((raddr + rsize) > (seg->s_base + seg->s_size))
1131 ssize = seg->s_base + seg->s_size - raddr;
1132 else
1133 ssize = rsize;
1134 retry:
1135 error = segop_setprot(seg, raddr, ssize, prot);
1136
1137 if (error == IE_NOMEM) {
1138 error = EAGAIN;
1139 break;
1140 }
1141
1142 if (error == IE_RETRY) {
1143 AS_LOCK_EXIT(as, &as->a_lock);
1144 writer = 1;
1145 goto setprot_top;
1146 }
1147
1148 if (error == EAGAIN) {
1149 /*
1150 * Make sure we have a_lock as writer.
1151 */
1152 if (writer == 0) {
1153 AS_LOCK_EXIT(as, &as->a_lock);
1154 writer = 1;
1155 goto setprot_top;
1266 seg = as_segat(as, raddr);
1267 if (seg == NULL) {
1268 as_setwatch(as);
1269 AS_LOCK_EXIT(as, &as->a_lock);
1270 return (ENOMEM);
1271 }
1272
1273 for (; rsize != 0; rsize -= ssize, raddr += ssize) {
1274 if (raddr >= seg->s_base + seg->s_size) {
1275 seg = AS_SEGNEXT(as, seg);
1276 if (seg == NULL || raddr != seg->s_base) {
1277 error = ENOMEM;
1278 break;
1279 }
1280 }
1281 if ((raddr + rsize) > (seg->s_base + seg->s_size))
1282 ssize = seg->s_base + seg->s_size - raddr;
1283 else
1284 ssize = rsize;
1285
1286 error = segop_checkprot(seg, raddr, ssize, prot);
1287 if (error != 0)
1288 break;
1289 }
1290 as_setwatch(as);
1291 AS_LOCK_EXIT(as, &as->a_lock);
1292 return (error);
1293 }
1294
1295 int
1296 as_unmap(struct as *as, caddr_t addr, size_t size)
1297 {
1298 struct seg *seg, *seg_next;
1299 struct as_callback *cb;
1300 caddr_t raddr, eaddr;
1301 size_t ssize, rsize = 0;
1302 int err;
1303
1304 top:
1305 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1306 eaddr = (caddr_t)(((uintptr_t)(addr + size) + PAGEOFFSET) &
1332 else
1333 ssize = eaddr - raddr;
1334
1335 /*
1336 * Save next segment pointer since seg can be
1337 * destroyed during the segment unmap operation.
1338 */
1339 seg_next = AS_SEGNEXT(as, seg);
1340
1341 /*
1342 * We didn't count /dev/null mappings, so ignore them here.
1343 * We'll handle MAP_NORESERVE cases in segvn_unmap(). (Again,
1344 * we have to do this check here while we have seg.)
1345 */
1346 rsize = 0;
1347 if (!SEG_IS_DEVNULL_MAPPING(seg) &&
1348 !SEG_IS_PARTIAL_RESV(seg))
1349 rsize = ssize;
1350
1351 retry:
1352 err = segop_unmap(seg, raddr, ssize);
1353 if (err == EAGAIN) {
1354 /*
1355 * Memory is currently locked. It must be unlocked
1356 * before this operation can succeed through a retry.
1357 * The possible reasons for locked memory and
1358 * corresponding strategies for unlocking are:
1359 * (1) Normal I/O
1360 * wait for a signal that the I/O operation
1361 * has completed and the memory is unlocked.
1362 * (2) Asynchronous I/O
1363 * The aio subsystem does not unlock pages when
1364 * the I/O is completed. Those pages are unlocked
1365 * when the application calls aiowait/aioerror.
1366 * So, to prevent blocking forever, cv_broadcast()
1367 * is done to wake up aio_cleanup_thread.
1368 * Subsequently, segvn_reclaim will be called, and
1369 * that will do AS_CLRUNMAPWAIT() and wake us up.
1370 * (3) Long term page locking:
1371 * Drivers intending to have pages locked for a
1372 * period considerably longer than for normal I/O
1770 */
1771 void
1772 as_purge(struct as *as)
1773 {
1774 struct seg *seg;
1775 struct seg *next_seg;
1776
1777 /*
1778 * the setting of NEEDSPURGE is protect by as_rangelock(), so
1779 * no need to grab a_contents mutex for this check
1780 */
1781 if ((as->a_flags & AS_NEEDSPURGE) == 0)
1782 return;
1783
1784 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1785 next_seg = NULL;
1786 seg = AS_SEGFIRST(as);
1787 while (seg != NULL) {
1788 next_seg = AS_SEGNEXT(as, seg);
1789 if (seg->s_flags & S_PURGE)
1790 segop_unmap(seg, seg->s_base, seg->s_size);
1791 seg = next_seg;
1792 }
1793 AS_LOCK_EXIT(as, &as->a_lock);
1794
1795 mutex_enter(&as->a_contents);
1796 as->a_flags &= ~AS_NEEDSPURGE;
1797 mutex_exit(&as->a_contents);
1798 }
1799
1800 /*
1801 * Find a hole within [*basep, *basep + *lenp), which contains a mappable
1802 * range of addresses at least "minlen" long, where the base of the range is
1803 * at "off" phase from an "align" boundary and there is space for a
1804 * "redzone"-sized redzone on eithe rside of the range. Thus,
1805 * if align was 4M and off was 16k, the user wants a hole which will start
1806 * 16k into a 4M page.
1807 *
1808 * If flags specifies AH_HI, the hole will have the highest possible address
1809 * in the range. We use the as->a_lastgap field to figure out where to
1810 * start looking for a gap.
2082
2083 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2084 seg = as_segat(as, raddr);
2085 if (seg == NULL) {
2086 AS_LOCK_EXIT(as, &as->a_lock);
2087 return (-1);
2088 }
2089
2090 for (; rsize != 0; rsize -= ssize, raddr += ssize) {
2091 if (raddr >= seg->s_base + seg->s_size) {
2092 seg = AS_SEGNEXT(as, seg);
2093 if (seg == NULL || raddr != seg->s_base) {
2094 error = -1;
2095 break;
2096 }
2097 }
2098 if ((raddr + rsize) > (seg->s_base + seg->s_size))
2099 ssize = seg->s_base + seg->s_size - raddr;
2100 else
2101 ssize = rsize;
2102 *sizep += isize = segop_incore(seg, raddr, ssize, vec);
2103 if (isize != ssize) {
2104 error = -1;
2105 break;
2106 }
2107 vec += btopr(ssize);
2108 }
2109 AS_LOCK_EXIT(as, &as->a_lock);
2110 return (error);
2111 }
2112
2113 static void
2114 as_segunlock(struct seg *seg, caddr_t addr, int attr,
2115 ulong_t *bitmap, size_t position, size_t npages)
2116 {
2117 caddr_t range_start;
2118 size_t pos1 = position;
2119 size_t pos2;
2120 size_t size;
2121 size_t end_pos = npages + position;
2122
2123 while (bt_range(bitmap, &pos1, &pos2, end_pos)) {
2124 size = ptob((pos2 - pos1));
2125 range_start = (caddr_t)((uintptr_t)addr +
2126 ptob(pos1 - position));
2127
2128 (void) segop_lockop(seg, range_start, size, attr, MC_UNLOCK,
2129 (ulong_t *)NULL, (size_t)NULL);
2130 pos1 = pos2;
2131 }
2132 }
2133
2134 static void
2135 as_unlockerr(struct as *as, int attr, ulong_t *mlock_map,
2136 caddr_t raddr, size_t rsize)
2137 {
2138 struct seg *seg = as_segat(as, raddr);
2139 size_t ssize;
2140
2141 while (rsize != 0) {
2142 if (raddr >= seg->s_base + seg->s_size)
2143 seg = AS_SEGNEXT(as, seg);
2144
2145 if ((raddr + rsize) > (seg->s_base + seg->s_size))
2146 ssize = seg->s_base + seg->s_size - raddr;
2147 else
2148 ssize = rsize;
2204 if (seg == NULL) {
2205 AS_LOCK_EXIT(as, &as->a_lock);
2206 return (0);
2207 }
2208
2209 do {
2210 raddr = (caddr_t)((uintptr_t)seg->s_base &
2211 (uintptr_t)PAGEMASK);
2212 rlen += (((uintptr_t)(seg->s_base + seg->s_size) +
2213 PAGEOFFSET) & PAGEMASK) - (uintptr_t)raddr;
2214 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2215
2216 mlock_size = BT_BITOUL(btopr(rlen));
2217 if ((mlock_map = (ulong_t *)kmem_zalloc(mlock_size *
2218 sizeof (ulong_t), KM_NOSLEEP)) == NULL) {
2219 AS_LOCK_EXIT(as, &as->a_lock);
2220 return (EAGAIN);
2221 }
2222
2223 for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) {
2224 error = segop_lockop(seg, seg->s_base,
2225 seg->s_size, attr, MC_LOCK, mlock_map, pos);
2226 if (error != 0)
2227 break;
2228 pos += seg_pages(seg);
2229 }
2230
2231 if (error) {
2232 for (seg = AS_SEGFIRST(as); seg != NULL;
2233 seg = AS_SEGNEXT(as, seg)) {
2234
2235 raddr = (caddr_t)((uintptr_t)seg->s_base &
2236 (uintptr_t)PAGEMASK);
2237 npages = seg_pages(seg);
2238 as_segunlock(seg, raddr, attr, mlock_map,
2239 idx, npages);
2240 idx += npages;
2241 }
2242 }
2243
2244 kmem_free(mlock_map, mlock_size * sizeof (ulong_t));
2245 AS_LOCK_EXIT(as, &as->a_lock);
2246 goto lockerr;
2247 } else if (func == MC_UNLOCKAS) {
2248 mutex_enter(&as->a_contents);
2249 AS_CLRPGLCK(as);
2250 mutex_exit(&as->a_contents);
2251
2252 for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) {
2253 error = segop_lockop(seg, seg->s_base,
2254 seg->s_size, attr, MC_UNLOCK, NULL, 0);
2255 if (error != 0)
2256 break;
2257 }
2258
2259 AS_LOCK_EXIT(as, &as->a_lock);
2260 goto lockerr;
2261 }
2262
2263 /*
2264 * Normalize addresses and sizes.
2265 */
2266 initraddr = raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2267 initrsize = rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
2268 (size_t)raddr;
2269
2270 if (raddr + rsize < raddr) { /* check for wraparound */
2271 AS_LOCK_EXIT(as, &as->a_lock);
2272 return (ENOMEM);
2273 }
2311 }
2312 AS_LOCK_EXIT(as, &as->a_lock);
2313 return (ENOMEM);
2314 }
2315 }
2316 if ((raddr + rsize) > (seg->s_base + seg->s_size))
2317 ssize = seg->s_base + seg->s_size - raddr;
2318 else
2319 ssize = rsize;
2320
2321 /*
2322 * Dispatch on specific function.
2323 */
2324 switch (func) {
2325
2326 /*
2327 * Synchronize cached data from mappings with backing
2328 * objects.
2329 */
2330 case MC_SYNC:
2331 if (error = segop_sync(seg, raddr, ssize,
2332 attr, (uint_t)arg)) {
2333 AS_LOCK_EXIT(as, &as->a_lock);
2334 return (error);
2335 }
2336 break;
2337
2338 /*
2339 * Lock pages in memory.
2340 */
2341 case MC_LOCK:
2342 if (error = segop_lockop(seg, raddr, ssize,
2343 attr, func, mlock_map, pos)) {
2344 as_unlockerr(as, attr, mlock_map, initraddr,
2345 initrsize - rsize + ssize);
2346 kmem_free(mlock_map, mlock_size *
2347 sizeof (ulong_t));
2348 AS_LOCK_EXIT(as, &as->a_lock);
2349 goto lockerr;
2350 }
2351 break;
2352
2353 /*
2354 * Unlock mapped pages.
2355 */
2356 case MC_UNLOCK:
2357 (void) segop_lockop(seg, raddr, ssize, attr, func,
2358 (ulong_t *)NULL, (size_t)NULL);
2359 break;
2360
2361 /*
2362 * Store VM advise for mapped pages in segment layer.
2363 */
2364 case MC_ADVISE:
2365 error = segop_advise(seg, raddr, ssize, (uint_t)arg);
2366
2367 /*
2368 * Check for regular errors and special retry error
2369 */
2370 if (error) {
2371 if (error == IE_RETRY) {
2372 /*
2373 * Need to acquire writers lock, so
2374 * have to drop readers lock and start
2375 * all over again
2376 */
2377 AS_LOCK_EXIT(as, &as->a_lock);
2378 goto retry;
2379 } else if (error == IE_REATTACH) {
2380 /*
2381 * Find segment for current address
2382 * because current segment just got
2383 * split or concatenated
2384 */
2385 seg = as_segat(as, raddr);
2386 if (seg == NULL) {
2387 AS_LOCK_EXIT(as, &as->a_lock);
2388 return (ENOMEM);
2389 }
2390 } else {
2391 /*
2392 * Regular error
2393 */
2394 AS_LOCK_EXIT(as, &as->a_lock);
2395 return (error);
2396 }
2397 }
2398 break;
2399
2400 case MC_INHERIT_ZERO:
2401 if (seg->s_ops->inherit == NULL) {
2402 error = ENOTSUP;
2403 } else {
2404 error = segop_inherit(seg, raddr, ssize,
2405 SEGP_INH_ZERO);
2406 }
2407 if (error != 0) {
2408 AS_LOCK_EXIT(as, &as->a_lock);
2409 return (error);
2410 }
2411 break;
2412
2413 /*
2414 * Can't happen.
2415 */
2416 default:
2417 panic("as_ctl: bad operation %d", func);
2418 /*NOTREACHED*/
2419 }
2420
2421 rsize -= ssize;
2422 raddr += ssize;
2423 }
2424
2499 /*
2500 * Count the number of segments covered by the range we are about to
2501 * lock. The segment count is used to size the shadow list we return
2502 * back to the caller.
2503 */
2504 for (; size != 0; size -= ssize, addr += ssize) {
2505 if (addr >= seg->s_base + seg->s_size) {
2506
2507 seg = AS_SEGNEXT(as, seg);
2508 if (seg == NULL || addr != seg->s_base) {
2509 AS_LOCK_EXIT(as, &as->a_lock);
2510 return (EFAULT);
2511 }
2512 /*
2513 * Do a quick check if subsequent segments
2514 * will most likely support pagelock.
2515 */
2516 if (seg->s_ops == &segvn_ops) {
2517 vnode_t *vp;
2518
2519 if (segop_getvp(seg, addr, &vp) != 0 ||
2520 vp != NULL) {
2521 AS_LOCK_EXIT(as, &as->a_lock);
2522 goto slow;
2523 }
2524 } else if (seg->s_ops != &segspt_shmops) {
2525 AS_LOCK_EXIT(as, &as->a_lock);
2526 goto slow;
2527 }
2528 segcnt++;
2529 }
2530 if (addr + size > seg->s_base + seg->s_size) {
2531 ssize = seg->s_base + seg->s_size - addr;
2532 } else {
2533 ssize = size;
2534 }
2535 }
2536 ASSERT(segcnt > 1);
2537
2538 plist = kmem_zalloc((npages + segcnt) * sizeof (page_t *), KM_SLEEP);
2539
2540 addr = sv_addr;
2541 size = sv_size;
2542 seg = sv_seg;
2543
2544 for (cnt = 0, pl_off = 0; size != 0; size -= ssize, addr += ssize) {
2545 if (addr >= seg->s_base + seg->s_size) {
2546 seg = AS_SEGNEXT(as, seg);
2547 ASSERT(seg != NULL && addr == seg->s_base);
2548 cnt++;
2549 ASSERT(cnt < segcnt);
2550 }
2551 if (addr + size > seg->s_base + seg->s_size) {
2552 ssize = seg->s_base + seg->s_size - addr;
2553 } else {
2554 ssize = size;
2555 }
2556 pl = &plist[npages + cnt];
2557 error = segop_pagelock(seg, addr, ssize, (page_t ***)pl,
2558 L_PAGELOCK, rw);
2559 if (error) {
2560 break;
2561 }
2562 ASSERT(plist[npages + cnt] != NULL);
2563 ASSERT(pl_off + btop(ssize) <= npages);
2564 bcopy(plist[npages + cnt], &plist[pl_off],
2565 btop(ssize) * sizeof (page_t *));
2566 pl_off += btop(ssize);
2567 }
2568
2569 if (size == 0) {
2570 AS_LOCK_EXIT(as, &as->a_lock);
2571 ASSERT(cnt == segcnt - 1);
2572 *ppp = plist;
2573 return (0);
2574 }
2575
2576 /*
2577 * one of pagelock calls failed. The error type is in error variable.
2580 * back to the caller.
2581 */
2582
2583 eaddr = addr;
2584 seg = sv_seg;
2585
2586 for (cnt = 0, addr = sv_addr; addr < eaddr; addr += ssize) {
2587 if (addr >= seg->s_base + seg->s_size) {
2588 seg = AS_SEGNEXT(as, seg);
2589 ASSERT(seg != NULL && addr == seg->s_base);
2590 cnt++;
2591 ASSERT(cnt < segcnt);
2592 }
2593 if (eaddr > seg->s_base + seg->s_size) {
2594 ssize = seg->s_base + seg->s_size - addr;
2595 } else {
2596 ssize = eaddr - addr;
2597 }
2598 pl = &plist[npages + cnt];
2599 ASSERT(*pl != NULL);
2600 (void) segop_pagelock(seg, addr, ssize, (page_t ***)pl,
2601 L_PAGEUNLOCK, rw);
2602 }
2603
2604 AS_LOCK_EXIT(as, &as->a_lock);
2605
2606 kmem_free(plist, (npages + segcnt) * sizeof (page_t *));
2607
2608 if (error != ENOTSUP && error != EFAULT) {
2609 return (error);
2610 }
2611
2612 slow:
2613 /*
2614 * If we are here because pagelock failed due to the need to cow fault
2615 * in the pages we want to lock F_SOFTLOCK will do this job and in
2616 * next as_pagelock() call for this address range pagelock will
2617 * hopefully succeed.
2618 */
2619 fault_err = as_fault(as->a_hat, as, sv_addr, sv_size, F_SOFTLOCK, rw);
2620 if (fault_err != 0) {
2655 seg = as_segat(as, raddr);
2656 if (seg == NULL) {
2657 AS_LOCK_EXIT(as, &as->a_lock);
2658 return (EFAULT);
2659 }
2660 ASSERT(raddr >= seg->s_base && raddr < seg->s_base + seg->s_size);
2661 if (raddr + rsize > seg->s_base + seg->s_size) {
2662 return (as_pagelock_segs(as, seg, ppp, raddr, rsize, rw));
2663 }
2664 if (raddr + rsize <= raddr) {
2665 AS_LOCK_EXIT(as, &as->a_lock);
2666 return (EFAULT);
2667 }
2668
2669 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEG_LOCK_START,
2670 "seg_lock_1_start: raddr %p rsize %ld", raddr, rsize);
2671
2672 /*
2673 * try to lock pages and pass back shadow list
2674 */
2675 err = segop_pagelock(seg, raddr, rsize, ppp, L_PAGELOCK, rw);
2676
2677 TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_SEG_LOCK_END, "seg_lock_1_end");
2678
2679 AS_LOCK_EXIT(as, &as->a_lock);
2680
2681 if (err == 0 || (err != ENOTSUP && err != EFAULT)) {
2682 return (err);
2683 }
2684
2685 /*
2686 * Use F_SOFTLOCK to lock the pages because pagelock failed either due
2687 * to no pagelock support for this segment or pages need to be cow
2688 * faulted in. If fault is needed F_SOFTLOCK will do this job for
2689 * this as_pagelock() call and in the next as_pagelock() call for the
2690 * same address range pagelock call will hopefull succeed.
2691 */
2692 fault_err = as_fault(as->a_hat, as, addr, size, F_SOFTLOCK, rw);
2693 if (fault_err != 0) {
2694 return (fc_decode(fault_err));
2695 }
2718 ASSERT(seg != NULL);
2719 ASSERT(addr >= seg->s_base && addr < seg->s_base + seg->s_size);
2720 ASSERT(addr + size > seg->s_base + seg->s_size);
2721 ASSERT(IS_P2ALIGNED(size, PAGESIZE));
2722 ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
2723 ASSERT(plist != NULL);
2724
2725 for (cnt = 0; addr < eaddr; addr += ssize) {
2726 if (addr >= seg->s_base + seg->s_size) {
2727 seg = AS_SEGNEXT(as, seg);
2728 ASSERT(seg != NULL && addr == seg->s_base);
2729 cnt++;
2730 }
2731 if (eaddr > seg->s_base + seg->s_size) {
2732 ssize = seg->s_base + seg->s_size - addr;
2733 } else {
2734 ssize = eaddr - addr;
2735 }
2736 pl = &plist[npages + cnt];
2737 ASSERT(*pl != NULL);
2738 (void) segop_pagelock(seg, addr, ssize, (page_t ***)pl,
2739 L_PAGEUNLOCK, rw);
2740 }
2741 ASSERT(cnt > 0);
2742 AS_LOCK_EXIT(as, &as->a_lock);
2743
2744 cnt++;
2745 kmem_free(plist, (npages + cnt) * sizeof (page_t *));
2746 }
2747
2748 /*
2749 * unlock pages in a given address range
2750 */
2751 void
2752 as_pageunlock(struct as *as, struct page **pp, caddr_t addr, size_t size,
2753 enum seg_rw rw)
2754 {
2755 struct seg *seg;
2756 size_t rsize;
2757 caddr_t raddr;
2758
2764 * falling back to as_fault
2765 */
2766 if (pp == NULL) {
2767 (void) as_fault(as->a_hat, as, addr, size, F_SOFTUNLOCK, rw);
2768 return;
2769 }
2770
2771 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2772 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
2773 (size_t)raddr;
2774
2775 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2776 seg = as_segat(as, raddr);
2777 ASSERT(seg != NULL);
2778
2779 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEG_UNLOCK_START,
2780 "seg_unlock_start: raddr %p rsize %ld", raddr, rsize);
2781
2782 ASSERT(raddr >= seg->s_base && raddr < seg->s_base + seg->s_size);
2783 if (raddr + rsize <= seg->s_base + seg->s_size) {
2784 segop_pagelock(seg, raddr, rsize, &pp, L_PAGEUNLOCK, rw);
2785 } else {
2786 as_pageunlock_segs(as, seg, raddr, rsize, pp, rw);
2787 return;
2788 }
2789 AS_LOCK_EXIT(as, &as->a_lock);
2790 TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_AS_UNLOCK_END, "as_pageunlock_end");
2791 }
2792
2793 int
2794 as_setpagesize(struct as *as, caddr_t addr, size_t size, uint_t szc,
2795 boolean_t wait)
2796 {
2797 struct seg *seg;
2798 size_t ssize;
2799 caddr_t raddr; /* rounded down addr */
2800 size_t rsize; /* rounded up size */
2801 int error = 0;
2802 size_t pgsz = page_get_pagesize(szc);
2803
2804 setpgsz_top:
2819 as_setwatch(as);
2820 AS_LOCK_EXIT(as, &as->a_lock);
2821 return (ENOMEM);
2822 }
2823
2824 for (; rsize != 0; rsize -= ssize, raddr += ssize) {
2825 if (raddr >= seg->s_base + seg->s_size) {
2826 seg = AS_SEGNEXT(as, seg);
2827 if (seg == NULL || raddr != seg->s_base) {
2828 error = ENOMEM;
2829 break;
2830 }
2831 }
2832 if ((raddr + rsize) > (seg->s_base + seg->s_size)) {
2833 ssize = seg->s_base + seg->s_size - raddr;
2834 } else {
2835 ssize = rsize;
2836 }
2837
2838 retry:
2839 error = segop_setpagesize(seg, raddr, ssize, szc);
2840
2841 if (error == IE_NOMEM) {
2842 error = EAGAIN;
2843 break;
2844 }
2845
2846 if (error == IE_RETRY) {
2847 AS_LOCK_EXIT(as, &as->a_lock);
2848 goto setpgsz_top;
2849 }
2850
2851 if (error == ENOTSUP) {
2852 error = EINVAL;
2853 break;
2854 }
2855
2856 if (wait && (error == EAGAIN)) {
2857 /*
2858 * Memory is currently locked. It must be unlocked
2859 * before this operation can succeed through a retry.
2898 * number of retries without sleeping should
2899 * be very small. See segvn_reclaim() for
2900 * more comments.
2901 */
2902 AS_CLRNOUNMAPWAIT(as);
2903 mutex_exit(&as->a_contents);
2904 goto retry;
2905 }
2906 mutex_exit(&as->a_contents);
2907 goto setpgsz_top;
2908 } else if (error != 0) {
2909 break;
2910 }
2911 }
2912 as_setwatch(as);
2913 AS_LOCK_EXIT(as, &as->a_lock);
2914 return (error);
2915 }
2916
2917 /*
2918 * as_iset3_default_lpsize() just calls segop_setpagesize() on all segments
2919 * in its chunk where s_szc is less than the szc we want to set.
2920 */
2921 static int
2922 as_iset3_default_lpsize(struct as *as, caddr_t raddr, size_t rsize, uint_t szc,
2923 int *retry)
2924 {
2925 struct seg *seg;
2926 size_t ssize;
2927 int error;
2928
2929 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
2930
2931 seg = as_segat(as, raddr);
2932 if (seg == NULL) {
2933 panic("as_iset3_default_lpsize: no seg");
2934 }
2935
2936 for (; rsize != 0; rsize -= ssize, raddr += ssize) {
2937 if (raddr >= seg->s_base + seg->s_size) {
2938 seg = AS_SEGNEXT(as, seg);
2939 if (seg == NULL || raddr != seg->s_base) {
2940 panic("as_iset3_default_lpsize: as changed");
2941 }
2942 }
2943 if ((raddr + rsize) > (seg->s_base + seg->s_size)) {
2944 ssize = seg->s_base + seg->s_size - raddr;
2945 } else {
2946 ssize = rsize;
2947 }
2948
2949 if (szc > seg->s_szc) {
2950 error = segop_setpagesize(seg, raddr, ssize, szc);
2951 /* Only retry on EINVAL segments that have no vnode. */
2952 if (error == EINVAL) {
2953 vnode_t *vp = NULL;
2954 if ((segop_gettype(seg, raddr) & MAP_SHARED) &&
2955 (segop_getvp(seg, raddr, &vp) != 0 ||
2956 vp == NULL)) {
2957 *retry = 1;
2958 } else {
2959 *retry = 0;
2960 }
2961 }
2962 if (error) {
2963 return (error);
2964 }
2965 }
2966 }
2967 return (0);
2968 }
2969
2970 /*
2971 * as_iset2_default_lpsize() calls as_iset3_default_lpsize() to set the
2972 * pagesize on each segment in its range, but if any fails with EINVAL,
2973 * then it reduces the pagesizes to the next size in the bitmap and
2974 * retries as_iset3_default_lpsize(). The reason why the code retries
2975 * smaller allowed sizes on EINVAL is because (a) the anon offset may not
3178 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3179 again:
3180 error = 0;
3181
3182 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3183 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
3184 (size_t)raddr;
3185
3186 if (raddr + rsize < raddr) { /* check for wraparound */
3187 AS_LOCK_EXIT(as, &as->a_lock);
3188 return (ENOMEM);
3189 }
3190 as_clearwatchprot(as, raddr, rsize);
3191 seg = as_segat(as, raddr);
3192 if (seg == NULL) {
3193 as_setwatch(as);
3194 AS_LOCK_EXIT(as, &as->a_lock);
3195 return (ENOMEM);
3196 }
3197 if (seg->s_ops == &segvn_ops) {
3198 rtype = segop_gettype(seg, addr);
3199 rflags = rtype & (MAP_TEXT | MAP_INITDATA);
3200 rtype = rtype & (MAP_SHARED | MAP_PRIVATE);
3201 segvn = 1;
3202 } else {
3203 segvn = 0;
3204 }
3205 setaddr = raddr;
3206 setsize = 0;
3207
3208 for (; rsize != 0; rsize -= ssize, raddr += ssize, setsize += ssize) {
3209 if (raddr >= (seg->s_base + seg->s_size)) {
3210 seg = AS_SEGNEXT(as, seg);
3211 if (seg == NULL || raddr != seg->s_base) {
3212 error = ENOMEM;
3213 break;
3214 }
3215 if (seg->s_ops == &segvn_ops) {
3216 stype = segop_gettype(seg, raddr);
3217 sflags = stype & (MAP_TEXT | MAP_INITDATA);
3218 stype &= (MAP_SHARED | MAP_PRIVATE);
3219 if (segvn && (rflags != sflags ||
3220 rtype != stype)) {
3221 /*
3222 * The next segment is also segvn but
3223 * has different flags and/or type.
3224 */
3225 ASSERT(setsize != 0);
3226 error = as_iset_default_lpsize(as,
3227 setaddr, setsize, rflags, rtype);
3228 if (error) {
3229 break;
3230 }
3231 rflags = sflags;
3232 rtype = stype;
3233 setaddr = raddr;
3234 setsize = 0;
3235 } else if (!segvn) {
3236 rflags = sflags;
3310 as_setwatch(struct as *as)
3311 {
3312 struct watched_page *pwp;
3313 struct seg *seg;
3314 caddr_t vaddr;
3315 uint_t prot;
3316 int err, retrycnt;
3317
3318 if (avl_numnodes(&as->a_wpage) == 0)
3319 return;
3320
3321 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3322
3323 for (pwp = avl_first(&as->a_wpage); pwp != NULL;
3324 pwp = AVL_NEXT(&as->a_wpage, pwp)) {
3325 retrycnt = 0;
3326 retry:
3327 vaddr = pwp->wp_vaddr;
3328 if (pwp->wp_oprot != 0 || /* already set up */
3329 (seg = as_segat(as, vaddr)) == NULL ||
3330 segop_getprot(seg, vaddr, 0, &prot) != 0)
3331 continue;
3332
3333 pwp->wp_oprot = prot;
3334 if (pwp->wp_read)
3335 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3336 if (pwp->wp_write)
3337 prot &= ~PROT_WRITE;
3338 if (pwp->wp_exec)
3339 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3340 if (!(pwp->wp_flags & WP_NOWATCH) && prot != pwp->wp_oprot) {
3341 err = segop_setprot(seg, vaddr, PAGESIZE, prot);
3342 if (err == IE_RETRY) {
3343 pwp->wp_oprot = 0;
3344 ASSERT(retrycnt == 0);
3345 retrycnt++;
3346 goto retry;
3347 }
3348 }
3349 pwp->wp_prot = prot;
3350 }
3351 }
3352
3353 /*
3354 * Clear all of the watched pages in the address space.
3355 */
3356 void
3357 as_clearwatch(struct as *as)
3358 {
3359 struct watched_page *pwp;
3360 struct seg *seg;
3361 caddr_t vaddr;
3362 uint_t prot;
3363 int err, retrycnt;
3364
3365 if (avl_numnodes(&as->a_wpage) == 0)
3366 return;
3367
3368 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3369
3370 for (pwp = avl_first(&as->a_wpage); pwp != NULL;
3371 pwp = AVL_NEXT(&as->a_wpage, pwp)) {
3372 retrycnt = 0;
3373 retry:
3374 vaddr = pwp->wp_vaddr;
3375 if (pwp->wp_oprot == 0 || /* not set up */
3376 (seg = as_segat(as, vaddr)) == NULL)
3377 continue;
3378
3379 if ((prot = pwp->wp_oprot) != pwp->wp_prot) {
3380 err = segop_setprot(seg, vaddr, PAGESIZE, prot);
3381 if (err == IE_RETRY) {
3382 ASSERT(retrycnt == 0);
3383 retrycnt++;
3384 goto retry;
3385 }
3386 }
3387 pwp->wp_oprot = 0;
3388 pwp->wp_prot = 0;
3389 }
3390 }
3391
3392 /*
3393 * Force a new setup for all the watched pages in the range.
3394 */
3395 static void
3396 as_setwatchprot(struct as *as, caddr_t addr, size_t size, uint_t prot)
3397 {
3398 struct watched_page *pwp;
3399 struct watched_page tpw;
3400 caddr_t eaddr = addr + size;
3414 pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER);
3415
3416 while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3417 retrycnt = 0;
3418 vaddr = pwp->wp_vaddr;
3419
3420 wprot = prot;
3421 if (pwp->wp_read)
3422 wprot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3423 if (pwp->wp_write)
3424 wprot &= ~PROT_WRITE;
3425 if (pwp->wp_exec)
3426 wprot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3427 if (!(pwp->wp_flags & WP_NOWATCH) && wprot != pwp->wp_oprot) {
3428 retry:
3429 seg = as_segat(as, vaddr);
3430 if (seg == NULL) {
3431 panic("as_setwatchprot: no seg");
3432 /*NOTREACHED*/
3433 }
3434 err = segop_setprot(seg, vaddr, PAGESIZE, wprot);
3435 if (err == IE_RETRY) {
3436 ASSERT(retrycnt == 0);
3437 retrycnt++;
3438 goto retry;
3439 }
3440 }
3441 pwp->wp_oprot = prot;
3442 pwp->wp_prot = wprot;
3443
3444 pwp = AVL_NEXT(&as->a_wpage, pwp);
3445 }
3446 }
3447
3448 /*
3449 * Clear all of the watched pages in the range.
3450 */
3451 static void
3452 as_clearwatchprot(struct as *as, caddr_t addr, size_t size)
3453 {
3454 caddr_t eaddr = addr + size;
3461
3462 if (avl_numnodes(&as->a_wpage) == 0)
3463 return;
3464
3465 tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3466 if ((pwp = avl_find(&as->a_wpage, &tpw, &where)) == NULL)
3467 pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER);
3468
3469 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3470
3471 while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3472
3473 if ((prot = pwp->wp_oprot) != 0) {
3474 retrycnt = 0;
3475
3476 if (prot != pwp->wp_prot) {
3477 retry:
3478 seg = as_segat(as, pwp->wp_vaddr);
3479 if (seg == NULL)
3480 continue;
3481 err = segop_setprot(seg, pwp->wp_vaddr,
3482 PAGESIZE, prot);
3483 if (err == IE_RETRY) {
3484 ASSERT(retrycnt == 0);
3485 retrycnt++;
3486 goto retry;
3487
3488 }
3489 }
3490 pwp->wp_oprot = 0;
3491 pwp->wp_prot = 0;
3492 }
3493
3494 pwp = AVL_NEXT(&as->a_wpage, pwp);
3495 }
3496 }
3497
3498 void
3499 as_signal_proc(struct as *as, k_siginfo_t *siginfo)
3500 {
3501 struct proc *p;
3518 int
3519 as_getmemid(struct as *as, caddr_t addr, memid_t *memidp)
3520 {
3521 struct seg *seg;
3522 int sts;
3523
3524 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
3525 seg = as_segat(as, addr);
3526 if (seg == NULL) {
3527 AS_LOCK_EXIT(as, &as->a_lock);
3528 return (EFAULT);
3529 }
3530 /*
3531 * catch old drivers which may not support getmemid
3532 */
3533 if (seg->s_ops->getmemid == NULL) {
3534 AS_LOCK_EXIT(as, &as->a_lock);
3535 return (ENODEV);
3536 }
3537
3538 sts = segop_getmemid(seg, addr, memidp);
3539
3540 AS_LOCK_EXIT(as, &as->a_lock);
3541 return (sts);
3542 }
|