1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright 2015, Joyent, Inc. All rights reserved. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 /* 31 * University Copyright- Copyright (c) 1982, 1986, 1988 32 * The Regents of the University of California 33 * All Rights Reserved 34 * 35 * University Acknowledgment- Portions of this document are derived from 36 * software developed by the University of California, Berkeley, and its 37 * contributors. 38 */ 39 40 /* 41 * VM - address spaces. 42 */ 43 44 #include <sys/types.h> 45 #include <sys/t_lock.h> 46 #include <sys/param.h> 47 #include <sys/errno.h> 48 #include <sys/systm.h> 49 #include <sys/mman.h> 50 #include <sys/sysmacros.h> 51 #include <sys/cpuvar.h> 52 #include <sys/sysinfo.h> 53 #include <sys/kmem.h> 54 #include <sys/vnode.h> 55 #include <sys/vmsystm.h> 56 #include <sys/cmn_err.h> 57 #include <sys/debug.h> 58 #include <sys/tnf_probe.h> 59 #include <sys/vtrace.h> 60 61 #include <vm/hat.h> 62 #include <vm/as.h> 63 #include <vm/seg.h> 64 #include <vm/seg_vn.h> 65 #include <vm/seg_dev.h> 66 #include <vm/seg_kmem.h> 67 #include <vm/seg_map.h> 68 #include <vm/seg_spt.h> 69 #include <vm/page.h> 70 71 clock_t deadlk_wait = 1; /* number of ticks to wait before retrying */ 72 73 static struct kmem_cache *as_cache; 74 75 static void as_setwatchprot(struct as *, caddr_t, size_t, uint_t); 76 static void as_clearwatchprot(struct as *, caddr_t, size_t); 77 int as_map_locked(struct as *, caddr_t, size_t, int ((*)()), void *); 78 79 80 /* 81 * Verifying the segment lists is very time-consuming; it may not be 82 * desirable always to define VERIFY_SEGLIST when DEBUG is set. 83 */ 84 #ifdef DEBUG 85 #define VERIFY_SEGLIST 86 int do_as_verify = 0; 87 #endif 88 89 /* 90 * Allocate a new callback data structure entry and fill in the events of 91 * interest, the address range of interest, and the callback argument. 92 * Link the entry on the as->a_callbacks list. A callback entry for the 93 * entire address space may be specified with vaddr = 0 and size = -1. 94 * 95 * CALLERS RESPONSIBILITY: If not calling from within the process context for 96 * the specified as, the caller must guarantee persistence of the specified as 97 * for the duration of this function (eg. pages being locked within the as 98 * will guarantee persistence). 99 */ 100 int 101 as_add_callback(struct as *as, void (*cb_func)(), void *arg, uint_t events, 102 caddr_t vaddr, size_t size, int sleepflag) 103 { 104 struct as_callback *current_head, *cb; 105 caddr_t saddr; 106 size_t rsize; 107 108 /* callback function and an event are mandatory */ 109 if ((cb_func == NULL) || ((events & AS_ALL_EVENT) == 0)) 110 return (EINVAL); 111 112 /* Adding a callback after as_free has been called is not allowed */ 113 if (as == &kas) 114 return (ENOMEM); 115 116 /* 117 * vaddr = 0 and size = -1 is used to indicate that the callback range 118 * is the entire address space so no rounding is done in that case. 119 */ 120 if (size != -1) { 121 saddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 122 rsize = (((size_t)(vaddr + size) + PAGEOFFSET) & PAGEMASK) - 123 (size_t)saddr; 124 /* check for wraparound */ 125 if (saddr + rsize < saddr) 126 return (ENOMEM); 127 } else { 128 if (vaddr != 0) 129 return (EINVAL); 130 saddr = vaddr; 131 rsize = size; 132 } 133 134 /* Allocate and initialize a callback entry */ 135 cb = kmem_zalloc(sizeof (struct as_callback), sleepflag); 136 if (cb == NULL) 137 return (EAGAIN); 138 139 cb->ascb_func = cb_func; 140 cb->ascb_arg = arg; 141 cb->ascb_events = events; 142 cb->ascb_saddr = saddr; 143 cb->ascb_len = rsize; 144 145 /* Add the entry to the list */ 146 mutex_enter(&as->a_contents); 147 current_head = as->a_callbacks; 148 as->a_callbacks = cb; 149 cb->ascb_next = current_head; 150 151 /* 152 * The call to this function may lose in a race with 153 * a pertinent event - eg. a thread does long term memory locking 154 * but before the callback is added another thread executes as_unmap. 155 * A broadcast here resolves that. 156 */ 157 if ((cb->ascb_events & AS_UNMAPWAIT_EVENT) && AS_ISUNMAPWAIT(as)) { 158 AS_CLRUNMAPWAIT(as); 159 cv_broadcast(&as->a_cv); 160 } 161 162 mutex_exit(&as->a_contents); 163 return (0); 164 } 165 166 /* 167 * Search the callback list for an entry which pertains to arg. 168 * 169 * This is called from within the client upon completion of the callback. 170 * RETURN VALUES: 171 * AS_CALLBACK_DELETED (callback entry found and deleted) 172 * AS_CALLBACK_NOTFOUND (no callback entry found - this is ok) 173 * AS_CALLBACK_DELETE_DEFERRED (callback is in process, delete of this 174 * entry will be made in as_do_callbacks) 175 * 176 * If as_delete_callback encounters a matching entry with AS_CALLBACK_CALLED 177 * set, it indicates that as_do_callbacks is processing this entry. The 178 * AS_ALL_EVENT events are cleared in the entry, and a broadcast is made 179 * to unblock as_do_callbacks, in case it is blocked. 180 * 181 * CALLERS RESPONSIBILITY: If not calling from within the process context for 182 * the specified as, the caller must guarantee persistence of the specified as 183 * for the duration of this function (eg. pages being locked within the as 184 * will guarantee persistence). 185 */ 186 uint_t 187 as_delete_callback(struct as *as, void *arg) 188 { 189 struct as_callback **prevcb = &as->a_callbacks; 190 struct as_callback *cb; 191 uint_t rc = AS_CALLBACK_NOTFOUND; 192 193 mutex_enter(&as->a_contents); 194 for (cb = as->a_callbacks; cb; prevcb = &cb->ascb_next, cb = *prevcb) { 195 if (cb->ascb_arg != arg) 196 continue; 197 198 /* 199 * If the events indicate AS_CALLBACK_CALLED, just clear 200 * AS_ALL_EVENT in the events field and wakeup the thread 201 * that may be waiting in as_do_callbacks. as_do_callbacks 202 * will take care of removing this entry from the list. In 203 * that case, return AS_CALLBACK_DELETE_DEFERRED. Otherwise 204 * (AS_CALLBACK_CALLED not set), just remove it from the 205 * list, return the memory and return AS_CALLBACK_DELETED. 206 */ 207 if ((cb->ascb_events & AS_CALLBACK_CALLED) != 0) { 208 /* leave AS_CALLBACK_CALLED */ 209 cb->ascb_events &= ~AS_ALL_EVENT; 210 rc = AS_CALLBACK_DELETE_DEFERRED; 211 cv_broadcast(&as->a_cv); 212 } else { 213 *prevcb = cb->ascb_next; 214 kmem_free(cb, sizeof (struct as_callback)); 215 rc = AS_CALLBACK_DELETED; 216 } 217 break; 218 } 219 mutex_exit(&as->a_contents); 220 return (rc); 221 } 222 223 /* 224 * Searches the as callback list for a matching entry. 225 * Returns a pointer to the first matching callback, or NULL if 226 * nothing is found. 227 * This function never sleeps so it is ok to call it with more 228 * locks held but the (required) a_contents mutex. 229 * 230 * See also comment on as_do_callbacks below. 231 */ 232 static struct as_callback * 233 as_find_callback(struct as *as, uint_t events, caddr_t event_addr, 234 size_t event_len) 235 { 236 struct as_callback *cb; 237 238 ASSERT(MUTEX_HELD(&as->a_contents)); 239 for (cb = as->a_callbacks; cb != NULL; cb = cb->ascb_next) { 240 /* 241 * If the callback has not already been called, then 242 * check if events or address range pertains. An event_len 243 * of zero means do an unconditional callback. 244 */ 245 if (((cb->ascb_events & AS_CALLBACK_CALLED) != 0) || 246 ((event_len != 0) && (((cb->ascb_events & events) == 0) || 247 (event_addr + event_len < cb->ascb_saddr) || 248 (event_addr > (cb->ascb_saddr + cb->ascb_len))))) { 249 continue; 250 } 251 break; 252 } 253 return (cb); 254 } 255 256 /* 257 * Executes a given callback and removes it from the callback list for 258 * this address space. 259 * This function may sleep so the caller must drop all locks except 260 * a_contents before calling this func. 261 * 262 * See also comments on as_do_callbacks below. 263 */ 264 static void 265 as_execute_callback(struct as *as, struct as_callback *cb, 266 uint_t events) 267 { 268 struct as_callback **prevcb; 269 void *cb_arg; 270 271 ASSERT(MUTEX_HELD(&as->a_contents) && (cb->ascb_events & events)); 272 cb->ascb_events |= AS_CALLBACK_CALLED; 273 mutex_exit(&as->a_contents); 274 (*cb->ascb_func)(as, cb->ascb_arg, events); 275 mutex_enter(&as->a_contents); 276 /* 277 * the callback function is required to delete the callback 278 * when the callback function determines it is OK for 279 * this thread to continue. as_delete_callback will clear 280 * the AS_ALL_EVENT in the events field when it is deleted. 281 * If the callback function called as_delete_callback, 282 * events will already be cleared and there will be no blocking. 283 */ 284 while ((cb->ascb_events & events) != 0) { 285 cv_wait(&as->a_cv, &as->a_contents); 286 } 287 /* 288 * This entry needs to be taken off the list. Normally, the 289 * callback func itself does that, but unfortunately the list 290 * may have changed while the callback was running because the 291 * a_contents mutex was dropped and someone else other than the 292 * callback func itself could have called as_delete_callback, 293 * so we have to search to find this entry again. The entry 294 * must have AS_CALLBACK_CALLED, and have the same 'arg'. 295 */ 296 cb_arg = cb->ascb_arg; 297 prevcb = &as->a_callbacks; 298 for (cb = as->a_callbacks; cb != NULL; 299 prevcb = &cb->ascb_next, cb = *prevcb) { 300 if (((cb->ascb_events & AS_CALLBACK_CALLED) == 0) || 301 (cb_arg != cb->ascb_arg)) { 302 continue; 303 } 304 *prevcb = cb->ascb_next; 305 kmem_free(cb, sizeof (struct as_callback)); 306 break; 307 } 308 } 309 310 /* 311 * Check the callback list for a matching event and intersection of 312 * address range. If there is a match invoke the callback. Skip an entry if: 313 * - a callback is already in progress for this entry (AS_CALLBACK_CALLED) 314 * - not event of interest 315 * - not address range of interest 316 * 317 * An event_len of zero indicates a request for an unconditional callback 318 * (regardless of event), only the AS_CALLBACK_CALLED is checked. The 319 * a_contents lock must be dropped before a callback, so only one callback 320 * can be done before returning. Return -1 (true) if a callback was 321 * executed and removed from the list, else return 0 (false). 322 * 323 * The logically separate parts, i.e. finding a matching callback and 324 * executing a given callback have been separated into two functions 325 * so that they can be called with different sets of locks held beyond 326 * the always-required a_contents. as_find_callback does not sleep so 327 * it is ok to call it if more locks than a_contents (i.e. the a_lock 328 * rwlock) are held. as_execute_callback on the other hand may sleep 329 * so all locks beyond a_contents must be dropped by the caller if one 330 * does not want to end comatose. 331 */ 332 static int 333 as_do_callbacks(struct as *as, uint_t events, caddr_t event_addr, 334 size_t event_len) 335 { 336 struct as_callback *cb; 337 338 if ((cb = as_find_callback(as, events, event_addr, event_len))) { 339 as_execute_callback(as, cb, events); 340 return (-1); 341 } 342 return (0); 343 } 344 345 /* 346 * Search for the segment containing addr. If a segment containing addr 347 * exists, that segment is returned. If no such segment exists, and 348 * the list spans addresses greater than addr, then the first segment 349 * whose base is greater than addr is returned; otherwise, NULL is 350 * returned unless tail is true, in which case the last element of the 351 * list is returned. 352 * 353 * a_seglast is used to cache the last found segment for repeated 354 * searches to the same addr (which happens frequently). 355 */ 356 struct seg * 357 as_findseg(struct as *as, caddr_t addr, int tail) 358 { 359 struct seg *seg = as->a_seglast; 360 avl_index_t where; 361 362 ASSERT(AS_LOCK_HELD(as, &as->a_lock)); 363 364 if (seg != NULL && 365 seg->s_base <= addr && 366 addr < seg->s_base + seg->s_size) 367 return (seg); 368 369 seg = avl_find(&as->a_segtree, &addr, &where); 370 if (seg != NULL) 371 return (as->a_seglast = seg); 372 373 seg = avl_nearest(&as->a_segtree, where, AVL_AFTER); 374 if (seg == NULL && tail) 375 seg = avl_last(&as->a_segtree); 376 return (as->a_seglast = seg); 377 } 378 379 #ifdef VERIFY_SEGLIST 380 /* 381 * verify that the linked list is coherent 382 */ 383 static void 384 as_verify(struct as *as) 385 { 386 struct seg *seg, *seglast, *p, *n; 387 uint_t nsegs = 0; 388 389 if (do_as_verify == 0) 390 return; 391 392 seglast = as->a_seglast; 393 394 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { 395 ASSERT(seg->s_as == as); 396 p = AS_SEGPREV(as, seg); 397 n = AS_SEGNEXT(as, seg); 398 ASSERT(p == NULL || p->s_as == as); 399 ASSERT(p == NULL || p->s_base < seg->s_base); 400 ASSERT(n == NULL || n->s_base > seg->s_base); 401 ASSERT(n != NULL || seg == avl_last(&as->a_segtree)); 402 if (seg == seglast) 403 seglast = NULL; 404 nsegs++; 405 } 406 ASSERT(seglast == NULL); 407 ASSERT(avl_numnodes(&as->a_segtree) == nsegs); 408 } 409 #endif /* VERIFY_SEGLIST */ 410 411 /* 412 * Add a new segment to the address space. The avl_find() 413 * may be expensive so we attempt to use last segment accessed 414 * in as_gap() as an insertion point. 415 */ 416 int 417 as_addseg(struct as *as, struct seg *newseg) 418 { 419 struct seg *seg; 420 caddr_t addr; 421 caddr_t eaddr; 422 avl_index_t where; 423 424 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 425 426 as->a_updatedir = 1; /* inform /proc */ 427 gethrestime(&as->a_updatetime); 428 429 if (as->a_lastgaphl != NULL) { 430 struct seg *hseg = NULL; 431 struct seg *lseg = NULL; 432 433 if (as->a_lastgaphl->s_base > newseg->s_base) { 434 hseg = as->a_lastgaphl; 435 lseg = AVL_PREV(&as->a_segtree, hseg); 436 } else { 437 lseg = as->a_lastgaphl; 438 hseg = AVL_NEXT(&as->a_segtree, lseg); 439 } 440 441 if (hseg && lseg && lseg->s_base < newseg->s_base && 442 hseg->s_base > newseg->s_base) { 443 avl_insert_here(&as->a_segtree, newseg, lseg, 444 AVL_AFTER); 445 as->a_lastgaphl = NULL; 446 as->a_seglast = newseg; 447 return (0); 448 } 449 as->a_lastgaphl = NULL; 450 } 451 452 addr = newseg->s_base; 453 eaddr = addr + newseg->s_size; 454 again: 455 456 seg = avl_find(&as->a_segtree, &addr, &where); 457 458 if (seg == NULL) 459 seg = avl_nearest(&as->a_segtree, where, AVL_AFTER); 460 461 if (seg == NULL) 462 seg = avl_last(&as->a_segtree); 463 464 if (seg != NULL) { 465 caddr_t base = seg->s_base; 466 467 /* 468 * If top of seg is below the requested address, then 469 * the insertion point is at the end of the linked list, 470 * and seg points to the tail of the list. Otherwise, 471 * the insertion point is immediately before seg. 472 */ 473 if (base + seg->s_size > addr) { 474 if (addr >= base || eaddr > base) { 475 #ifdef __sparc 476 extern struct seg_ops segnf_ops; 477 478 /* 479 * no-fault segs must disappear if overlaid. 480 * XXX need new segment type so 481 * we don't have to check s_ops 482 */ 483 if (seg->s_ops == &segnf_ops) { 484 seg_unmap(seg); 485 goto again; 486 } 487 #endif 488 return (-1); /* overlapping segment */ 489 } 490 } 491 } 492 as->a_seglast = newseg; 493 avl_insert(&as->a_segtree, newseg, where); 494 495 #ifdef VERIFY_SEGLIST 496 as_verify(as); 497 #endif 498 return (0); 499 } 500 501 struct seg * 502 as_removeseg(struct as *as, struct seg *seg) 503 { 504 avl_tree_t *t; 505 506 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 507 508 as->a_updatedir = 1; /* inform /proc */ 509 gethrestime(&as->a_updatetime); 510 511 if (seg == NULL) 512 return (NULL); 513 514 t = &as->a_segtree; 515 if (as->a_seglast == seg) 516 as->a_seglast = NULL; 517 as->a_lastgaphl = NULL; 518 519 /* 520 * if this segment is at an address higher than 521 * a_lastgap, set a_lastgap to the next segment (NULL if last segment) 522 */ 523 if (as->a_lastgap && 524 (seg == as->a_lastgap || seg->s_base > as->a_lastgap->s_base)) 525 as->a_lastgap = AVL_NEXT(t, seg); 526 527 /* 528 * remove the segment from the seg tree 529 */ 530 avl_remove(t, seg); 531 532 #ifdef VERIFY_SEGLIST 533 as_verify(as); 534 #endif 535 return (seg); 536 } 537 538 /* 539 * Find a segment containing addr. 540 */ 541 struct seg * 542 as_segat(struct as *as, caddr_t addr) 543 { 544 struct seg *seg = as->a_seglast; 545 546 ASSERT(AS_LOCK_HELD(as, &as->a_lock)); 547 548 if (seg != NULL && seg->s_base <= addr && 549 addr < seg->s_base + seg->s_size) 550 return (seg); 551 552 seg = avl_find(&as->a_segtree, &addr, NULL); 553 return (seg); 554 } 555 556 /* 557 * Serialize all searches for holes in an address space to 558 * prevent two or more threads from allocating the same virtual 559 * address range. The address space must not be "read/write" 560 * locked by the caller since we may block. 561 */ 562 void 563 as_rangelock(struct as *as) 564 { 565 mutex_enter(&as->a_contents); 566 while (AS_ISCLAIMGAP(as)) 567 cv_wait(&as->a_cv, &as->a_contents); 568 AS_SETCLAIMGAP(as); 569 mutex_exit(&as->a_contents); 570 } 571 572 /* 573 * Release hold on a_state & AS_CLAIMGAP and signal any other blocked threads. 574 */ 575 void 576 as_rangeunlock(struct as *as) 577 { 578 mutex_enter(&as->a_contents); 579 AS_CLRCLAIMGAP(as); 580 cv_signal(&as->a_cv); 581 mutex_exit(&as->a_contents); 582 } 583 584 /* 585 * compar segments (or just an address) by segment address range 586 */ 587 static int 588 as_segcompar(const void *x, const void *y) 589 { 590 struct seg *a = (struct seg *)x; 591 struct seg *b = (struct seg *)y; 592 593 if (a->s_base < b->s_base) 594 return (-1); 595 if (a->s_base >= b->s_base + b->s_size) 596 return (1); 597 return (0); 598 } 599 600 601 void 602 as_avlinit(struct as *as) 603 { 604 avl_create(&as->a_segtree, as_segcompar, sizeof (struct seg), 605 offsetof(struct seg, s_tree)); 606 avl_create(&as->a_wpage, wp_compare, sizeof (struct watched_page), 607 offsetof(struct watched_page, wp_link)); 608 } 609 610 /*ARGSUSED*/ 611 static int 612 as_constructor(void *buf, void *cdrarg, int kmflags) 613 { 614 struct as *as = buf; 615 616 mutex_init(&as->a_contents, NULL, MUTEX_DEFAULT, NULL); 617 cv_init(&as->a_cv, NULL, CV_DEFAULT, NULL); 618 rw_init(&as->a_lock, NULL, RW_DEFAULT, NULL); 619 as_avlinit(as); 620 return (0); 621 } 622 623 /*ARGSUSED1*/ 624 static void 625 as_destructor(void *buf, void *cdrarg) 626 { 627 struct as *as = buf; 628 629 avl_destroy(&as->a_segtree); 630 mutex_destroy(&as->a_contents); 631 cv_destroy(&as->a_cv); 632 rw_destroy(&as->a_lock); 633 } 634 635 void 636 as_init(void) 637 { 638 as_cache = kmem_cache_create("as_cache", sizeof (struct as), 0, 639 as_constructor, as_destructor, NULL, NULL, NULL, 0); 640 } 641 642 /* 643 * Allocate and initialize an address space data structure. 644 * We call hat_alloc to allow any machine dependent 645 * information in the hat structure to be initialized. 646 */ 647 struct as * 648 as_alloc(void) 649 { 650 struct as *as; 651 652 as = kmem_cache_alloc(as_cache, KM_SLEEP); 653 654 as->a_flags = 0; 655 as->a_vbits = 0; 656 as->a_hrm = NULL; 657 as->a_seglast = NULL; 658 as->a_size = 0; 659 as->a_resvsize = 0; 660 as->a_updatedir = 0; 661 gethrestime(&as->a_updatetime); 662 as->a_objectdir = NULL; 663 as->a_sizedir = 0; 664 as->a_userlimit = (caddr_t)USERLIMIT; 665 as->a_lastgap = NULL; 666 as->a_lastgaphl = NULL; 667 as->a_callbacks = NULL; 668 669 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 670 as->a_hat = hat_alloc(as); /* create hat for default system mmu */ 671 AS_LOCK_EXIT(as, &as->a_lock); 672 673 return (as); 674 } 675 676 /* 677 * Free an address space data structure. 678 * Need to free the hat first and then 679 * all the segments on this as and finally 680 * the space for the as struct itself. 681 */ 682 void 683 as_free(struct as *as) 684 { 685 struct hat *hat = as->a_hat; 686 struct seg *seg, *next; 687 boolean_t free_started = B_FALSE; 688 689 top: 690 /* 691 * Invoke ALL callbacks. as_do_callbacks will do one callback 692 * per call, and not return (-1) until the callback has completed. 693 * When as_do_callbacks returns zero, all callbacks have completed. 694 */ 695 mutex_enter(&as->a_contents); 696 while (as->a_callbacks && as_do_callbacks(as, AS_ALL_EVENT, 0, 0)) 697 ; 698 699 mutex_exit(&as->a_contents); 700 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 701 702 if (!free_started) { 703 free_started = B_TRUE; 704 hat_free_start(hat); 705 } 706 for (seg = AS_SEGFIRST(as); seg != NULL; seg = next) { 707 int err; 708 709 next = AS_SEGNEXT(as, seg); 710 retry: 711 err = SEGOP_UNMAP(seg, seg->s_base, seg->s_size); 712 if (err == EAGAIN) { 713 mutex_enter(&as->a_contents); 714 if (as->a_callbacks) { 715 AS_LOCK_EXIT(as, &as->a_lock); 716 } else if (!AS_ISNOUNMAPWAIT(as)) { 717 /* 718 * Memory is currently locked. Wait for a 719 * cv_signal that it has been unlocked, then 720 * try the operation again. 721 */ 722 if (AS_ISUNMAPWAIT(as) == 0) 723 cv_broadcast(&as->a_cv); 724 AS_SETUNMAPWAIT(as); 725 AS_LOCK_EXIT(as, &as->a_lock); 726 while (AS_ISUNMAPWAIT(as)) 727 cv_wait(&as->a_cv, &as->a_contents); 728 } else { 729 /* 730 * We may have raced with 731 * segvn_reclaim()/segspt_reclaim(). In this 732 * case clean nounmapwait flag and retry since 733 * softlockcnt in this segment may be already 734 * 0. We don't drop as writer lock so our 735 * number of retries without sleeping should 736 * be very small. See segvn_reclaim() for 737 * more comments. 738 */ 739 AS_CLRNOUNMAPWAIT(as); 740 mutex_exit(&as->a_contents); 741 goto retry; 742 } 743 mutex_exit(&as->a_contents); 744 goto top; 745 } else { 746 /* 747 * We do not expect any other error return at this 748 * time. This is similar to an ASSERT in seg_unmap() 749 */ 750 ASSERT(err == 0); 751 } 752 } 753 hat_free_end(hat); 754 AS_LOCK_EXIT(as, &as->a_lock); 755 756 /* /proc stuff */ 757 ASSERT(avl_numnodes(&as->a_wpage) == 0); 758 if (as->a_objectdir) { 759 kmem_free(as->a_objectdir, as->a_sizedir * sizeof (vnode_t *)); 760 as->a_objectdir = NULL; 761 as->a_sizedir = 0; 762 } 763 764 /* 765 * Free the struct as back to kmem. Assert it has no segments. 766 */ 767 ASSERT(avl_numnodes(&as->a_segtree) == 0); 768 kmem_cache_free(as_cache, as); 769 } 770 771 int 772 as_dup(struct as *as, struct proc *forkedproc) 773 { 774 struct as *newas; 775 struct seg *seg, *newseg; 776 size_t purgesize = 0; 777 int error; 778 779 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 780 as_clearwatch(as); 781 newas = as_alloc(); 782 newas->a_userlimit = as->a_userlimit; 783 newas->a_proc = forkedproc; 784 785 AS_LOCK_ENTER(newas, &newas->a_lock, RW_WRITER); 786 787 (void) hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_SRD); 788 789 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { 790 791 if (seg->s_flags & S_PURGE) { 792 purgesize += seg->s_size; 793 continue; 794 } 795 796 newseg = seg_alloc(newas, seg->s_base, seg->s_size); 797 if (newseg == NULL) { 798 AS_LOCK_EXIT(newas, &newas->a_lock); 799 as_setwatch(as); 800 AS_LOCK_EXIT(as, &as->a_lock); 801 as_free(newas); 802 return (-1); 803 } 804 if ((error = SEGOP_DUP(seg, newseg)) != 0) { 805 /* 806 * We call seg_free() on the new seg 807 * because the segment is not set up 808 * completely; i.e. it has no ops. 809 */ 810 as_setwatch(as); 811 AS_LOCK_EXIT(as, &as->a_lock); 812 seg_free(newseg); 813 AS_LOCK_EXIT(newas, &newas->a_lock); 814 as_free(newas); 815 return (error); 816 } 817 newas->a_size += seg->s_size; 818 } 819 newas->a_resvsize = as->a_resvsize - purgesize; 820 821 error = hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_ALL); 822 823 AS_LOCK_EXIT(newas, &newas->a_lock); 824 825 as_setwatch(as); 826 AS_LOCK_EXIT(as, &as->a_lock); 827 if (error != 0) { 828 as_free(newas); 829 return (error); 830 } 831 forkedproc->p_as = newas; 832 return (0); 833 } 834 835 /* 836 * Handle a ``fault'' at addr for size bytes. 837 */ 838 faultcode_t 839 as_fault(struct hat *hat, struct as *as, caddr_t addr, size_t size, 840 enum fault_type type, enum seg_rw rw) 841 { 842 struct seg *seg; 843 caddr_t raddr; /* rounded down addr */ 844 size_t rsize; /* rounded up size */ 845 size_t ssize; 846 faultcode_t res = 0; 847 caddr_t addrsav; 848 struct seg *segsav; 849 int as_lock_held; 850 klwp_t *lwp = ttolwp(curthread); 851 int holding_wpage = 0; 852 853 854 855 retry: 856 /* 857 * Indicate that the lwp is not to be stopped while waiting for a 858 * pagefault. This is to avoid deadlock while debugging a process 859 * via /proc over NFS (in particular). 860 */ 861 if (lwp != NULL) 862 lwp->lwp_nostop++; 863 864 /* 865 * same length must be used when we softlock and softunlock. We 866 * don't support softunlocking lengths less than the original length 867 * when there is largepage support. See seg_dev.c for more 868 * comments. 869 */ 870 switch (type) { 871 872 case F_SOFTLOCK: 873 CPU_STATS_ADD_K(vm, softlock, 1); 874 break; 875 876 case F_SOFTUNLOCK: 877 break; 878 879 case F_PROT: 880 CPU_STATS_ADD_K(vm, prot_fault, 1); 881 break; 882 883 case F_INVAL: 884 CPU_STATS_ENTER_K(); 885 CPU_STATS_ADDQ(CPU, vm, as_fault, 1); 886 if (as == &kas) 887 CPU_STATS_ADDQ(CPU, vm, kernel_asflt, 1); 888 CPU_STATS_EXIT_K(); 889 break; 890 } 891 892 /* Kernel probe */ 893 TNF_PROBE_3(address_fault, "vm pagefault", /* CSTYLED */, 894 tnf_opaque, address, addr, 895 tnf_fault_type, fault_type, type, 896 tnf_seg_access, access, rw); 897 898 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 899 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 900 (size_t)raddr; 901 902 /* 903 * XXX -- Don't grab the as lock for segkmap. We should grab it for 904 * correctness, but then we could be stuck holding this lock for 905 * a LONG time if the fault needs to be resolved on a slow 906 * filesystem, and then no-one will be able to exec new commands, 907 * as exec'ing requires the write lock on the as. 908 */ 909 if (as == &kas && segkmap && segkmap->s_base <= raddr && 910 raddr + size < segkmap->s_base + segkmap->s_size) { 911 seg = segkmap; 912 as_lock_held = 0; 913 } else { 914 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 915 916 seg = as_segat(as, raddr); 917 if (seg == NULL) { 918 AS_LOCK_EXIT(as, &as->a_lock); 919 if (lwp != NULL) 920 lwp->lwp_nostop--; 921 return (FC_NOMAP); 922 } 923 924 as_lock_held = 1; 925 } 926 927 addrsav = raddr; 928 segsav = seg; 929 930 for (; rsize != 0; rsize -= ssize, raddr += ssize) { 931 if (raddr >= seg->s_base + seg->s_size) { 932 seg = AS_SEGNEXT(as, seg); 933 if (seg == NULL || raddr != seg->s_base) { 934 res = FC_NOMAP; 935 break; 936 } 937 } 938 if (raddr + rsize > seg->s_base + seg->s_size) 939 ssize = seg->s_base + seg->s_size - raddr; 940 else 941 ssize = rsize; 942 943 res = SEGOP_FAULT(hat, seg, raddr, ssize, type, rw); 944 945 /* Restore watchpoints */ 946 if (holding_wpage) { 947 as_setwatch(as); 948 holding_wpage = 0; 949 } 950 951 if (res != 0) 952 break; 953 } 954 955 /* 956 * If we were SOFTLOCKing and encountered a failure, 957 * we must SOFTUNLOCK the range we already did. (Maybe we 958 * should just panic if we are SOFTLOCKing or even SOFTUNLOCKing 959 * right here...) 960 */ 961 if (res != 0 && type == F_SOFTLOCK) { 962 for (seg = segsav; addrsav < raddr; addrsav += ssize) { 963 if (addrsav >= seg->s_base + seg->s_size) 964 seg = AS_SEGNEXT(as, seg); 965 ASSERT(seg != NULL); 966 /* 967 * Now call the fault routine again to perform the 968 * unlock using S_OTHER instead of the rw variable 969 * since we never got a chance to touch the pages. 970 */ 971 if (raddr > seg->s_base + seg->s_size) 972 ssize = seg->s_base + seg->s_size - addrsav; 973 else 974 ssize = raddr - addrsav; 975 (void) SEGOP_FAULT(hat, seg, addrsav, ssize, 976 F_SOFTUNLOCK, S_OTHER); 977 } 978 } 979 if (as_lock_held) 980 AS_LOCK_EXIT(as, &as->a_lock); 981 if (lwp != NULL) 982 lwp->lwp_nostop--; 983 984 /* 985 * If the lower levels returned EDEADLK for a fault, 986 * It means that we should retry the fault. Let's wait 987 * a bit also to let the deadlock causing condition clear. 988 * This is part of a gross hack to work around a design flaw 989 * in the ufs/sds logging code and should go away when the 990 * logging code is re-designed to fix the problem. See bug 991 * 4125102 for details of the problem. 992 */ 993 if (FC_ERRNO(res) == EDEADLK) { 994 delay(deadlk_wait); 995 res = 0; 996 goto retry; 997 } 998 return (res); 999 } 1000 1001 1002 1003 /* 1004 * Asynchronous ``fault'' at addr for size bytes. 1005 */ 1006 faultcode_t 1007 as_faulta(struct as *as, caddr_t addr, size_t size) 1008 { 1009 struct seg *seg; 1010 caddr_t raddr; /* rounded down addr */ 1011 size_t rsize; /* rounded up size */ 1012 faultcode_t res = 0; 1013 klwp_t *lwp = ttolwp(curthread); 1014 1015 retry: 1016 /* 1017 * Indicate that the lwp is not to be stopped while waiting 1018 * for a pagefault. This is to avoid deadlock while debugging 1019 * a process via /proc over NFS (in particular). 1020 */ 1021 if (lwp != NULL) 1022 lwp->lwp_nostop++; 1023 1024 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1025 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 1026 (size_t)raddr; 1027 1028 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 1029 seg = as_segat(as, raddr); 1030 if (seg == NULL) { 1031 AS_LOCK_EXIT(as, &as->a_lock); 1032 if (lwp != NULL) 1033 lwp->lwp_nostop--; 1034 return (FC_NOMAP); 1035 } 1036 1037 for (; rsize != 0; rsize -= PAGESIZE, raddr += PAGESIZE) { 1038 if (raddr >= seg->s_base + seg->s_size) { 1039 seg = AS_SEGNEXT(as, seg); 1040 if (seg == NULL || raddr != seg->s_base) { 1041 res = FC_NOMAP; 1042 break; 1043 } 1044 } 1045 res = SEGOP_FAULTA(seg, raddr); 1046 if (res != 0) 1047 break; 1048 } 1049 AS_LOCK_EXIT(as, &as->a_lock); 1050 if (lwp != NULL) 1051 lwp->lwp_nostop--; 1052 /* 1053 * If the lower levels returned EDEADLK for a fault, 1054 * It means that we should retry the fault. Let's wait 1055 * a bit also to let the deadlock causing condition clear. 1056 * This is part of a gross hack to work around a design flaw 1057 * in the ufs/sds logging code and should go away when the 1058 * logging code is re-designed to fix the problem. See bug 1059 * 4125102 for details of the problem. 1060 */ 1061 if (FC_ERRNO(res) == EDEADLK) { 1062 delay(deadlk_wait); 1063 res = 0; 1064 goto retry; 1065 } 1066 return (res); 1067 } 1068 1069 /* 1070 * Set the virtual mapping for the interval from [addr : addr + size) 1071 * in address space `as' to have the specified protection. 1072 * It is ok for the range to cross over several segments, 1073 * as long as they are contiguous. 1074 */ 1075 int 1076 as_setprot(struct as *as, caddr_t addr, size_t size, uint_t prot) 1077 { 1078 struct seg *seg; 1079 struct as_callback *cb; 1080 size_t ssize; 1081 caddr_t raddr; /* rounded down addr */ 1082 size_t rsize; /* rounded up size */ 1083 int error = 0, writer = 0; 1084 caddr_t saveraddr; 1085 size_t saversize; 1086 1087 setprot_top: 1088 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1089 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 1090 (size_t)raddr; 1091 1092 if (raddr + rsize < raddr) /* check for wraparound */ 1093 return (ENOMEM); 1094 1095 saveraddr = raddr; 1096 saversize = rsize; 1097 1098 /* 1099 * Normally we only lock the as as a reader. But 1100 * if due to setprot the segment driver needs to split 1101 * a segment it will return IE_RETRY. Therefore we re-acquire 1102 * the as lock as a writer so the segment driver can change 1103 * the seg list. Also the segment driver will return IE_RETRY 1104 * after it has changed the segment list so we therefore keep 1105 * locking as a writer. Since these opeartions should be rare 1106 * want to only lock as a writer when necessary. 1107 */ 1108 if (writer || avl_numnodes(&as->a_wpage) != 0) { 1109 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 1110 } else { 1111 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 1112 } 1113 1114 as_clearwatchprot(as, raddr, rsize); 1115 seg = as_segat(as, raddr); 1116 if (seg == NULL) { 1117 as_setwatch(as); 1118 AS_LOCK_EXIT(as, &as->a_lock); 1119 return (ENOMEM); 1120 } 1121 1122 for (; rsize != 0; rsize -= ssize, raddr += ssize) { 1123 if (raddr >= seg->s_base + seg->s_size) { 1124 seg = AS_SEGNEXT(as, seg); 1125 if (seg == NULL || raddr != seg->s_base) { 1126 error = ENOMEM; 1127 break; 1128 } 1129 } 1130 if ((raddr + rsize) > (seg->s_base + seg->s_size)) 1131 ssize = seg->s_base + seg->s_size - raddr; 1132 else 1133 ssize = rsize; 1134 retry: 1135 error = SEGOP_SETPROT(seg, raddr, ssize, prot); 1136 1137 if (error == IE_NOMEM) { 1138 error = EAGAIN; 1139 break; 1140 } 1141 1142 if (error == IE_RETRY) { 1143 AS_LOCK_EXIT(as, &as->a_lock); 1144 writer = 1; 1145 goto setprot_top; 1146 } 1147 1148 if (error == EAGAIN) { 1149 /* 1150 * Make sure we have a_lock as writer. 1151 */ 1152 if (writer == 0) { 1153 AS_LOCK_EXIT(as, &as->a_lock); 1154 writer = 1; 1155 goto setprot_top; 1156 } 1157 1158 /* 1159 * Memory is currently locked. It must be unlocked 1160 * before this operation can succeed through a retry. 1161 * The possible reasons for locked memory and 1162 * corresponding strategies for unlocking are: 1163 * (1) Normal I/O 1164 * wait for a signal that the I/O operation 1165 * has completed and the memory is unlocked. 1166 * (2) Asynchronous I/O 1167 * The aio subsystem does not unlock pages when 1168 * the I/O is completed. Those pages are unlocked 1169 * when the application calls aiowait/aioerror. 1170 * So, to prevent blocking forever, cv_broadcast() 1171 * is done to wake up aio_cleanup_thread. 1172 * Subsequently, segvn_reclaim will be called, and 1173 * that will do AS_CLRUNMAPWAIT() and wake us up. 1174 * (3) Long term page locking: 1175 * Drivers intending to have pages locked for a 1176 * period considerably longer than for normal I/O 1177 * (essentially forever) may have registered for a 1178 * callback so they may unlock these pages on 1179 * request. This is needed to allow this operation 1180 * to succeed. Each entry on the callback list is 1181 * examined. If the event or address range pertains 1182 * the callback is invoked (unless it already is in 1183 * progress). The a_contents lock must be dropped 1184 * before the callback, so only one callback can 1185 * be done at a time. Go to the top and do more 1186 * until zero is returned. If zero is returned, 1187 * either there were no callbacks for this event 1188 * or they were already in progress. 1189 */ 1190 mutex_enter(&as->a_contents); 1191 if (as->a_callbacks && 1192 (cb = as_find_callback(as, AS_SETPROT_EVENT, 1193 seg->s_base, seg->s_size))) { 1194 AS_LOCK_EXIT(as, &as->a_lock); 1195 as_execute_callback(as, cb, AS_SETPROT_EVENT); 1196 } else if (!AS_ISNOUNMAPWAIT(as)) { 1197 if (AS_ISUNMAPWAIT(as) == 0) 1198 cv_broadcast(&as->a_cv); 1199 AS_SETUNMAPWAIT(as); 1200 AS_LOCK_EXIT(as, &as->a_lock); 1201 while (AS_ISUNMAPWAIT(as)) 1202 cv_wait(&as->a_cv, &as->a_contents); 1203 } else { 1204 /* 1205 * We may have raced with 1206 * segvn_reclaim()/segspt_reclaim(). In this 1207 * case clean nounmapwait flag and retry since 1208 * softlockcnt in this segment may be already 1209 * 0. We don't drop as writer lock so our 1210 * number of retries without sleeping should 1211 * be very small. See segvn_reclaim() for 1212 * more comments. 1213 */ 1214 AS_CLRNOUNMAPWAIT(as); 1215 mutex_exit(&as->a_contents); 1216 goto retry; 1217 } 1218 mutex_exit(&as->a_contents); 1219 goto setprot_top; 1220 } else if (error != 0) 1221 break; 1222 } 1223 if (error != 0) { 1224 as_setwatch(as); 1225 } else { 1226 as_setwatchprot(as, saveraddr, saversize, prot); 1227 } 1228 AS_LOCK_EXIT(as, &as->a_lock); 1229 return (error); 1230 } 1231 1232 /* 1233 * Check to make sure that the interval [addr, addr + size) 1234 * in address space `as' has at least the specified protection. 1235 * It is ok for the range to cross over several segments, as long 1236 * as they are contiguous. 1237 */ 1238 int 1239 as_checkprot(struct as *as, caddr_t addr, size_t size, uint_t prot) 1240 { 1241 struct seg *seg; 1242 size_t ssize; 1243 caddr_t raddr; /* rounded down addr */ 1244 size_t rsize; /* rounded up size */ 1245 int error = 0; 1246 1247 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1248 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 1249 (size_t)raddr; 1250 1251 if (raddr + rsize < raddr) /* check for wraparound */ 1252 return (ENOMEM); 1253 1254 /* 1255 * This is ugly as sin... 1256 * Normally, we only acquire the address space readers lock. 1257 * However, if the address space has watchpoints present, 1258 * we must acquire the writer lock on the address space for 1259 * the benefit of as_clearwatchprot() and as_setwatchprot(). 1260 */ 1261 if (avl_numnodes(&as->a_wpage) != 0) 1262 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 1263 else 1264 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 1265 as_clearwatchprot(as, raddr, rsize); 1266 seg = as_segat(as, raddr); 1267 if (seg == NULL) { 1268 as_setwatch(as); 1269 AS_LOCK_EXIT(as, &as->a_lock); 1270 return (ENOMEM); 1271 } 1272 1273 for (; rsize != 0; rsize -= ssize, raddr += ssize) { 1274 if (raddr >= seg->s_base + seg->s_size) { 1275 seg = AS_SEGNEXT(as, seg); 1276 if (seg == NULL || raddr != seg->s_base) { 1277 error = ENOMEM; 1278 break; 1279 } 1280 } 1281 if ((raddr + rsize) > (seg->s_base + seg->s_size)) 1282 ssize = seg->s_base + seg->s_size - raddr; 1283 else 1284 ssize = rsize; 1285 1286 error = SEGOP_CHECKPROT(seg, raddr, ssize, prot); 1287 if (error != 0) 1288 break; 1289 } 1290 as_setwatch(as); 1291 AS_LOCK_EXIT(as, &as->a_lock); 1292 return (error); 1293 } 1294 1295 int 1296 as_unmap(struct as *as, caddr_t addr, size_t size) 1297 { 1298 struct seg *seg, *seg_next; 1299 struct as_callback *cb; 1300 caddr_t raddr, eaddr; 1301 size_t ssize, rsize = 0; 1302 int err; 1303 1304 top: 1305 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1306 eaddr = (caddr_t)(((uintptr_t)(addr + size) + PAGEOFFSET) & 1307 (uintptr_t)PAGEMASK); 1308 1309 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 1310 1311 as->a_updatedir = 1; /* inform /proc */ 1312 gethrestime(&as->a_updatetime); 1313 1314 /* 1315 * Use as_findseg to find the first segment in the range, then 1316 * step through the segments in order, following s_next. 1317 */ 1318 as_clearwatchprot(as, raddr, eaddr - raddr); 1319 1320 for (seg = as_findseg(as, raddr, 0); seg != NULL; seg = seg_next) { 1321 if (eaddr <= seg->s_base) 1322 break; /* eaddr was in a gap; all done */ 1323 1324 /* this is implied by the test above */ 1325 ASSERT(raddr < eaddr); 1326 1327 if (raddr < seg->s_base) 1328 raddr = seg->s_base; /* raddr was in a gap */ 1329 1330 if (eaddr > (seg->s_base + seg->s_size)) 1331 ssize = seg->s_base + seg->s_size - raddr; 1332 else 1333 ssize = eaddr - raddr; 1334 1335 /* 1336 * Save next segment pointer since seg can be 1337 * destroyed during the segment unmap operation. 1338 */ 1339 seg_next = AS_SEGNEXT(as, seg); 1340 1341 /* 1342 * We didn't count /dev/null mappings, so ignore them here. 1343 * We'll handle MAP_NORESERVE cases in segvn_unmap(). (Again, 1344 * we have to do this check here while we have seg.) 1345 */ 1346 rsize = 0; 1347 if (!SEG_IS_DEVNULL_MAPPING(seg) && 1348 !SEG_IS_PARTIAL_RESV(seg)) 1349 rsize = ssize; 1350 1351 retry: 1352 err = SEGOP_UNMAP(seg, raddr, ssize); 1353 if (err == EAGAIN) { 1354 /* 1355 * Memory is currently locked. It must be unlocked 1356 * before this operation can succeed through a retry. 1357 * The possible reasons for locked memory and 1358 * corresponding strategies for unlocking are: 1359 * (1) Normal I/O 1360 * wait for a signal that the I/O operation 1361 * has completed and the memory is unlocked. 1362 * (2) Asynchronous I/O 1363 * The aio subsystem does not unlock pages when 1364 * the I/O is completed. Those pages are unlocked 1365 * when the application calls aiowait/aioerror. 1366 * So, to prevent blocking forever, cv_broadcast() 1367 * is done to wake up aio_cleanup_thread. 1368 * Subsequently, segvn_reclaim will be called, and 1369 * that will do AS_CLRUNMAPWAIT() and wake us up. 1370 * (3) Long term page locking: 1371 * Drivers intending to have pages locked for a 1372 * period considerably longer than for normal I/O 1373 * (essentially forever) may have registered for a 1374 * callback so they may unlock these pages on 1375 * request. This is needed to allow this operation 1376 * to succeed. Each entry on the callback list is 1377 * examined. If the event or address range pertains 1378 * the callback is invoked (unless it already is in 1379 * progress). The a_contents lock must be dropped 1380 * before the callback, so only one callback can 1381 * be done at a time. Go to the top and do more 1382 * until zero is returned. If zero is returned, 1383 * either there were no callbacks for this event 1384 * or they were already in progress. 1385 */ 1386 mutex_enter(&as->a_contents); 1387 if (as->a_callbacks && 1388 (cb = as_find_callback(as, AS_UNMAP_EVENT, 1389 seg->s_base, seg->s_size))) { 1390 AS_LOCK_EXIT(as, &as->a_lock); 1391 as_execute_callback(as, cb, AS_UNMAP_EVENT); 1392 } else if (!AS_ISNOUNMAPWAIT(as)) { 1393 if (AS_ISUNMAPWAIT(as) == 0) 1394 cv_broadcast(&as->a_cv); 1395 AS_SETUNMAPWAIT(as); 1396 AS_LOCK_EXIT(as, &as->a_lock); 1397 while (AS_ISUNMAPWAIT(as)) 1398 cv_wait(&as->a_cv, &as->a_contents); 1399 } else { 1400 /* 1401 * We may have raced with 1402 * segvn_reclaim()/segspt_reclaim(). In this 1403 * case clean nounmapwait flag and retry since 1404 * softlockcnt in this segment may be already 1405 * 0. We don't drop as writer lock so our 1406 * number of retries without sleeping should 1407 * be very small. See segvn_reclaim() for 1408 * more comments. 1409 */ 1410 AS_CLRNOUNMAPWAIT(as); 1411 mutex_exit(&as->a_contents); 1412 goto retry; 1413 } 1414 mutex_exit(&as->a_contents); 1415 goto top; 1416 } else if (err == IE_RETRY) { 1417 AS_LOCK_EXIT(as, &as->a_lock); 1418 goto top; 1419 } else if (err) { 1420 as_setwatch(as); 1421 AS_LOCK_EXIT(as, &as->a_lock); 1422 return (-1); 1423 } 1424 1425 as->a_size -= ssize; 1426 if (rsize) 1427 as->a_resvsize -= rsize; 1428 raddr += ssize; 1429 } 1430 AS_LOCK_EXIT(as, &as->a_lock); 1431 return (0); 1432 } 1433 1434 static int 1435 as_map_segvn_segs(struct as *as, caddr_t addr, size_t size, uint_t szcvec, 1436 int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated) 1437 { 1438 uint_t szc; 1439 uint_t nszc; 1440 int error; 1441 caddr_t a; 1442 caddr_t eaddr; 1443 size_t segsize; 1444 struct seg *seg; 1445 size_t pgsz; 1446 int do_off = (vn_a->vp != NULL || vn_a->amp != NULL); 1447 uint_t save_szcvec; 1448 1449 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 1450 ASSERT(IS_P2ALIGNED(addr, PAGESIZE)); 1451 ASSERT(IS_P2ALIGNED(size, PAGESIZE)); 1452 ASSERT(vn_a->vp == NULL || vn_a->amp == NULL); 1453 if (!do_off) { 1454 vn_a->offset = 0; 1455 } 1456 1457 if (szcvec <= 1) { 1458 seg = seg_alloc(as, addr, size); 1459 if (seg == NULL) { 1460 return (ENOMEM); 1461 } 1462 vn_a->szc = 0; 1463 error = (*crfp)(seg, vn_a); 1464 if (error != 0) { 1465 seg_free(seg); 1466 } else { 1467 as->a_size += size; 1468 as->a_resvsize += size; 1469 } 1470 return (error); 1471 } 1472 1473 eaddr = addr + size; 1474 save_szcvec = szcvec; 1475 szcvec >>= 1; 1476 szc = 0; 1477 nszc = 0; 1478 while (szcvec) { 1479 if ((szcvec & 0x1) == 0) { 1480 nszc++; 1481 szcvec >>= 1; 1482 continue; 1483 } 1484 nszc++; 1485 pgsz = page_get_pagesize(nszc); 1486 a = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); 1487 if (a != addr) { 1488 ASSERT(a < eaddr); 1489 segsize = a - addr; 1490 seg = seg_alloc(as, addr, segsize); 1491 if (seg == NULL) { 1492 return (ENOMEM); 1493 } 1494 vn_a->szc = szc; 1495 error = (*crfp)(seg, vn_a); 1496 if (error != 0) { 1497 seg_free(seg); 1498 return (error); 1499 } 1500 as->a_size += segsize; 1501 as->a_resvsize += segsize; 1502 *segcreated = 1; 1503 if (do_off) { 1504 vn_a->offset += segsize; 1505 } 1506 addr = a; 1507 } 1508 szc = nszc; 1509 szcvec >>= 1; 1510 } 1511 1512 ASSERT(addr < eaddr); 1513 szcvec = save_szcvec | 1; /* add 8K pages */ 1514 while (szcvec) { 1515 a = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz); 1516 ASSERT(a >= addr); 1517 if (a != addr) { 1518 segsize = a - addr; 1519 seg = seg_alloc(as, addr, segsize); 1520 if (seg == NULL) { 1521 return (ENOMEM); 1522 } 1523 vn_a->szc = szc; 1524 error = (*crfp)(seg, vn_a); 1525 if (error != 0) { 1526 seg_free(seg); 1527 return (error); 1528 } 1529 as->a_size += segsize; 1530 as->a_resvsize += segsize; 1531 *segcreated = 1; 1532 if (do_off) { 1533 vn_a->offset += segsize; 1534 } 1535 addr = a; 1536 } 1537 szcvec &= ~(1 << szc); 1538 if (szcvec) { 1539 szc = highbit(szcvec) - 1; 1540 pgsz = page_get_pagesize(szc); 1541 } 1542 } 1543 ASSERT(addr == eaddr); 1544 1545 return (0); 1546 } 1547 1548 static int 1549 as_map_vnsegs(struct as *as, caddr_t addr, size_t size, 1550 int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated) 1551 { 1552 uint_t mapflags = vn_a->flags & (MAP_TEXT | MAP_INITDATA); 1553 int type = (vn_a->type == MAP_SHARED) ? MAPPGSZC_SHM : MAPPGSZC_PRIVM; 1554 uint_t szcvec = map_pgszcvec(addr, size, (uintptr_t)addr, mapflags, 1555 type, 0); 1556 int error; 1557 struct seg *seg; 1558 struct vattr va; 1559 u_offset_t eoff; 1560 size_t save_size = 0; 1561 extern size_t textrepl_size_thresh; 1562 1563 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 1564 ASSERT(IS_P2ALIGNED(addr, PAGESIZE)); 1565 ASSERT(IS_P2ALIGNED(size, PAGESIZE)); 1566 ASSERT(vn_a->vp != NULL); 1567 ASSERT(vn_a->amp == NULL); 1568 1569 again: 1570 if (szcvec <= 1) { 1571 seg = seg_alloc(as, addr, size); 1572 if (seg == NULL) { 1573 return (ENOMEM); 1574 } 1575 vn_a->szc = 0; 1576 error = (*crfp)(seg, vn_a); 1577 if (error != 0) { 1578 seg_free(seg); 1579 } else { 1580 as->a_size += size; 1581 as->a_resvsize += size; 1582 } 1583 return (error); 1584 } 1585 1586 va.va_mask = AT_SIZE; 1587 if (VOP_GETATTR(vn_a->vp, &va, ATTR_HINT, vn_a->cred, NULL) != 0) { 1588 szcvec = 0; 1589 goto again; 1590 } 1591 eoff = vn_a->offset & PAGEMASK; 1592 if (eoff >= va.va_size) { 1593 szcvec = 0; 1594 goto again; 1595 } 1596 eoff += size; 1597 if (btopr(va.va_size) < btopr(eoff)) { 1598 save_size = size; 1599 size = va.va_size - (vn_a->offset & PAGEMASK); 1600 size = P2ROUNDUP_TYPED(size, PAGESIZE, size_t); 1601 szcvec = map_pgszcvec(addr, size, (uintptr_t)addr, mapflags, 1602 type, 0); 1603 if (szcvec <= 1) { 1604 size = save_size; 1605 goto again; 1606 } 1607 } 1608 1609 if (size > textrepl_size_thresh) { 1610 vn_a->flags |= _MAP_TEXTREPL; 1611 } 1612 error = as_map_segvn_segs(as, addr, size, szcvec, crfp, vn_a, 1613 segcreated); 1614 if (error != 0) { 1615 return (error); 1616 } 1617 if (save_size) { 1618 addr += size; 1619 size = save_size - size; 1620 szcvec = 0; 1621 goto again; 1622 } 1623 return (0); 1624 } 1625 1626 /* 1627 * as_map_ansegs: shared or private anonymous memory. Note that the flags 1628 * passed to map_pgszvec cannot be MAP_INITDATA, for anon. 1629 */ 1630 static int 1631 as_map_ansegs(struct as *as, caddr_t addr, size_t size, 1632 int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated) 1633 { 1634 uint_t szcvec; 1635 uchar_t type; 1636 1637 ASSERT(vn_a->type == MAP_SHARED || vn_a->type == MAP_PRIVATE); 1638 if (vn_a->type == MAP_SHARED) { 1639 type = MAPPGSZC_SHM; 1640 } else if (vn_a->type == MAP_PRIVATE) { 1641 if (vn_a->szc == AS_MAP_HEAP) { 1642 type = MAPPGSZC_HEAP; 1643 } else if (vn_a->szc == AS_MAP_STACK) { 1644 type = MAPPGSZC_STACK; 1645 } else { 1646 type = MAPPGSZC_PRIVM; 1647 } 1648 } 1649 szcvec = map_pgszcvec(addr, size, vn_a->amp == NULL ? 1650 (uintptr_t)addr : (uintptr_t)P2ROUNDUP(vn_a->offset, PAGESIZE), 1651 (vn_a->flags & MAP_TEXT), type, 0); 1652 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 1653 ASSERT(IS_P2ALIGNED(addr, PAGESIZE)); 1654 ASSERT(IS_P2ALIGNED(size, PAGESIZE)); 1655 ASSERT(vn_a->vp == NULL); 1656 1657 return (as_map_segvn_segs(as, addr, size, szcvec, 1658 crfp, vn_a, segcreated)); 1659 } 1660 1661 int 1662 as_map(struct as *as, caddr_t addr, size_t size, int (*crfp)(), void *argsp) 1663 { 1664 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 1665 return (as_map_locked(as, addr, size, crfp, argsp)); 1666 } 1667 1668 int 1669 as_map_locked(struct as *as, caddr_t addr, size_t size, int (*crfp)(), 1670 void *argsp) 1671 { 1672 struct seg *seg = NULL; 1673 caddr_t raddr; /* rounded down addr */ 1674 size_t rsize; /* rounded up size */ 1675 int error; 1676 int unmap = 0; 1677 struct proc *p = curproc; 1678 struct segvn_crargs crargs; 1679 1680 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1681 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 1682 (size_t)raddr; 1683 1684 /* 1685 * check for wrap around 1686 */ 1687 if ((raddr + rsize < raddr) || (as->a_size > (ULONG_MAX - size))) { 1688 AS_LOCK_EXIT(as, &as->a_lock); 1689 return (ENOMEM); 1690 } 1691 1692 as->a_updatedir = 1; /* inform /proc */ 1693 gethrestime(&as->a_updatetime); 1694 1695 if (as != &kas && as->a_size + rsize > (size_t)p->p_vmem_ctl) { 1696 AS_LOCK_EXIT(as, &as->a_lock); 1697 1698 (void) rctl_action(rctlproc_legacy[RLIMIT_VMEM], p->p_rctls, p, 1699 RCA_UNSAFE_ALL); 1700 1701 return (ENOMEM); 1702 } 1703 1704 if (AS_MAP_CHECK_VNODE_LPOOB(crfp, argsp)) { 1705 crargs = *(struct segvn_crargs *)argsp; 1706 error = as_map_vnsegs(as, raddr, rsize, crfp, &crargs, &unmap); 1707 if (error != 0) { 1708 AS_LOCK_EXIT(as, &as->a_lock); 1709 if (unmap) { 1710 (void) as_unmap(as, addr, size); 1711 } 1712 return (error); 1713 } 1714 } else if (AS_MAP_CHECK_ANON_LPOOB(crfp, argsp)) { 1715 crargs = *(struct segvn_crargs *)argsp; 1716 error = as_map_ansegs(as, raddr, rsize, crfp, &crargs, &unmap); 1717 if (error != 0) { 1718 AS_LOCK_EXIT(as, &as->a_lock); 1719 if (unmap) { 1720 (void) as_unmap(as, addr, size); 1721 } 1722 return (error); 1723 } 1724 } else { 1725 seg = seg_alloc(as, addr, size); 1726 if (seg == NULL) { 1727 AS_LOCK_EXIT(as, &as->a_lock); 1728 return (ENOMEM); 1729 } 1730 1731 error = (*crfp)(seg, argsp); 1732 if (error != 0) { 1733 seg_free(seg); 1734 AS_LOCK_EXIT(as, &as->a_lock); 1735 return (error); 1736 } 1737 /* 1738 * Add size now so as_unmap will work if as_ctl fails. 1739 */ 1740 as->a_size += rsize; 1741 as->a_resvsize += rsize; 1742 } 1743 1744 as_setwatch(as); 1745 1746 /* 1747 * If the address space is locked, 1748 * establish memory locks for the new segment. 1749 */ 1750 mutex_enter(&as->a_contents); 1751 if (AS_ISPGLCK(as)) { 1752 mutex_exit(&as->a_contents); 1753 AS_LOCK_EXIT(as, &as->a_lock); 1754 error = as_ctl(as, addr, size, MC_LOCK, 0, 0, NULL, 0); 1755 if (error != 0) 1756 (void) as_unmap(as, addr, size); 1757 } else { 1758 mutex_exit(&as->a_contents); 1759 AS_LOCK_EXIT(as, &as->a_lock); 1760 } 1761 return (error); 1762 } 1763 1764 1765 /* 1766 * Delete all segments in the address space marked with S_PURGE. 1767 * This is currently used for Sparc V9 nofault ASI segments (seg_nf.c). 1768 * These segments are deleted as a first step before calls to as_gap(), so 1769 * that they don't affect mmap() or shmat(). 1770 */ 1771 void 1772 as_purge(struct as *as) 1773 { 1774 struct seg *seg; 1775 struct seg *next_seg; 1776 1777 /* 1778 * the setting of NEEDSPURGE is protect by as_rangelock(), so 1779 * no need to grab a_contents mutex for this check 1780 */ 1781 if ((as->a_flags & AS_NEEDSPURGE) == 0) 1782 return; 1783 1784 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 1785 next_seg = NULL; 1786 seg = AS_SEGFIRST(as); 1787 while (seg != NULL) { 1788 next_seg = AS_SEGNEXT(as, seg); 1789 if (seg->s_flags & S_PURGE) 1790 SEGOP_UNMAP(seg, seg->s_base, seg->s_size); 1791 seg = next_seg; 1792 } 1793 AS_LOCK_EXIT(as, &as->a_lock); 1794 1795 mutex_enter(&as->a_contents); 1796 as->a_flags &= ~AS_NEEDSPURGE; 1797 mutex_exit(&as->a_contents); 1798 } 1799 1800 /* 1801 * Find a hole within [*basep, *basep + *lenp), which contains a mappable 1802 * range of addresses at least "minlen" long, where the base of the range is 1803 * at "off" phase from an "align" boundary and there is space for a 1804 * "redzone"-sized redzone on eithe rside of the range. Thus, 1805 * if align was 4M and off was 16k, the user wants a hole which will start 1806 * 16k into a 4M page. 1807 * 1808 * If flags specifies AH_HI, the hole will have the highest possible address 1809 * in the range. We use the as->a_lastgap field to figure out where to 1810 * start looking for a gap. 1811 * 1812 * Otherwise, the gap will have the lowest possible address. 1813 * 1814 * If flags specifies AH_CONTAIN, the hole will contain the address addr. 1815 * 1816 * If an adequate hole is found, *basep and *lenp are set to reflect the part of 1817 * the hole that is within range, and 0 is returned. On failure, -1 is returned. 1818 * 1819 * NOTE: This routine is not correct when base+len overflows caddr_t. 1820 */ 1821 int 1822 as_gap_aligned(struct as *as, size_t minlen, caddr_t *basep, size_t *lenp, 1823 uint_t flags, caddr_t addr, size_t align, size_t redzone, size_t off) 1824 { 1825 caddr_t lobound = *basep; 1826 caddr_t hibound = lobound + *lenp; 1827 struct seg *lseg, *hseg; 1828 caddr_t lo, hi; 1829 int forward; 1830 caddr_t save_base; 1831 size_t save_len; 1832 size_t save_minlen; 1833 size_t save_redzone; 1834 int fast_path = 1; 1835 1836 save_base = *basep; 1837 save_len = *lenp; 1838 save_minlen = minlen; 1839 save_redzone = redzone; 1840 1841 /* 1842 * For the first pass/fast_path, just add align and redzone into 1843 * minlen since if we get an allocation, we can guarantee that it 1844 * will fit the alignment and redzone requested. 1845 * This increases the chance that hibound will be adjusted to 1846 * a_lastgap->s_base which will likely allow us to find an 1847 * acceptable hole in the address space quicker. 1848 * If we can't find a hole with this fast_path, then we look for 1849 * smaller holes in which the alignment and offset may allow 1850 * the allocation to fit. 1851 */ 1852 minlen += align; 1853 minlen += 2 * redzone; 1854 redzone = 0; 1855 1856 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 1857 if (AS_SEGFIRST(as) == NULL) { 1858 if (valid_va_range_aligned(basep, lenp, minlen, flags & AH_DIR, 1859 align, redzone, off)) { 1860 AS_LOCK_EXIT(as, &as->a_lock); 1861 return (0); 1862 } else { 1863 AS_LOCK_EXIT(as, &as->a_lock); 1864 *basep = save_base; 1865 *lenp = save_len; 1866 return (-1); 1867 } 1868 } 1869 1870 retry: 1871 /* 1872 * Set up to iterate over all the inter-segment holes in the given 1873 * direction. lseg is NULL for the lowest-addressed hole and hseg is 1874 * NULL for the highest-addressed hole. If moving backwards, we reset 1875 * sseg to denote the highest-addressed segment. 1876 */ 1877 forward = (flags & AH_DIR) == AH_LO; 1878 if (forward) { 1879 hseg = as_findseg(as, lobound, 1); 1880 lseg = AS_SEGPREV(as, hseg); 1881 } else { 1882 1883 /* 1884 * If allocating at least as much as the last allocation, 1885 * use a_lastgap's base as a better estimate of hibound. 1886 */ 1887 if (as->a_lastgap && 1888 minlen >= as->a_lastgap->s_size && 1889 hibound >= as->a_lastgap->s_base) 1890 hibound = as->a_lastgap->s_base; 1891 1892 hseg = as_findseg(as, hibound, 1); 1893 if (hseg->s_base + hseg->s_size < hibound) { 1894 lseg = hseg; 1895 hseg = NULL; 1896 } else { 1897 lseg = AS_SEGPREV(as, hseg); 1898 } 1899 } 1900 1901 for (;;) { 1902 /* 1903 * Set lo and hi to the hole's boundaries. (We should really 1904 * use MAXADDR in place of hibound in the expression below, 1905 * but can't express it easily; using hibound in its place is 1906 * harmless.) 1907 */ 1908 lo = (lseg == NULL) ? 0 : lseg->s_base + lseg->s_size; 1909 hi = (hseg == NULL) ? hibound : hseg->s_base; 1910 /* 1911 * If the iteration has moved past the interval from lobound 1912 * to hibound it's pointless to continue. 1913 */ 1914 if ((forward && lo > hibound) || (!forward && hi < lobound)) 1915 break; 1916 else if (lo > hibound || hi < lobound) 1917 goto cont; 1918 /* 1919 * Candidate hole lies at least partially within the allowable 1920 * range. Restrict it to fall completely within that range, 1921 * i.e., to [max(lo, lobound), min(hi, hibound)]. 1922 */ 1923 if (lo < lobound) 1924 lo = lobound; 1925 if (hi > hibound) 1926 hi = hibound; 1927 /* 1928 * Verify that the candidate hole is big enough and meets 1929 * hardware constraints. If the hole is too small, no need 1930 * to do the further checks since they will fail. 1931 */ 1932 *basep = lo; 1933 *lenp = hi - lo; 1934 if (*lenp >= minlen && valid_va_range_aligned(basep, lenp, 1935 minlen, forward ? AH_LO : AH_HI, align, redzone, off) && 1936 ((flags & AH_CONTAIN) == 0 || 1937 (*basep <= addr && *basep + *lenp > addr))) { 1938 if (!forward) 1939 as->a_lastgap = hseg; 1940 if (hseg != NULL) 1941 as->a_lastgaphl = hseg; 1942 else 1943 as->a_lastgaphl = lseg; 1944 AS_LOCK_EXIT(as, &as->a_lock); 1945 return (0); 1946 } 1947 cont: 1948 /* 1949 * Move to the next hole. 1950 */ 1951 if (forward) { 1952 lseg = hseg; 1953 if (lseg == NULL) 1954 break; 1955 hseg = AS_SEGNEXT(as, hseg); 1956 } else { 1957 hseg = lseg; 1958 if (hseg == NULL) 1959 break; 1960 lseg = AS_SEGPREV(as, lseg); 1961 } 1962 } 1963 if (fast_path && (align != 0 || save_redzone != 0)) { 1964 fast_path = 0; 1965 minlen = save_minlen; 1966 redzone = save_redzone; 1967 goto retry; 1968 } 1969 *basep = save_base; 1970 *lenp = save_len; 1971 AS_LOCK_EXIT(as, &as->a_lock); 1972 return (-1); 1973 } 1974 1975 /* 1976 * Find a hole of at least size minlen within [*basep, *basep + *lenp). 1977 * 1978 * If flags specifies AH_HI, the hole will have the highest possible address 1979 * in the range. We use the as->a_lastgap field to figure out where to 1980 * start looking for a gap. 1981 * 1982 * Otherwise, the gap will have the lowest possible address. 1983 * 1984 * If flags specifies AH_CONTAIN, the hole will contain the address addr. 1985 * 1986 * If an adequate hole is found, base and len are set to reflect the part of 1987 * the hole that is within range, and 0 is returned, otherwise, 1988 * -1 is returned. 1989 * 1990 * NOTE: This routine is not correct when base+len overflows caddr_t. 1991 */ 1992 int 1993 as_gap(struct as *as, size_t minlen, caddr_t *basep, size_t *lenp, uint_t flags, 1994 caddr_t addr) 1995 { 1996 1997 return (as_gap_aligned(as, minlen, basep, lenp, flags, addr, 0, 0, 0)); 1998 } 1999 2000 /* 2001 * Return the next range within [base, base + len) that is backed 2002 * with "real memory". Skip holes and non-seg_vn segments. 2003 * We're lazy and only return one segment at a time. 2004 */ 2005 int 2006 as_memory(struct as *as, caddr_t *basep, size_t *lenp) 2007 { 2008 extern struct seg_ops segspt_shmops; /* needs a header file */ 2009 struct seg *seg; 2010 caddr_t addr, eaddr; 2011 caddr_t segend; 2012 2013 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 2014 2015 addr = *basep; 2016 eaddr = addr + *lenp; 2017 2018 seg = as_findseg(as, addr, 0); 2019 if (seg != NULL) 2020 addr = MAX(seg->s_base, addr); 2021 2022 for (;;) { 2023 if (seg == NULL || addr >= eaddr || eaddr <= seg->s_base) { 2024 AS_LOCK_EXIT(as, &as->a_lock); 2025 return (EINVAL); 2026 } 2027 2028 if (seg->s_ops == &segvn_ops) { 2029 segend = seg->s_base + seg->s_size; 2030 break; 2031 } 2032 2033 /* 2034 * We do ISM by looking into the private data 2035 * to determine the real size of the segment. 2036 */ 2037 if (seg->s_ops == &segspt_shmops) { 2038 segend = seg->s_base + spt_realsize(seg); 2039 if (addr < segend) 2040 break; 2041 } 2042 2043 seg = AS_SEGNEXT(as, seg); 2044 2045 if (seg != NULL) 2046 addr = seg->s_base; 2047 } 2048 2049 *basep = addr; 2050 2051 if (segend > eaddr) 2052 *lenp = eaddr - addr; 2053 else 2054 *lenp = segend - addr; 2055 2056 AS_LOCK_EXIT(as, &as->a_lock); 2057 return (0); 2058 } 2059 2060 /* 2061 * Swap the pages associated with the address space as out to 2062 * secondary storage, returning the number of bytes actually 2063 * swapped. 2064 * 2065 * The value returned is intended to correlate well with the process's 2066 * memory requirements. Its usefulness for this purpose depends on 2067 * how well the segment-level routines do at returning accurate 2068 * information. 2069 */ 2070 size_t 2071 as_swapout(struct as *as) 2072 { 2073 struct seg *seg; 2074 size_t swpcnt = 0; 2075 2076 /* 2077 * Kernel-only processes have given up their address 2078 * spaces. Of course, we shouldn't be attempting to 2079 * swap out such processes in the first place... 2080 */ 2081 if (as == NULL) 2082 return (0); 2083 2084 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 2085 2086 /* 2087 * Free all mapping resources associated with the address 2088 * space. The segment-level swapout routines capitalize 2089 * on this unmapping by scavanging pages that have become 2090 * unmapped here. 2091 */ 2092 hat_swapout(as->a_hat); 2093 2094 /* 2095 * Call the swapout routines of all segments in the address 2096 * space to do the actual work, accumulating the amount of 2097 * space reclaimed. 2098 */ 2099 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { 2100 struct seg_ops *ov = seg->s_ops; 2101 2102 /* 2103 * We have to check to see if the seg has 2104 * an ops vector because the seg may have 2105 * been in the middle of being set up when 2106 * the process was picked for swapout. 2107 */ 2108 if ((ov != NULL) && (ov->swapout != NULL)) 2109 swpcnt += SEGOP_SWAPOUT(seg); 2110 } 2111 AS_LOCK_EXIT(as, &as->a_lock); 2112 return (swpcnt); 2113 } 2114 2115 /* 2116 * Determine whether data from the mappings in interval [addr, addr + size) 2117 * are in the primary memory (core) cache. 2118 */ 2119 int 2120 as_incore(struct as *as, caddr_t addr, 2121 size_t size, char *vec, size_t *sizep) 2122 { 2123 struct seg *seg; 2124 size_t ssize; 2125 caddr_t raddr; /* rounded down addr */ 2126 size_t rsize; /* rounded up size */ 2127 size_t isize; /* iteration size */ 2128 int error = 0; /* result, assume success */ 2129 2130 *sizep = 0; 2131 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 2132 rsize = ((((size_t)addr + size) + PAGEOFFSET) & PAGEMASK) - 2133 (size_t)raddr; 2134 2135 if (raddr + rsize < raddr) /* check for wraparound */ 2136 return (ENOMEM); 2137 2138 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 2139 seg = as_segat(as, raddr); 2140 if (seg == NULL) { 2141 AS_LOCK_EXIT(as, &as->a_lock); 2142 return (-1); 2143 } 2144 2145 for (; rsize != 0; rsize -= ssize, raddr += ssize) { 2146 if (raddr >= seg->s_base + seg->s_size) { 2147 seg = AS_SEGNEXT(as, seg); 2148 if (seg == NULL || raddr != seg->s_base) { 2149 error = -1; 2150 break; 2151 } 2152 } 2153 if ((raddr + rsize) > (seg->s_base + seg->s_size)) 2154 ssize = seg->s_base + seg->s_size - raddr; 2155 else 2156 ssize = rsize; 2157 *sizep += isize = SEGOP_INCORE(seg, raddr, ssize, vec); 2158 if (isize != ssize) { 2159 error = -1; 2160 break; 2161 } 2162 vec += btopr(ssize); 2163 } 2164 AS_LOCK_EXIT(as, &as->a_lock); 2165 return (error); 2166 } 2167 2168 static void 2169 as_segunlock(struct seg *seg, caddr_t addr, int attr, 2170 ulong_t *bitmap, size_t position, size_t npages) 2171 { 2172 caddr_t range_start; 2173 size_t pos1 = position; 2174 size_t pos2; 2175 size_t size; 2176 size_t end_pos = npages + position; 2177 2178 while (bt_range(bitmap, &pos1, &pos2, end_pos)) { 2179 size = ptob((pos2 - pos1)); 2180 range_start = (caddr_t)((uintptr_t)addr + 2181 ptob(pos1 - position)); 2182 2183 (void) SEGOP_LOCKOP(seg, range_start, size, attr, MC_UNLOCK, 2184 (ulong_t *)NULL, (size_t)NULL); 2185 pos1 = pos2; 2186 } 2187 } 2188 2189 static void 2190 as_unlockerr(struct as *as, int attr, ulong_t *mlock_map, 2191 caddr_t raddr, size_t rsize) 2192 { 2193 struct seg *seg = as_segat(as, raddr); 2194 size_t ssize; 2195 2196 while (rsize != 0) { 2197 if (raddr >= seg->s_base + seg->s_size) 2198 seg = AS_SEGNEXT(as, seg); 2199 2200 if ((raddr + rsize) > (seg->s_base + seg->s_size)) 2201 ssize = seg->s_base + seg->s_size - raddr; 2202 else 2203 ssize = rsize; 2204 2205 as_segunlock(seg, raddr, attr, mlock_map, 0, btopr(ssize)); 2206 2207 rsize -= ssize; 2208 raddr += ssize; 2209 } 2210 } 2211 2212 /* 2213 * Cache control operations over the interval [addr, addr + size) in 2214 * address space "as". 2215 */ 2216 /*ARGSUSED*/ 2217 int 2218 as_ctl(struct as *as, caddr_t addr, size_t size, int func, int attr, 2219 uintptr_t arg, ulong_t *lock_map, size_t pos) 2220 { 2221 struct seg *seg; /* working segment */ 2222 caddr_t raddr; /* rounded down addr */ 2223 caddr_t initraddr; /* saved initial rounded down addr */ 2224 size_t rsize; /* rounded up size */ 2225 size_t initrsize; /* saved initial rounded up size */ 2226 size_t ssize; /* size of seg */ 2227 int error = 0; /* result */ 2228 size_t mlock_size; /* size of bitmap */ 2229 ulong_t *mlock_map; /* pointer to bitmap used */ 2230 /* to represent the locked */ 2231 /* pages. */ 2232 retry: 2233 if (error == IE_RETRY) 2234 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 2235 else 2236 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 2237 2238 /* 2239 * If these are address space lock/unlock operations, loop over 2240 * all segments in the address space, as appropriate. 2241 */ 2242 if (func == MC_LOCKAS) { 2243 size_t npages, idx; 2244 size_t rlen = 0; /* rounded as length */ 2245 2246 idx = pos; 2247 2248 if (arg & MCL_FUTURE) { 2249 mutex_enter(&as->a_contents); 2250 AS_SETPGLCK(as); 2251 mutex_exit(&as->a_contents); 2252 } 2253 if ((arg & MCL_CURRENT) == 0) { 2254 AS_LOCK_EXIT(as, &as->a_lock); 2255 return (0); 2256 } 2257 2258 seg = AS_SEGFIRST(as); 2259 if (seg == NULL) { 2260 AS_LOCK_EXIT(as, &as->a_lock); 2261 return (0); 2262 } 2263 2264 do { 2265 raddr = (caddr_t)((uintptr_t)seg->s_base & 2266 (uintptr_t)PAGEMASK); 2267 rlen += (((uintptr_t)(seg->s_base + seg->s_size) + 2268 PAGEOFFSET) & PAGEMASK) - (uintptr_t)raddr; 2269 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2270 2271 mlock_size = BT_BITOUL(btopr(rlen)); 2272 if ((mlock_map = (ulong_t *)kmem_zalloc(mlock_size * 2273 sizeof (ulong_t), KM_NOSLEEP)) == NULL) { 2274 AS_LOCK_EXIT(as, &as->a_lock); 2275 return (EAGAIN); 2276 } 2277 2278 for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) { 2279 error = SEGOP_LOCKOP(seg, seg->s_base, 2280 seg->s_size, attr, MC_LOCK, mlock_map, pos); 2281 if (error != 0) 2282 break; 2283 pos += seg_pages(seg); 2284 } 2285 2286 if (error) { 2287 for (seg = AS_SEGFIRST(as); seg != NULL; 2288 seg = AS_SEGNEXT(as, seg)) { 2289 2290 raddr = (caddr_t)((uintptr_t)seg->s_base & 2291 (uintptr_t)PAGEMASK); 2292 npages = seg_pages(seg); 2293 as_segunlock(seg, raddr, attr, mlock_map, 2294 idx, npages); 2295 idx += npages; 2296 } 2297 } 2298 2299 kmem_free(mlock_map, mlock_size * sizeof (ulong_t)); 2300 AS_LOCK_EXIT(as, &as->a_lock); 2301 goto lockerr; 2302 } else if (func == MC_UNLOCKAS) { 2303 mutex_enter(&as->a_contents); 2304 AS_CLRPGLCK(as); 2305 mutex_exit(&as->a_contents); 2306 2307 for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) { 2308 error = SEGOP_LOCKOP(seg, seg->s_base, 2309 seg->s_size, attr, MC_UNLOCK, NULL, 0); 2310 if (error != 0) 2311 break; 2312 } 2313 2314 AS_LOCK_EXIT(as, &as->a_lock); 2315 goto lockerr; 2316 } 2317 2318 /* 2319 * Normalize addresses and sizes. 2320 */ 2321 initraddr = raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 2322 initrsize = rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 2323 (size_t)raddr; 2324 2325 if (raddr + rsize < raddr) { /* check for wraparound */ 2326 AS_LOCK_EXIT(as, &as->a_lock); 2327 return (ENOMEM); 2328 } 2329 2330 /* 2331 * Get initial segment. 2332 */ 2333 if ((seg = as_segat(as, raddr)) == NULL) { 2334 AS_LOCK_EXIT(as, &as->a_lock); 2335 return (ENOMEM); 2336 } 2337 2338 if (func == MC_LOCK) { 2339 mlock_size = BT_BITOUL(btopr(rsize)); 2340 if ((mlock_map = (ulong_t *)kmem_zalloc(mlock_size * 2341 sizeof (ulong_t), KM_NOSLEEP)) == NULL) { 2342 AS_LOCK_EXIT(as, &as->a_lock); 2343 return (EAGAIN); 2344 } 2345 } 2346 2347 /* 2348 * Loop over all segments. If a hole in the address range is 2349 * discovered, then fail. For each segment, perform the appropriate 2350 * control operation. 2351 */ 2352 while (rsize != 0) { 2353 2354 /* 2355 * Make sure there's no hole, calculate the portion 2356 * of the next segment to be operated over. 2357 */ 2358 if (raddr >= seg->s_base + seg->s_size) { 2359 seg = AS_SEGNEXT(as, seg); 2360 if (seg == NULL || raddr != seg->s_base) { 2361 if (func == MC_LOCK) { 2362 as_unlockerr(as, attr, mlock_map, 2363 initraddr, initrsize - rsize); 2364 kmem_free(mlock_map, 2365 mlock_size * sizeof (ulong_t)); 2366 } 2367 AS_LOCK_EXIT(as, &as->a_lock); 2368 return (ENOMEM); 2369 } 2370 } 2371 if ((raddr + rsize) > (seg->s_base + seg->s_size)) 2372 ssize = seg->s_base + seg->s_size - raddr; 2373 else 2374 ssize = rsize; 2375 2376 /* 2377 * Dispatch on specific function. 2378 */ 2379 switch (func) { 2380 2381 /* 2382 * Synchronize cached data from mappings with backing 2383 * objects. 2384 */ 2385 case MC_SYNC: 2386 if (error = SEGOP_SYNC(seg, raddr, ssize, 2387 attr, (uint_t)arg)) { 2388 AS_LOCK_EXIT(as, &as->a_lock); 2389 return (error); 2390 } 2391 break; 2392 2393 /* 2394 * Lock pages in memory. 2395 */ 2396 case MC_LOCK: 2397 if (error = SEGOP_LOCKOP(seg, raddr, ssize, 2398 attr, func, mlock_map, pos)) { 2399 as_unlockerr(as, attr, mlock_map, initraddr, 2400 initrsize - rsize + ssize); 2401 kmem_free(mlock_map, mlock_size * 2402 sizeof (ulong_t)); 2403 AS_LOCK_EXIT(as, &as->a_lock); 2404 goto lockerr; 2405 } 2406 break; 2407 2408 /* 2409 * Unlock mapped pages. 2410 */ 2411 case MC_UNLOCK: 2412 (void) SEGOP_LOCKOP(seg, raddr, ssize, attr, func, 2413 (ulong_t *)NULL, (size_t)NULL); 2414 break; 2415 2416 /* 2417 * Store VM advise for mapped pages in segment layer. 2418 */ 2419 case MC_ADVISE: 2420 error = SEGOP_ADVISE(seg, raddr, ssize, (uint_t)arg); 2421 2422 /* 2423 * Check for regular errors and special retry error 2424 */ 2425 if (error) { 2426 if (error == IE_RETRY) { 2427 /* 2428 * Need to acquire writers lock, so 2429 * have to drop readers lock and start 2430 * all over again 2431 */ 2432 AS_LOCK_EXIT(as, &as->a_lock); 2433 goto retry; 2434 } else if (error == IE_REATTACH) { 2435 /* 2436 * Find segment for current address 2437 * because current segment just got 2438 * split or concatenated 2439 */ 2440 seg = as_segat(as, raddr); 2441 if (seg == NULL) { 2442 AS_LOCK_EXIT(as, &as->a_lock); 2443 return (ENOMEM); 2444 } 2445 } else { 2446 /* 2447 * Regular error 2448 */ 2449 AS_LOCK_EXIT(as, &as->a_lock); 2450 return (error); 2451 } 2452 } 2453 break; 2454 2455 case MC_INHERIT_ZERO: 2456 if (seg->s_ops->inherit == NULL) { 2457 error = ENOTSUP; 2458 } else { 2459 error = SEGOP_INHERIT(seg, raddr, ssize, 2460 SEGP_INH_ZERO); 2461 } 2462 if (error != 0) { 2463 AS_LOCK_EXIT(as, &as->a_lock); 2464 return (error); 2465 } 2466 break; 2467 2468 /* 2469 * Can't happen. 2470 */ 2471 default: 2472 panic("as_ctl: bad operation %d", func); 2473 /*NOTREACHED*/ 2474 } 2475 2476 rsize -= ssize; 2477 raddr += ssize; 2478 } 2479 2480 if (func == MC_LOCK) 2481 kmem_free(mlock_map, mlock_size * sizeof (ulong_t)); 2482 AS_LOCK_EXIT(as, &as->a_lock); 2483 return (0); 2484 lockerr: 2485 2486 /* 2487 * If the lower levels returned EDEADLK for a segment lockop, 2488 * it means that we should retry the operation. Let's wait 2489 * a bit also to let the deadlock causing condition clear. 2490 * This is part of a gross hack to work around a design flaw 2491 * in the ufs/sds logging code and should go away when the 2492 * logging code is re-designed to fix the problem. See bug 2493 * 4125102 for details of the problem. 2494 */ 2495 if (error == EDEADLK) { 2496 delay(deadlk_wait); 2497 error = 0; 2498 goto retry; 2499 } 2500 return (error); 2501 } 2502 2503 int 2504 fc_decode(faultcode_t fault_err) 2505 { 2506 int error = 0; 2507 2508 switch (FC_CODE(fault_err)) { 2509 case FC_OBJERR: 2510 error = FC_ERRNO(fault_err); 2511 break; 2512 case FC_PROT: 2513 error = EACCES; 2514 break; 2515 default: 2516 error = EFAULT; 2517 break; 2518 } 2519 return (error); 2520 } 2521 2522 /* 2523 * Pagelock pages from a range that spans more than 1 segment. Obtain shadow 2524 * lists from each segment and copy them to one contiguous shadow list (plist) 2525 * as expected by the caller. Save pointers to per segment shadow lists at 2526 * the tail of plist so that they can be used during as_pageunlock(). 2527 */ 2528 static int 2529 as_pagelock_segs(struct as *as, struct seg *seg, struct page ***ppp, 2530 caddr_t addr, size_t size, enum seg_rw rw) 2531 { 2532 caddr_t sv_addr = addr; 2533 size_t sv_size = size; 2534 struct seg *sv_seg = seg; 2535 ulong_t segcnt = 1; 2536 ulong_t cnt; 2537 size_t ssize; 2538 pgcnt_t npages = btop(size); 2539 page_t **plist; 2540 page_t **pl; 2541 int error; 2542 caddr_t eaddr; 2543 faultcode_t fault_err = 0; 2544 pgcnt_t pl_off; 2545 extern struct seg_ops segspt_shmops; 2546 2547 ASSERT(AS_LOCK_HELD(as, &as->a_lock)); 2548 ASSERT(seg != NULL); 2549 ASSERT(addr >= seg->s_base && addr < seg->s_base + seg->s_size); 2550 ASSERT(addr + size > seg->s_base + seg->s_size); 2551 ASSERT(IS_P2ALIGNED(size, PAGESIZE)); 2552 ASSERT(IS_P2ALIGNED(addr, PAGESIZE)); 2553 2554 /* 2555 * Count the number of segments covered by the range we are about to 2556 * lock. The segment count is used to size the shadow list we return 2557 * back to the caller. 2558 */ 2559 for (; size != 0; size -= ssize, addr += ssize) { 2560 if (addr >= seg->s_base + seg->s_size) { 2561 2562 seg = AS_SEGNEXT(as, seg); 2563 if (seg == NULL || addr != seg->s_base) { 2564 AS_LOCK_EXIT(as, &as->a_lock); 2565 return (EFAULT); 2566 } 2567 /* 2568 * Do a quick check if subsequent segments 2569 * will most likely support pagelock. 2570 */ 2571 if (seg->s_ops == &segvn_ops) { 2572 vnode_t *vp; 2573 2574 if (SEGOP_GETVP(seg, addr, &vp) != 0 || 2575 vp != NULL) { 2576 AS_LOCK_EXIT(as, &as->a_lock); 2577 goto slow; 2578 } 2579 } else if (seg->s_ops != &segspt_shmops) { 2580 AS_LOCK_EXIT(as, &as->a_lock); 2581 goto slow; 2582 } 2583 segcnt++; 2584 } 2585 if (addr + size > seg->s_base + seg->s_size) { 2586 ssize = seg->s_base + seg->s_size - addr; 2587 } else { 2588 ssize = size; 2589 } 2590 } 2591 ASSERT(segcnt > 1); 2592 2593 plist = kmem_zalloc((npages + segcnt) * sizeof (page_t *), KM_SLEEP); 2594 2595 addr = sv_addr; 2596 size = sv_size; 2597 seg = sv_seg; 2598 2599 for (cnt = 0, pl_off = 0; size != 0; size -= ssize, addr += ssize) { 2600 if (addr >= seg->s_base + seg->s_size) { 2601 seg = AS_SEGNEXT(as, seg); 2602 ASSERT(seg != NULL && addr == seg->s_base); 2603 cnt++; 2604 ASSERT(cnt < segcnt); 2605 } 2606 if (addr + size > seg->s_base + seg->s_size) { 2607 ssize = seg->s_base + seg->s_size - addr; 2608 } else { 2609 ssize = size; 2610 } 2611 pl = &plist[npages + cnt]; 2612 error = SEGOP_PAGELOCK(seg, addr, ssize, (page_t ***)pl, 2613 L_PAGELOCK, rw); 2614 if (error) { 2615 break; 2616 } 2617 ASSERT(plist[npages + cnt] != NULL); 2618 ASSERT(pl_off + btop(ssize) <= npages); 2619 bcopy(plist[npages + cnt], &plist[pl_off], 2620 btop(ssize) * sizeof (page_t *)); 2621 pl_off += btop(ssize); 2622 } 2623 2624 if (size == 0) { 2625 AS_LOCK_EXIT(as, &as->a_lock); 2626 ASSERT(cnt == segcnt - 1); 2627 *ppp = plist; 2628 return (0); 2629 } 2630 2631 /* 2632 * one of pagelock calls failed. The error type is in error variable. 2633 * Unlock what we've locked so far and retry with F_SOFTLOCK if error 2634 * type is either EFAULT or ENOTSUP. Otherwise just return the error 2635 * back to the caller. 2636 */ 2637 2638 eaddr = addr; 2639 seg = sv_seg; 2640 2641 for (cnt = 0, addr = sv_addr; addr < eaddr; addr += ssize) { 2642 if (addr >= seg->s_base + seg->s_size) { 2643 seg = AS_SEGNEXT(as, seg); 2644 ASSERT(seg != NULL && addr == seg->s_base); 2645 cnt++; 2646 ASSERT(cnt < segcnt); 2647 } 2648 if (eaddr > seg->s_base + seg->s_size) { 2649 ssize = seg->s_base + seg->s_size - addr; 2650 } else { 2651 ssize = eaddr - addr; 2652 } 2653 pl = &plist[npages + cnt]; 2654 ASSERT(*pl != NULL); 2655 (void) SEGOP_PAGELOCK(seg, addr, ssize, (page_t ***)pl, 2656 L_PAGEUNLOCK, rw); 2657 } 2658 2659 AS_LOCK_EXIT(as, &as->a_lock); 2660 2661 kmem_free(plist, (npages + segcnt) * sizeof (page_t *)); 2662 2663 if (error != ENOTSUP && error != EFAULT) { 2664 return (error); 2665 } 2666 2667 slow: 2668 /* 2669 * If we are here because pagelock failed due to the need to cow fault 2670 * in the pages we want to lock F_SOFTLOCK will do this job and in 2671 * next as_pagelock() call for this address range pagelock will 2672 * hopefully succeed. 2673 */ 2674 fault_err = as_fault(as->a_hat, as, sv_addr, sv_size, F_SOFTLOCK, rw); 2675 if (fault_err != 0) { 2676 return (fc_decode(fault_err)); 2677 } 2678 *ppp = NULL; 2679 2680 return (0); 2681 } 2682 2683 /* 2684 * lock pages in a given address space. Return shadow list. If 2685 * the list is NULL, the MMU mapping is also locked. 2686 */ 2687 int 2688 as_pagelock(struct as *as, struct page ***ppp, caddr_t addr, 2689 size_t size, enum seg_rw rw) 2690 { 2691 size_t rsize; 2692 caddr_t raddr; 2693 faultcode_t fault_err; 2694 struct seg *seg; 2695 int err; 2696 2697 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_AS_LOCK_START, 2698 "as_pagelock_start: addr %p size %ld", addr, size); 2699 2700 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 2701 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 2702 (size_t)raddr; 2703 2704 /* 2705 * if the request crosses two segments let 2706 * as_fault handle it. 2707 */ 2708 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 2709 2710 seg = as_segat(as, raddr); 2711 if (seg == NULL) { 2712 AS_LOCK_EXIT(as, &as->a_lock); 2713 return (EFAULT); 2714 } 2715 ASSERT(raddr >= seg->s_base && raddr < seg->s_base + seg->s_size); 2716 if (raddr + rsize > seg->s_base + seg->s_size) { 2717 return (as_pagelock_segs(as, seg, ppp, raddr, rsize, rw)); 2718 } 2719 if (raddr + rsize <= raddr) { 2720 AS_LOCK_EXIT(as, &as->a_lock); 2721 return (EFAULT); 2722 } 2723 2724 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEG_LOCK_START, 2725 "seg_lock_1_start: raddr %p rsize %ld", raddr, rsize); 2726 2727 /* 2728 * try to lock pages and pass back shadow list 2729 */ 2730 err = SEGOP_PAGELOCK(seg, raddr, rsize, ppp, L_PAGELOCK, rw); 2731 2732 TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_SEG_LOCK_END, "seg_lock_1_end"); 2733 2734 AS_LOCK_EXIT(as, &as->a_lock); 2735 2736 if (err == 0 || (err != ENOTSUP && err != EFAULT)) { 2737 return (err); 2738 } 2739 2740 /* 2741 * Use F_SOFTLOCK to lock the pages because pagelock failed either due 2742 * to no pagelock support for this segment or pages need to be cow 2743 * faulted in. If fault is needed F_SOFTLOCK will do this job for 2744 * this as_pagelock() call and in the next as_pagelock() call for the 2745 * same address range pagelock call will hopefull succeed. 2746 */ 2747 fault_err = as_fault(as->a_hat, as, addr, size, F_SOFTLOCK, rw); 2748 if (fault_err != 0) { 2749 return (fc_decode(fault_err)); 2750 } 2751 *ppp = NULL; 2752 2753 TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_AS_LOCK_END, "as_pagelock_end"); 2754 return (0); 2755 } 2756 2757 /* 2758 * unlock pages locked by as_pagelock_segs(). Retrieve per segment shadow 2759 * lists from the end of plist and call pageunlock interface for each segment. 2760 * Drop as lock and free plist. 2761 */ 2762 static void 2763 as_pageunlock_segs(struct as *as, struct seg *seg, caddr_t addr, size_t size, 2764 struct page **plist, enum seg_rw rw) 2765 { 2766 ulong_t cnt; 2767 caddr_t eaddr = addr + size; 2768 pgcnt_t npages = btop(size); 2769 size_t ssize; 2770 page_t **pl; 2771 2772 ASSERT(AS_LOCK_HELD(as, &as->a_lock)); 2773 ASSERT(seg != NULL); 2774 ASSERT(addr >= seg->s_base && addr < seg->s_base + seg->s_size); 2775 ASSERT(addr + size > seg->s_base + seg->s_size); 2776 ASSERT(IS_P2ALIGNED(size, PAGESIZE)); 2777 ASSERT(IS_P2ALIGNED(addr, PAGESIZE)); 2778 ASSERT(plist != NULL); 2779 2780 for (cnt = 0; addr < eaddr; addr += ssize) { 2781 if (addr >= seg->s_base + seg->s_size) { 2782 seg = AS_SEGNEXT(as, seg); 2783 ASSERT(seg != NULL && addr == seg->s_base); 2784 cnt++; 2785 } 2786 if (eaddr > seg->s_base + seg->s_size) { 2787 ssize = seg->s_base + seg->s_size - addr; 2788 } else { 2789 ssize = eaddr - addr; 2790 } 2791 pl = &plist[npages + cnt]; 2792 ASSERT(*pl != NULL); 2793 (void) SEGOP_PAGELOCK(seg, addr, ssize, (page_t ***)pl, 2794 L_PAGEUNLOCK, rw); 2795 } 2796 ASSERT(cnt > 0); 2797 AS_LOCK_EXIT(as, &as->a_lock); 2798 2799 cnt++; 2800 kmem_free(plist, (npages + cnt) * sizeof (page_t *)); 2801 } 2802 2803 /* 2804 * unlock pages in a given address range 2805 */ 2806 void 2807 as_pageunlock(struct as *as, struct page **pp, caddr_t addr, size_t size, 2808 enum seg_rw rw) 2809 { 2810 struct seg *seg; 2811 size_t rsize; 2812 caddr_t raddr; 2813 2814 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_AS_UNLOCK_START, 2815 "as_pageunlock_start: addr %p size %ld", addr, size); 2816 2817 /* 2818 * if the shadow list is NULL, as_pagelock was 2819 * falling back to as_fault 2820 */ 2821 if (pp == NULL) { 2822 (void) as_fault(as->a_hat, as, addr, size, F_SOFTUNLOCK, rw); 2823 return; 2824 } 2825 2826 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 2827 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 2828 (size_t)raddr; 2829 2830 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 2831 seg = as_segat(as, raddr); 2832 ASSERT(seg != NULL); 2833 2834 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEG_UNLOCK_START, 2835 "seg_unlock_start: raddr %p rsize %ld", raddr, rsize); 2836 2837 ASSERT(raddr >= seg->s_base && raddr < seg->s_base + seg->s_size); 2838 if (raddr + rsize <= seg->s_base + seg->s_size) { 2839 SEGOP_PAGELOCK(seg, raddr, rsize, &pp, L_PAGEUNLOCK, rw); 2840 } else { 2841 as_pageunlock_segs(as, seg, raddr, rsize, pp, rw); 2842 return; 2843 } 2844 AS_LOCK_EXIT(as, &as->a_lock); 2845 TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_AS_UNLOCK_END, "as_pageunlock_end"); 2846 } 2847 2848 int 2849 as_setpagesize(struct as *as, caddr_t addr, size_t size, uint_t szc, 2850 boolean_t wait) 2851 { 2852 struct seg *seg; 2853 size_t ssize; 2854 caddr_t raddr; /* rounded down addr */ 2855 size_t rsize; /* rounded up size */ 2856 int error = 0; 2857 size_t pgsz = page_get_pagesize(szc); 2858 2859 setpgsz_top: 2860 if (!IS_P2ALIGNED(addr, pgsz) || !IS_P2ALIGNED(size, pgsz)) { 2861 return (EINVAL); 2862 } 2863 2864 raddr = addr; 2865 rsize = size; 2866 2867 if (raddr + rsize < raddr) /* check for wraparound */ 2868 return (ENOMEM); 2869 2870 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 2871 as_clearwatchprot(as, raddr, rsize); 2872 seg = as_segat(as, raddr); 2873 if (seg == NULL) { 2874 as_setwatch(as); 2875 AS_LOCK_EXIT(as, &as->a_lock); 2876 return (ENOMEM); 2877 } 2878 2879 for (; rsize != 0; rsize -= ssize, raddr += ssize) { 2880 if (raddr >= seg->s_base + seg->s_size) { 2881 seg = AS_SEGNEXT(as, seg); 2882 if (seg == NULL || raddr != seg->s_base) { 2883 error = ENOMEM; 2884 break; 2885 } 2886 } 2887 if ((raddr + rsize) > (seg->s_base + seg->s_size)) { 2888 ssize = seg->s_base + seg->s_size - raddr; 2889 } else { 2890 ssize = rsize; 2891 } 2892 2893 retry: 2894 error = SEGOP_SETPAGESIZE(seg, raddr, ssize, szc); 2895 2896 if (error == IE_NOMEM) { 2897 error = EAGAIN; 2898 break; 2899 } 2900 2901 if (error == IE_RETRY) { 2902 AS_LOCK_EXIT(as, &as->a_lock); 2903 goto setpgsz_top; 2904 } 2905 2906 if (error == ENOTSUP) { 2907 error = EINVAL; 2908 break; 2909 } 2910 2911 if (wait && (error == EAGAIN)) { 2912 /* 2913 * Memory is currently locked. It must be unlocked 2914 * before this operation can succeed through a retry. 2915 * The possible reasons for locked memory and 2916 * corresponding strategies for unlocking are: 2917 * (1) Normal I/O 2918 * wait for a signal that the I/O operation 2919 * has completed and the memory is unlocked. 2920 * (2) Asynchronous I/O 2921 * The aio subsystem does not unlock pages when 2922 * the I/O is completed. Those pages are unlocked 2923 * when the application calls aiowait/aioerror. 2924 * So, to prevent blocking forever, cv_broadcast() 2925 * is done to wake up aio_cleanup_thread. 2926 * Subsequently, segvn_reclaim will be called, and 2927 * that will do AS_CLRUNMAPWAIT() and wake us up. 2928 * (3) Long term page locking: 2929 * This is not relevant for as_setpagesize() 2930 * because we cannot change the page size for 2931 * driver memory. The attempt to do so will 2932 * fail with a different error than EAGAIN so 2933 * there's no need to trigger as callbacks like 2934 * as_unmap, as_setprot or as_free would do. 2935 */ 2936 mutex_enter(&as->a_contents); 2937 if (!AS_ISNOUNMAPWAIT(as)) { 2938 if (AS_ISUNMAPWAIT(as) == 0) { 2939 cv_broadcast(&as->a_cv); 2940 } 2941 AS_SETUNMAPWAIT(as); 2942 AS_LOCK_EXIT(as, &as->a_lock); 2943 while (AS_ISUNMAPWAIT(as)) { 2944 cv_wait(&as->a_cv, &as->a_contents); 2945 } 2946 } else { 2947 /* 2948 * We may have raced with 2949 * segvn_reclaim()/segspt_reclaim(). In this 2950 * case clean nounmapwait flag and retry since 2951 * softlockcnt in this segment may be already 2952 * 0. We don't drop as writer lock so our 2953 * number of retries without sleeping should 2954 * be very small. See segvn_reclaim() for 2955 * more comments. 2956 */ 2957 AS_CLRNOUNMAPWAIT(as); 2958 mutex_exit(&as->a_contents); 2959 goto retry; 2960 } 2961 mutex_exit(&as->a_contents); 2962 goto setpgsz_top; 2963 } else if (error != 0) { 2964 break; 2965 } 2966 } 2967 as_setwatch(as); 2968 AS_LOCK_EXIT(as, &as->a_lock); 2969 return (error); 2970 } 2971 2972 /* 2973 * as_iset3_default_lpsize() just calls SEGOP_SETPAGESIZE() on all segments 2974 * in its chunk where s_szc is less than the szc we want to set. 2975 */ 2976 static int 2977 as_iset3_default_lpsize(struct as *as, caddr_t raddr, size_t rsize, uint_t szc, 2978 int *retry) 2979 { 2980 struct seg *seg; 2981 size_t ssize; 2982 int error; 2983 2984 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 2985 2986 seg = as_segat(as, raddr); 2987 if (seg == NULL) { 2988 panic("as_iset3_default_lpsize: no seg"); 2989 } 2990 2991 for (; rsize != 0; rsize -= ssize, raddr += ssize) { 2992 if (raddr >= seg->s_base + seg->s_size) { 2993 seg = AS_SEGNEXT(as, seg); 2994 if (seg == NULL || raddr != seg->s_base) { 2995 panic("as_iset3_default_lpsize: as changed"); 2996 } 2997 } 2998 if ((raddr + rsize) > (seg->s_base + seg->s_size)) { 2999 ssize = seg->s_base + seg->s_size - raddr; 3000 } else { 3001 ssize = rsize; 3002 } 3003 3004 if (szc > seg->s_szc) { 3005 error = SEGOP_SETPAGESIZE(seg, raddr, ssize, szc); 3006 /* Only retry on EINVAL segments that have no vnode. */ 3007 if (error == EINVAL) { 3008 vnode_t *vp = NULL; 3009 if ((SEGOP_GETTYPE(seg, raddr) & MAP_SHARED) && 3010 (SEGOP_GETVP(seg, raddr, &vp) != 0 || 3011 vp == NULL)) { 3012 *retry = 1; 3013 } else { 3014 *retry = 0; 3015 } 3016 } 3017 if (error) { 3018 return (error); 3019 } 3020 } 3021 } 3022 return (0); 3023 } 3024 3025 /* 3026 * as_iset2_default_lpsize() calls as_iset3_default_lpsize() to set the 3027 * pagesize on each segment in its range, but if any fails with EINVAL, 3028 * then it reduces the pagesizes to the next size in the bitmap and 3029 * retries as_iset3_default_lpsize(). The reason why the code retries 3030 * smaller allowed sizes on EINVAL is because (a) the anon offset may not 3031 * match the bigger sizes, and (b) it's hard to get this offset (to begin 3032 * with) to pass to map_pgszcvec(). 3033 */ 3034 static int 3035 as_iset2_default_lpsize(struct as *as, caddr_t addr, size_t size, uint_t szc, 3036 uint_t szcvec) 3037 { 3038 int error; 3039 int retry; 3040 3041 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 3042 3043 for (;;) { 3044 error = as_iset3_default_lpsize(as, addr, size, szc, &retry); 3045 if (error == EINVAL && retry) { 3046 szcvec &= ~(1 << szc); 3047 if (szcvec <= 1) { 3048 return (EINVAL); 3049 } 3050 szc = highbit(szcvec) - 1; 3051 } else { 3052 return (error); 3053 } 3054 } 3055 } 3056 3057 /* 3058 * as_iset1_default_lpsize() breaks its chunk into areas where existing 3059 * segments have a smaller szc than we want to set. For each such area, 3060 * it calls as_iset2_default_lpsize() 3061 */ 3062 static int 3063 as_iset1_default_lpsize(struct as *as, caddr_t raddr, size_t rsize, uint_t szc, 3064 uint_t szcvec) 3065 { 3066 struct seg *seg; 3067 size_t ssize; 3068 caddr_t setaddr = raddr; 3069 size_t setsize = 0; 3070 int set; 3071 int error; 3072 3073 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 3074 3075 seg = as_segat(as, raddr); 3076 if (seg == NULL) { 3077 panic("as_iset1_default_lpsize: no seg"); 3078 } 3079 if (seg->s_szc < szc) { 3080 set = 1; 3081 } else { 3082 set = 0; 3083 } 3084 3085 for (; rsize != 0; rsize -= ssize, raddr += ssize, setsize += ssize) { 3086 if (raddr >= seg->s_base + seg->s_size) { 3087 seg = AS_SEGNEXT(as, seg); 3088 if (seg == NULL || raddr != seg->s_base) { 3089 panic("as_iset1_default_lpsize: as changed"); 3090 } 3091 if (seg->s_szc >= szc && set) { 3092 ASSERT(setsize != 0); 3093 error = as_iset2_default_lpsize(as, 3094 setaddr, setsize, szc, szcvec); 3095 if (error) { 3096 return (error); 3097 } 3098 set = 0; 3099 } else if (seg->s_szc < szc && !set) { 3100 setaddr = raddr; 3101 setsize = 0; 3102 set = 1; 3103 } 3104 } 3105 if ((raddr + rsize) > (seg->s_base + seg->s_size)) { 3106 ssize = seg->s_base + seg->s_size - raddr; 3107 } else { 3108 ssize = rsize; 3109 } 3110 } 3111 error = 0; 3112 if (set) { 3113 ASSERT(setsize != 0); 3114 error = as_iset2_default_lpsize(as, setaddr, setsize, 3115 szc, szcvec); 3116 } 3117 return (error); 3118 } 3119 3120 /* 3121 * as_iset_default_lpsize() breaks its chunk according to the size code bitmap 3122 * returned by map_pgszcvec() (similar to as_map_segvn_segs()), and passes each 3123 * chunk to as_iset1_default_lpsize(). 3124 */ 3125 static int 3126 as_iset_default_lpsize(struct as *as, caddr_t addr, size_t size, int flags, 3127 int type) 3128 { 3129 int rtype = (type & MAP_SHARED) ? MAPPGSZC_SHM : MAPPGSZC_PRIVM; 3130 uint_t szcvec = map_pgszcvec(addr, size, (uintptr_t)addr, 3131 flags, rtype, 1); 3132 uint_t szc; 3133 uint_t nszc; 3134 int error; 3135 caddr_t a; 3136 caddr_t eaddr; 3137 size_t segsize; 3138 size_t pgsz; 3139 uint_t save_szcvec; 3140 3141 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 3142 ASSERT(IS_P2ALIGNED(addr, PAGESIZE)); 3143 ASSERT(IS_P2ALIGNED(size, PAGESIZE)); 3144 3145 szcvec &= ~1; 3146 if (szcvec <= 1) { /* skip if base page size */ 3147 return (0); 3148 } 3149 3150 /* Get the pagesize of the first larger page size. */ 3151 szc = lowbit(szcvec) - 1; 3152 pgsz = page_get_pagesize(szc); 3153 eaddr = addr + size; 3154 addr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); 3155 eaddr = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz); 3156 3157 save_szcvec = szcvec; 3158 szcvec >>= (szc + 1); 3159 nszc = szc; 3160 while (szcvec) { 3161 if ((szcvec & 0x1) == 0) { 3162 nszc++; 3163 szcvec >>= 1; 3164 continue; 3165 } 3166 nszc++; 3167 pgsz = page_get_pagesize(nszc); 3168 a = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); 3169 if (a != addr) { 3170 ASSERT(szc > 0); 3171 ASSERT(a < eaddr); 3172 segsize = a - addr; 3173 error = as_iset1_default_lpsize(as, addr, segsize, szc, 3174 save_szcvec); 3175 if (error) { 3176 return (error); 3177 } 3178 addr = a; 3179 } 3180 szc = nszc; 3181 szcvec >>= 1; 3182 } 3183 3184 ASSERT(addr < eaddr); 3185 szcvec = save_szcvec; 3186 while (szcvec) { 3187 a = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz); 3188 ASSERT(a >= addr); 3189 if (a != addr) { 3190 ASSERT(szc > 0); 3191 segsize = a - addr; 3192 error = as_iset1_default_lpsize(as, addr, segsize, szc, 3193 save_szcvec); 3194 if (error) { 3195 return (error); 3196 } 3197 addr = a; 3198 } 3199 szcvec &= ~(1 << szc); 3200 if (szcvec) { 3201 szc = highbit(szcvec) - 1; 3202 pgsz = page_get_pagesize(szc); 3203 } 3204 } 3205 ASSERT(addr == eaddr); 3206 3207 return (0); 3208 } 3209 3210 /* 3211 * Set the default large page size for the range. Called via memcntl with 3212 * page size set to 0. as_set_default_lpsize breaks the range down into 3213 * chunks with the same type/flags, ignores-non segvn segments, and passes 3214 * each chunk to as_iset_default_lpsize(). 3215 */ 3216 int 3217 as_set_default_lpsize(struct as *as, caddr_t addr, size_t size) 3218 { 3219 struct seg *seg; 3220 caddr_t raddr; 3221 size_t rsize; 3222 size_t ssize; 3223 int rtype, rflags; 3224 int stype, sflags; 3225 int error; 3226 caddr_t setaddr; 3227 size_t setsize; 3228 int segvn; 3229 3230 if (size == 0) 3231 return (0); 3232 3233 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 3234 again: 3235 error = 0; 3236 3237 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 3238 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 3239 (size_t)raddr; 3240 3241 if (raddr + rsize < raddr) { /* check for wraparound */ 3242 AS_LOCK_EXIT(as, &as->a_lock); 3243 return (ENOMEM); 3244 } 3245 as_clearwatchprot(as, raddr, rsize); 3246 seg = as_segat(as, raddr); 3247 if (seg == NULL) { 3248 as_setwatch(as); 3249 AS_LOCK_EXIT(as, &as->a_lock); 3250 return (ENOMEM); 3251 } 3252 if (seg->s_ops == &segvn_ops) { 3253 rtype = SEGOP_GETTYPE(seg, addr); 3254 rflags = rtype & (MAP_TEXT | MAP_INITDATA); 3255 rtype = rtype & (MAP_SHARED | MAP_PRIVATE); 3256 segvn = 1; 3257 } else { 3258 segvn = 0; 3259 } 3260 setaddr = raddr; 3261 setsize = 0; 3262 3263 for (; rsize != 0; rsize -= ssize, raddr += ssize, setsize += ssize) { 3264 if (raddr >= (seg->s_base + seg->s_size)) { 3265 seg = AS_SEGNEXT(as, seg); 3266 if (seg == NULL || raddr != seg->s_base) { 3267 error = ENOMEM; 3268 break; 3269 } 3270 if (seg->s_ops == &segvn_ops) { 3271 stype = SEGOP_GETTYPE(seg, raddr); 3272 sflags = stype & (MAP_TEXT | MAP_INITDATA); 3273 stype &= (MAP_SHARED | MAP_PRIVATE); 3274 if (segvn && (rflags != sflags || 3275 rtype != stype)) { 3276 /* 3277 * The next segment is also segvn but 3278 * has different flags and/or type. 3279 */ 3280 ASSERT(setsize != 0); 3281 error = as_iset_default_lpsize(as, 3282 setaddr, setsize, rflags, rtype); 3283 if (error) { 3284 break; 3285 } 3286 rflags = sflags; 3287 rtype = stype; 3288 setaddr = raddr; 3289 setsize = 0; 3290 } else if (!segvn) { 3291 rflags = sflags; 3292 rtype = stype; 3293 setaddr = raddr; 3294 setsize = 0; 3295 segvn = 1; 3296 } 3297 } else if (segvn) { 3298 /* The next segment is not segvn. */ 3299 ASSERT(setsize != 0); 3300 error = as_iset_default_lpsize(as, 3301 setaddr, setsize, rflags, rtype); 3302 if (error) { 3303 break; 3304 } 3305 segvn = 0; 3306 } 3307 } 3308 if ((raddr + rsize) > (seg->s_base + seg->s_size)) { 3309 ssize = seg->s_base + seg->s_size - raddr; 3310 } else { 3311 ssize = rsize; 3312 } 3313 } 3314 if (error == 0 && segvn) { 3315 /* The last chunk when rsize == 0. */ 3316 ASSERT(setsize != 0); 3317 error = as_iset_default_lpsize(as, setaddr, setsize, 3318 rflags, rtype); 3319 } 3320 3321 if (error == IE_RETRY) { 3322 goto again; 3323 } else if (error == IE_NOMEM) { 3324 error = EAGAIN; 3325 } else if (error == ENOTSUP) { 3326 error = EINVAL; 3327 } else if (error == EAGAIN) { 3328 mutex_enter(&as->a_contents); 3329 if (!AS_ISNOUNMAPWAIT(as)) { 3330 if (AS_ISUNMAPWAIT(as) == 0) { 3331 cv_broadcast(&as->a_cv); 3332 } 3333 AS_SETUNMAPWAIT(as); 3334 AS_LOCK_EXIT(as, &as->a_lock); 3335 while (AS_ISUNMAPWAIT(as)) { 3336 cv_wait(&as->a_cv, &as->a_contents); 3337 } 3338 mutex_exit(&as->a_contents); 3339 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 3340 } else { 3341 /* 3342 * We may have raced with 3343 * segvn_reclaim()/segspt_reclaim(). In this case 3344 * clean nounmapwait flag and retry since softlockcnt 3345 * in this segment may be already 0. We don't drop as 3346 * writer lock so our number of retries without 3347 * sleeping should be very small. See segvn_reclaim() 3348 * for more comments. 3349 */ 3350 AS_CLRNOUNMAPWAIT(as); 3351 mutex_exit(&as->a_contents); 3352 } 3353 goto again; 3354 } 3355 3356 as_setwatch(as); 3357 AS_LOCK_EXIT(as, &as->a_lock); 3358 return (error); 3359 } 3360 3361 /* 3362 * Setup all of the uninitialized watched pages that we can. 3363 */ 3364 void 3365 as_setwatch(struct as *as) 3366 { 3367 struct watched_page *pwp; 3368 struct seg *seg; 3369 caddr_t vaddr; 3370 uint_t prot; 3371 int err, retrycnt; 3372 3373 if (avl_numnodes(&as->a_wpage) == 0) 3374 return; 3375 3376 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 3377 3378 for (pwp = avl_first(&as->a_wpage); pwp != NULL; 3379 pwp = AVL_NEXT(&as->a_wpage, pwp)) { 3380 retrycnt = 0; 3381 retry: 3382 vaddr = pwp->wp_vaddr; 3383 if (pwp->wp_oprot != 0 || /* already set up */ 3384 (seg = as_segat(as, vaddr)) == NULL || 3385 SEGOP_GETPROT(seg, vaddr, 0, &prot) != 0) 3386 continue; 3387 3388 pwp->wp_oprot = prot; 3389 if (pwp->wp_read) 3390 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3391 if (pwp->wp_write) 3392 prot &= ~PROT_WRITE; 3393 if (pwp->wp_exec) 3394 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3395 if (!(pwp->wp_flags & WP_NOWATCH) && prot != pwp->wp_oprot) { 3396 err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, prot); 3397 if (err == IE_RETRY) { 3398 pwp->wp_oprot = 0; 3399 ASSERT(retrycnt == 0); 3400 retrycnt++; 3401 goto retry; 3402 } 3403 } 3404 pwp->wp_prot = prot; 3405 } 3406 } 3407 3408 /* 3409 * Clear all of the watched pages in the address space. 3410 */ 3411 void 3412 as_clearwatch(struct as *as) 3413 { 3414 struct watched_page *pwp; 3415 struct seg *seg; 3416 caddr_t vaddr; 3417 uint_t prot; 3418 int err, retrycnt; 3419 3420 if (avl_numnodes(&as->a_wpage) == 0) 3421 return; 3422 3423 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 3424 3425 for (pwp = avl_first(&as->a_wpage); pwp != NULL; 3426 pwp = AVL_NEXT(&as->a_wpage, pwp)) { 3427 retrycnt = 0; 3428 retry: 3429 vaddr = pwp->wp_vaddr; 3430 if (pwp->wp_oprot == 0 || /* not set up */ 3431 (seg = as_segat(as, vaddr)) == NULL) 3432 continue; 3433 3434 if ((prot = pwp->wp_oprot) != pwp->wp_prot) { 3435 err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, prot); 3436 if (err == IE_RETRY) { 3437 ASSERT(retrycnt == 0); 3438 retrycnt++; 3439 goto retry; 3440 } 3441 } 3442 pwp->wp_oprot = 0; 3443 pwp->wp_prot = 0; 3444 } 3445 } 3446 3447 /* 3448 * Force a new setup for all the watched pages in the range. 3449 */ 3450 static void 3451 as_setwatchprot(struct as *as, caddr_t addr, size_t size, uint_t prot) 3452 { 3453 struct watched_page *pwp; 3454 struct watched_page tpw; 3455 caddr_t eaddr = addr + size; 3456 caddr_t vaddr; 3457 struct seg *seg; 3458 int err, retrycnt; 3459 uint_t wprot; 3460 avl_index_t where; 3461 3462 if (avl_numnodes(&as->a_wpage) == 0) 3463 return; 3464 3465 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 3466 3467 tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 3468 if ((pwp = avl_find(&as->a_wpage, &tpw, &where)) == NULL) 3469 pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER); 3470 3471 while (pwp != NULL && pwp->wp_vaddr < eaddr) { 3472 retrycnt = 0; 3473 vaddr = pwp->wp_vaddr; 3474 3475 wprot = prot; 3476 if (pwp->wp_read) 3477 wprot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3478 if (pwp->wp_write) 3479 wprot &= ~PROT_WRITE; 3480 if (pwp->wp_exec) 3481 wprot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3482 if (!(pwp->wp_flags & WP_NOWATCH) && wprot != pwp->wp_oprot) { 3483 retry: 3484 seg = as_segat(as, vaddr); 3485 if (seg == NULL) { 3486 panic("as_setwatchprot: no seg"); 3487 /*NOTREACHED*/ 3488 } 3489 err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, wprot); 3490 if (err == IE_RETRY) { 3491 ASSERT(retrycnt == 0); 3492 retrycnt++; 3493 goto retry; 3494 } 3495 } 3496 pwp->wp_oprot = prot; 3497 pwp->wp_prot = wprot; 3498 3499 pwp = AVL_NEXT(&as->a_wpage, pwp); 3500 } 3501 } 3502 3503 /* 3504 * Clear all of the watched pages in the range. 3505 */ 3506 static void 3507 as_clearwatchprot(struct as *as, caddr_t addr, size_t size) 3508 { 3509 caddr_t eaddr = addr + size; 3510 struct watched_page *pwp; 3511 struct watched_page tpw; 3512 uint_t prot; 3513 struct seg *seg; 3514 int err, retrycnt; 3515 avl_index_t where; 3516 3517 if (avl_numnodes(&as->a_wpage) == 0) 3518 return; 3519 3520 tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 3521 if ((pwp = avl_find(&as->a_wpage, &tpw, &where)) == NULL) 3522 pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER); 3523 3524 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 3525 3526 while (pwp != NULL && pwp->wp_vaddr < eaddr) { 3527 3528 if ((prot = pwp->wp_oprot) != 0) { 3529 retrycnt = 0; 3530 3531 if (prot != pwp->wp_prot) { 3532 retry: 3533 seg = as_segat(as, pwp->wp_vaddr); 3534 if (seg == NULL) 3535 continue; 3536 err = SEGOP_SETPROT(seg, pwp->wp_vaddr, 3537 PAGESIZE, prot); 3538 if (err == IE_RETRY) { 3539 ASSERT(retrycnt == 0); 3540 retrycnt++; 3541 goto retry; 3542 3543 } 3544 } 3545 pwp->wp_oprot = 0; 3546 pwp->wp_prot = 0; 3547 } 3548 3549 pwp = AVL_NEXT(&as->a_wpage, pwp); 3550 } 3551 } 3552 3553 void 3554 as_signal_proc(struct as *as, k_siginfo_t *siginfo) 3555 { 3556 struct proc *p; 3557 3558 mutex_enter(&pidlock); 3559 for (p = practive; p; p = p->p_next) { 3560 if (p->p_as == as) { 3561 mutex_enter(&p->p_lock); 3562 if (p->p_as == as) 3563 sigaddq(p, NULL, siginfo, KM_NOSLEEP); 3564 mutex_exit(&p->p_lock); 3565 } 3566 } 3567 mutex_exit(&pidlock); 3568 } 3569 3570 /* 3571 * return memory object ID 3572 */ 3573 int 3574 as_getmemid(struct as *as, caddr_t addr, memid_t *memidp) 3575 { 3576 struct seg *seg; 3577 int sts; 3578 3579 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 3580 seg = as_segat(as, addr); 3581 if (seg == NULL) { 3582 AS_LOCK_EXIT(as, &as->a_lock); 3583 return (EFAULT); 3584 } 3585 /* 3586 * catch old drivers which may not support getmemid 3587 */ 3588 if (seg->s_ops->getmemid == NULL) { 3589 AS_LOCK_EXIT(as, &as->a_lock); 3590 return (ENODEV); 3591 } 3592 3593 sts = SEGOP_GETMEMID(seg, addr, memidp); 3594 3595 AS_LOCK_EXIT(as, &as->a_lock); 3596 return (sts); 3597 }