Print this page
const-ify make segment ops structures
There is no reason to keep the segment ops structures writable.
use NULL getmemid segop as a shorthand for ENODEV
Instead of forcing every segment driver to implement a dummy function to
return (hopefully) ENODEV, handle NULL getmemid segop function pointer as
"return ENODEV" shorthand.
seg_inherit_notsup is redundant since segop_inherit checks for NULL properly
patch lower-case-segops
remove whole-process swapping
Long before Unix supported paging, it used process swapping to reclaim
memory.  The code is there and in theory it runs when we get *extremely* low
on memory.  In practice, it never runs since the definition of low-on-memory
is antiquated. (XXX: define what antiquated means)
You can check the number of swapout/swapin events with kstats:
$ kstat -p ::vm:swapin ::vm:swapout
remove xhat
The xhat infrastructure was added to support hardware such as the zulu
graphics card - hardware which had on-board MMUs.  The VM used the xhat code
to keep the CPU's and Zulu's page tables in-sync.  Since the only xhat user
was zulu (which is gone), we can safely remove it simplifying the whole VM
subsystem.
Assorted notes:
- AS_BUSY flag was used solely by xhat


  42  */
  43 
  44 #include <sys/types.h>
  45 #include <sys/t_lock.h>
  46 #include <sys/param.h>
  47 #include <sys/errno.h>
  48 #include <sys/systm.h>
  49 #include <sys/mman.h>
  50 #include <sys/sysmacros.h>
  51 #include <sys/cpuvar.h>
  52 #include <sys/sysinfo.h>
  53 #include <sys/kmem.h>
  54 #include <sys/vnode.h>
  55 #include <sys/vmsystm.h>
  56 #include <sys/cmn_err.h>
  57 #include <sys/debug.h>
  58 #include <sys/tnf_probe.h>
  59 #include <sys/vtrace.h>
  60 
  61 #include <vm/hat.h>
  62 #include <vm/xhat.h>
  63 #include <vm/as.h>
  64 #include <vm/seg.h>
  65 #include <vm/seg_vn.h>
  66 #include <vm/seg_dev.h>
  67 #include <vm/seg_kmem.h>
  68 #include <vm/seg_map.h>
  69 #include <vm/seg_spt.h>
  70 #include <vm/page.h>
  71 
  72 clock_t deadlk_wait = 1; /* number of ticks to wait before retrying */
  73 
  74 static struct kmem_cache *as_cache;
  75 
  76 static void as_setwatchprot(struct as *, caddr_t, size_t, uint_t);
  77 static void as_clearwatchprot(struct as *, caddr_t, size_t);
  78 int as_map_locked(struct as *, caddr_t, size_t, int ((*)()), void *);
  79 
  80 
  81 /*
  82  * Verifying the segment lists is very time-consuming; it may not be


 457         seg = avl_find(&as->a_segtree, &addr, &where);
 458 
 459         if (seg == NULL)
 460                 seg = avl_nearest(&as->a_segtree, where, AVL_AFTER);
 461 
 462         if (seg == NULL)
 463                 seg = avl_last(&as->a_segtree);
 464 
 465         if (seg != NULL) {
 466                 caddr_t base = seg->s_base;
 467 
 468                 /*
 469                  * If top of seg is below the requested address, then
 470                  * the insertion point is at the end of the linked list,
 471                  * and seg points to the tail of the list.  Otherwise,
 472                  * the insertion point is immediately before seg.
 473                  */
 474                 if (base + seg->s_size > addr) {
 475                         if (addr >= base || eaddr > base) {
 476 #ifdef __sparc
 477                                 extern struct seg_ops segnf_ops;
 478 
 479                                 /*
 480                                  * no-fault segs must disappear if overlaid.
 481                                  * XXX need new segment type so
 482                                  * we don't have to check s_ops
 483                                  */
 484                                 if (seg->s_ops == &segnf_ops) {
 485                                         seg_unmap(seg);
 486                                         goto again;
 487                                 }
 488 #endif
 489                                 return (-1);    /* overlapping segment */
 490                         }
 491                 }
 492         }
 493         as->a_seglast = newseg;
 494         avl_insert(&as->a_segtree, newseg, where);
 495 
 496 #ifdef VERIFY_SEGLIST
 497         as_verify(as);


 654 
 655         as->a_flags          = 0;
 656         as->a_vbits          = 0;
 657         as->a_hrm            = NULL;
 658         as->a_seglast                = NULL;
 659         as->a_size           = 0;
 660         as->a_resvsize               = 0;
 661         as->a_updatedir              = 0;
 662         gethrestime(&as->a_updatetime);
 663         as->a_objectdir              = NULL;
 664         as->a_sizedir                = 0;
 665         as->a_userlimit              = (caddr_t)USERLIMIT;
 666         as->a_lastgap                = NULL;
 667         as->a_lastgaphl              = NULL;
 668         as->a_callbacks              = NULL;
 669 
 670         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
 671         as->a_hat = hat_alloc(as);   /* create hat for default system mmu */
 672         AS_LOCK_EXIT(as, &as->a_lock);
 673 
 674         as->a_xhat = NULL;
 675 
 676         return (as);
 677 }
 678 
 679 /*
 680  * Free an address space data structure.
 681  * Need to free the hat first and then
 682  * all the segments on this as and finally
 683  * the space for the as struct itself.
 684  */
 685 void
 686 as_free(struct as *as)
 687 {
 688         struct hat *hat = as->a_hat;
 689         struct seg *seg, *next;
 690         int called = 0;
 691 
 692 top:
 693         /*
 694          * Invoke ALL callbacks. as_do_callbacks will do one callback
 695          * per call, and not return (-1) until the callback has completed.
 696          * When as_do_callbacks returns zero, all callbacks have completed.
 697          */
 698         mutex_enter(&as->a_contents);
 699         while (as->a_callbacks && as_do_callbacks(as, AS_ALL_EVENT, 0, 0))
 700                 ;
 701 
 702         /* This will prevent new XHATs from attaching to as */
 703         if (!called)
 704                 AS_SETBUSY(as);
 705         mutex_exit(&as->a_contents);
 706         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
 707 
 708         if (!called) {
 709                 called = 1;
 710                 hat_free_start(hat);
 711                 if (as->a_xhat != NULL)
 712                         xhat_free_start_all(as);
 713         }
 714         for (seg = AS_SEGFIRST(as); seg != NULL; seg = next) {
 715                 int err;
 716 
 717                 next = AS_SEGNEXT(as, seg);
 718 retry:
 719                 err = SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
 720                 if (err == EAGAIN) {
 721                         mutex_enter(&as->a_contents);
 722                         if (as->a_callbacks) {
 723                                 AS_LOCK_EXIT(as, &as->a_lock);
 724                         } else if (!AS_ISNOUNMAPWAIT(as)) {
 725                                 /*
 726                                  * Memory is currently locked. Wait for a
 727                                  * cv_signal that it has been unlocked, then
 728                                  * try the operation again.
 729                                  */
 730                                 if (AS_ISUNMAPWAIT(as) == 0)
 731                                         cv_broadcast(&as->a_cv);
 732                                 AS_SETUNMAPWAIT(as);
 733                                 AS_LOCK_EXIT(as, &as->a_lock);
 734                                 while (AS_ISUNMAPWAIT(as))
 735                                         cv_wait(&as->a_cv, &as->a_contents);
 736                         } else {
 737                                 /*
 738                                  * We may have raced with
 739                                  * segvn_reclaim()/segspt_reclaim(). In this


 742                                  * 0.  We don't drop as writer lock so our
 743                                  * number of retries without sleeping should
 744                                  * be very small. See segvn_reclaim() for
 745                                  * more comments.
 746                                  */
 747                                 AS_CLRNOUNMAPWAIT(as);
 748                                 mutex_exit(&as->a_contents);
 749                                 goto retry;
 750                         }
 751                         mutex_exit(&as->a_contents);
 752                         goto top;
 753                 } else {
 754                         /*
 755                          * We do not expect any other error return at this
 756                          * time. This is similar to an ASSERT in seg_unmap()
 757                          */
 758                         ASSERT(err == 0);
 759                 }
 760         }
 761         hat_free_end(hat);
 762         if (as->a_xhat != NULL)
 763                 xhat_free_end_all(as);
 764         AS_LOCK_EXIT(as, &as->a_lock);
 765 
 766         /* /proc stuff */
 767         ASSERT(avl_numnodes(&as->a_wpage) == 0);
 768         if (as->a_objectdir) {
 769                 kmem_free(as->a_objectdir, as->a_sizedir * sizeof (vnode_t *));
 770                 as->a_objectdir = NULL;
 771                 as->a_sizedir = 0;
 772         }
 773 
 774         /*
 775          * Free the struct as back to kmem.  Assert it has no segments.
 776          */
 777         ASSERT(avl_numnodes(&as->a_segtree) == 0);
 778         kmem_cache_free(as_cache, as);
 779 }
 780 
 781 int
 782 as_dup(struct as *as, struct proc *forkedproc)
 783 {
 784         struct as *newas;
 785         struct seg *seg, *newseg;
 786         size_t  purgesize = 0;
 787         int error;
 788 
 789         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
 790         as_clearwatch(as);
 791         newas = as_alloc();
 792         newas->a_userlimit = as->a_userlimit;
 793         newas->a_proc = forkedproc;
 794 
 795         AS_LOCK_ENTER(newas, &newas->a_lock, RW_WRITER);
 796 
 797         /* This will prevent new XHATs from attaching */
 798         mutex_enter(&as->a_contents);
 799         AS_SETBUSY(as);
 800         mutex_exit(&as->a_contents);
 801         mutex_enter(&newas->a_contents);
 802         AS_SETBUSY(newas);
 803         mutex_exit(&newas->a_contents);
 804 
 805         (void) hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_SRD);
 806 
 807         for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
 808 
 809                 if (seg->s_flags & S_PURGE) {
 810                         purgesize += seg->s_size;
 811                         continue;
 812                 }
 813 
 814                 newseg = seg_alloc(newas, seg->s_base, seg->s_size);
 815                 if (newseg == NULL) {
 816                         AS_LOCK_EXIT(newas, &newas->a_lock);
 817                         as_setwatch(as);
 818                         mutex_enter(&as->a_contents);
 819                         AS_CLRBUSY(as);
 820                         mutex_exit(&as->a_contents);
 821                         AS_LOCK_EXIT(as, &as->a_lock);
 822                         as_free(newas);
 823                         return (-1);
 824                 }
 825                 if ((error = SEGOP_DUP(seg, newseg)) != 0) {
 826                         /*
 827                          * We call seg_free() on the new seg
 828                          * because the segment is not set up
 829                          * completely; i.e. it has no ops.
 830                          */
 831                         as_setwatch(as);
 832                         mutex_enter(&as->a_contents);
 833                         AS_CLRBUSY(as);
 834                         mutex_exit(&as->a_contents);
 835                         AS_LOCK_EXIT(as, &as->a_lock);
 836                         seg_free(newseg);
 837                         AS_LOCK_EXIT(newas, &newas->a_lock);
 838                         as_free(newas);
 839                         return (error);
 840                 }
 841                 newas->a_size += seg->s_size;
 842         }
 843         newas->a_resvsize = as->a_resvsize - purgesize;
 844 
 845         error = hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_ALL);
 846         if (as->a_xhat != NULL)
 847                 error |= xhat_dup_all(as, newas, NULL, 0, HAT_DUP_ALL);
 848 
 849         mutex_enter(&newas->a_contents);
 850         AS_CLRBUSY(newas);
 851         mutex_exit(&newas->a_contents);
 852         AS_LOCK_EXIT(newas, &newas->a_lock);
 853 
 854         as_setwatch(as);
 855         mutex_enter(&as->a_contents);
 856         AS_CLRBUSY(as);
 857         mutex_exit(&as->a_contents);
 858         AS_LOCK_EXIT(as, &as->a_lock);
 859         if (error != 0) {
 860                 as_free(newas);
 861                 return (error);
 862         }
 863         forkedproc->p_as = newas;
 864         return (0);
 865 }
 866 
 867 /*
 868  * Handle a ``fault'' at addr for size bytes.
 869  */
 870 faultcode_t
 871 as_fault(struct hat *hat, struct as *as, caddr_t addr, size_t size,
 872         enum fault_type type, enum seg_rw rw)
 873 {
 874         struct seg *seg;
 875         caddr_t raddr;                  /* rounded down addr */
 876         size_t rsize;                   /* rounded up size */
 877         size_t ssize;
 878         faultcode_t res = 0;
 879         caddr_t addrsav;
 880         struct seg *segsav;
 881         int as_lock_held;
 882         klwp_t *lwp = ttolwp(curthread);
 883         int is_xhat = 0;
 884         int holding_wpage = 0;
 885         extern struct seg_ops   segdev_ops;
 886 
 887 
 888 
 889         if (as->a_hat != hat) {
 890                 /* This must be an XHAT then */
 891                 is_xhat = 1;
 892 
 893                 if ((type != F_INVAL) || (as == &kas))
 894                         return (FC_NOSUPPORT);
 895         }
 896 
 897 retry:
 898         if (!is_xhat) {
 899                 /*
 900                  * Indicate that the lwp is not to be stopped while waiting
 901                  * for a pagefault.  This is to avoid deadlock while debugging
 902                  * a process via /proc over NFS (in particular).
 903                  */
 904                 if (lwp != NULL)
 905                         lwp->lwp_nostop++;
 906 
 907                 /*
 908                  * same length must be used when we softlock and softunlock.
 909                  * We don't support softunlocking lengths less than
 910                  * the original length when there is largepage support.
 911                  * See seg_dev.c for more comments.
 912                  */
 913                 switch (type) {
 914 
 915                 case F_SOFTLOCK:
 916                         CPU_STATS_ADD_K(vm, softlock, 1);
 917                         break;
 918 
 919                 case F_SOFTUNLOCK:
 920                         break;
 921 
 922                 case F_PROT:
 923                         CPU_STATS_ADD_K(vm, prot_fault, 1);
 924                         break;
 925 
 926                 case F_INVAL:
 927                         CPU_STATS_ENTER_K();
 928                         CPU_STATS_ADDQ(CPU, vm, as_fault, 1);
 929                         if (as == &kas)
 930                                 CPU_STATS_ADDQ(CPU, vm, kernel_asflt, 1);
 931                         CPU_STATS_EXIT_K();
 932                         break;
 933                 }
 934         }
 935 
 936         /* Kernel probe */
 937         TNF_PROBE_3(address_fault, "vm pagefault", /* CSTYLED */,
 938             tnf_opaque, address,        addr,
 939             tnf_fault_type,     fault_type,     type,
 940             tnf_seg_access,     access,         rw);
 941 
 942         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
 943         rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
 944             (size_t)raddr;
 945 
 946         /*
 947          * XXX -- Don't grab the as lock for segkmap. We should grab it for
 948          * correctness, but then we could be stuck holding this lock for
 949          * a LONG time if the fault needs to be resolved on a slow
 950          * filesystem, and then no-one will be able to exec new commands,
 951          * as exec'ing requires the write lock on the as.
 952          */
 953         if (as == &kas && segkmap && segkmap->s_base <= raddr &&
 954             raddr + size < segkmap->s_base + segkmap->s_size) {
 955                 /*
 956                  * if (as==&kas), this can't be XHAT: we've already returned
 957                  * FC_NOSUPPORT.
 958                  */
 959                 seg = segkmap;
 960                 as_lock_held = 0;
 961         } else {
 962                 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
 963                 if (is_xhat && avl_numnodes(&as->a_wpage) != 0) {
 964                         /*
 965                          * Grab and hold the writers' lock on the as
 966                          * if the fault is to a watched page.
 967                          * This will keep CPUs from "peeking" at the
 968                          * address range while we're temporarily boosting
 969                          * the permissions for the XHAT device to
 970                          * resolve the fault in the segment layer.
 971                          *
 972                          * We could check whether faulted address
 973                          * is within a watched page and only then grab
 974                          * the writer lock, but this is simpler.
 975                          */
 976                         AS_LOCK_EXIT(as, &as->a_lock);
 977                         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
 978                 }
 979 
 980                 seg = as_segat(as, raddr);
 981                 if (seg == NULL) {
 982                         AS_LOCK_EXIT(as, &as->a_lock);
 983                         if ((lwp != NULL) && (!is_xhat))
 984                                 lwp->lwp_nostop--;
 985                         return (FC_NOMAP);
 986                 }
 987 
 988                 as_lock_held = 1;
 989         }
 990 
 991         addrsav = raddr;
 992         segsav = seg;
 993 
 994         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
 995                 if (raddr >= seg->s_base + seg->s_size) {
 996                         seg = AS_SEGNEXT(as, seg);
 997                         if (seg == NULL || raddr != seg->s_base) {
 998                                 res = FC_NOMAP;
 999                                 break;
1000                         }
1001                 }
1002                 if (raddr + rsize > seg->s_base + seg->s_size)
1003                         ssize = seg->s_base + seg->s_size - raddr;
1004                 else
1005                         ssize = rsize;
1006 
1007                 if (!is_xhat || (seg->s_ops != &segdev_ops)) {
1008 
1009                         if (is_xhat && avl_numnodes(&as->a_wpage) != 0 &&
1010                             pr_is_watchpage_as(raddr, rw, as)) {
1011                                 /*
1012                                  * Handle watch pages.  If we're faulting on a
1013                                  * watched page from an X-hat, we have to
1014                                  * restore the original permissions while we
1015                                  * handle the fault.
1016                                  */
1017                                 as_clearwatch(as);
1018                                 holding_wpage = 1;
1019                         }
1020 
1021                         res = SEGOP_FAULT(hat, seg, raddr, ssize, type, rw);
1022 
1023                         /* Restore watchpoints */
1024                         if (holding_wpage) {
1025                                 as_setwatch(as);
1026                                 holding_wpage = 0;
1027                         }
1028 
1029                         if (res != 0)
1030                                 break;
1031                 } else {
1032                         /* XHAT does not support seg_dev */
1033                         res = FC_NOSUPPORT;
1034                         break;
1035                 }
1036         }
1037 
1038         /*
1039          * If we were SOFTLOCKing and encountered a failure,
1040          * we must SOFTUNLOCK the range we already did. (Maybe we
1041          * should just panic if we are SOFTLOCKing or even SOFTUNLOCKing
1042          * right here...)
1043          */
1044         if (res != 0 && type == F_SOFTLOCK) {
1045                 for (seg = segsav; addrsav < raddr; addrsav += ssize) {
1046                         if (addrsav >= seg->s_base + seg->s_size)
1047                                 seg = AS_SEGNEXT(as, seg);
1048                         ASSERT(seg != NULL);
1049                         /*
1050                          * Now call the fault routine again to perform the
1051                          * unlock using S_OTHER instead of the rw variable
1052                          * since we never got a chance to touch the pages.
1053                          */
1054                         if (raddr > seg->s_base + seg->s_size)
1055                                 ssize = seg->s_base + seg->s_size - addrsav;
1056                         else
1057                                 ssize = raddr - addrsav;
1058                         (void) SEGOP_FAULT(hat, seg, addrsav, ssize,
1059                             F_SOFTUNLOCK, S_OTHER);
1060                 }
1061         }
1062         if (as_lock_held)
1063                 AS_LOCK_EXIT(as, &as->a_lock);
1064         if ((lwp != NULL) && (!is_xhat))
1065                 lwp->lwp_nostop--;
1066 
1067         /*
1068          * If the lower levels returned EDEADLK for a fault,
1069          * It means that we should retry the fault.  Let's wait
1070          * a bit also to let the deadlock causing condition clear.
1071          * This is part of a gross hack to work around a design flaw
1072          * in the ufs/sds logging code and should go away when the
1073          * logging code is re-designed to fix the problem. See bug
1074          * 4125102 for details of the problem.
1075          */
1076         if (FC_ERRNO(res) == EDEADLK) {
1077                 delay(deadlk_wait);
1078                 res = 0;
1079                 goto retry;
1080         }
1081         return (res);
1082 }
1083 
1084 


1108         rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
1109             (size_t)raddr;
1110 
1111         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1112         seg = as_segat(as, raddr);
1113         if (seg == NULL) {
1114                 AS_LOCK_EXIT(as, &as->a_lock);
1115                 if (lwp != NULL)
1116                         lwp->lwp_nostop--;
1117                 return (FC_NOMAP);
1118         }
1119 
1120         for (; rsize != 0; rsize -= PAGESIZE, raddr += PAGESIZE) {
1121                 if (raddr >= seg->s_base + seg->s_size) {
1122                         seg = AS_SEGNEXT(as, seg);
1123                         if (seg == NULL || raddr != seg->s_base) {
1124                                 res = FC_NOMAP;
1125                                 break;
1126                         }
1127                 }
1128                 res = SEGOP_FAULTA(seg, raddr);
1129                 if (res != 0)
1130                         break;
1131         }
1132         AS_LOCK_EXIT(as, &as->a_lock);
1133         if (lwp != NULL)
1134                 lwp->lwp_nostop--;
1135         /*
1136          * If the lower levels returned EDEADLK for a fault,
1137          * It means that we should retry the fault.  Let's wait
1138          * a bit also to let the deadlock causing condition clear.
1139          * This is part of a gross hack to work around a design flaw
1140          * in the ufs/sds logging code and should go away when the
1141          * logging code is re-designed to fix the problem. See bug
1142          * 4125102 for details of the problem.
1143          */
1144         if (FC_ERRNO(res) == EDEADLK) {
1145                 delay(deadlk_wait);
1146                 res = 0;
1147                 goto retry;
1148         }


1198         seg = as_segat(as, raddr);
1199         if (seg == NULL) {
1200                 as_setwatch(as);
1201                 AS_LOCK_EXIT(as, &as->a_lock);
1202                 return (ENOMEM);
1203         }
1204 
1205         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
1206                 if (raddr >= seg->s_base + seg->s_size) {
1207                         seg = AS_SEGNEXT(as, seg);
1208                         if (seg == NULL || raddr != seg->s_base) {
1209                                 error = ENOMEM;
1210                                 break;
1211                         }
1212                 }
1213                 if ((raddr + rsize) > (seg->s_base + seg->s_size))
1214                         ssize = seg->s_base + seg->s_size - raddr;
1215                 else
1216                         ssize = rsize;
1217 retry:
1218                 error = SEGOP_SETPROT(seg, raddr, ssize, prot);
1219 
1220                 if (error == IE_NOMEM) {
1221                         error = EAGAIN;
1222                         break;
1223                 }
1224 
1225                 if (error == IE_RETRY) {
1226                         AS_LOCK_EXIT(as, &as->a_lock);
1227                         writer = 1;
1228                         goto setprot_top;
1229                 }
1230 
1231                 if (error == EAGAIN) {
1232                         /*
1233                          * Make sure we have a_lock as writer.
1234                          */
1235                         if (writer == 0) {
1236                                 AS_LOCK_EXIT(as, &as->a_lock);
1237                                 writer = 1;
1238                                 goto setprot_top;


1349         seg = as_segat(as, raddr);
1350         if (seg == NULL) {
1351                 as_setwatch(as);
1352                 AS_LOCK_EXIT(as, &as->a_lock);
1353                 return (ENOMEM);
1354         }
1355 
1356         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
1357                 if (raddr >= seg->s_base + seg->s_size) {
1358                         seg = AS_SEGNEXT(as, seg);
1359                         if (seg == NULL || raddr != seg->s_base) {
1360                                 error = ENOMEM;
1361                                 break;
1362                         }
1363                 }
1364                 if ((raddr + rsize) > (seg->s_base + seg->s_size))
1365                         ssize = seg->s_base + seg->s_size - raddr;
1366                 else
1367                         ssize = rsize;
1368 
1369                 error = SEGOP_CHECKPROT(seg, raddr, ssize, prot);
1370                 if (error != 0)
1371                         break;
1372         }
1373         as_setwatch(as);
1374         AS_LOCK_EXIT(as, &as->a_lock);
1375         return (error);
1376 }
1377 
1378 int
1379 as_unmap(struct as *as, caddr_t addr, size_t size)
1380 {
1381         struct seg *seg, *seg_next;
1382         struct as_callback *cb;
1383         caddr_t raddr, eaddr;
1384         size_t ssize, rsize = 0;
1385         int err;
1386 
1387 top:
1388         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1389         eaddr = (caddr_t)(((uintptr_t)(addr + size) + PAGEOFFSET) &


1415                 else
1416                         ssize = eaddr - raddr;
1417 
1418                 /*
1419                  * Save next segment pointer since seg can be
1420                  * destroyed during the segment unmap operation.
1421                  */
1422                 seg_next = AS_SEGNEXT(as, seg);
1423 
1424                 /*
1425                  * We didn't count /dev/null mappings, so ignore them here.
1426                  * We'll handle MAP_NORESERVE cases in segvn_unmap(). (Again,
1427                  * we have to do this check here while we have seg.)
1428                  */
1429                 rsize = 0;
1430                 if (!SEG_IS_DEVNULL_MAPPING(seg) &&
1431                     !SEG_IS_PARTIAL_RESV(seg))
1432                         rsize = ssize;
1433 
1434 retry:
1435                 err = SEGOP_UNMAP(seg, raddr, ssize);
1436                 if (err == EAGAIN) {
1437                         /*
1438                          * Memory is currently locked.  It must be unlocked
1439                          * before this operation can succeed through a retry.
1440                          * The possible reasons for locked memory and
1441                          * corresponding strategies for unlocking are:
1442                          * (1) Normal I/O
1443                          *      wait for a signal that the I/O operation
1444                          *      has completed and the memory is unlocked.
1445                          * (2) Asynchronous I/O
1446                          *      The aio subsystem does not unlock pages when
1447                          *      the I/O is completed. Those pages are unlocked
1448                          *      when the application calls aiowait/aioerror.
1449                          *      So, to prevent blocking forever, cv_broadcast()
1450                          *      is done to wake up aio_cleanup_thread.
1451                          *      Subsequently, segvn_reclaim will be called, and
1452                          *      that will do AS_CLRUNMAPWAIT() and wake us up.
1453                          * (3) Long term page locking:
1454                          *      Drivers intending to have pages locked for a
1455                          *      period considerably longer than for normal I/O


1853  */
1854 void
1855 as_purge(struct as *as)
1856 {
1857         struct seg *seg;
1858         struct seg *next_seg;
1859 
1860         /*
1861          * the setting of NEEDSPURGE is protect by as_rangelock(), so
1862          * no need to grab a_contents mutex for this check
1863          */
1864         if ((as->a_flags & AS_NEEDSPURGE) == 0)
1865                 return;
1866 
1867         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1868         next_seg = NULL;
1869         seg = AS_SEGFIRST(as);
1870         while (seg != NULL) {
1871                 next_seg = AS_SEGNEXT(as, seg);
1872                 if (seg->s_flags & S_PURGE)
1873                         SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
1874                 seg = next_seg;
1875         }
1876         AS_LOCK_EXIT(as, &as->a_lock);
1877 
1878         mutex_enter(&as->a_contents);
1879         as->a_flags &= ~AS_NEEDSPURGE;
1880         mutex_exit(&as->a_contents);
1881 }
1882 
1883 /*
1884  * Find a hole within [*basep, *basep + *lenp), which contains a mappable
1885  * range of addresses at least "minlen" long, where the base of the range is
1886  * at "off" phase from an "align" boundary and there is space for a
1887  * "redzone"-sized redzone on eithe rside of the range.  Thus,
1888  * if align was 4M and off was 16k, the user wants a hole which will start
1889  * 16k into a 4M page.
1890  *
1891  * If flags specifies AH_HI, the hole will have the highest possible address
1892  * in the range.  We use the as->a_lastgap field to figure out where to
1893  * start looking for a gap.


2071  * -1 is returned.
2072  *
2073  * NOTE: This routine is not correct when base+len overflows caddr_t.
2074  */
2075 int
2076 as_gap(struct as *as, size_t minlen, caddr_t *basep, size_t *lenp, uint_t flags,
2077     caddr_t addr)
2078 {
2079 
2080         return (as_gap_aligned(as, minlen, basep, lenp, flags, addr, 0, 0, 0));
2081 }
2082 
2083 /*
2084  * Return the next range within [base, base + len) that is backed
2085  * with "real memory".  Skip holes and non-seg_vn segments.
2086  * We're lazy and only return one segment at a time.
2087  */
2088 int
2089 as_memory(struct as *as, caddr_t *basep, size_t *lenp)
2090 {
2091         extern struct seg_ops segspt_shmops;    /* needs a header file */
2092         struct seg *seg;
2093         caddr_t addr, eaddr;
2094         caddr_t segend;
2095 
2096         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2097 
2098         addr = *basep;
2099         eaddr = addr + *lenp;
2100 
2101         seg = as_findseg(as, addr, 0);
2102         if (seg != NULL)
2103                 addr = MAX(seg->s_base, addr);
2104 
2105         for (;;) {
2106                 if (seg == NULL || addr >= eaddr || eaddr <= seg->s_base) {
2107                         AS_LOCK_EXIT(as, &as->a_lock);
2108                         return (EINVAL);
2109                 }
2110 
2111                 if (seg->s_ops == &segvn_ops) {


2124                 }
2125 
2126                 seg = AS_SEGNEXT(as, seg);
2127 
2128                 if (seg != NULL)
2129                         addr = seg->s_base;
2130         }
2131 
2132         *basep = addr;
2133 
2134         if (segend > eaddr)
2135                 *lenp = eaddr - addr;
2136         else
2137                 *lenp = segend - addr;
2138 
2139         AS_LOCK_EXIT(as, &as->a_lock);
2140         return (0);
2141 }
2142 
2143 /*
2144  * Swap the pages associated with the address space as out to
2145  * secondary storage, returning the number of bytes actually
2146  * swapped.
2147  *
2148  * The value returned is intended to correlate well with the process's
2149  * memory requirements.  Its usefulness for this purpose depends on
2150  * how well the segment-level routines do at returning accurate
2151  * information.
2152  */
2153 size_t
2154 as_swapout(struct as *as)
2155 {
2156         struct seg *seg;
2157         size_t swpcnt = 0;
2158 
2159         /*
2160          * Kernel-only processes have given up their address
2161          * spaces.  Of course, we shouldn't be attempting to
2162          * swap out such processes in the first place...
2163          */
2164         if (as == NULL)
2165                 return (0);
2166 
2167         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2168 
2169         /* Prevent XHATs from attaching */
2170         mutex_enter(&as->a_contents);
2171         AS_SETBUSY(as);
2172         mutex_exit(&as->a_contents);
2173 
2174 
2175         /*
2176          * Free all mapping resources associated with the address
2177          * space.  The segment-level swapout routines capitalize
2178          * on this unmapping by scavanging pages that have become
2179          * unmapped here.
2180          */
2181         hat_swapout(as->a_hat);
2182         if (as->a_xhat != NULL)
2183                 xhat_swapout_all(as);
2184 
2185         mutex_enter(&as->a_contents);
2186         AS_CLRBUSY(as);
2187         mutex_exit(&as->a_contents);
2188 
2189         /*
2190          * Call the swapout routines of all segments in the address
2191          * space to do the actual work, accumulating the amount of
2192          * space reclaimed.
2193          */
2194         for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
2195                 struct seg_ops *ov = seg->s_ops;
2196 
2197                 /*
2198                  * We have to check to see if the seg has
2199                  * an ops vector because the seg may have
2200                  * been in the middle of being set up when
2201                  * the process was picked for swapout.
2202                  */
2203                 if ((ov != NULL) && (ov->swapout != NULL))
2204                         swpcnt += SEGOP_SWAPOUT(seg);
2205         }
2206         AS_LOCK_EXIT(as, &as->a_lock);
2207         return (swpcnt);
2208 }
2209 
2210 /*
2211  * Determine whether data from the mappings in interval [addr, addr + size)
2212  * are in the primary memory (core) cache.
2213  */
2214 int
2215 as_incore(struct as *as, caddr_t addr,
2216     size_t size, char *vec, size_t *sizep)
2217 {
2218         struct seg *seg;
2219         size_t ssize;
2220         caddr_t raddr;          /* rounded down addr */
2221         size_t rsize;           /* rounded up size */
2222         size_t isize;                   /* iteration size */
2223         int error = 0;          /* result, assume success */
2224 
2225         *sizep = 0;
2226         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2227         rsize = ((((size_t)addr + size) + PAGEOFFSET) & PAGEMASK) -
2228             (size_t)raddr;
2229 
2230         if (raddr + rsize < raddr)           /* check for wraparound */


2232 
2233         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2234         seg = as_segat(as, raddr);
2235         if (seg == NULL) {
2236                 AS_LOCK_EXIT(as, &as->a_lock);
2237                 return (-1);
2238         }
2239 
2240         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
2241                 if (raddr >= seg->s_base + seg->s_size) {
2242                         seg = AS_SEGNEXT(as, seg);
2243                         if (seg == NULL || raddr != seg->s_base) {
2244                                 error = -1;
2245                                 break;
2246                         }
2247                 }
2248                 if ((raddr + rsize) > (seg->s_base + seg->s_size))
2249                         ssize = seg->s_base + seg->s_size - raddr;
2250                 else
2251                         ssize = rsize;
2252                 *sizep += isize = SEGOP_INCORE(seg, raddr, ssize, vec);
2253                 if (isize != ssize) {
2254                         error = -1;
2255                         break;
2256                 }
2257                 vec += btopr(ssize);
2258         }
2259         AS_LOCK_EXIT(as, &as->a_lock);
2260         return (error);
2261 }
2262 
2263 static void
2264 as_segunlock(struct seg *seg, caddr_t addr, int attr,
2265         ulong_t *bitmap, size_t position, size_t npages)
2266 {
2267         caddr_t range_start;
2268         size_t  pos1 = position;
2269         size_t  pos2;
2270         size_t  size;
2271         size_t  end_pos = npages + position;
2272 
2273         while (bt_range(bitmap, &pos1, &pos2, end_pos)) {
2274                 size = ptob((pos2 - pos1));
2275                 range_start = (caddr_t)((uintptr_t)addr +
2276                     ptob(pos1 - position));
2277 
2278                 (void) SEGOP_LOCKOP(seg, range_start, size, attr, MC_UNLOCK,
2279                     (ulong_t *)NULL, (size_t)NULL);
2280                 pos1 = pos2;
2281         }
2282 }
2283 
2284 static void
2285 as_unlockerr(struct as *as, int attr, ulong_t *mlock_map,
2286         caddr_t raddr, size_t rsize)
2287 {
2288         struct seg *seg = as_segat(as, raddr);
2289         size_t ssize;
2290 
2291         while (rsize != 0) {
2292                 if (raddr >= seg->s_base + seg->s_size)
2293                         seg = AS_SEGNEXT(as, seg);
2294 
2295                 if ((raddr + rsize) > (seg->s_base + seg->s_size))
2296                         ssize = seg->s_base + seg->s_size - raddr;
2297                 else
2298                         ssize = rsize;


2354                 if (seg == NULL) {
2355                         AS_LOCK_EXIT(as, &as->a_lock);
2356                         return (0);
2357                 }
2358 
2359                 do {
2360                         raddr = (caddr_t)((uintptr_t)seg->s_base &
2361                             (uintptr_t)PAGEMASK);
2362                         rlen += (((uintptr_t)(seg->s_base + seg->s_size) +
2363                             PAGEOFFSET) & PAGEMASK) - (uintptr_t)raddr;
2364                 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2365 
2366                 mlock_size = BT_BITOUL(btopr(rlen));
2367                 if ((mlock_map = (ulong_t *)kmem_zalloc(mlock_size *
2368                     sizeof (ulong_t), KM_NOSLEEP)) == NULL) {
2369                                 AS_LOCK_EXIT(as, &as->a_lock);
2370                                 return (EAGAIN);
2371                 }
2372 
2373                 for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) {
2374                         error = SEGOP_LOCKOP(seg, seg->s_base,
2375                             seg->s_size, attr, MC_LOCK, mlock_map, pos);
2376                         if (error != 0)
2377                                 break;
2378                         pos += seg_pages(seg);
2379                 }
2380 
2381                 if (error) {
2382                         for (seg = AS_SEGFIRST(as); seg != NULL;
2383                             seg = AS_SEGNEXT(as, seg)) {
2384 
2385                                 raddr = (caddr_t)((uintptr_t)seg->s_base &
2386                                     (uintptr_t)PAGEMASK);
2387                                 npages = seg_pages(seg);
2388                                 as_segunlock(seg, raddr, attr, mlock_map,
2389                                     idx, npages);
2390                                 idx += npages;
2391                         }
2392                 }
2393 
2394                 kmem_free(mlock_map, mlock_size * sizeof (ulong_t));
2395                 AS_LOCK_EXIT(as, &as->a_lock);
2396                 goto lockerr;
2397         } else if (func == MC_UNLOCKAS) {
2398                 mutex_enter(&as->a_contents);
2399                 AS_CLRPGLCK(as);
2400                 mutex_exit(&as->a_contents);
2401 
2402                 for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) {
2403                         error = SEGOP_LOCKOP(seg, seg->s_base,
2404                             seg->s_size, attr, MC_UNLOCK, NULL, 0);
2405                         if (error != 0)
2406                                 break;
2407                 }
2408 
2409                 AS_LOCK_EXIT(as, &as->a_lock);
2410                 goto lockerr;
2411         }
2412 
2413         /*
2414          * Normalize addresses and sizes.
2415          */
2416         initraddr = raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2417         initrsize = rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
2418             (size_t)raddr;
2419 
2420         if (raddr + rsize < raddr) {         /* check for wraparound */
2421                 AS_LOCK_EXIT(as, &as->a_lock);
2422                 return (ENOMEM);
2423         }


2461                                 }
2462                                 AS_LOCK_EXIT(as, &as->a_lock);
2463                                 return (ENOMEM);
2464                         }
2465                 }
2466                 if ((raddr + rsize) > (seg->s_base + seg->s_size))
2467                         ssize = seg->s_base + seg->s_size - raddr;
2468                 else
2469                         ssize = rsize;
2470 
2471                 /*
2472                  * Dispatch on specific function.
2473                  */
2474                 switch (func) {
2475 
2476                 /*
2477                  * Synchronize cached data from mappings with backing
2478                  * objects.
2479                  */
2480                 case MC_SYNC:
2481                         if (error = SEGOP_SYNC(seg, raddr, ssize,
2482                             attr, (uint_t)arg)) {
2483                                 AS_LOCK_EXIT(as, &as->a_lock);
2484                                 return (error);
2485                         }
2486                         break;
2487 
2488                 /*
2489                  * Lock pages in memory.
2490                  */
2491                 case MC_LOCK:
2492                         if (error = SEGOP_LOCKOP(seg, raddr, ssize,
2493                             attr, func, mlock_map, pos)) {
2494                                 as_unlockerr(as, attr, mlock_map, initraddr,
2495                                     initrsize - rsize + ssize);
2496                                 kmem_free(mlock_map, mlock_size *
2497                                     sizeof (ulong_t));
2498                                 AS_LOCK_EXIT(as, &as->a_lock);
2499                                 goto lockerr;
2500                         }
2501                         break;
2502 
2503                 /*
2504                  * Unlock mapped pages.
2505                  */
2506                 case MC_UNLOCK:
2507                         (void) SEGOP_LOCKOP(seg, raddr, ssize, attr, func,
2508                             (ulong_t *)NULL, (size_t)NULL);
2509                         break;
2510 
2511                 /*
2512                  * Store VM advise for mapped pages in segment layer.
2513                  */
2514                 case MC_ADVISE:
2515                         error = SEGOP_ADVISE(seg, raddr, ssize, (uint_t)arg);
2516 
2517                         /*
2518                          * Check for regular errors and special retry error
2519                          */
2520                         if (error) {
2521                                 if (error == IE_RETRY) {
2522                                         /*
2523                                          * Need to acquire writers lock, so
2524                                          * have to drop readers lock and start
2525                                          * all over again
2526                                          */
2527                                         AS_LOCK_EXIT(as, &as->a_lock);
2528                                         goto retry;
2529                                 } else if (error == IE_REATTACH) {
2530                                         /*
2531                                          * Find segment for current address
2532                                          * because current segment just got
2533                                          * split or concatenated
2534                                          */
2535                                         seg = as_segat(as, raddr);
2536                                         if (seg == NULL) {
2537                                                 AS_LOCK_EXIT(as, &as->a_lock);
2538                                                 return (ENOMEM);
2539                                         }
2540                                 } else {
2541                                         /*
2542                                          * Regular error
2543                                          */
2544                                         AS_LOCK_EXIT(as, &as->a_lock);
2545                                         return (error);
2546                                 }
2547                         }
2548                         break;
2549 
2550                 case MC_INHERIT_ZERO:
2551                         if (seg->s_ops->inherit == NULL) {
2552                                 error = ENOTSUP;
2553                         } else {
2554                                 error = SEGOP_INHERIT(seg, raddr, ssize,
2555                                     SEGP_INH_ZERO);
2556                         }
2557                         if (error != 0) {
2558                                 AS_LOCK_EXIT(as, &as->a_lock);
2559                                 return (error);
2560                         }
2561                         break;
2562 
2563                 /*
2564                  * Can't happen.
2565                  */
2566                 default:
2567                         panic("as_ctl: bad operation %d", func);
2568                         /*NOTREACHED*/
2569                 }
2570 
2571                 rsize -= ssize;
2572                 raddr += ssize;
2573         }
2574 
2575         if (func == MC_LOCK)
2576                 kmem_free(mlock_map, mlock_size * sizeof (ulong_t));


2620  * as expected by the caller.  Save pointers to per segment shadow lists at
2621  * the tail of plist so that they can be used during as_pageunlock().
2622  */
2623 static int
2624 as_pagelock_segs(struct as *as, struct seg *seg, struct page ***ppp,
2625     caddr_t addr, size_t size, enum seg_rw rw)
2626 {
2627         caddr_t sv_addr = addr;
2628         size_t sv_size = size;
2629         struct seg *sv_seg = seg;
2630         ulong_t segcnt = 1;
2631         ulong_t cnt;
2632         size_t ssize;
2633         pgcnt_t npages = btop(size);
2634         page_t **plist;
2635         page_t **pl;
2636         int error;
2637         caddr_t eaddr;
2638         faultcode_t fault_err = 0;
2639         pgcnt_t pl_off;
2640         extern struct seg_ops segspt_shmops;
2641 
2642         ASSERT(AS_LOCK_HELD(as, &as->a_lock));
2643         ASSERT(seg != NULL);
2644         ASSERT(addr >= seg->s_base && addr < seg->s_base + seg->s_size);
2645         ASSERT(addr + size > seg->s_base + seg->s_size);
2646         ASSERT(IS_P2ALIGNED(size, PAGESIZE));
2647         ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
2648 
2649         /*
2650          * Count the number of segments covered by the range we are about to
2651          * lock. The segment count is used to size the shadow list we return
2652          * back to the caller.
2653          */
2654         for (; size != 0; size -= ssize, addr += ssize) {
2655                 if (addr >= seg->s_base + seg->s_size) {
2656 
2657                         seg = AS_SEGNEXT(as, seg);
2658                         if (seg == NULL || addr != seg->s_base) {
2659                                 AS_LOCK_EXIT(as, &as->a_lock);
2660                                 return (EFAULT);
2661                         }
2662                         /*
2663                          * Do a quick check if subsequent segments
2664                          * will most likely support pagelock.
2665                          */
2666                         if (seg->s_ops == &segvn_ops) {
2667                                 vnode_t *vp;
2668 
2669                                 if (SEGOP_GETVP(seg, addr, &vp) != 0 ||
2670                                     vp != NULL) {
2671                                         AS_LOCK_EXIT(as, &as->a_lock);
2672                                         goto slow;
2673                                 }
2674                         } else if (seg->s_ops != &segspt_shmops) {
2675                                 AS_LOCK_EXIT(as, &as->a_lock);
2676                                 goto slow;
2677                         }
2678                         segcnt++;
2679                 }
2680                 if (addr + size > seg->s_base + seg->s_size) {
2681                         ssize = seg->s_base + seg->s_size - addr;
2682                 } else {
2683                         ssize = size;
2684                 }
2685         }
2686         ASSERT(segcnt > 1);
2687 
2688         plist = kmem_zalloc((npages + segcnt) * sizeof (page_t *), KM_SLEEP);
2689 
2690         addr = sv_addr;
2691         size = sv_size;
2692         seg = sv_seg;
2693 
2694         for (cnt = 0, pl_off = 0; size != 0; size -= ssize, addr += ssize) {
2695                 if (addr >= seg->s_base + seg->s_size) {
2696                         seg = AS_SEGNEXT(as, seg);
2697                         ASSERT(seg != NULL && addr == seg->s_base);
2698                         cnt++;
2699                         ASSERT(cnt < segcnt);
2700                 }
2701                 if (addr + size > seg->s_base + seg->s_size) {
2702                         ssize = seg->s_base + seg->s_size - addr;
2703                 } else {
2704                         ssize = size;
2705                 }
2706                 pl = &plist[npages + cnt];
2707                 error = SEGOP_PAGELOCK(seg, addr, ssize, (page_t ***)pl,
2708                     L_PAGELOCK, rw);
2709                 if (error) {
2710                         break;
2711                 }
2712                 ASSERT(plist[npages + cnt] != NULL);
2713                 ASSERT(pl_off + btop(ssize) <= npages);
2714                 bcopy(plist[npages + cnt], &plist[pl_off],
2715                     btop(ssize) * sizeof (page_t *));
2716                 pl_off += btop(ssize);
2717         }
2718 
2719         if (size == 0) {
2720                 AS_LOCK_EXIT(as, &as->a_lock);
2721                 ASSERT(cnt == segcnt - 1);
2722                 *ppp = plist;
2723                 return (0);
2724         }
2725 
2726         /*
2727          * one of pagelock calls failed. The error type is in error variable.


2730          * back to the caller.
2731          */
2732 
2733         eaddr = addr;
2734         seg = sv_seg;
2735 
2736         for (cnt = 0, addr = sv_addr; addr < eaddr; addr += ssize) {
2737                 if (addr >= seg->s_base + seg->s_size) {
2738                         seg = AS_SEGNEXT(as, seg);
2739                         ASSERT(seg != NULL && addr == seg->s_base);
2740                         cnt++;
2741                         ASSERT(cnt < segcnt);
2742                 }
2743                 if (eaddr > seg->s_base + seg->s_size) {
2744                         ssize = seg->s_base + seg->s_size - addr;
2745                 } else {
2746                         ssize = eaddr - addr;
2747                 }
2748                 pl = &plist[npages + cnt];
2749                 ASSERT(*pl != NULL);
2750                 (void) SEGOP_PAGELOCK(seg, addr, ssize, (page_t ***)pl,
2751                     L_PAGEUNLOCK, rw);
2752         }
2753 
2754         AS_LOCK_EXIT(as, &as->a_lock);
2755 
2756         kmem_free(plist, (npages + segcnt) * sizeof (page_t *));
2757 
2758         if (error != ENOTSUP && error != EFAULT) {
2759                 return (error);
2760         }
2761 
2762 slow:
2763         /*
2764          * If we are here because pagelock failed due to the need to cow fault
2765          * in the pages we want to lock F_SOFTLOCK will do this job and in
2766          * next as_pagelock() call for this address range pagelock will
2767          * hopefully succeed.
2768          */
2769         fault_err = as_fault(as->a_hat, as, sv_addr, sv_size, F_SOFTLOCK, rw);
2770         if (fault_err != 0) {


2805         seg = as_segat(as, raddr);
2806         if (seg == NULL) {
2807                 AS_LOCK_EXIT(as, &as->a_lock);
2808                 return (EFAULT);
2809         }
2810         ASSERT(raddr >= seg->s_base && raddr < seg->s_base + seg->s_size);
2811         if (raddr + rsize > seg->s_base + seg->s_size) {
2812                 return (as_pagelock_segs(as, seg, ppp, raddr, rsize, rw));
2813         }
2814         if (raddr + rsize <= raddr) {
2815                 AS_LOCK_EXIT(as, &as->a_lock);
2816                 return (EFAULT);
2817         }
2818 
2819         TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEG_LOCK_START,
2820             "seg_lock_1_start: raddr %p rsize %ld", raddr, rsize);
2821 
2822         /*
2823          * try to lock pages and pass back shadow list
2824          */
2825         err = SEGOP_PAGELOCK(seg, raddr, rsize, ppp, L_PAGELOCK, rw);
2826 
2827         TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_SEG_LOCK_END, "seg_lock_1_end");
2828 
2829         AS_LOCK_EXIT(as, &as->a_lock);
2830 
2831         if (err == 0 || (err != ENOTSUP && err != EFAULT)) {
2832                 return (err);
2833         }
2834 
2835         /*
2836          * Use F_SOFTLOCK to lock the pages because pagelock failed either due
2837          * to no pagelock support for this segment or pages need to be cow
2838          * faulted in. If fault is needed F_SOFTLOCK will do this job for
2839          * this as_pagelock() call and in the next as_pagelock() call for the
2840          * same address range pagelock call will hopefull succeed.
2841          */
2842         fault_err = as_fault(as->a_hat, as, addr, size, F_SOFTLOCK, rw);
2843         if (fault_err != 0) {
2844                 return (fc_decode(fault_err));
2845         }


2868         ASSERT(seg != NULL);
2869         ASSERT(addr >= seg->s_base && addr < seg->s_base + seg->s_size);
2870         ASSERT(addr + size > seg->s_base + seg->s_size);
2871         ASSERT(IS_P2ALIGNED(size, PAGESIZE));
2872         ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
2873         ASSERT(plist != NULL);
2874 
2875         for (cnt = 0; addr < eaddr; addr += ssize) {
2876                 if (addr >= seg->s_base + seg->s_size) {
2877                         seg = AS_SEGNEXT(as, seg);
2878                         ASSERT(seg != NULL && addr == seg->s_base);
2879                         cnt++;
2880                 }
2881                 if (eaddr > seg->s_base + seg->s_size) {
2882                         ssize = seg->s_base + seg->s_size - addr;
2883                 } else {
2884                         ssize = eaddr - addr;
2885                 }
2886                 pl = &plist[npages + cnt];
2887                 ASSERT(*pl != NULL);
2888                 (void) SEGOP_PAGELOCK(seg, addr, ssize, (page_t ***)pl,
2889                     L_PAGEUNLOCK, rw);
2890         }
2891         ASSERT(cnt > 0);
2892         AS_LOCK_EXIT(as, &as->a_lock);
2893 
2894         cnt++;
2895         kmem_free(plist, (npages + cnt) * sizeof (page_t *));
2896 }
2897 
2898 /*
2899  * unlock pages in a given address range
2900  */
2901 void
2902 as_pageunlock(struct as *as, struct page **pp, caddr_t addr, size_t size,
2903     enum seg_rw rw)
2904 {
2905         struct seg *seg;
2906         size_t rsize;
2907         caddr_t raddr;
2908 


2914          * falling back to as_fault
2915          */
2916         if (pp == NULL) {
2917                 (void) as_fault(as->a_hat, as, addr, size, F_SOFTUNLOCK, rw);
2918                 return;
2919         }
2920 
2921         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2922         rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
2923             (size_t)raddr;
2924 
2925         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2926         seg = as_segat(as, raddr);
2927         ASSERT(seg != NULL);
2928 
2929         TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEG_UNLOCK_START,
2930             "seg_unlock_start: raddr %p rsize %ld", raddr, rsize);
2931 
2932         ASSERT(raddr >= seg->s_base && raddr < seg->s_base + seg->s_size);
2933         if (raddr + rsize <= seg->s_base + seg->s_size) {
2934                 SEGOP_PAGELOCK(seg, raddr, rsize, &pp, L_PAGEUNLOCK, rw);
2935         } else {
2936                 as_pageunlock_segs(as, seg, raddr, rsize, pp, rw);
2937                 return;
2938         }
2939         AS_LOCK_EXIT(as, &as->a_lock);
2940         TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_AS_UNLOCK_END, "as_pageunlock_end");
2941 }
2942 
2943 int
2944 as_setpagesize(struct as *as, caddr_t addr, size_t size, uint_t szc,
2945     boolean_t wait)
2946 {
2947         struct seg *seg;
2948         size_t ssize;
2949         caddr_t raddr;                  /* rounded down addr */
2950         size_t rsize;                   /* rounded up size */
2951         int error = 0;
2952         size_t pgsz = page_get_pagesize(szc);
2953 
2954 setpgsz_top:


2969                 as_setwatch(as);
2970                 AS_LOCK_EXIT(as, &as->a_lock);
2971                 return (ENOMEM);
2972         }
2973 
2974         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
2975                 if (raddr >= seg->s_base + seg->s_size) {
2976                         seg = AS_SEGNEXT(as, seg);
2977                         if (seg == NULL || raddr != seg->s_base) {
2978                                 error = ENOMEM;
2979                                 break;
2980                         }
2981                 }
2982                 if ((raddr + rsize) > (seg->s_base + seg->s_size)) {
2983                         ssize = seg->s_base + seg->s_size - raddr;
2984                 } else {
2985                         ssize = rsize;
2986                 }
2987 
2988 retry:
2989                 error = SEGOP_SETPAGESIZE(seg, raddr, ssize, szc);
2990 
2991                 if (error == IE_NOMEM) {
2992                         error = EAGAIN;
2993                         break;
2994                 }
2995 
2996                 if (error == IE_RETRY) {
2997                         AS_LOCK_EXIT(as, &as->a_lock);
2998                         goto setpgsz_top;
2999                 }
3000 
3001                 if (error == ENOTSUP) {
3002                         error = EINVAL;
3003                         break;
3004                 }
3005 
3006                 if (wait && (error == EAGAIN)) {
3007                         /*
3008                          * Memory is currently locked.  It must be unlocked
3009                          * before this operation can succeed through a retry.


3048                                  * number of retries without sleeping should
3049                                  * be very small. See segvn_reclaim() for
3050                                  * more comments.
3051                                  */
3052                                 AS_CLRNOUNMAPWAIT(as);
3053                                 mutex_exit(&as->a_contents);
3054                                 goto retry;
3055                         }
3056                         mutex_exit(&as->a_contents);
3057                         goto setpgsz_top;
3058                 } else if (error != 0) {
3059                         break;
3060                 }
3061         }
3062         as_setwatch(as);
3063         AS_LOCK_EXIT(as, &as->a_lock);
3064         return (error);
3065 }
3066 
3067 /*
3068  * as_iset3_default_lpsize() just calls SEGOP_SETPAGESIZE() on all segments
3069  * in its chunk where s_szc is less than the szc we want to set.
3070  */
3071 static int
3072 as_iset3_default_lpsize(struct as *as, caddr_t raddr, size_t rsize, uint_t szc,
3073     int *retry)
3074 {
3075         struct seg *seg;
3076         size_t ssize;
3077         int error;
3078 
3079         ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3080 
3081         seg = as_segat(as, raddr);
3082         if (seg == NULL) {
3083                 panic("as_iset3_default_lpsize: no seg");
3084         }
3085 
3086         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
3087                 if (raddr >= seg->s_base + seg->s_size) {
3088                         seg = AS_SEGNEXT(as, seg);
3089                         if (seg == NULL || raddr != seg->s_base) {
3090                                 panic("as_iset3_default_lpsize: as changed");
3091                         }
3092                 }
3093                 if ((raddr + rsize) > (seg->s_base + seg->s_size)) {
3094                         ssize = seg->s_base + seg->s_size - raddr;
3095                 } else {
3096                         ssize = rsize;
3097                 }
3098 
3099                 if (szc > seg->s_szc) {
3100                         error = SEGOP_SETPAGESIZE(seg, raddr, ssize, szc);
3101                         /* Only retry on EINVAL segments that have no vnode. */
3102                         if (error == EINVAL) {
3103                                 vnode_t *vp = NULL;
3104                                 if ((SEGOP_GETTYPE(seg, raddr) & MAP_SHARED) &&
3105                                     (SEGOP_GETVP(seg, raddr, &vp) != 0 ||
3106                                     vp == NULL)) {
3107                                         *retry = 1;
3108                                 } else {
3109                                         *retry = 0;
3110                                 }
3111                         }
3112                         if (error) {
3113                                 return (error);
3114                         }
3115                 }
3116         }
3117         return (0);
3118 }
3119 
3120 /*
3121  * as_iset2_default_lpsize() calls as_iset3_default_lpsize() to set the
3122  * pagesize on each segment in its range, but if any fails with EINVAL,
3123  * then it reduces the pagesizes to the next size in the bitmap and
3124  * retries as_iset3_default_lpsize(). The reason why the code retries
3125  * smaller allowed sizes on EINVAL is because (a) the anon offset may not


3328         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3329 again:
3330         error = 0;
3331 
3332         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3333         rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
3334             (size_t)raddr;
3335 
3336         if (raddr + rsize < raddr) {         /* check for wraparound */
3337                 AS_LOCK_EXIT(as, &as->a_lock);
3338                 return (ENOMEM);
3339         }
3340         as_clearwatchprot(as, raddr, rsize);
3341         seg = as_segat(as, raddr);
3342         if (seg == NULL) {
3343                 as_setwatch(as);
3344                 AS_LOCK_EXIT(as, &as->a_lock);
3345                 return (ENOMEM);
3346         }
3347         if (seg->s_ops == &segvn_ops) {
3348                 rtype = SEGOP_GETTYPE(seg, addr);
3349                 rflags = rtype & (MAP_TEXT | MAP_INITDATA);
3350                 rtype = rtype & (MAP_SHARED | MAP_PRIVATE);
3351                 segvn = 1;
3352         } else {
3353                 segvn = 0;
3354         }
3355         setaddr = raddr;
3356         setsize = 0;
3357 
3358         for (; rsize != 0; rsize -= ssize, raddr += ssize, setsize += ssize) {
3359                 if (raddr >= (seg->s_base + seg->s_size)) {
3360                         seg = AS_SEGNEXT(as, seg);
3361                         if (seg == NULL || raddr != seg->s_base) {
3362                                 error = ENOMEM;
3363                                 break;
3364                         }
3365                         if (seg->s_ops == &segvn_ops) {
3366                                 stype = SEGOP_GETTYPE(seg, raddr);
3367                                 sflags = stype & (MAP_TEXT | MAP_INITDATA);
3368                                 stype &= (MAP_SHARED | MAP_PRIVATE);
3369                                 if (segvn && (rflags != sflags ||
3370                                     rtype != stype)) {
3371                                         /*
3372                                          * The next segment is also segvn but
3373                                          * has different flags and/or type.
3374                                          */
3375                                         ASSERT(setsize != 0);
3376                                         error = as_iset_default_lpsize(as,
3377                                             setaddr, setsize, rflags, rtype);
3378                                         if (error) {
3379                                                 break;
3380                                         }
3381                                         rflags = sflags;
3382                                         rtype = stype;
3383                                         setaddr = raddr;
3384                                         setsize = 0;
3385                                 } else if (!segvn) {
3386                                         rflags = sflags;


3460 as_setwatch(struct as *as)
3461 {
3462         struct watched_page *pwp;
3463         struct seg *seg;
3464         caddr_t vaddr;
3465         uint_t prot;
3466         int  err, retrycnt;
3467 
3468         if (avl_numnodes(&as->a_wpage) == 0)
3469                 return;
3470 
3471         ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3472 
3473         for (pwp = avl_first(&as->a_wpage); pwp != NULL;
3474             pwp = AVL_NEXT(&as->a_wpage, pwp)) {
3475                 retrycnt = 0;
3476         retry:
3477                 vaddr = pwp->wp_vaddr;
3478                 if (pwp->wp_oprot != 0 ||    /* already set up */
3479                     (seg = as_segat(as, vaddr)) == NULL ||
3480                     SEGOP_GETPROT(seg, vaddr, 0, &prot) != 0)
3481                         continue;
3482 
3483                 pwp->wp_oprot = prot;
3484                 if (pwp->wp_read)
3485                         prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3486                 if (pwp->wp_write)
3487                         prot &= ~PROT_WRITE;
3488                 if (pwp->wp_exec)
3489                         prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3490                 if (!(pwp->wp_flags & WP_NOWATCH) && prot != pwp->wp_oprot) {
3491                         err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, prot);
3492                         if (err == IE_RETRY) {
3493                                 pwp->wp_oprot = 0;
3494                                 ASSERT(retrycnt == 0);
3495                                 retrycnt++;
3496                                 goto retry;
3497                         }
3498                 }
3499                 pwp->wp_prot = prot;
3500         }
3501 }
3502 
3503 /*
3504  * Clear all of the watched pages in the address space.
3505  */
3506 void
3507 as_clearwatch(struct as *as)
3508 {
3509         struct watched_page *pwp;
3510         struct seg *seg;
3511         caddr_t vaddr;
3512         uint_t prot;
3513         int err, retrycnt;
3514 
3515         if (avl_numnodes(&as->a_wpage) == 0)
3516                 return;
3517 
3518         ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3519 
3520         for (pwp = avl_first(&as->a_wpage); pwp != NULL;
3521             pwp = AVL_NEXT(&as->a_wpage, pwp)) {
3522                 retrycnt = 0;
3523         retry:
3524                 vaddr = pwp->wp_vaddr;
3525                 if (pwp->wp_oprot == 0 ||    /* not set up */
3526                     (seg = as_segat(as, vaddr)) == NULL)
3527                         continue;
3528 
3529                 if ((prot = pwp->wp_oprot) != pwp->wp_prot) {
3530                         err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, prot);
3531                         if (err == IE_RETRY) {
3532                                 ASSERT(retrycnt == 0);
3533                                 retrycnt++;
3534                                 goto retry;
3535                         }
3536                 }
3537                 pwp->wp_oprot = 0;
3538                 pwp->wp_prot = 0;
3539         }
3540 }
3541 
3542 /*
3543  * Force a new setup for all the watched pages in the range.
3544  */
3545 static void
3546 as_setwatchprot(struct as *as, caddr_t addr, size_t size, uint_t prot)
3547 {
3548         struct watched_page *pwp;
3549         struct watched_page tpw;
3550         caddr_t eaddr = addr + size;


3564                 pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER);
3565 
3566         while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3567                 retrycnt = 0;
3568                 vaddr = pwp->wp_vaddr;
3569 
3570                 wprot = prot;
3571                 if (pwp->wp_read)
3572                         wprot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3573                 if (pwp->wp_write)
3574                         wprot &= ~PROT_WRITE;
3575                 if (pwp->wp_exec)
3576                         wprot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3577                 if (!(pwp->wp_flags & WP_NOWATCH) && wprot != pwp->wp_oprot) {
3578                 retry:
3579                         seg = as_segat(as, vaddr);
3580                         if (seg == NULL) {
3581                                 panic("as_setwatchprot: no seg");
3582                                 /*NOTREACHED*/
3583                         }
3584                         err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, wprot);
3585                         if (err == IE_RETRY) {
3586                                 ASSERT(retrycnt == 0);
3587                                 retrycnt++;
3588                                 goto retry;
3589                         }
3590                 }
3591                 pwp->wp_oprot = prot;
3592                 pwp->wp_prot = wprot;
3593 
3594                 pwp = AVL_NEXT(&as->a_wpage, pwp);
3595         }
3596 }
3597 
3598 /*
3599  * Clear all of the watched pages in the range.
3600  */
3601 static void
3602 as_clearwatchprot(struct as *as, caddr_t addr, size_t size)
3603 {
3604         caddr_t eaddr = addr + size;


3611 
3612         if (avl_numnodes(&as->a_wpage) == 0)
3613                 return;
3614 
3615         tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3616         if ((pwp = avl_find(&as->a_wpage, &tpw, &where)) == NULL)
3617                 pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER);
3618 
3619         ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3620 
3621         while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3622 
3623                 if ((prot = pwp->wp_oprot) != 0) {
3624                         retrycnt = 0;
3625 
3626                         if (prot != pwp->wp_prot) {
3627                         retry:
3628                                 seg = as_segat(as, pwp->wp_vaddr);
3629                                 if (seg == NULL)
3630                                         continue;
3631                                 err = SEGOP_SETPROT(seg, pwp->wp_vaddr,
3632                                     PAGESIZE, prot);
3633                                 if (err == IE_RETRY) {
3634                                         ASSERT(retrycnt == 0);
3635                                         retrycnt++;
3636                                         goto retry;
3637 
3638                                 }
3639                         }
3640                         pwp->wp_oprot = 0;
3641                         pwp->wp_prot = 0;
3642                 }
3643 
3644                 pwp = AVL_NEXT(&as->a_wpage, pwp);
3645         }
3646 }
3647 
3648 void
3649 as_signal_proc(struct as *as, k_siginfo_t *siginfo)
3650 {
3651         struct proc *p;


3660                 }
3661         }
3662         mutex_exit(&pidlock);
3663 }
3664 
3665 /*
3666  * return memory object ID
3667  */
3668 int
3669 as_getmemid(struct as *as, caddr_t addr, memid_t *memidp)
3670 {
3671         struct seg      *seg;
3672         int             sts;
3673 
3674         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
3675         seg = as_segat(as, addr);
3676         if (seg == NULL) {
3677                 AS_LOCK_EXIT(as, &as->a_lock);
3678                 return (EFAULT);
3679         }
3680         /*
3681          * catch old drivers which may not support getmemid
3682          */
3683         if (seg->s_ops->getmemid == NULL) {
3684                 AS_LOCK_EXIT(as, &as->a_lock);
3685                 return (ENODEV);
3686         }
3687 
3688         sts = SEGOP_GETMEMID(seg, addr, memidp);
3689 
3690         AS_LOCK_EXIT(as, &as->a_lock);
3691         return (sts);
3692 }


  42  */
  43 
  44 #include <sys/types.h>
  45 #include <sys/t_lock.h>
  46 #include <sys/param.h>
  47 #include <sys/errno.h>
  48 #include <sys/systm.h>
  49 #include <sys/mman.h>
  50 #include <sys/sysmacros.h>
  51 #include <sys/cpuvar.h>
  52 #include <sys/sysinfo.h>
  53 #include <sys/kmem.h>
  54 #include <sys/vnode.h>
  55 #include <sys/vmsystm.h>
  56 #include <sys/cmn_err.h>
  57 #include <sys/debug.h>
  58 #include <sys/tnf_probe.h>
  59 #include <sys/vtrace.h>
  60 
  61 #include <vm/hat.h>

  62 #include <vm/as.h>
  63 #include <vm/seg.h>
  64 #include <vm/seg_vn.h>
  65 #include <vm/seg_dev.h>
  66 #include <vm/seg_kmem.h>
  67 #include <vm/seg_map.h>
  68 #include <vm/seg_spt.h>
  69 #include <vm/page.h>
  70 
  71 clock_t deadlk_wait = 1; /* number of ticks to wait before retrying */
  72 
  73 static struct kmem_cache *as_cache;
  74 
  75 static void as_setwatchprot(struct as *, caddr_t, size_t, uint_t);
  76 static void as_clearwatchprot(struct as *, caddr_t, size_t);
  77 int as_map_locked(struct as *, caddr_t, size_t, int ((*)()), void *);
  78 
  79 
  80 /*
  81  * Verifying the segment lists is very time-consuming; it may not be


 456         seg = avl_find(&as->a_segtree, &addr, &where);
 457 
 458         if (seg == NULL)
 459                 seg = avl_nearest(&as->a_segtree, where, AVL_AFTER);
 460 
 461         if (seg == NULL)
 462                 seg = avl_last(&as->a_segtree);
 463 
 464         if (seg != NULL) {
 465                 caddr_t base = seg->s_base;
 466 
 467                 /*
 468                  * If top of seg is below the requested address, then
 469                  * the insertion point is at the end of the linked list,
 470                  * and seg points to the tail of the list.  Otherwise,
 471                  * the insertion point is immediately before seg.
 472                  */
 473                 if (base + seg->s_size > addr) {
 474                         if (addr >= base || eaddr > base) {
 475 #ifdef __sparc
 476                                 extern const struct seg_ops segnf_ops;
 477 
 478                                 /*
 479                                  * no-fault segs must disappear if overlaid.
 480                                  * XXX need new segment type so
 481                                  * we don't have to check s_ops
 482                                  */
 483                                 if (seg->s_ops == &segnf_ops) {
 484                                         seg_unmap(seg);
 485                                         goto again;
 486                                 }
 487 #endif
 488                                 return (-1);    /* overlapping segment */
 489                         }
 490                 }
 491         }
 492         as->a_seglast = newseg;
 493         avl_insert(&as->a_segtree, newseg, where);
 494 
 495 #ifdef VERIFY_SEGLIST
 496         as_verify(as);


 653 
 654         as->a_flags          = 0;
 655         as->a_vbits          = 0;
 656         as->a_hrm            = NULL;
 657         as->a_seglast                = NULL;
 658         as->a_size           = 0;
 659         as->a_resvsize               = 0;
 660         as->a_updatedir              = 0;
 661         gethrestime(&as->a_updatetime);
 662         as->a_objectdir              = NULL;
 663         as->a_sizedir                = 0;
 664         as->a_userlimit              = (caddr_t)USERLIMIT;
 665         as->a_lastgap                = NULL;
 666         as->a_lastgaphl              = NULL;
 667         as->a_callbacks              = NULL;
 668 
 669         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
 670         as->a_hat = hat_alloc(as);   /* create hat for default system mmu */
 671         AS_LOCK_EXIT(as, &as->a_lock);
 672 


 673         return (as);
 674 }
 675 
 676 /*
 677  * Free an address space data structure.
 678  * Need to free the hat first and then
 679  * all the segments on this as and finally
 680  * the space for the as struct itself.
 681  */
 682 void
 683 as_free(struct as *as)
 684 {
 685         struct hat *hat = as->a_hat;
 686         struct seg *seg, *next;
 687         boolean_t free_started = B_FALSE;
 688 
 689 top:
 690         /*
 691          * Invoke ALL callbacks. as_do_callbacks will do one callback
 692          * per call, and not return (-1) until the callback has completed.
 693          * When as_do_callbacks returns zero, all callbacks have completed.
 694          */
 695         mutex_enter(&as->a_contents);
 696         while (as->a_callbacks && as_do_callbacks(as, AS_ALL_EVENT, 0, 0))
 697                 ;
 698 



 699         mutex_exit(&as->a_contents);
 700         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
 701 
 702         if (!free_started) {
 703                 free_started = B_TRUE;
 704                 hat_free_start(hat);


 705         }
 706         for (seg = AS_SEGFIRST(as); seg != NULL; seg = next) {
 707                 int err;
 708 
 709                 next = AS_SEGNEXT(as, seg);
 710 retry:
 711                 err = segop_unmap(seg, seg->s_base, seg->s_size);
 712                 if (err == EAGAIN) {
 713                         mutex_enter(&as->a_contents);
 714                         if (as->a_callbacks) {
 715                                 AS_LOCK_EXIT(as, &as->a_lock);
 716                         } else if (!AS_ISNOUNMAPWAIT(as)) {
 717                                 /*
 718                                  * Memory is currently locked. Wait for a
 719                                  * cv_signal that it has been unlocked, then
 720                                  * try the operation again.
 721                                  */
 722                                 if (AS_ISUNMAPWAIT(as) == 0)
 723                                         cv_broadcast(&as->a_cv);
 724                                 AS_SETUNMAPWAIT(as);
 725                                 AS_LOCK_EXIT(as, &as->a_lock);
 726                                 while (AS_ISUNMAPWAIT(as))
 727                                         cv_wait(&as->a_cv, &as->a_contents);
 728                         } else {
 729                                 /*
 730                                  * We may have raced with
 731                                  * segvn_reclaim()/segspt_reclaim(). In this


 734                                  * 0.  We don't drop as writer lock so our
 735                                  * number of retries without sleeping should
 736                                  * be very small. See segvn_reclaim() for
 737                                  * more comments.
 738                                  */
 739                                 AS_CLRNOUNMAPWAIT(as);
 740                                 mutex_exit(&as->a_contents);
 741                                 goto retry;
 742                         }
 743                         mutex_exit(&as->a_contents);
 744                         goto top;
 745                 } else {
 746                         /*
 747                          * We do not expect any other error return at this
 748                          * time. This is similar to an ASSERT in seg_unmap()
 749                          */
 750                         ASSERT(err == 0);
 751                 }
 752         }
 753         hat_free_end(hat);


 754         AS_LOCK_EXIT(as, &as->a_lock);
 755 
 756         /* /proc stuff */
 757         ASSERT(avl_numnodes(&as->a_wpage) == 0);
 758         if (as->a_objectdir) {
 759                 kmem_free(as->a_objectdir, as->a_sizedir * sizeof (vnode_t *));
 760                 as->a_objectdir = NULL;
 761                 as->a_sizedir = 0;
 762         }
 763 
 764         /*
 765          * Free the struct as back to kmem.  Assert it has no segments.
 766          */
 767         ASSERT(avl_numnodes(&as->a_segtree) == 0);
 768         kmem_cache_free(as_cache, as);
 769 }
 770 
 771 int
 772 as_dup(struct as *as, struct proc *forkedproc)
 773 {
 774         struct as *newas;
 775         struct seg *seg, *newseg;
 776         size_t  purgesize = 0;
 777         int error;
 778 
 779         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
 780         as_clearwatch(as);
 781         newas = as_alloc();
 782         newas->a_userlimit = as->a_userlimit;
 783         newas->a_proc = forkedproc;
 784 
 785         AS_LOCK_ENTER(newas, &newas->a_lock, RW_WRITER);
 786 








 787         (void) hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_SRD);
 788 
 789         for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
 790 
 791                 if (seg->s_flags & S_PURGE) {
 792                         purgesize += seg->s_size;
 793                         continue;
 794                 }
 795 
 796                 newseg = seg_alloc(newas, seg->s_base, seg->s_size);
 797                 if (newseg == NULL) {
 798                         AS_LOCK_EXIT(newas, &newas->a_lock);
 799                         as_setwatch(as);



 800                         AS_LOCK_EXIT(as, &as->a_lock);
 801                         as_free(newas);
 802                         return (-1);
 803                 }
 804                 if ((error = segop_dup(seg, newseg)) != 0) {
 805                         /*
 806                          * We call seg_free() on the new seg
 807                          * because the segment is not set up
 808                          * completely; i.e. it has no ops.
 809                          */
 810                         as_setwatch(as);



 811                         AS_LOCK_EXIT(as, &as->a_lock);
 812                         seg_free(newseg);
 813                         AS_LOCK_EXIT(newas, &newas->a_lock);
 814                         as_free(newas);
 815                         return (error);
 816                 }
 817                 newas->a_size += seg->s_size;
 818         }
 819         newas->a_resvsize = as->a_resvsize - purgesize;
 820 
 821         error = hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_ALL);


 822 



 823         AS_LOCK_EXIT(newas, &newas->a_lock);
 824 
 825         as_setwatch(as);



 826         AS_LOCK_EXIT(as, &as->a_lock);
 827         if (error != 0) {
 828                 as_free(newas);
 829                 return (error);
 830         }
 831         forkedproc->p_as = newas;
 832         return (0);
 833 }
 834 
 835 /*
 836  * Handle a ``fault'' at addr for size bytes.
 837  */
 838 faultcode_t
 839 as_fault(struct hat *hat, struct as *as, caddr_t addr, size_t size,
 840         enum fault_type type, enum seg_rw rw)
 841 {
 842         struct seg *seg;
 843         caddr_t raddr;                  /* rounded down addr */
 844         size_t rsize;                   /* rounded up size */
 845         size_t ssize;
 846         faultcode_t res = 0;
 847         caddr_t addrsav;
 848         struct seg *segsav;
 849         int as_lock_held;
 850         klwp_t *lwp = ttolwp(curthread);

 851         int holding_wpage = 0;

 852 
 853 
 854 








 855 retry:

 856         /*
 857          * Indicate that the lwp is not to be stopped while waiting for a
 858          * pagefault.  This is to avoid deadlock while debugging a process
 859          * via /proc over NFS (in particular).
 860          */
 861         if (lwp != NULL)
 862                 lwp->lwp_nostop++;
 863 
 864         /*
 865          * same length must be used when we softlock and softunlock.  We
 866          * don't support softunlocking lengths less than the original length
 867          * when there is largepage support.  See seg_dev.c for more
 868          * comments.
 869          */
 870         switch (type) {
 871 
 872         case F_SOFTLOCK:
 873                 CPU_STATS_ADD_K(vm, softlock, 1);
 874                 break;
 875 
 876         case F_SOFTUNLOCK:
 877                 break;
 878 
 879         case F_PROT:
 880                 CPU_STATS_ADD_K(vm, prot_fault, 1);
 881                 break;
 882 
 883         case F_INVAL:
 884                 CPU_STATS_ENTER_K();
 885                 CPU_STATS_ADDQ(CPU, vm, as_fault, 1);
 886                 if (as == &kas)
 887                         CPU_STATS_ADDQ(CPU, vm, kernel_asflt, 1);
 888                 CPU_STATS_EXIT_K();
 889                 break;
 890         }

 891 
 892         /* Kernel probe */
 893         TNF_PROBE_3(address_fault, "vm pagefault", /* CSTYLED */,
 894             tnf_opaque, address,        addr,
 895             tnf_fault_type,     fault_type,     type,
 896             tnf_seg_access,     access,         rw);
 897 
 898         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
 899         rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
 900             (size_t)raddr;
 901 
 902         /*
 903          * XXX -- Don't grab the as lock for segkmap. We should grab it for
 904          * correctness, but then we could be stuck holding this lock for
 905          * a LONG time if the fault needs to be resolved on a slow
 906          * filesystem, and then no-one will be able to exec new commands,
 907          * as exec'ing requires the write lock on the as.
 908          */
 909         if (as == &kas && segkmap && segkmap->s_base <= raddr &&
 910             raddr + size < segkmap->s_base + segkmap->s_size) {




 911                 seg = segkmap;
 912                 as_lock_held = 0;
 913         } else {
 914                 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
















 915 
 916                 seg = as_segat(as, raddr);
 917                 if (seg == NULL) {
 918                         AS_LOCK_EXIT(as, &as->a_lock);
 919                         if (lwp != NULL)
 920                                 lwp->lwp_nostop--;
 921                         return (FC_NOMAP);
 922                 }
 923 
 924                 as_lock_held = 1;
 925         }
 926 
 927         addrsav = raddr;
 928         segsav = seg;
 929 
 930         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
 931                 if (raddr >= seg->s_base + seg->s_size) {
 932                         seg = AS_SEGNEXT(as, seg);
 933                         if (seg == NULL || raddr != seg->s_base) {
 934                                 res = FC_NOMAP;
 935                                 break;
 936                         }
 937                 }
 938                 if (raddr + rsize > seg->s_base + seg->s_size)
 939                         ssize = seg->s_base + seg->s_size - raddr;
 940                 else
 941                         ssize = rsize;
 942 
 943                 res = segop_fault(hat, seg, raddr, ssize, type, rw);














 944 
 945                 /* Restore watchpoints */
 946                 if (holding_wpage) {
 947                         as_setwatch(as);
 948                         holding_wpage = 0;
 949                 }
 950 
 951                 if (res != 0)
 952                         break;





 953         }
 954 
 955         /*
 956          * If we were SOFTLOCKing and encountered a failure,
 957          * we must SOFTUNLOCK the range we already did. (Maybe we
 958          * should just panic if we are SOFTLOCKing or even SOFTUNLOCKing
 959          * right here...)
 960          */
 961         if (res != 0 && type == F_SOFTLOCK) {
 962                 for (seg = segsav; addrsav < raddr; addrsav += ssize) {
 963                         if (addrsav >= seg->s_base + seg->s_size)
 964                                 seg = AS_SEGNEXT(as, seg);
 965                         ASSERT(seg != NULL);
 966                         /*
 967                          * Now call the fault routine again to perform the
 968                          * unlock using S_OTHER instead of the rw variable
 969                          * since we never got a chance to touch the pages.
 970                          */
 971                         if (raddr > seg->s_base + seg->s_size)
 972                                 ssize = seg->s_base + seg->s_size - addrsav;
 973                         else
 974                                 ssize = raddr - addrsav;
 975                         (void) segop_fault(hat, seg, addrsav, ssize,
 976                             F_SOFTUNLOCK, S_OTHER);
 977                 }
 978         }
 979         if (as_lock_held)
 980                 AS_LOCK_EXIT(as, &as->a_lock);
 981         if (lwp != NULL)
 982                 lwp->lwp_nostop--;
 983 
 984         /*
 985          * If the lower levels returned EDEADLK for a fault,
 986          * It means that we should retry the fault.  Let's wait
 987          * a bit also to let the deadlock causing condition clear.
 988          * This is part of a gross hack to work around a design flaw
 989          * in the ufs/sds logging code and should go away when the
 990          * logging code is re-designed to fix the problem. See bug
 991          * 4125102 for details of the problem.
 992          */
 993         if (FC_ERRNO(res) == EDEADLK) {
 994                 delay(deadlk_wait);
 995                 res = 0;
 996                 goto retry;
 997         }
 998         return (res);
 999 }
1000 
1001 


1025         rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
1026             (size_t)raddr;
1027 
1028         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1029         seg = as_segat(as, raddr);
1030         if (seg == NULL) {
1031                 AS_LOCK_EXIT(as, &as->a_lock);
1032                 if (lwp != NULL)
1033                         lwp->lwp_nostop--;
1034                 return (FC_NOMAP);
1035         }
1036 
1037         for (; rsize != 0; rsize -= PAGESIZE, raddr += PAGESIZE) {
1038                 if (raddr >= seg->s_base + seg->s_size) {
1039                         seg = AS_SEGNEXT(as, seg);
1040                         if (seg == NULL || raddr != seg->s_base) {
1041                                 res = FC_NOMAP;
1042                                 break;
1043                         }
1044                 }
1045                 res = segop_faulta(seg, raddr);
1046                 if (res != 0)
1047                         break;
1048         }
1049         AS_LOCK_EXIT(as, &as->a_lock);
1050         if (lwp != NULL)
1051                 lwp->lwp_nostop--;
1052         /*
1053          * If the lower levels returned EDEADLK for a fault,
1054          * It means that we should retry the fault.  Let's wait
1055          * a bit also to let the deadlock causing condition clear.
1056          * This is part of a gross hack to work around a design flaw
1057          * in the ufs/sds logging code and should go away when the
1058          * logging code is re-designed to fix the problem. See bug
1059          * 4125102 for details of the problem.
1060          */
1061         if (FC_ERRNO(res) == EDEADLK) {
1062                 delay(deadlk_wait);
1063                 res = 0;
1064                 goto retry;
1065         }


1115         seg = as_segat(as, raddr);
1116         if (seg == NULL) {
1117                 as_setwatch(as);
1118                 AS_LOCK_EXIT(as, &as->a_lock);
1119                 return (ENOMEM);
1120         }
1121 
1122         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
1123                 if (raddr >= seg->s_base + seg->s_size) {
1124                         seg = AS_SEGNEXT(as, seg);
1125                         if (seg == NULL || raddr != seg->s_base) {
1126                                 error = ENOMEM;
1127                                 break;
1128                         }
1129                 }
1130                 if ((raddr + rsize) > (seg->s_base + seg->s_size))
1131                         ssize = seg->s_base + seg->s_size - raddr;
1132                 else
1133                         ssize = rsize;
1134 retry:
1135                 error = segop_setprot(seg, raddr, ssize, prot);
1136 
1137                 if (error == IE_NOMEM) {
1138                         error = EAGAIN;
1139                         break;
1140                 }
1141 
1142                 if (error == IE_RETRY) {
1143                         AS_LOCK_EXIT(as, &as->a_lock);
1144                         writer = 1;
1145                         goto setprot_top;
1146                 }
1147 
1148                 if (error == EAGAIN) {
1149                         /*
1150                          * Make sure we have a_lock as writer.
1151                          */
1152                         if (writer == 0) {
1153                                 AS_LOCK_EXIT(as, &as->a_lock);
1154                                 writer = 1;
1155                                 goto setprot_top;


1266         seg = as_segat(as, raddr);
1267         if (seg == NULL) {
1268                 as_setwatch(as);
1269                 AS_LOCK_EXIT(as, &as->a_lock);
1270                 return (ENOMEM);
1271         }
1272 
1273         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
1274                 if (raddr >= seg->s_base + seg->s_size) {
1275                         seg = AS_SEGNEXT(as, seg);
1276                         if (seg == NULL || raddr != seg->s_base) {
1277                                 error = ENOMEM;
1278                                 break;
1279                         }
1280                 }
1281                 if ((raddr + rsize) > (seg->s_base + seg->s_size))
1282                         ssize = seg->s_base + seg->s_size - raddr;
1283                 else
1284                         ssize = rsize;
1285 
1286                 error = segop_checkprot(seg, raddr, ssize, prot);
1287                 if (error != 0)
1288                         break;
1289         }
1290         as_setwatch(as);
1291         AS_LOCK_EXIT(as, &as->a_lock);
1292         return (error);
1293 }
1294 
1295 int
1296 as_unmap(struct as *as, caddr_t addr, size_t size)
1297 {
1298         struct seg *seg, *seg_next;
1299         struct as_callback *cb;
1300         caddr_t raddr, eaddr;
1301         size_t ssize, rsize = 0;
1302         int err;
1303 
1304 top:
1305         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1306         eaddr = (caddr_t)(((uintptr_t)(addr + size) + PAGEOFFSET) &


1332                 else
1333                         ssize = eaddr - raddr;
1334 
1335                 /*
1336                  * Save next segment pointer since seg can be
1337                  * destroyed during the segment unmap operation.
1338                  */
1339                 seg_next = AS_SEGNEXT(as, seg);
1340 
1341                 /*
1342                  * We didn't count /dev/null mappings, so ignore them here.
1343                  * We'll handle MAP_NORESERVE cases in segvn_unmap(). (Again,
1344                  * we have to do this check here while we have seg.)
1345                  */
1346                 rsize = 0;
1347                 if (!SEG_IS_DEVNULL_MAPPING(seg) &&
1348                     !SEG_IS_PARTIAL_RESV(seg))
1349                         rsize = ssize;
1350 
1351 retry:
1352                 err = segop_unmap(seg, raddr, ssize);
1353                 if (err == EAGAIN) {
1354                         /*
1355                          * Memory is currently locked.  It must be unlocked
1356                          * before this operation can succeed through a retry.
1357                          * The possible reasons for locked memory and
1358                          * corresponding strategies for unlocking are:
1359                          * (1) Normal I/O
1360                          *      wait for a signal that the I/O operation
1361                          *      has completed and the memory is unlocked.
1362                          * (2) Asynchronous I/O
1363                          *      The aio subsystem does not unlock pages when
1364                          *      the I/O is completed. Those pages are unlocked
1365                          *      when the application calls aiowait/aioerror.
1366                          *      So, to prevent blocking forever, cv_broadcast()
1367                          *      is done to wake up aio_cleanup_thread.
1368                          *      Subsequently, segvn_reclaim will be called, and
1369                          *      that will do AS_CLRUNMAPWAIT() and wake us up.
1370                          * (3) Long term page locking:
1371                          *      Drivers intending to have pages locked for a
1372                          *      period considerably longer than for normal I/O


1770  */
1771 void
1772 as_purge(struct as *as)
1773 {
1774         struct seg *seg;
1775         struct seg *next_seg;
1776 
1777         /*
1778          * the setting of NEEDSPURGE is protect by as_rangelock(), so
1779          * no need to grab a_contents mutex for this check
1780          */
1781         if ((as->a_flags & AS_NEEDSPURGE) == 0)
1782                 return;
1783 
1784         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1785         next_seg = NULL;
1786         seg = AS_SEGFIRST(as);
1787         while (seg != NULL) {
1788                 next_seg = AS_SEGNEXT(as, seg);
1789                 if (seg->s_flags & S_PURGE)
1790                         segop_unmap(seg, seg->s_base, seg->s_size);
1791                 seg = next_seg;
1792         }
1793         AS_LOCK_EXIT(as, &as->a_lock);
1794 
1795         mutex_enter(&as->a_contents);
1796         as->a_flags &= ~AS_NEEDSPURGE;
1797         mutex_exit(&as->a_contents);
1798 }
1799 
1800 /*
1801  * Find a hole within [*basep, *basep + *lenp), which contains a mappable
1802  * range of addresses at least "minlen" long, where the base of the range is
1803  * at "off" phase from an "align" boundary and there is space for a
1804  * "redzone"-sized redzone on eithe rside of the range.  Thus,
1805  * if align was 4M and off was 16k, the user wants a hole which will start
1806  * 16k into a 4M page.
1807  *
1808  * If flags specifies AH_HI, the hole will have the highest possible address
1809  * in the range.  We use the as->a_lastgap field to figure out where to
1810  * start looking for a gap.


1988  * -1 is returned.
1989  *
1990  * NOTE: This routine is not correct when base+len overflows caddr_t.
1991  */
1992 int
1993 as_gap(struct as *as, size_t minlen, caddr_t *basep, size_t *lenp, uint_t flags,
1994     caddr_t addr)
1995 {
1996 
1997         return (as_gap_aligned(as, minlen, basep, lenp, flags, addr, 0, 0, 0));
1998 }
1999 
2000 /*
2001  * Return the next range within [base, base + len) that is backed
2002  * with "real memory".  Skip holes and non-seg_vn segments.
2003  * We're lazy and only return one segment at a time.
2004  */
2005 int
2006 as_memory(struct as *as, caddr_t *basep, size_t *lenp)
2007 {
2008         extern const struct seg_ops segspt_shmops;      /* needs a header file */
2009         struct seg *seg;
2010         caddr_t addr, eaddr;
2011         caddr_t segend;
2012 
2013         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2014 
2015         addr = *basep;
2016         eaddr = addr + *lenp;
2017 
2018         seg = as_findseg(as, addr, 0);
2019         if (seg != NULL)
2020                 addr = MAX(seg->s_base, addr);
2021 
2022         for (;;) {
2023                 if (seg == NULL || addr >= eaddr || eaddr <= seg->s_base) {
2024                         AS_LOCK_EXIT(as, &as->a_lock);
2025                         return (EINVAL);
2026                 }
2027 
2028                 if (seg->s_ops == &segvn_ops) {


2041                 }
2042 
2043                 seg = AS_SEGNEXT(as, seg);
2044 
2045                 if (seg != NULL)
2046                         addr = seg->s_base;
2047         }
2048 
2049         *basep = addr;
2050 
2051         if (segend > eaddr)
2052                 *lenp = eaddr - addr;
2053         else
2054                 *lenp = segend - addr;
2055 
2056         AS_LOCK_EXIT(as, &as->a_lock);
2057         return (0);
2058 }
2059 
2060 /*



































































2061  * Determine whether data from the mappings in interval [addr, addr + size)
2062  * are in the primary memory (core) cache.
2063  */
2064 int
2065 as_incore(struct as *as, caddr_t addr,
2066     size_t size, char *vec, size_t *sizep)
2067 {
2068         struct seg *seg;
2069         size_t ssize;
2070         caddr_t raddr;          /* rounded down addr */
2071         size_t rsize;           /* rounded up size */
2072         size_t isize;                   /* iteration size */
2073         int error = 0;          /* result, assume success */
2074 
2075         *sizep = 0;
2076         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2077         rsize = ((((size_t)addr + size) + PAGEOFFSET) & PAGEMASK) -
2078             (size_t)raddr;
2079 
2080         if (raddr + rsize < raddr)           /* check for wraparound */


2082 
2083         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2084         seg = as_segat(as, raddr);
2085         if (seg == NULL) {
2086                 AS_LOCK_EXIT(as, &as->a_lock);
2087                 return (-1);
2088         }
2089 
2090         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
2091                 if (raddr >= seg->s_base + seg->s_size) {
2092                         seg = AS_SEGNEXT(as, seg);
2093                         if (seg == NULL || raddr != seg->s_base) {
2094                                 error = -1;
2095                                 break;
2096                         }
2097                 }
2098                 if ((raddr + rsize) > (seg->s_base + seg->s_size))
2099                         ssize = seg->s_base + seg->s_size - raddr;
2100                 else
2101                         ssize = rsize;
2102                 *sizep += isize = segop_incore(seg, raddr, ssize, vec);
2103                 if (isize != ssize) {
2104                         error = -1;
2105                         break;
2106                 }
2107                 vec += btopr(ssize);
2108         }
2109         AS_LOCK_EXIT(as, &as->a_lock);
2110         return (error);
2111 }
2112 
2113 static void
2114 as_segunlock(struct seg *seg, caddr_t addr, int attr,
2115         ulong_t *bitmap, size_t position, size_t npages)
2116 {
2117         caddr_t range_start;
2118         size_t  pos1 = position;
2119         size_t  pos2;
2120         size_t  size;
2121         size_t  end_pos = npages + position;
2122 
2123         while (bt_range(bitmap, &pos1, &pos2, end_pos)) {
2124                 size = ptob((pos2 - pos1));
2125                 range_start = (caddr_t)((uintptr_t)addr +
2126                     ptob(pos1 - position));
2127 
2128                 (void) segop_lockop(seg, range_start, size, attr, MC_UNLOCK,
2129                     (ulong_t *)NULL, (size_t)NULL);
2130                 pos1 = pos2;
2131         }
2132 }
2133 
2134 static void
2135 as_unlockerr(struct as *as, int attr, ulong_t *mlock_map,
2136         caddr_t raddr, size_t rsize)
2137 {
2138         struct seg *seg = as_segat(as, raddr);
2139         size_t ssize;
2140 
2141         while (rsize != 0) {
2142                 if (raddr >= seg->s_base + seg->s_size)
2143                         seg = AS_SEGNEXT(as, seg);
2144 
2145                 if ((raddr + rsize) > (seg->s_base + seg->s_size))
2146                         ssize = seg->s_base + seg->s_size - raddr;
2147                 else
2148                         ssize = rsize;


2204                 if (seg == NULL) {
2205                         AS_LOCK_EXIT(as, &as->a_lock);
2206                         return (0);
2207                 }
2208 
2209                 do {
2210                         raddr = (caddr_t)((uintptr_t)seg->s_base &
2211                             (uintptr_t)PAGEMASK);
2212                         rlen += (((uintptr_t)(seg->s_base + seg->s_size) +
2213                             PAGEOFFSET) & PAGEMASK) - (uintptr_t)raddr;
2214                 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2215 
2216                 mlock_size = BT_BITOUL(btopr(rlen));
2217                 if ((mlock_map = (ulong_t *)kmem_zalloc(mlock_size *
2218                     sizeof (ulong_t), KM_NOSLEEP)) == NULL) {
2219                                 AS_LOCK_EXIT(as, &as->a_lock);
2220                                 return (EAGAIN);
2221                 }
2222 
2223                 for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) {
2224                         error = segop_lockop(seg, seg->s_base,
2225                             seg->s_size, attr, MC_LOCK, mlock_map, pos);
2226                         if (error != 0)
2227                                 break;
2228                         pos += seg_pages(seg);
2229                 }
2230 
2231                 if (error) {
2232                         for (seg = AS_SEGFIRST(as); seg != NULL;
2233                             seg = AS_SEGNEXT(as, seg)) {
2234 
2235                                 raddr = (caddr_t)((uintptr_t)seg->s_base &
2236                                     (uintptr_t)PAGEMASK);
2237                                 npages = seg_pages(seg);
2238                                 as_segunlock(seg, raddr, attr, mlock_map,
2239                                     idx, npages);
2240                                 idx += npages;
2241                         }
2242                 }
2243 
2244                 kmem_free(mlock_map, mlock_size * sizeof (ulong_t));
2245                 AS_LOCK_EXIT(as, &as->a_lock);
2246                 goto lockerr;
2247         } else if (func == MC_UNLOCKAS) {
2248                 mutex_enter(&as->a_contents);
2249                 AS_CLRPGLCK(as);
2250                 mutex_exit(&as->a_contents);
2251 
2252                 for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) {
2253                         error = segop_lockop(seg, seg->s_base,
2254                             seg->s_size, attr, MC_UNLOCK, NULL, 0);
2255                         if (error != 0)
2256                                 break;
2257                 }
2258 
2259                 AS_LOCK_EXIT(as, &as->a_lock);
2260                 goto lockerr;
2261         }
2262 
2263         /*
2264          * Normalize addresses and sizes.
2265          */
2266         initraddr = raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2267         initrsize = rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
2268             (size_t)raddr;
2269 
2270         if (raddr + rsize < raddr) {         /* check for wraparound */
2271                 AS_LOCK_EXIT(as, &as->a_lock);
2272                 return (ENOMEM);
2273         }


2311                                 }
2312                                 AS_LOCK_EXIT(as, &as->a_lock);
2313                                 return (ENOMEM);
2314                         }
2315                 }
2316                 if ((raddr + rsize) > (seg->s_base + seg->s_size))
2317                         ssize = seg->s_base + seg->s_size - raddr;
2318                 else
2319                         ssize = rsize;
2320 
2321                 /*
2322                  * Dispatch on specific function.
2323                  */
2324                 switch (func) {
2325 
2326                 /*
2327                  * Synchronize cached data from mappings with backing
2328                  * objects.
2329                  */
2330                 case MC_SYNC:
2331                         if (error = segop_sync(seg, raddr, ssize,
2332                             attr, (uint_t)arg)) {
2333                                 AS_LOCK_EXIT(as, &as->a_lock);
2334                                 return (error);
2335                         }
2336                         break;
2337 
2338                 /*
2339                  * Lock pages in memory.
2340                  */
2341                 case MC_LOCK:
2342                         if (error = segop_lockop(seg, raddr, ssize,
2343                             attr, func, mlock_map, pos)) {
2344                                 as_unlockerr(as, attr, mlock_map, initraddr,
2345                                     initrsize - rsize + ssize);
2346                                 kmem_free(mlock_map, mlock_size *
2347                                     sizeof (ulong_t));
2348                                 AS_LOCK_EXIT(as, &as->a_lock);
2349                                 goto lockerr;
2350                         }
2351                         break;
2352 
2353                 /*
2354                  * Unlock mapped pages.
2355                  */
2356                 case MC_UNLOCK:
2357                         (void) segop_lockop(seg, raddr, ssize, attr, func,
2358                             (ulong_t *)NULL, (size_t)NULL);
2359                         break;
2360 
2361                 /*
2362                  * Store VM advise for mapped pages in segment layer.
2363                  */
2364                 case MC_ADVISE:
2365                         error = segop_advise(seg, raddr, ssize, (uint_t)arg);
2366 
2367                         /*
2368                          * Check for regular errors and special retry error
2369                          */
2370                         if (error) {
2371                                 if (error == IE_RETRY) {
2372                                         /*
2373                                          * Need to acquire writers lock, so
2374                                          * have to drop readers lock and start
2375                                          * all over again
2376                                          */
2377                                         AS_LOCK_EXIT(as, &as->a_lock);
2378                                         goto retry;
2379                                 } else if (error == IE_REATTACH) {
2380                                         /*
2381                                          * Find segment for current address
2382                                          * because current segment just got
2383                                          * split or concatenated
2384                                          */
2385                                         seg = as_segat(as, raddr);
2386                                         if (seg == NULL) {
2387                                                 AS_LOCK_EXIT(as, &as->a_lock);
2388                                                 return (ENOMEM);
2389                                         }
2390                                 } else {
2391                                         /*
2392                                          * Regular error
2393                                          */
2394                                         AS_LOCK_EXIT(as, &as->a_lock);
2395                                         return (error);
2396                                 }
2397                         }
2398                         break;
2399 
2400                 case MC_INHERIT_ZERO:
2401                         error = segop_inherit(seg, raddr, ssize, SEGP_INH_ZERO);





2402                         if (error != 0) {
2403                                 AS_LOCK_EXIT(as, &as->a_lock);
2404                                 return (error);
2405                         }
2406                         break;
2407 
2408                 /*
2409                  * Can't happen.
2410                  */
2411                 default:
2412                         panic("as_ctl: bad operation %d", func);
2413                         /*NOTREACHED*/
2414                 }
2415 
2416                 rsize -= ssize;
2417                 raddr += ssize;
2418         }
2419 
2420         if (func == MC_LOCK)
2421                 kmem_free(mlock_map, mlock_size * sizeof (ulong_t));


2465  * as expected by the caller.  Save pointers to per segment shadow lists at
2466  * the tail of plist so that they can be used during as_pageunlock().
2467  */
2468 static int
2469 as_pagelock_segs(struct as *as, struct seg *seg, struct page ***ppp,
2470     caddr_t addr, size_t size, enum seg_rw rw)
2471 {
2472         caddr_t sv_addr = addr;
2473         size_t sv_size = size;
2474         struct seg *sv_seg = seg;
2475         ulong_t segcnt = 1;
2476         ulong_t cnt;
2477         size_t ssize;
2478         pgcnt_t npages = btop(size);
2479         page_t **plist;
2480         page_t **pl;
2481         int error;
2482         caddr_t eaddr;
2483         faultcode_t fault_err = 0;
2484         pgcnt_t pl_off;
2485         extern const struct seg_ops segspt_shmops;
2486 
2487         ASSERT(AS_LOCK_HELD(as, &as->a_lock));
2488         ASSERT(seg != NULL);
2489         ASSERT(addr >= seg->s_base && addr < seg->s_base + seg->s_size);
2490         ASSERT(addr + size > seg->s_base + seg->s_size);
2491         ASSERT(IS_P2ALIGNED(size, PAGESIZE));
2492         ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
2493 
2494         /*
2495          * Count the number of segments covered by the range we are about to
2496          * lock. The segment count is used to size the shadow list we return
2497          * back to the caller.
2498          */
2499         for (; size != 0; size -= ssize, addr += ssize) {
2500                 if (addr >= seg->s_base + seg->s_size) {
2501 
2502                         seg = AS_SEGNEXT(as, seg);
2503                         if (seg == NULL || addr != seg->s_base) {
2504                                 AS_LOCK_EXIT(as, &as->a_lock);
2505                                 return (EFAULT);
2506                         }
2507                         /*
2508                          * Do a quick check if subsequent segments
2509                          * will most likely support pagelock.
2510                          */
2511                         if (seg->s_ops == &segvn_ops) {
2512                                 vnode_t *vp;
2513 
2514                                 if (segop_getvp(seg, addr, &vp) != 0 ||
2515                                     vp != NULL) {
2516                                         AS_LOCK_EXIT(as, &as->a_lock);
2517                                         goto slow;
2518                                 }
2519                         } else if (seg->s_ops != &segspt_shmops) {
2520                                 AS_LOCK_EXIT(as, &as->a_lock);
2521                                 goto slow;
2522                         }
2523                         segcnt++;
2524                 }
2525                 if (addr + size > seg->s_base + seg->s_size) {
2526                         ssize = seg->s_base + seg->s_size - addr;
2527                 } else {
2528                         ssize = size;
2529                 }
2530         }
2531         ASSERT(segcnt > 1);
2532 
2533         plist = kmem_zalloc((npages + segcnt) * sizeof (page_t *), KM_SLEEP);
2534 
2535         addr = sv_addr;
2536         size = sv_size;
2537         seg = sv_seg;
2538 
2539         for (cnt = 0, pl_off = 0; size != 0; size -= ssize, addr += ssize) {
2540                 if (addr >= seg->s_base + seg->s_size) {
2541                         seg = AS_SEGNEXT(as, seg);
2542                         ASSERT(seg != NULL && addr == seg->s_base);
2543                         cnt++;
2544                         ASSERT(cnt < segcnt);
2545                 }
2546                 if (addr + size > seg->s_base + seg->s_size) {
2547                         ssize = seg->s_base + seg->s_size - addr;
2548                 } else {
2549                         ssize = size;
2550                 }
2551                 pl = &plist[npages + cnt];
2552                 error = segop_pagelock(seg, addr, ssize, (page_t ***)pl,
2553                     L_PAGELOCK, rw);
2554                 if (error) {
2555                         break;
2556                 }
2557                 ASSERT(plist[npages + cnt] != NULL);
2558                 ASSERT(pl_off + btop(ssize) <= npages);
2559                 bcopy(plist[npages + cnt], &plist[pl_off],
2560                     btop(ssize) * sizeof (page_t *));
2561                 pl_off += btop(ssize);
2562         }
2563 
2564         if (size == 0) {
2565                 AS_LOCK_EXIT(as, &as->a_lock);
2566                 ASSERT(cnt == segcnt - 1);
2567                 *ppp = plist;
2568                 return (0);
2569         }
2570 
2571         /*
2572          * one of pagelock calls failed. The error type is in error variable.


2575          * back to the caller.
2576          */
2577 
2578         eaddr = addr;
2579         seg = sv_seg;
2580 
2581         for (cnt = 0, addr = sv_addr; addr < eaddr; addr += ssize) {
2582                 if (addr >= seg->s_base + seg->s_size) {
2583                         seg = AS_SEGNEXT(as, seg);
2584                         ASSERT(seg != NULL && addr == seg->s_base);
2585                         cnt++;
2586                         ASSERT(cnt < segcnt);
2587                 }
2588                 if (eaddr > seg->s_base + seg->s_size) {
2589                         ssize = seg->s_base + seg->s_size - addr;
2590                 } else {
2591                         ssize = eaddr - addr;
2592                 }
2593                 pl = &plist[npages + cnt];
2594                 ASSERT(*pl != NULL);
2595                 (void) segop_pagelock(seg, addr, ssize, (page_t ***)pl,
2596                     L_PAGEUNLOCK, rw);
2597         }
2598 
2599         AS_LOCK_EXIT(as, &as->a_lock);
2600 
2601         kmem_free(plist, (npages + segcnt) * sizeof (page_t *));
2602 
2603         if (error != ENOTSUP && error != EFAULT) {
2604                 return (error);
2605         }
2606 
2607 slow:
2608         /*
2609          * If we are here because pagelock failed due to the need to cow fault
2610          * in the pages we want to lock F_SOFTLOCK will do this job and in
2611          * next as_pagelock() call for this address range pagelock will
2612          * hopefully succeed.
2613          */
2614         fault_err = as_fault(as->a_hat, as, sv_addr, sv_size, F_SOFTLOCK, rw);
2615         if (fault_err != 0) {


2650         seg = as_segat(as, raddr);
2651         if (seg == NULL) {
2652                 AS_LOCK_EXIT(as, &as->a_lock);
2653                 return (EFAULT);
2654         }
2655         ASSERT(raddr >= seg->s_base && raddr < seg->s_base + seg->s_size);
2656         if (raddr + rsize > seg->s_base + seg->s_size) {
2657                 return (as_pagelock_segs(as, seg, ppp, raddr, rsize, rw));
2658         }
2659         if (raddr + rsize <= raddr) {
2660                 AS_LOCK_EXIT(as, &as->a_lock);
2661                 return (EFAULT);
2662         }
2663 
2664         TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEG_LOCK_START,
2665             "seg_lock_1_start: raddr %p rsize %ld", raddr, rsize);
2666 
2667         /*
2668          * try to lock pages and pass back shadow list
2669          */
2670         err = segop_pagelock(seg, raddr, rsize, ppp, L_PAGELOCK, rw);
2671 
2672         TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_SEG_LOCK_END, "seg_lock_1_end");
2673 
2674         AS_LOCK_EXIT(as, &as->a_lock);
2675 
2676         if (err == 0 || (err != ENOTSUP && err != EFAULT)) {
2677                 return (err);
2678         }
2679 
2680         /*
2681          * Use F_SOFTLOCK to lock the pages because pagelock failed either due
2682          * to no pagelock support for this segment or pages need to be cow
2683          * faulted in. If fault is needed F_SOFTLOCK will do this job for
2684          * this as_pagelock() call and in the next as_pagelock() call for the
2685          * same address range pagelock call will hopefull succeed.
2686          */
2687         fault_err = as_fault(as->a_hat, as, addr, size, F_SOFTLOCK, rw);
2688         if (fault_err != 0) {
2689                 return (fc_decode(fault_err));
2690         }


2713         ASSERT(seg != NULL);
2714         ASSERT(addr >= seg->s_base && addr < seg->s_base + seg->s_size);
2715         ASSERT(addr + size > seg->s_base + seg->s_size);
2716         ASSERT(IS_P2ALIGNED(size, PAGESIZE));
2717         ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
2718         ASSERT(plist != NULL);
2719 
2720         for (cnt = 0; addr < eaddr; addr += ssize) {
2721                 if (addr >= seg->s_base + seg->s_size) {
2722                         seg = AS_SEGNEXT(as, seg);
2723                         ASSERT(seg != NULL && addr == seg->s_base);
2724                         cnt++;
2725                 }
2726                 if (eaddr > seg->s_base + seg->s_size) {
2727                         ssize = seg->s_base + seg->s_size - addr;
2728                 } else {
2729                         ssize = eaddr - addr;
2730                 }
2731                 pl = &plist[npages + cnt];
2732                 ASSERT(*pl != NULL);
2733                 (void) segop_pagelock(seg, addr, ssize, (page_t ***)pl,
2734                     L_PAGEUNLOCK, rw);
2735         }
2736         ASSERT(cnt > 0);
2737         AS_LOCK_EXIT(as, &as->a_lock);
2738 
2739         cnt++;
2740         kmem_free(plist, (npages + cnt) * sizeof (page_t *));
2741 }
2742 
2743 /*
2744  * unlock pages in a given address range
2745  */
2746 void
2747 as_pageunlock(struct as *as, struct page **pp, caddr_t addr, size_t size,
2748     enum seg_rw rw)
2749 {
2750         struct seg *seg;
2751         size_t rsize;
2752         caddr_t raddr;
2753 


2759          * falling back to as_fault
2760          */
2761         if (pp == NULL) {
2762                 (void) as_fault(as->a_hat, as, addr, size, F_SOFTUNLOCK, rw);
2763                 return;
2764         }
2765 
2766         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2767         rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
2768             (size_t)raddr;
2769 
2770         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2771         seg = as_segat(as, raddr);
2772         ASSERT(seg != NULL);
2773 
2774         TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEG_UNLOCK_START,
2775             "seg_unlock_start: raddr %p rsize %ld", raddr, rsize);
2776 
2777         ASSERT(raddr >= seg->s_base && raddr < seg->s_base + seg->s_size);
2778         if (raddr + rsize <= seg->s_base + seg->s_size) {
2779                 segop_pagelock(seg, raddr, rsize, &pp, L_PAGEUNLOCK, rw);
2780         } else {
2781                 as_pageunlock_segs(as, seg, raddr, rsize, pp, rw);
2782                 return;
2783         }
2784         AS_LOCK_EXIT(as, &as->a_lock);
2785         TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_AS_UNLOCK_END, "as_pageunlock_end");
2786 }
2787 
2788 int
2789 as_setpagesize(struct as *as, caddr_t addr, size_t size, uint_t szc,
2790     boolean_t wait)
2791 {
2792         struct seg *seg;
2793         size_t ssize;
2794         caddr_t raddr;                  /* rounded down addr */
2795         size_t rsize;                   /* rounded up size */
2796         int error = 0;
2797         size_t pgsz = page_get_pagesize(szc);
2798 
2799 setpgsz_top:


2814                 as_setwatch(as);
2815                 AS_LOCK_EXIT(as, &as->a_lock);
2816                 return (ENOMEM);
2817         }
2818 
2819         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
2820                 if (raddr >= seg->s_base + seg->s_size) {
2821                         seg = AS_SEGNEXT(as, seg);
2822                         if (seg == NULL || raddr != seg->s_base) {
2823                                 error = ENOMEM;
2824                                 break;
2825                         }
2826                 }
2827                 if ((raddr + rsize) > (seg->s_base + seg->s_size)) {
2828                         ssize = seg->s_base + seg->s_size - raddr;
2829                 } else {
2830                         ssize = rsize;
2831                 }
2832 
2833 retry:
2834                 error = segop_setpagesize(seg, raddr, ssize, szc);
2835 
2836                 if (error == IE_NOMEM) {
2837                         error = EAGAIN;
2838                         break;
2839                 }
2840 
2841                 if (error == IE_RETRY) {
2842                         AS_LOCK_EXIT(as, &as->a_lock);
2843                         goto setpgsz_top;
2844                 }
2845 
2846                 if (error == ENOTSUP) {
2847                         error = EINVAL;
2848                         break;
2849                 }
2850 
2851                 if (wait && (error == EAGAIN)) {
2852                         /*
2853                          * Memory is currently locked.  It must be unlocked
2854                          * before this operation can succeed through a retry.


2893                                  * number of retries without sleeping should
2894                                  * be very small. See segvn_reclaim() for
2895                                  * more comments.
2896                                  */
2897                                 AS_CLRNOUNMAPWAIT(as);
2898                                 mutex_exit(&as->a_contents);
2899                                 goto retry;
2900                         }
2901                         mutex_exit(&as->a_contents);
2902                         goto setpgsz_top;
2903                 } else if (error != 0) {
2904                         break;
2905                 }
2906         }
2907         as_setwatch(as);
2908         AS_LOCK_EXIT(as, &as->a_lock);
2909         return (error);
2910 }
2911 
2912 /*
2913  * as_iset3_default_lpsize() just calls segop_setpagesize() on all segments
2914  * in its chunk where s_szc is less than the szc we want to set.
2915  */
2916 static int
2917 as_iset3_default_lpsize(struct as *as, caddr_t raddr, size_t rsize, uint_t szc,
2918     int *retry)
2919 {
2920         struct seg *seg;
2921         size_t ssize;
2922         int error;
2923 
2924         ASSERT(AS_WRITE_HELD(as, &as->a_lock));
2925 
2926         seg = as_segat(as, raddr);
2927         if (seg == NULL) {
2928                 panic("as_iset3_default_lpsize: no seg");
2929         }
2930 
2931         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
2932                 if (raddr >= seg->s_base + seg->s_size) {
2933                         seg = AS_SEGNEXT(as, seg);
2934                         if (seg == NULL || raddr != seg->s_base) {
2935                                 panic("as_iset3_default_lpsize: as changed");
2936                         }
2937                 }
2938                 if ((raddr + rsize) > (seg->s_base + seg->s_size)) {
2939                         ssize = seg->s_base + seg->s_size - raddr;
2940                 } else {
2941                         ssize = rsize;
2942                 }
2943 
2944                 if (szc > seg->s_szc) {
2945                         error = segop_setpagesize(seg, raddr, ssize, szc);
2946                         /* Only retry on EINVAL segments that have no vnode. */
2947                         if (error == EINVAL) {
2948                                 vnode_t *vp = NULL;
2949                                 if ((segop_gettype(seg, raddr) & MAP_SHARED) &&
2950                                     (segop_getvp(seg, raddr, &vp) != 0 ||
2951                                     vp == NULL)) {
2952                                         *retry = 1;
2953                                 } else {
2954                                         *retry = 0;
2955                                 }
2956                         }
2957                         if (error) {
2958                                 return (error);
2959                         }
2960                 }
2961         }
2962         return (0);
2963 }
2964 
2965 /*
2966  * as_iset2_default_lpsize() calls as_iset3_default_lpsize() to set the
2967  * pagesize on each segment in its range, but if any fails with EINVAL,
2968  * then it reduces the pagesizes to the next size in the bitmap and
2969  * retries as_iset3_default_lpsize(). The reason why the code retries
2970  * smaller allowed sizes on EINVAL is because (a) the anon offset may not


3173         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3174 again:
3175         error = 0;
3176 
3177         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3178         rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
3179             (size_t)raddr;
3180 
3181         if (raddr + rsize < raddr) {         /* check for wraparound */
3182                 AS_LOCK_EXIT(as, &as->a_lock);
3183                 return (ENOMEM);
3184         }
3185         as_clearwatchprot(as, raddr, rsize);
3186         seg = as_segat(as, raddr);
3187         if (seg == NULL) {
3188                 as_setwatch(as);
3189                 AS_LOCK_EXIT(as, &as->a_lock);
3190                 return (ENOMEM);
3191         }
3192         if (seg->s_ops == &segvn_ops) {
3193                 rtype = segop_gettype(seg, addr);
3194                 rflags = rtype & (MAP_TEXT | MAP_INITDATA);
3195                 rtype = rtype & (MAP_SHARED | MAP_PRIVATE);
3196                 segvn = 1;
3197         } else {
3198                 segvn = 0;
3199         }
3200         setaddr = raddr;
3201         setsize = 0;
3202 
3203         for (; rsize != 0; rsize -= ssize, raddr += ssize, setsize += ssize) {
3204                 if (raddr >= (seg->s_base + seg->s_size)) {
3205                         seg = AS_SEGNEXT(as, seg);
3206                         if (seg == NULL || raddr != seg->s_base) {
3207                                 error = ENOMEM;
3208                                 break;
3209                         }
3210                         if (seg->s_ops == &segvn_ops) {
3211                                 stype = segop_gettype(seg, raddr);
3212                                 sflags = stype & (MAP_TEXT | MAP_INITDATA);
3213                                 stype &= (MAP_SHARED | MAP_PRIVATE);
3214                                 if (segvn && (rflags != sflags ||
3215                                     rtype != stype)) {
3216                                         /*
3217                                          * The next segment is also segvn but
3218                                          * has different flags and/or type.
3219                                          */
3220                                         ASSERT(setsize != 0);
3221                                         error = as_iset_default_lpsize(as,
3222                                             setaddr, setsize, rflags, rtype);
3223                                         if (error) {
3224                                                 break;
3225                                         }
3226                                         rflags = sflags;
3227                                         rtype = stype;
3228                                         setaddr = raddr;
3229                                         setsize = 0;
3230                                 } else if (!segvn) {
3231                                         rflags = sflags;


3305 as_setwatch(struct as *as)
3306 {
3307         struct watched_page *pwp;
3308         struct seg *seg;
3309         caddr_t vaddr;
3310         uint_t prot;
3311         int  err, retrycnt;
3312 
3313         if (avl_numnodes(&as->a_wpage) == 0)
3314                 return;
3315 
3316         ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3317 
3318         for (pwp = avl_first(&as->a_wpage); pwp != NULL;
3319             pwp = AVL_NEXT(&as->a_wpage, pwp)) {
3320                 retrycnt = 0;
3321         retry:
3322                 vaddr = pwp->wp_vaddr;
3323                 if (pwp->wp_oprot != 0 ||    /* already set up */
3324                     (seg = as_segat(as, vaddr)) == NULL ||
3325                     segop_getprot(seg, vaddr, 0, &prot) != 0)
3326                         continue;
3327 
3328                 pwp->wp_oprot = prot;
3329                 if (pwp->wp_read)
3330                         prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3331                 if (pwp->wp_write)
3332                         prot &= ~PROT_WRITE;
3333                 if (pwp->wp_exec)
3334                         prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3335                 if (!(pwp->wp_flags & WP_NOWATCH) && prot != pwp->wp_oprot) {
3336                         err = segop_setprot(seg, vaddr, PAGESIZE, prot);
3337                         if (err == IE_RETRY) {
3338                                 pwp->wp_oprot = 0;
3339                                 ASSERT(retrycnt == 0);
3340                                 retrycnt++;
3341                                 goto retry;
3342                         }
3343                 }
3344                 pwp->wp_prot = prot;
3345         }
3346 }
3347 
3348 /*
3349  * Clear all of the watched pages in the address space.
3350  */
3351 void
3352 as_clearwatch(struct as *as)
3353 {
3354         struct watched_page *pwp;
3355         struct seg *seg;
3356         caddr_t vaddr;
3357         uint_t prot;
3358         int err, retrycnt;
3359 
3360         if (avl_numnodes(&as->a_wpage) == 0)
3361                 return;
3362 
3363         ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3364 
3365         for (pwp = avl_first(&as->a_wpage); pwp != NULL;
3366             pwp = AVL_NEXT(&as->a_wpage, pwp)) {
3367                 retrycnt = 0;
3368         retry:
3369                 vaddr = pwp->wp_vaddr;
3370                 if (pwp->wp_oprot == 0 ||    /* not set up */
3371                     (seg = as_segat(as, vaddr)) == NULL)
3372                         continue;
3373 
3374                 if ((prot = pwp->wp_oprot) != pwp->wp_prot) {
3375                         err = segop_setprot(seg, vaddr, PAGESIZE, prot);
3376                         if (err == IE_RETRY) {
3377                                 ASSERT(retrycnt == 0);
3378                                 retrycnt++;
3379                                 goto retry;
3380                         }
3381                 }
3382                 pwp->wp_oprot = 0;
3383                 pwp->wp_prot = 0;
3384         }
3385 }
3386 
3387 /*
3388  * Force a new setup for all the watched pages in the range.
3389  */
3390 static void
3391 as_setwatchprot(struct as *as, caddr_t addr, size_t size, uint_t prot)
3392 {
3393         struct watched_page *pwp;
3394         struct watched_page tpw;
3395         caddr_t eaddr = addr + size;


3409                 pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER);
3410 
3411         while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3412                 retrycnt = 0;
3413                 vaddr = pwp->wp_vaddr;
3414 
3415                 wprot = prot;
3416                 if (pwp->wp_read)
3417                         wprot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3418                 if (pwp->wp_write)
3419                         wprot &= ~PROT_WRITE;
3420                 if (pwp->wp_exec)
3421                         wprot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3422                 if (!(pwp->wp_flags & WP_NOWATCH) && wprot != pwp->wp_oprot) {
3423                 retry:
3424                         seg = as_segat(as, vaddr);
3425                         if (seg == NULL) {
3426                                 panic("as_setwatchprot: no seg");
3427                                 /*NOTREACHED*/
3428                         }
3429                         err = segop_setprot(seg, vaddr, PAGESIZE, wprot);
3430                         if (err == IE_RETRY) {
3431                                 ASSERT(retrycnt == 0);
3432                                 retrycnt++;
3433                                 goto retry;
3434                         }
3435                 }
3436                 pwp->wp_oprot = prot;
3437                 pwp->wp_prot = wprot;
3438 
3439                 pwp = AVL_NEXT(&as->a_wpage, pwp);
3440         }
3441 }
3442 
3443 /*
3444  * Clear all of the watched pages in the range.
3445  */
3446 static void
3447 as_clearwatchprot(struct as *as, caddr_t addr, size_t size)
3448 {
3449         caddr_t eaddr = addr + size;


3456 
3457         if (avl_numnodes(&as->a_wpage) == 0)
3458                 return;
3459 
3460         tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3461         if ((pwp = avl_find(&as->a_wpage, &tpw, &where)) == NULL)
3462                 pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER);
3463 
3464         ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3465 
3466         while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3467 
3468                 if ((prot = pwp->wp_oprot) != 0) {
3469                         retrycnt = 0;
3470 
3471                         if (prot != pwp->wp_prot) {
3472                         retry:
3473                                 seg = as_segat(as, pwp->wp_vaddr);
3474                                 if (seg == NULL)
3475                                         continue;
3476                                 err = segop_setprot(seg, pwp->wp_vaddr,
3477                                     PAGESIZE, prot);
3478                                 if (err == IE_RETRY) {
3479                                         ASSERT(retrycnt == 0);
3480                                         retrycnt++;
3481                                         goto retry;
3482 
3483                                 }
3484                         }
3485                         pwp->wp_oprot = 0;
3486                         pwp->wp_prot = 0;
3487                 }
3488 
3489                 pwp = AVL_NEXT(&as->a_wpage, pwp);
3490         }
3491 }
3492 
3493 void
3494 as_signal_proc(struct as *as, k_siginfo_t *siginfo)
3495 {
3496         struct proc *p;


3505                 }
3506         }
3507         mutex_exit(&pidlock);
3508 }
3509 
3510 /*
3511  * return memory object ID
3512  */
3513 int
3514 as_getmemid(struct as *as, caddr_t addr, memid_t *memidp)
3515 {
3516         struct seg      *seg;
3517         int             sts;
3518 
3519         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
3520         seg = as_segat(as, addr);
3521         if (seg == NULL) {
3522                 AS_LOCK_EXIT(as, &as->a_lock);
3523                 return (EFAULT);
3524         }







3525 
3526         sts = segop_getmemid(seg, addr, memidp);
3527 
3528         AS_LOCK_EXIT(as, &as->a_lock);
3529         return (sts);
3530 }