Print this page
patch fixes
6345 remove xhat support


  42  */
  43 
  44 #include <sys/types.h>
  45 #include <sys/t_lock.h>
  46 #include <sys/param.h>
  47 #include <sys/errno.h>
  48 #include <sys/systm.h>
  49 #include <sys/mman.h>
  50 #include <sys/sysmacros.h>
  51 #include <sys/cpuvar.h>
  52 #include <sys/sysinfo.h>
  53 #include <sys/kmem.h>
  54 #include <sys/vnode.h>
  55 #include <sys/vmsystm.h>
  56 #include <sys/cmn_err.h>
  57 #include <sys/debug.h>
  58 #include <sys/tnf_probe.h>
  59 #include <sys/vtrace.h>
  60 
  61 #include <vm/hat.h>
  62 #include <vm/xhat.h>
  63 #include <vm/as.h>
  64 #include <vm/seg.h>
  65 #include <vm/seg_vn.h>
  66 #include <vm/seg_dev.h>
  67 #include <vm/seg_kmem.h>
  68 #include <vm/seg_map.h>
  69 #include <vm/seg_spt.h>
  70 #include <vm/page.h>
  71 
  72 clock_t deadlk_wait = 1; /* number of ticks to wait before retrying */
  73 
  74 static struct kmem_cache *as_cache;
  75 
  76 static void as_setwatchprot(struct as *, caddr_t, size_t, uint_t);
  77 static void as_clearwatchprot(struct as *, caddr_t, size_t);
  78 int as_map_locked(struct as *, caddr_t, size_t, int ((*)()), void *);
  79 
  80 
  81 /*
  82  * Verifying the segment lists is very time-consuming; it may not be


 654 
 655         as->a_flags          = 0;
 656         as->a_vbits          = 0;
 657         as->a_hrm            = NULL;
 658         as->a_seglast                = NULL;
 659         as->a_size           = 0;
 660         as->a_resvsize               = 0;
 661         as->a_updatedir              = 0;
 662         gethrestime(&as->a_updatetime);
 663         as->a_objectdir              = NULL;
 664         as->a_sizedir                = 0;
 665         as->a_userlimit              = (caddr_t)USERLIMIT;
 666         as->a_lastgap                = NULL;
 667         as->a_lastgaphl              = NULL;
 668         as->a_callbacks              = NULL;
 669 
 670         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
 671         as->a_hat = hat_alloc(as);   /* create hat for default system mmu */
 672         AS_LOCK_EXIT(as, &as->a_lock);
 673 
 674         as->a_xhat = NULL;
 675 
 676         return (as);
 677 }
 678 
 679 /*
 680  * Free an address space data structure.
 681  * Need to free the hat first and then
 682  * all the segments on this as and finally
 683  * the space for the as struct itself.
 684  */
 685 void
 686 as_free(struct as *as)
 687 {
 688         struct hat *hat = as->a_hat;
 689         struct seg *seg, *next;
 690         int called = 0;
 691 
 692 top:
 693         /*
 694          * Invoke ALL callbacks. as_do_callbacks will do one callback
 695          * per call, and not return (-1) until the callback has completed.
 696          * When as_do_callbacks returns zero, all callbacks have completed.
 697          */
 698         mutex_enter(&as->a_contents);
 699         while (as->a_callbacks && as_do_callbacks(as, AS_ALL_EVENT, 0, 0))
 700                 ;
 701 
 702         /* This will prevent new XHATs from attaching to as */
 703         if (!called)
 704                 AS_SETBUSY(as);
 705         mutex_exit(&as->a_contents);
 706         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
 707 
 708         if (!called) {
 709                 called = 1;
 710                 hat_free_start(hat);
 711                 if (as->a_xhat != NULL)
 712                         xhat_free_start_all(as);
 713         }
 714         for (seg = AS_SEGFIRST(as); seg != NULL; seg = next) {
 715                 int err;
 716 
 717                 next = AS_SEGNEXT(as, seg);
 718 retry:
 719                 err = SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
 720                 if (err == EAGAIN) {
 721                         mutex_enter(&as->a_contents);
 722                         if (as->a_callbacks) {
 723                                 AS_LOCK_EXIT(as, &as->a_lock);
 724                         } else if (!AS_ISNOUNMAPWAIT(as)) {
 725                                 /*
 726                                  * Memory is currently locked. Wait for a
 727                                  * cv_signal that it has been unlocked, then
 728                                  * try the operation again.
 729                                  */
 730                                 if (AS_ISUNMAPWAIT(as) == 0)
 731                                         cv_broadcast(&as->a_cv);
 732                                 AS_SETUNMAPWAIT(as);


 742                                  * 0.  We don't drop as writer lock so our
 743                                  * number of retries without sleeping should
 744                                  * be very small. See segvn_reclaim() for
 745                                  * more comments.
 746                                  */
 747                                 AS_CLRNOUNMAPWAIT(as);
 748                                 mutex_exit(&as->a_contents);
 749                                 goto retry;
 750                         }
 751                         mutex_exit(&as->a_contents);
 752                         goto top;
 753                 } else {
 754                         /*
 755                          * We do not expect any other error return at this
 756                          * time. This is similar to an ASSERT in seg_unmap()
 757                          */
 758                         ASSERT(err == 0);
 759                 }
 760         }
 761         hat_free_end(hat);
 762         if (as->a_xhat != NULL)
 763                 xhat_free_end_all(as);
 764         AS_LOCK_EXIT(as, &as->a_lock);
 765 
 766         /* /proc stuff */
 767         ASSERT(avl_numnodes(&as->a_wpage) == 0);
 768         if (as->a_objectdir) {
 769                 kmem_free(as->a_objectdir, as->a_sizedir * sizeof (vnode_t *));
 770                 as->a_objectdir = NULL;
 771                 as->a_sizedir = 0;
 772         }
 773 
 774         /*
 775          * Free the struct as back to kmem.  Assert it has no segments.
 776          */
 777         ASSERT(avl_numnodes(&as->a_segtree) == 0);
 778         kmem_cache_free(as_cache, as);
 779 }
 780 
 781 int
 782 as_dup(struct as *as, struct proc *forkedproc)
 783 {
 784         struct as *newas;
 785         struct seg *seg, *newseg;
 786         size_t  purgesize = 0;
 787         int error;
 788 
 789         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
 790         as_clearwatch(as);
 791         newas = as_alloc();
 792         newas->a_userlimit = as->a_userlimit;
 793         newas->a_proc = forkedproc;
 794 
 795         AS_LOCK_ENTER(newas, &newas->a_lock, RW_WRITER);
 796 
 797         /* This will prevent new XHATs from attaching */
 798         mutex_enter(&as->a_contents);
 799         AS_SETBUSY(as);
 800         mutex_exit(&as->a_contents);
 801         mutex_enter(&newas->a_contents);
 802         AS_SETBUSY(newas);
 803         mutex_exit(&newas->a_contents);
 804 
 805         (void) hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_SRD);
 806 
 807         for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
 808 
 809                 if (seg->s_flags & S_PURGE) {
 810                         purgesize += seg->s_size;
 811                         continue;
 812                 }
 813 
 814                 newseg = seg_alloc(newas, seg->s_base, seg->s_size);
 815                 if (newseg == NULL) {
 816                         AS_LOCK_EXIT(newas, &newas->a_lock);
 817                         as_setwatch(as);
 818                         mutex_enter(&as->a_contents);
 819                         AS_CLRBUSY(as);
 820                         mutex_exit(&as->a_contents);
 821                         AS_LOCK_EXIT(as, &as->a_lock);
 822                         as_free(newas);
 823                         return (-1);
 824                 }
 825                 if ((error = SEGOP_DUP(seg, newseg)) != 0) {
 826                         /*
 827                          * We call seg_free() on the new seg
 828                          * because the segment is not set up
 829                          * completely; i.e. it has no ops.
 830                          */
 831                         as_setwatch(as);
 832                         mutex_enter(&as->a_contents);
 833                         AS_CLRBUSY(as);
 834                         mutex_exit(&as->a_contents);
 835                         AS_LOCK_EXIT(as, &as->a_lock);
 836                         seg_free(newseg);
 837                         AS_LOCK_EXIT(newas, &newas->a_lock);
 838                         as_free(newas);
 839                         return (error);
 840                 }
 841                 newas->a_size += seg->s_size;
 842         }
 843         newas->a_resvsize = as->a_resvsize - purgesize;
 844 
 845         error = hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_ALL);
 846         if (as->a_xhat != NULL)
 847                 error |= xhat_dup_all(as, newas, NULL, 0, HAT_DUP_ALL);
 848 
 849         mutex_enter(&newas->a_contents);
 850         AS_CLRBUSY(newas);
 851         mutex_exit(&newas->a_contents);
 852         AS_LOCK_EXIT(newas, &newas->a_lock);
 853 
 854         as_setwatch(as);
 855         mutex_enter(&as->a_contents);
 856         AS_CLRBUSY(as);
 857         mutex_exit(&as->a_contents);
 858         AS_LOCK_EXIT(as, &as->a_lock);
 859         if (error != 0) {
 860                 as_free(newas);
 861                 return (error);
 862         }
 863         forkedproc->p_as = newas;
 864         return (0);
 865 }
 866 
 867 /*
 868  * Handle a ``fault'' at addr for size bytes.
 869  */
 870 faultcode_t
 871 as_fault(struct hat *hat, struct as *as, caddr_t addr, size_t size,
 872         enum fault_type type, enum seg_rw rw)
 873 {
 874         struct seg *seg;
 875         caddr_t raddr;                  /* rounded down addr */
 876         size_t rsize;                   /* rounded up size */
 877         size_t ssize;
 878         faultcode_t res = 0;
 879         caddr_t addrsav;
 880         struct seg *segsav;
 881         int as_lock_held;
 882         klwp_t *lwp = ttolwp(curthread);
 883         int is_xhat = 0;
 884         int holding_wpage = 0;
 885         extern struct seg_ops   segdev_ops;
 886 
 887 
 888 
 889         if (as->a_hat != hat) {
 890                 /* This must be an XHAT then */
 891                 is_xhat = 1;
 892 
 893                 if ((type != F_INVAL) || (as == &kas))
 894                         return (FC_NOSUPPORT);
 895         }
 896 
 897 retry:
 898         if (!is_xhat) {
 899                 /*
 900                  * Indicate that the lwp is not to be stopped while waiting
 901                  * for a pagefault.  This is to avoid deadlock while debugging
 902                  * a process via /proc over NFS (in particular).
 903                  */
 904                 if (lwp != NULL)
 905                         lwp->lwp_nostop++;
 906 
 907                 /*
 908                  * same length must be used when we softlock and softunlock.
 909                  * We don't support softunlocking lengths less than
 910                  * the original length when there is largepage support.
 911                  * See seg_dev.c for more comments.
 912                  */
 913                 switch (type) {
 914 
 915                 case F_SOFTLOCK:
 916                         CPU_STATS_ADD_K(vm, softlock, 1);
 917                         break;
 918 
 919                 case F_SOFTUNLOCK:
 920                         break;
 921 
 922                 case F_PROT:
 923                         CPU_STATS_ADD_K(vm, prot_fault, 1);
 924                         break;
 925 
 926                 case F_INVAL:
 927                         CPU_STATS_ENTER_K();
 928                         CPU_STATS_ADDQ(CPU, vm, as_fault, 1);
 929                         if (as == &kas)
 930                                 CPU_STATS_ADDQ(CPU, vm, kernel_asflt, 1);
 931                         CPU_STATS_EXIT_K();
 932                         break;
 933                 }
 934         }
 935 
 936         /* Kernel probe */
 937         TNF_PROBE_3(address_fault, "vm pagefault", /* CSTYLED */,
 938             tnf_opaque, address,        addr,
 939             tnf_fault_type,     fault_type,     type,
 940             tnf_seg_access,     access,         rw);
 941 
 942         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
 943         rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
 944             (size_t)raddr;
 945 
 946         /*
 947          * XXX -- Don't grab the as lock for segkmap. We should grab it for
 948          * correctness, but then we could be stuck holding this lock for
 949          * a LONG time if the fault needs to be resolved on a slow
 950          * filesystem, and then no-one will be able to exec new commands,
 951          * as exec'ing requires the write lock on the as.
 952          */
 953         if (as == &kas && segkmap && segkmap->s_base <= raddr &&
 954             raddr + size < segkmap->s_base + segkmap->s_size) {
 955                 /*
 956                  * if (as==&kas), this can't be XHAT: we've already returned
 957                  * FC_NOSUPPORT.
 958                  */
 959                 seg = segkmap;
 960                 as_lock_held = 0;
 961         } else {
 962                 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
 963                 if (is_xhat && avl_numnodes(&as->a_wpage) != 0) {
 964                         /*
 965                          * Grab and hold the writers' lock on the as
 966                          * if the fault is to a watched page.
 967                          * This will keep CPUs from "peeking" at the
 968                          * address range while we're temporarily boosting
 969                          * the permissions for the XHAT device to
 970                          * resolve the fault in the segment layer.
 971                          *
 972                          * We could check whether faulted address
 973                          * is within a watched page and only then grab
 974                          * the writer lock, but this is simpler.
 975                          */
 976                         AS_LOCK_EXIT(as, &as->a_lock);
 977                         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
 978                 }
 979 
 980                 seg = as_segat(as, raddr);
 981                 if (seg == NULL) {
 982                         AS_LOCK_EXIT(as, &as->a_lock);
 983                         if ((lwp != NULL) && (!is_xhat))
 984                                 lwp->lwp_nostop--;
 985                         return (FC_NOMAP);
 986                 }
 987 
 988                 as_lock_held = 1;
 989         }
 990 
 991         addrsav = raddr;
 992         segsav = seg;
 993 
 994         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
 995                 if (raddr >= seg->s_base + seg->s_size) {
 996                         seg = AS_SEGNEXT(as, seg);
 997                         if (seg == NULL || raddr != seg->s_base) {
 998                                 res = FC_NOMAP;
 999                                 break;
1000                         }
1001                 }
1002                 if (raddr + rsize > seg->s_base + seg->s_size)
1003                         ssize = seg->s_base + seg->s_size - raddr;
1004                 else
1005                         ssize = rsize;
1006 
1007                 if (!is_xhat || (seg->s_ops != &segdev_ops)) {
1008 
1009                         if (is_xhat && avl_numnodes(&as->a_wpage) != 0 &&
1010                             pr_is_watchpage_as(raddr, rw, as)) {
1011                                 /*
1012                                  * Handle watch pages.  If we're faulting on a
1013                                  * watched page from an X-hat, we have to
1014                                  * restore the original permissions while we
1015                                  * handle the fault.
1016                                  */
1017                                 as_clearwatch(as);
1018                                 holding_wpage = 1;
1019                         }
1020 
1021                         res = SEGOP_FAULT(hat, seg, raddr, ssize, type, rw);
1022 
1023                         /* Restore watchpoints */
1024                         if (holding_wpage) {
1025                                 as_setwatch(as);
1026                                 holding_wpage = 0;
1027                         }
1028 
1029                         if (res != 0)
1030                                 break;
1031                 } else {
1032                         /* XHAT does not support seg_dev */
1033                         res = FC_NOSUPPORT;
1034                         break;
1035                 }
1036         }
1037 
1038         /*
1039          * If we were SOFTLOCKing and encountered a failure,
1040          * we must SOFTUNLOCK the range we already did. (Maybe we
1041          * should just panic if we are SOFTLOCKing or even SOFTUNLOCKing
1042          * right here...)
1043          */
1044         if (res != 0 && type == F_SOFTLOCK) {
1045                 for (seg = segsav; addrsav < raddr; addrsav += ssize) {
1046                         if (addrsav >= seg->s_base + seg->s_size)
1047                                 seg = AS_SEGNEXT(as, seg);
1048                         ASSERT(seg != NULL);
1049                         /*
1050                          * Now call the fault routine again to perform the
1051                          * unlock using S_OTHER instead of the rw variable
1052                          * since we never got a chance to touch the pages.
1053                          */
1054                         if (raddr > seg->s_base + seg->s_size)
1055                                 ssize = seg->s_base + seg->s_size - addrsav;
1056                         else
1057                                 ssize = raddr - addrsav;
1058                         (void) SEGOP_FAULT(hat, seg, addrsav, ssize,
1059                             F_SOFTUNLOCK, S_OTHER);
1060                 }
1061         }
1062         if (as_lock_held)
1063                 AS_LOCK_EXIT(as, &as->a_lock);
1064         if ((lwp != NULL) && (!is_xhat))
1065                 lwp->lwp_nostop--;
1066 
1067         /*
1068          * If the lower levels returned EDEADLK for a fault,
1069          * It means that we should retry the fault.  Let's wait
1070          * a bit also to let the deadlock causing condition clear.
1071          * This is part of a gross hack to work around a design flaw
1072          * in the ufs/sds logging code and should go away when the
1073          * logging code is re-designed to fix the problem. See bug
1074          * 4125102 for details of the problem.
1075          */
1076         if (FC_ERRNO(res) == EDEADLK) {
1077                 delay(deadlk_wait);
1078                 res = 0;
1079                 goto retry;
1080         }
1081         return (res);
1082 }
1083 
1084 


2149  * memory requirements.  Its usefulness for this purpose depends on
2150  * how well the segment-level routines do at returning accurate
2151  * information.
2152  */
2153 size_t
2154 as_swapout(struct as *as)
2155 {
2156         struct seg *seg;
2157         size_t swpcnt = 0;
2158 
2159         /*
2160          * Kernel-only processes have given up their address
2161          * spaces.  Of course, we shouldn't be attempting to
2162          * swap out such processes in the first place...
2163          */
2164         if (as == NULL)
2165                 return (0);
2166 
2167         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2168 
2169         /* Prevent XHATs from attaching */
2170         mutex_enter(&as->a_contents);
2171         AS_SETBUSY(as);
2172         mutex_exit(&as->a_contents);
2173 
2174 
2175         /*
2176          * Free all mapping resources associated with the address
2177          * space.  The segment-level swapout routines capitalize
2178          * on this unmapping by scavanging pages that have become
2179          * unmapped here.
2180          */
2181         hat_swapout(as->a_hat);
2182         if (as->a_xhat != NULL)
2183                 xhat_swapout_all(as);
2184 
2185         mutex_enter(&as->a_contents);
2186         AS_CLRBUSY(as);
2187         mutex_exit(&as->a_contents);
2188 
2189         /*
2190          * Call the swapout routines of all segments in the address
2191          * space to do the actual work, accumulating the amount of
2192          * space reclaimed.
2193          */
2194         for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
2195                 struct seg_ops *ov = seg->s_ops;
2196 
2197                 /*
2198                  * We have to check to see if the seg has
2199                  * an ops vector because the seg may have
2200                  * been in the middle of being set up when
2201                  * the process was picked for swapout.
2202                  */
2203                 if ((ov != NULL) && (ov->swapout != NULL))
2204                         swpcnt += SEGOP_SWAPOUT(seg);
2205         }
2206         AS_LOCK_EXIT(as, &as->a_lock);
2207         return (swpcnt);




  42  */
  43 
  44 #include <sys/types.h>
  45 #include <sys/t_lock.h>
  46 #include <sys/param.h>
  47 #include <sys/errno.h>
  48 #include <sys/systm.h>
  49 #include <sys/mman.h>
  50 #include <sys/sysmacros.h>
  51 #include <sys/cpuvar.h>
  52 #include <sys/sysinfo.h>
  53 #include <sys/kmem.h>
  54 #include <sys/vnode.h>
  55 #include <sys/vmsystm.h>
  56 #include <sys/cmn_err.h>
  57 #include <sys/debug.h>
  58 #include <sys/tnf_probe.h>
  59 #include <sys/vtrace.h>
  60 
  61 #include <vm/hat.h>

  62 #include <vm/as.h>
  63 #include <vm/seg.h>
  64 #include <vm/seg_vn.h>
  65 #include <vm/seg_dev.h>
  66 #include <vm/seg_kmem.h>
  67 #include <vm/seg_map.h>
  68 #include <vm/seg_spt.h>
  69 #include <vm/page.h>
  70 
  71 clock_t deadlk_wait = 1; /* number of ticks to wait before retrying */
  72 
  73 static struct kmem_cache *as_cache;
  74 
  75 static void as_setwatchprot(struct as *, caddr_t, size_t, uint_t);
  76 static void as_clearwatchprot(struct as *, caddr_t, size_t);
  77 int as_map_locked(struct as *, caddr_t, size_t, int ((*)()), void *);
  78 
  79 
  80 /*
  81  * Verifying the segment lists is very time-consuming; it may not be


 653 
 654         as->a_flags          = 0;
 655         as->a_vbits          = 0;
 656         as->a_hrm            = NULL;
 657         as->a_seglast                = NULL;
 658         as->a_size           = 0;
 659         as->a_resvsize               = 0;
 660         as->a_updatedir              = 0;
 661         gethrestime(&as->a_updatetime);
 662         as->a_objectdir              = NULL;
 663         as->a_sizedir                = 0;
 664         as->a_userlimit              = (caddr_t)USERLIMIT;
 665         as->a_lastgap                = NULL;
 666         as->a_lastgaphl              = NULL;
 667         as->a_callbacks              = NULL;
 668 
 669         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
 670         as->a_hat = hat_alloc(as);   /* create hat for default system mmu */
 671         AS_LOCK_EXIT(as, &as->a_lock);
 672 


 673         return (as);
 674 }
 675 
 676 /*
 677  * Free an address space data structure.
 678  * Need to free the hat first and then
 679  * all the segments on this as and finally
 680  * the space for the as struct itself.
 681  */
 682 void
 683 as_free(struct as *as)
 684 {
 685         struct hat *hat = as->a_hat;
 686         struct seg *seg, *next;
 687         boolean_t free_started = B_FALSE;
 688 
 689 top:
 690         /*
 691          * Invoke ALL callbacks. as_do_callbacks will do one callback
 692          * per call, and not return (-1) until the callback has completed.
 693          * When as_do_callbacks returns zero, all callbacks have completed.
 694          */
 695         mutex_enter(&as->a_contents);
 696         while (as->a_callbacks && as_do_callbacks(as, AS_ALL_EVENT, 0, 0))
 697                 ;
 698 



 699         mutex_exit(&as->a_contents);
 700         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
 701 
 702         if (!free_started) {
 703                 free_started = B_TRUE;
 704                 hat_free_start(hat);


 705         }
 706         for (seg = AS_SEGFIRST(as); seg != NULL; seg = next) {
 707                 int err;
 708 
 709                 next = AS_SEGNEXT(as, seg);
 710 retry:
 711                 err = SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
 712                 if (err == EAGAIN) {
 713                         mutex_enter(&as->a_contents);
 714                         if (as->a_callbacks) {
 715                                 AS_LOCK_EXIT(as, &as->a_lock);
 716                         } else if (!AS_ISNOUNMAPWAIT(as)) {
 717                                 /*
 718                                  * Memory is currently locked. Wait for a
 719                                  * cv_signal that it has been unlocked, then
 720                                  * try the operation again.
 721                                  */
 722                                 if (AS_ISUNMAPWAIT(as) == 0)
 723                                         cv_broadcast(&as->a_cv);
 724                                 AS_SETUNMAPWAIT(as);


 734                                  * 0.  We don't drop as writer lock so our
 735                                  * number of retries without sleeping should
 736                                  * be very small. See segvn_reclaim() for
 737                                  * more comments.
 738                                  */
 739                                 AS_CLRNOUNMAPWAIT(as);
 740                                 mutex_exit(&as->a_contents);
 741                                 goto retry;
 742                         }
 743                         mutex_exit(&as->a_contents);
 744                         goto top;
 745                 } else {
 746                         /*
 747                          * We do not expect any other error return at this
 748                          * time. This is similar to an ASSERT in seg_unmap()
 749                          */
 750                         ASSERT(err == 0);
 751                 }
 752         }
 753         hat_free_end(hat);


 754         AS_LOCK_EXIT(as, &as->a_lock);
 755 
 756         /* /proc stuff */
 757         ASSERT(avl_numnodes(&as->a_wpage) == 0);
 758         if (as->a_objectdir) {
 759                 kmem_free(as->a_objectdir, as->a_sizedir * sizeof (vnode_t *));
 760                 as->a_objectdir = NULL;
 761                 as->a_sizedir = 0;
 762         }
 763 
 764         /*
 765          * Free the struct as back to kmem.  Assert it has no segments.
 766          */
 767         ASSERT(avl_numnodes(&as->a_segtree) == 0);
 768         kmem_cache_free(as_cache, as);
 769 }
 770 
 771 int
 772 as_dup(struct as *as, struct proc *forkedproc)
 773 {
 774         struct as *newas;
 775         struct seg *seg, *newseg;
 776         size_t  purgesize = 0;
 777         int error;
 778 
 779         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
 780         as_clearwatch(as);
 781         newas = as_alloc();
 782         newas->a_userlimit = as->a_userlimit;
 783         newas->a_proc = forkedproc;
 784 
 785         AS_LOCK_ENTER(newas, &newas->a_lock, RW_WRITER);
 786 








 787         (void) hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_SRD);
 788 
 789         for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
 790 
 791                 if (seg->s_flags & S_PURGE) {
 792                         purgesize += seg->s_size;
 793                         continue;
 794                 }
 795 
 796                 newseg = seg_alloc(newas, seg->s_base, seg->s_size);
 797                 if (newseg == NULL) {
 798                         AS_LOCK_EXIT(newas, &newas->a_lock);
 799                         as_setwatch(as);



 800                         AS_LOCK_EXIT(as, &as->a_lock);
 801                         as_free(newas);
 802                         return (-1);
 803                 }
 804                 if ((error = SEGOP_DUP(seg, newseg)) != 0) {
 805                         /*
 806                          * We call seg_free() on the new seg
 807                          * because the segment is not set up
 808                          * completely; i.e. it has no ops.
 809                          */
 810                         as_setwatch(as);



 811                         AS_LOCK_EXIT(as, &as->a_lock);
 812                         seg_free(newseg);
 813                         AS_LOCK_EXIT(newas, &newas->a_lock);
 814                         as_free(newas);
 815                         return (error);
 816                 }
 817                 newas->a_size += seg->s_size;
 818         }
 819         newas->a_resvsize = as->a_resvsize - purgesize;
 820 
 821         error = hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_ALL);


 822 



 823         AS_LOCK_EXIT(newas, &newas->a_lock);
 824 
 825         as_setwatch(as);



 826         AS_LOCK_EXIT(as, &as->a_lock);
 827         if (error != 0) {
 828                 as_free(newas);
 829                 return (error);
 830         }
 831         forkedproc->p_as = newas;
 832         return (0);
 833 }
 834 
 835 /*
 836  * Handle a ``fault'' at addr for size bytes.
 837  */
 838 faultcode_t
 839 as_fault(struct hat *hat, struct as *as, caddr_t addr, size_t size,
 840         enum fault_type type, enum seg_rw rw)
 841 {
 842         struct seg *seg;
 843         caddr_t raddr;                  /* rounded down addr */
 844         size_t rsize;                   /* rounded up size */
 845         size_t ssize;
 846         faultcode_t res = 0;
 847         caddr_t addrsav;
 848         struct seg *segsav;
 849         int as_lock_held;
 850         klwp_t *lwp = ttolwp(curthread);





 851 



 852 



 853 
 854 retry:

 855         /*
 856          * Indicate that the lwp is not to be stopped while waiting for a
 857          * pagefault.  This is to avoid deadlock while debugging a process
 858          * via /proc over NFS (in particular).
 859          */
 860         if (lwp != NULL)
 861                 lwp->lwp_nostop++;
 862 
 863         /*
 864          * same length must be used when we softlock and softunlock.  We
 865          * don't support softunlocking lengths less than the original length
 866          * when there is largepage support.  See seg_dev.c for more
 867          * comments.
 868          */
 869         switch (type) {
 870 
 871         case F_SOFTLOCK:
 872                 CPU_STATS_ADD_K(vm, softlock, 1);
 873                 break;
 874 
 875         case F_SOFTUNLOCK:
 876                 break;
 877 
 878         case F_PROT:
 879                 CPU_STATS_ADD_K(vm, prot_fault, 1);
 880                 break;
 881 
 882         case F_INVAL:
 883                 CPU_STATS_ENTER_K();
 884                 CPU_STATS_ADDQ(CPU, vm, as_fault, 1);
 885                 if (as == &kas)
 886                         CPU_STATS_ADDQ(CPU, vm, kernel_asflt, 1);
 887                 CPU_STATS_EXIT_K();
 888                 break;
 889         }

 890 
 891         /* Kernel probe */
 892         TNF_PROBE_3(address_fault, "vm pagefault", /* CSTYLED */,
 893             tnf_opaque, address,        addr,
 894             tnf_fault_type,     fault_type,     type,
 895             tnf_seg_access,     access,         rw);
 896 
 897         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
 898         rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
 899             (size_t)raddr;
 900 
 901         /*
 902          * XXX -- Don't grab the as lock for segkmap. We should grab it for
 903          * correctness, but then we could be stuck holding this lock for
 904          * a LONG time if the fault needs to be resolved on a slow
 905          * filesystem, and then no-one will be able to exec new commands,
 906          * as exec'ing requires the write lock on the as.
 907          */
 908         if (as == &kas && segkmap && segkmap->s_base <= raddr &&
 909             raddr + size < segkmap->s_base + segkmap->s_size) {




 910                 seg = segkmap;
 911                 as_lock_held = 0;
 912         } else {
 913                 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
















 914 
 915                 seg = as_segat(as, raddr);
 916                 if (seg == NULL) {
 917                         AS_LOCK_EXIT(as, &as->a_lock);
 918                         if (lwp != NULL)
 919                                 lwp->lwp_nostop--;
 920                         return (FC_NOMAP);
 921                 }
 922 
 923                 as_lock_held = 1;
 924         }
 925 
 926         addrsav = raddr;
 927         segsav = seg;
 928 
 929         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
 930                 if (raddr >= seg->s_base + seg->s_size) {
 931                         seg = AS_SEGNEXT(as, seg);
 932                         if (seg == NULL || raddr != seg->s_base) {
 933                                 res = FC_NOMAP;
 934                                 break;
 935                         }
 936                 }
 937                 if (raddr + rsize > seg->s_base + seg->s_size)
 938                         ssize = seg->s_base + seg->s_size - raddr;
 939                 else
 940                         ssize = rsize;
 941 














 942                 res = SEGOP_FAULT(hat, seg, raddr, ssize, type, rw);







 943                 if (res != 0)
 944                         break;





 945         }
 946 
 947         /*
 948          * If we were SOFTLOCKing and encountered a failure,
 949          * we must SOFTUNLOCK the range we already did. (Maybe we
 950          * should just panic if we are SOFTLOCKing or even SOFTUNLOCKing
 951          * right here...)
 952          */
 953         if (res != 0 && type == F_SOFTLOCK) {
 954                 for (seg = segsav; addrsav < raddr; addrsav += ssize) {
 955                         if (addrsav >= seg->s_base + seg->s_size)
 956                                 seg = AS_SEGNEXT(as, seg);
 957                         ASSERT(seg != NULL);
 958                         /*
 959                          * Now call the fault routine again to perform the
 960                          * unlock using S_OTHER instead of the rw variable
 961                          * since we never got a chance to touch the pages.
 962                          */
 963                         if (raddr > seg->s_base + seg->s_size)
 964                                 ssize = seg->s_base + seg->s_size - addrsav;
 965                         else
 966                                 ssize = raddr - addrsav;
 967                         (void) SEGOP_FAULT(hat, seg, addrsav, ssize,
 968                             F_SOFTUNLOCK, S_OTHER);
 969                 }
 970         }
 971         if (as_lock_held)
 972                 AS_LOCK_EXIT(as, &as->a_lock);
 973         if (lwp != NULL)
 974                 lwp->lwp_nostop--;
 975 
 976         /*
 977          * If the lower levels returned EDEADLK for a fault,
 978          * It means that we should retry the fault.  Let's wait
 979          * a bit also to let the deadlock causing condition clear.
 980          * This is part of a gross hack to work around a design flaw
 981          * in the ufs/sds logging code and should go away when the
 982          * logging code is re-designed to fix the problem. See bug
 983          * 4125102 for details of the problem.
 984          */
 985         if (FC_ERRNO(res) == EDEADLK) {
 986                 delay(deadlk_wait);
 987                 res = 0;
 988                 goto retry;
 989         }
 990         return (res);
 991 }
 992 
 993 


2058  * memory requirements.  Its usefulness for this purpose depends on
2059  * how well the segment-level routines do at returning accurate
2060  * information.
2061  */
2062 size_t
2063 as_swapout(struct as *as)
2064 {
2065         struct seg *seg;
2066         size_t swpcnt = 0;
2067 
2068         /*
2069          * Kernel-only processes have given up their address
2070          * spaces.  Of course, we shouldn't be attempting to
2071          * swap out such processes in the first place...
2072          */
2073         if (as == NULL)
2074                 return (0);
2075 
2076         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2077 






2078         /*
2079          * Free all mapping resources associated with the address
2080          * space.  The segment-level swapout routines capitalize
2081          * on this unmapping by scavanging pages that have become
2082          * unmapped here.
2083          */
2084         hat_swapout(as->a_hat);






2085 
2086         /*
2087          * Call the swapout routines of all segments in the address
2088          * space to do the actual work, accumulating the amount of
2089          * space reclaimed.
2090          */
2091         for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
2092                 struct seg_ops *ov = seg->s_ops;
2093 
2094                 /*
2095                  * We have to check to see if the seg has
2096                  * an ops vector because the seg may have
2097                  * been in the middle of being set up when
2098                  * the process was picked for swapout.
2099                  */
2100                 if ((ov != NULL) && (ov->swapout != NULL))
2101                         swpcnt += SEGOP_SWAPOUT(seg);
2102         }
2103         AS_LOCK_EXIT(as, &as->a_lock);
2104         return (swpcnt);