Print this page
PVN_GETPAGE_{SZ,NUM} are misnamed and unnecessarily complicated
There is really no reason to not allow 8 pages all the time.  With the
current logic, we get the following:
Assuming 4kB pages (x86):
    _SZ  = ptob(8) /* 32kB */
    _NUM = 8
Assuming 8kB pages (sparc):
    _SZ  = ptob(8) /* 64kB */
    _NUM = 8
We'd have to deal with 16kB base pages in order for the _NUM #define to not
be 8 (it'd be 4 in that case).  So, in the spirit of simplicity, let's just
always grab 8 pages as there are no interesting systems with 16kB+ base pages.
Finally, the defines are poorly named.


  61 #include <sys/vm.h>
  62 #include <sys/dumphdr.h>
  63 #include <sys/lgrp.h>
  64 
  65 #include <vm/hat.h>
  66 #include <vm/as.h>
  67 #include <vm/seg.h>
  68 #include <vm/seg_vn.h>
  69 #include <vm/pvn.h>
  70 #include <vm/anon.h>
  71 #include <vm/page.h>
  72 #include <vm/vpage.h>
  73 #include <sys/proc.h>
  74 #include <sys/task.h>
  75 #include <sys/project.h>
  76 #include <sys/zone.h>
  77 #include <sys/shm_impl.h>
  78 
  79 /*
  80  * segvn_fault needs a temporary page list array.  To avoid calling kmem all
  81  * the time, it creates a small (PVN_GETPAGE_NUM entry) array and uses it if
  82  * it can.  In the rare case when this page list is not large enough, it
  83  * goes and gets a large enough array from kmem.
  84  *
  85  * This small page list array covers either 8 pages or 64kB worth of pages -
  86  * whichever is smaller.
  87  */
  88 #define PVN_MAX_GETPAGE_SZ      0x10000
  89 #define PVN_MAX_GETPAGE_NUM     0x8
  90 
  91 #if PVN_MAX_GETPAGE_SZ > PVN_MAX_GETPAGE_NUM * PAGESIZE
  92 #define PVN_GETPAGE_SZ  ptob(PVN_MAX_GETPAGE_NUM)
  93 #define PVN_GETPAGE_NUM PVN_MAX_GETPAGE_NUM
  94 #else
  95 #define PVN_GETPAGE_SZ  PVN_MAX_GETPAGE_SZ
  96 #define PVN_GETPAGE_NUM btop(PVN_MAX_GETPAGE_SZ)
  97 #endif
  98 
  99 /*
 100  * Private seg op routines.
 101  */
 102 static int      segvn_dup(struct seg *seg, struct seg *newseg);
 103 static int      segvn_unmap(struct seg *seg, caddr_t addr, size_t len);
 104 static void     segvn_free(struct seg *seg);
 105 static faultcode_t segvn_fault(struct hat *hat, struct seg *seg,
 106                     caddr_t addr, size_t len, enum fault_type type,
 107                     enum seg_rw rw);
 108 static faultcode_t segvn_faulta(struct seg *seg, caddr_t addr);
 109 static int      segvn_setprot(struct seg *seg, caddr_t addr,
 110                     size_t len, uint_t prot);
 111 static int      segvn_checkprot(struct seg *seg, caddr_t addr,
 112                     size_t len, uint_t prot);
 113 static int      segvn_kluster(struct seg *seg, caddr_t addr, ssize_t delta);
 114 static int      segvn_sync(struct seg *seg, caddr_t addr, size_t len,
 115                     int attr, uint_t flags);
 116 static size_t   segvn_incore(struct seg *seg, caddr_t addr, size_t len,
 117                     char *vec);


4908  *              Call VOP_GETPAGE over the range of non-anonymous pages
4909  *      endif
4910  *      Loop over all addresses requested
4911  *              Call segvn_faultpage passing in page list
4912  *                  to load up translations and handle anonymous pages
4913  *      endloop
4914  *      Load up translation to any additional pages in page list not
4915  *          already handled that fit into this segment
4916  */
4917 static faultcode_t
4918 segvn_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
4919     enum fault_type type, enum seg_rw rw)
4920 {
4921         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
4922         page_t **plp, **ppp, *pp;
4923         u_offset_t off;
4924         caddr_t a;
4925         struct vpage *vpage;
4926         uint_t vpprot, prot;
4927         int err;
4928         page_t *pl[PVN_GETPAGE_NUM + 1];
4929         size_t plsz, pl_alloc_sz;
4930         size_t page;
4931         ulong_t anon_index;
4932         struct anon_map *amp;
4933         int dogetpage = 0;
4934         caddr_t lpgaddr, lpgeaddr;
4935         size_t pgsz;
4936         anon_sync_obj_t cookie;
4937         int brkcow = BREAK_COW_SHARE(rw, type, svd->type);
4938 
4939         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
4940         ASSERT(svd->amp == NULL || svd->rcookie == HAT_INVALID_REGION_COOKIE);
4941 
4942         /*
4943          * First handle the easy stuff
4944          */
4945         if (type == F_SOFTUNLOCK) {
4946                 if (rw == S_READ_NOCOW) {
4947                         rw = S_READ;
4948                         ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));


5347                          * Only acquire reader lock to prevent amp->ahp
5348                          * from being changed.  It's ok to miss pages,
5349                          * hence we don't do anon_array_enter
5350                          */
5351                         ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
5352                         ap = anon_get_ptr(amp->ahp, anon_index);
5353 
5354                         if (len <= PAGESIZE)
5355                                 /* inline non_anon() */
5356                                 dogetpage = (ap == NULL);
5357                         else
5358                                 dogetpage = non_anon(amp->ahp, anon_index,
5359                                     &vp_off, &vp_len);
5360                         ANON_LOCK_EXIT(&amp->a_rwlock);
5361                 }
5362 
5363                 if (dogetpage) {
5364                         enum seg_rw arw;
5365                         struct as *as = seg->s_as;
5366 
5367                         if (len > ptob((sizeof (pl) / sizeof (pl[0])) - 1)) {
5368                                 /*
5369                                  * Page list won't fit in local array,
5370                                  * allocate one of the needed size.
5371                                  */
5372                                 pl_alloc_sz =
5373                                     (btop(len) + 1) * sizeof (page_t *);
5374                                 plp = kmem_alloc(pl_alloc_sz, KM_SLEEP);
5375                                 plp[0] = NULL;
5376                                 plsz = len;
5377                         } else if (rw == S_WRITE && svd->type == MAP_PRIVATE ||
5378                             svd->tr_state == SEGVN_TR_ON || rw == S_OTHER ||
5379                             (((size_t)(addr + PAGESIZE) <
5380                             (size_t)(seg->s_base + seg->s_size)) &&
5381                             hat_probe(as->a_hat, addr + PAGESIZE))) {
5382                                 /*
5383                                  * Ask VOP_GETPAGE to return the exact number
5384                                  * of pages if
5385                                  * (a) this is a COW fault, or
5386                                  * (b) this is a software fault, or
5387                                  * (c) next page is already mapped.
5388                                  */
5389                                 plsz = len;
5390                         } else {
5391                                 /*
5392                                  * Ask VOP_GETPAGE to return adjacent pages
5393                                  * within the segment.
5394                                  */
5395                                 plsz = MIN((size_t)PVN_GETPAGE_SZ, (size_t)
5396                                     ((seg->s_base + seg->s_size) - addr));
5397                                 ASSERT((addr + plsz) <=
5398                                     (seg->s_base + seg->s_size));
5399                         }
5400 
5401                         /*
5402                          * Need to get some non-anonymous pages.
5403                          * We need to make only one call to GETPAGE to do
5404                          * this to prevent certain deadlocking conditions
5405                          * when we are doing locking.  In this case
5406                          * non_anon() should have picked up the smallest
5407                          * range which includes all the non-anonymous
5408                          * pages in the requested range.  We have to
5409                          * be careful regarding which rw flag to pass in
5410                          * because on a private mapping, the underlying
5411                          * object is never allowed to be written.
5412                          */
5413                         if (rw == S_WRITE && svd->type == MAP_PRIVATE) {
5414                                 arw = S_READ;
5415                         } else {




  61 #include <sys/vm.h>
  62 #include <sys/dumphdr.h>
  63 #include <sys/lgrp.h>
  64 
  65 #include <vm/hat.h>
  66 #include <vm/as.h>
  67 #include <vm/seg.h>
  68 #include <vm/seg_vn.h>
  69 #include <vm/pvn.h>
  70 #include <vm/anon.h>
  71 #include <vm/page.h>
  72 #include <vm/vpage.h>
  73 #include <sys/proc.h>
  74 #include <sys/task.h>
  75 #include <sys/project.h>
  76 #include <sys/zone.h>
  77 #include <sys/shm_impl.h>
  78 
  79 /*
  80  * segvn_fault needs a temporary page list array.  To avoid calling kmem all
  81  * the time, it creates a small (FAULT_TMP_PAGES_NUM entry) array and uses
  82  * it if it can.  In the rare case when this page list is not large enough,
  83  * it goes and gets a large enough array from kmem.



  84  */
  85 #define FAULT_TMP_PAGES_NUM     0x8
  86 #define FAULT_TMP_PAGES_SZ      ptob(FAULT_TMP_PAGES_NUM)








  87 
  88 /*
  89  * Private seg op routines.
  90  */
  91 static int      segvn_dup(struct seg *seg, struct seg *newseg);
  92 static int      segvn_unmap(struct seg *seg, caddr_t addr, size_t len);
  93 static void     segvn_free(struct seg *seg);
  94 static faultcode_t segvn_fault(struct hat *hat, struct seg *seg,
  95                     caddr_t addr, size_t len, enum fault_type type,
  96                     enum seg_rw rw);
  97 static faultcode_t segvn_faulta(struct seg *seg, caddr_t addr);
  98 static int      segvn_setprot(struct seg *seg, caddr_t addr,
  99                     size_t len, uint_t prot);
 100 static int      segvn_checkprot(struct seg *seg, caddr_t addr,
 101                     size_t len, uint_t prot);
 102 static int      segvn_kluster(struct seg *seg, caddr_t addr, ssize_t delta);
 103 static int      segvn_sync(struct seg *seg, caddr_t addr, size_t len,
 104                     int attr, uint_t flags);
 105 static size_t   segvn_incore(struct seg *seg, caddr_t addr, size_t len,
 106                     char *vec);


4897  *              Call VOP_GETPAGE over the range of non-anonymous pages
4898  *      endif
4899  *      Loop over all addresses requested
4900  *              Call segvn_faultpage passing in page list
4901  *                  to load up translations and handle anonymous pages
4902  *      endloop
4903  *      Load up translation to any additional pages in page list not
4904  *          already handled that fit into this segment
4905  */
4906 static faultcode_t
4907 segvn_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
4908     enum fault_type type, enum seg_rw rw)
4909 {
4910         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
4911         page_t **plp, **ppp, *pp;
4912         u_offset_t off;
4913         caddr_t a;
4914         struct vpage *vpage;
4915         uint_t vpprot, prot;
4916         int err;
4917         page_t *pl[FAULT_TMP_PAGES_NUM + 1];
4918         size_t plsz, pl_alloc_sz;
4919         size_t page;
4920         ulong_t anon_index;
4921         struct anon_map *amp;
4922         int dogetpage = 0;
4923         caddr_t lpgaddr, lpgeaddr;
4924         size_t pgsz;
4925         anon_sync_obj_t cookie;
4926         int brkcow = BREAK_COW_SHARE(rw, type, svd->type);
4927 
4928         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
4929         ASSERT(svd->amp == NULL || svd->rcookie == HAT_INVALID_REGION_COOKIE);
4930 
4931         /*
4932          * First handle the easy stuff
4933          */
4934         if (type == F_SOFTUNLOCK) {
4935                 if (rw == S_READ_NOCOW) {
4936                         rw = S_READ;
4937                         ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));


5336                          * Only acquire reader lock to prevent amp->ahp
5337                          * from being changed.  It's ok to miss pages,
5338                          * hence we don't do anon_array_enter
5339                          */
5340                         ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
5341                         ap = anon_get_ptr(amp->ahp, anon_index);
5342 
5343                         if (len <= PAGESIZE)
5344                                 /* inline non_anon() */
5345                                 dogetpage = (ap == NULL);
5346                         else
5347                                 dogetpage = non_anon(amp->ahp, anon_index,
5348                                     &vp_off, &vp_len);
5349                         ANON_LOCK_EXIT(&amp->a_rwlock);
5350                 }
5351 
5352                 if (dogetpage) {
5353                         enum seg_rw arw;
5354                         struct as *as = seg->s_as;
5355 
5356                         if (len > FAULT_TMP_PAGES_SZ) {
5357                                 /*
5358                                  * Page list won't fit in local array,
5359                                  * allocate one of the needed size.
5360                                  */
5361                                 pl_alloc_sz =
5362                                     (btop(len) + 1) * sizeof (page_t *);
5363                                 plp = kmem_alloc(pl_alloc_sz, KM_SLEEP);
5364                                 plp[0] = NULL;
5365                                 plsz = len;
5366                         } else if (rw == S_WRITE && svd->type == MAP_PRIVATE ||
5367                             svd->tr_state == SEGVN_TR_ON || rw == S_OTHER ||
5368                             (((size_t)(addr + PAGESIZE) <
5369                             (size_t)(seg->s_base + seg->s_size)) &&
5370                             hat_probe(as->a_hat, addr + PAGESIZE))) {
5371                                 /*
5372                                  * Ask VOP_GETPAGE to return the exact number
5373                                  * of pages if
5374                                  * (a) this is a COW fault, or
5375                                  * (b) this is a software fault, or
5376                                  * (c) next page is already mapped.
5377                                  */
5378                                 plsz = len;
5379                         } else {
5380                                 /*
5381                                  * Ask VOP_GETPAGE to return adjacent pages
5382                                  * within the segment.
5383                                  */
5384                                 plsz = MIN((size_t)FAULT_TMP_PAGES_SZ, (size_t)
5385                                     ((seg->s_base + seg->s_size) - addr));
5386                                 ASSERT((addr + plsz) <=
5387                                     (seg->s_base + seg->s_size));
5388                         }
5389 
5390                         /*
5391                          * Need to get some non-anonymous pages.
5392                          * We need to make only one call to GETPAGE to do
5393                          * this to prevent certain deadlocking conditions
5394                          * when we are doing locking.  In this case
5395                          * non_anon() should have picked up the smallest
5396                          * range which includes all the non-anonymous
5397                          * pages in the requested range.  We have to
5398                          * be careful regarding which rw flag to pass in
5399                          * because on a private mapping, the underlying
5400                          * object is never allowed to be written.
5401                          */
5402                         if (rw == S_WRITE && svd->type == MAP_PRIVATE) {
5403                                 arw = S_READ;
5404                         } else {