combined Cdiff usr/src/uts/common/vm/seg

Print this page

PVN_GETPAGE_{SZ,NUM} are misnamed and unnecessarily complicated
There is really no reason to not allow 8 pages all the time.  With the
current logic, we get the following:
Assuming 4kB pages (x86):
    _SZ  = ptob(8) /* 32kB */
    _NUM = 8
Assuming 8kB pages (sparc):
    _SZ  = ptob(8) /* 64kB */
    _NUM = 8
We'd have to deal with 16kB base pages in order for the _NUM #define to not
be 8 (it'd be 4 in that case).  So, in the spirit of simplicity, let's just
always grab 8 pages as there are no interesting systems with 16kB+ base pages.
Finally, the defines are poorly named.
const-ify make segment ops structures
There is no reason to keep the segment ops structures writable.
use NULL capable segop as a shorthand for no-capabilities
Instead of forcing every segment driver to implement a dummy "return 0"
function, handle NULL capable segop function pointer as "no copabilities
supported" shorthand.
patch lower-case-segops
use C99 initializers in segment ops structures
remove whole-process swapping
Long before Unix supported paging, it used process swapping to reclaim
memory.  The code is there and in theory it runs when we get *extremely* low
on memory.  In practice, it never runs since the definition of low-on-memory
is antiquated. (XXX: define what antiquated means)
You can check the number of swapout/swapin events with kstats:
$ kstat -p ::vm:swapin ::vm:swapout
remove xhat
The xhat infrastructure was added to support hardware such as the zulu
graphics card - hardware which had on-board MMUs.  The VM used the xhat code
to keep the CPU's and Zulu's page tables in-sync.  Since the only xhat user
was zulu (which is gone), we can safely remove it simplifying the whole VM
subsystem.
Assorted notes:
- AS_BUSY flag was used solely by xhat


*** 76,102 ****
  #include <sys/zone.h>
  #include <sys/shm_impl.h>
  
  /*
   * segvn_fault needs a temporary page list array.  To avoid calling kmem all
!  * the time, it creates a small (PVN_GETPAGE_NUM entry) array and uses it if
!  * it can.  In the rare case when this page list is not large enough, it
!  * goes and gets a large enough array from kmem.
!  *
!  * This small page list array covers either 8 pages or 64kB worth of pages -
!  * whichever is smaller.
!  */
! #define PVN_MAX_GETPAGE_SZ      0x10000
! #define PVN_MAX_GETPAGE_NUM     0x8
! 
! #if PVN_MAX_GETPAGE_SZ > PVN_MAX_GETPAGE_NUM * PAGESIZE
! #define PVN_GETPAGE_SZ  ptob(PVN_MAX_GETPAGE_NUM)
! #define PVN_GETPAGE_NUM PVN_MAX_GETPAGE_NUM
! #else
! #define PVN_GETPAGE_SZ  PVN_MAX_GETPAGE_SZ
! #define PVN_GETPAGE_NUM btop(PVN_MAX_GETPAGE_SZ)
! #endif
  
  /*
   * Private seg op routines.
   */
  static int      segvn_dup(struct seg *seg, struct seg *newseg);
--- 76,91 ----
  #include <sys/zone.h>
  #include <sys/shm_impl.h>
  
  /*
   * segvn_fault needs a temporary page list array.  To avoid calling kmem all
!  * the time, it creates a small (FAULT_TMP_PAGES_NUM entry) array and uses
!  * it if it can.  In the rare case when this page list is not large enough,
!  * it goes and gets a large enough array from kmem.
!  */
! #define FAULT_TMP_PAGES_NUM     0x8
! #define FAULT_TMP_PAGES_SZ      ptob(FAULT_TMP_PAGES_NUM)
  
  /*
   * Private seg op routines.
   */
  static int      segvn_dup(struct seg *seg, struct seg *newseg);
*** 109,119 ****
  static int      segvn_setprot(struct seg *seg, caddr_t addr,
                      size_t len, uint_t prot);
  static int      segvn_checkprot(struct seg *seg, caddr_t addr,
                      size_t len, uint_t prot);
  static int      segvn_kluster(struct seg *seg, caddr_t addr, ssize_t delta);
- static size_t   segvn_swapout(struct seg *seg);
  static int      segvn_sync(struct seg *seg, caddr_t addr, size_t len,
                      int attr, uint_t flags);
  static size_t   segvn_incore(struct seg *seg, caddr_t addr, size_t len,
                      char *vec);
  static int      segvn_lockop(struct seg *seg, caddr_t addr, size_t len,
--- 98,107 ----
*** 132,169 ****
  static int      segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len,
                      uint_t szc);
  static int      segvn_getmemid(struct seg *seg, caddr_t addr,
                      memid_t *memidp);
  static lgrp_mem_policy_info_t   *segvn_getpolicy(struct seg *, caddr_t);
- static int      segvn_capable(struct seg *seg, segcapability_t capable);
  static int      segvn_inherit(struct seg *, caddr_t, size_t, uint_t);
  
! struct  seg_ops segvn_ops = {
!         segvn_dup,
!         segvn_unmap,
!         segvn_free,
!         segvn_fault,
!         segvn_faulta,
!         segvn_setprot,
!         segvn_checkprot,
!         segvn_kluster,
!         segvn_swapout,
!         segvn_sync,
!         segvn_incore,
!         segvn_lockop,
!         segvn_getprot,
!         segvn_getoffset,
!         segvn_gettype,
!         segvn_getvp,
!         segvn_advise,
!         segvn_dump,
!         segvn_pagelock,
!         segvn_setpagesize,
!         segvn_getmemid,
!         segvn_getpolicy,
!         segvn_capable,
!         segvn_inherit
  };
  
  /*
   * Common zfod structures, provided as a shorthand for others to use.
   */
--- 120,154 ----
  static int      segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len,
                      uint_t szc);
  static int      segvn_getmemid(struct seg *seg, caddr_t addr,
                      memid_t *memidp);
  static lgrp_mem_policy_info_t   *segvn_getpolicy(struct seg *, caddr_t);
  static int      segvn_inherit(struct seg *, caddr_t, size_t, uint_t);
  
! const struct seg_ops segvn_ops = {
!         .dup            = segvn_dup,
!         .unmap          = segvn_unmap,
!         .free           = segvn_free,
!         .fault          = segvn_fault,
!         .faulta         = segvn_faulta,
!         .setprot        = segvn_setprot,
!         .checkprot      = segvn_checkprot,
!         .kluster        = segvn_kluster,
!         .sync           = segvn_sync,
!         .incore         = segvn_incore,
!         .lockop         = segvn_lockop,
!         .getprot        = segvn_getprot,
!         .getoffset      = segvn_getoffset,
!         .gettype        = segvn_gettype,
!         .getvp          = segvn_getvp,
!         .advise         = segvn_advise,
!         .dump           = segvn_dump,
!         .pagelock       = segvn_pagelock,
!         .setpagesize    = segvn_setpagesize,
!         .getmemid       = segvn_getmemid,
!         .getpolicy      = segvn_getpolicy,
!         .inherit        = segvn_inherit,
  };
  
  /*
   * Common zfod structures, provided as a shorthand for others to use.
   */
*** 3854,3864 ****
          anon_sync_obj_t an_cookie;
          enum seg_rw arw;
          int alloc_failed = 0;
          int adjszc_chk;
          struct vattr va;
-         int xhat = 0;
          page_t *pplist;
          pfn_t pfn;
          int physcontig;
          int upgrdfail;
          int segvn_anypgsz_vnode = 0; /* for now map vnode with 2 page sizes */
--- 3839,3848 ----
*** 3904,3917 ****
          } else {
                  prot = svd->prot;
                  /* caller has already done segment level protection check. */
          }
  
-         if (seg->s_as->a_hat != hat) {
-                 xhat = 1;
-         }
- 
          if (rw == S_WRITE && segtype == MAP_PRIVATE) {
                  SEGVN_VMSTAT_FLTVNPAGES(2);
                  arw = S_READ;
          } else {
                  arw = rw;
--- 3888,3897 ----
*** 4263,4291 ****
                                  if (PP_ISMIGRATE(ppa[0])) {
                                          page_migrate(seg, a, ppa, pages);
                                  }
                                  SEGVN_UPDATE_MODBITS(ppa, pages, rw,
                                      prot, vpprot);
-                                 if (!xhat) {
                                          hat_memload_array_region(hat, a, pgsz,
                                              ppa, prot & vpprot, hat_flag,
                                              svd->rcookie);
-                                 } else {
-                                         /*
-                                          * avoid large xhat mappings to FS
-                                          * pages so that hat_page_demote()
-                                          * doesn't need to check for xhat
-                                          * large mappings.
-                                          * Don't use regions with xhats.
-                                          */
-                                         for (i = 0; i < pages; i++) {
-                                                 hat_memload(hat,
-                                                     a + (i << PAGESHIFT),
-                                                     ppa[i], prot & vpprot,
-                                                     hat_flag);
-                                         }
-                                 }
  
                                  if (!(hat_flag & HAT_LOAD_LOCK)) {
                                          for (i = 0; i < pages; i++) {
                                                  page_unlock(ppa[i]);
                                          }
--- 4243,4255 ----
*** 4335,4345 ****
  
                          /*
                           * check if we should use smallest mapping size.
                           */
                          upgrdfail = 0;
!                         if (szc == 0 || xhat ||
                              (pszc >= szc &&
                              !IS_P2ALIGNED(pfn, pages)) ||
                              (pszc < szc &&
                              !segvn_full_szcpages(ppa, szc, &upgrdfail,
                              &pszc))) {
--- 4299,4309 ----
  
                          /*
                           * check if we should use smallest mapping size.
                           */
                          upgrdfail = 0;
!                         if (szc == 0 ||
                              (pszc >= szc &&
                              !IS_P2ALIGNED(pfn, pages)) ||
                              (pszc < szc &&
                              !segvn_full_szcpages(ppa, szc, &upgrdfail,
                              &pszc))) {
*** 4367,4377 ****
                                                  ANON_LOCK_EXIT(&amp->a_rwlock);
                                          }
                                          ierr = -1;
                                          break;
                                  }
!                                 if (szc != 0 && !xhat && !upgrdfail) {
                                          segvn_faultvnmpss_align_err5++;
                                  }
                                  SEGVN_VMSTAT_FLTVNPAGES(34);
                                  if (pplist != NULL) {
                                          page_free_replacement_page(pplist);
--- 4331,4341 ----
                                                  ANON_LOCK_EXIT(&amp->a_rwlock);
                                          }
                                          ierr = -1;
                                          break;
                                  }
!                                 if (szc != 0 && !upgrdfail) {
                                          segvn_faultvnmpss_align_err5++;
                                  }
                                  SEGVN_VMSTAT_FLTVNPAGES(34);
                                  if (pplist != NULL) {
                                          page_free_replacement_page(pplist);
*** 4948,4958 ****
          u_offset_t off;
          caddr_t a;
          struct vpage *vpage;
          uint_t vpprot, prot;
          int err;
!         page_t *pl[PVN_GETPAGE_NUM + 1];
          size_t plsz, pl_alloc_sz;
          size_t page;
          ulong_t anon_index;
          struct anon_map *amp;
          int dogetpage = 0;
--- 4912,4922 ----
          u_offset_t off;
          caddr_t a;
          struct vpage *vpage;
          uint_t vpprot, prot;
          int err;
!         page_t *pl[FAULT_TMP_PAGES_NUM + 1];
          size_t plsz, pl_alloc_sz;
          size_t page;
          ulong_t anon_index;
          struct anon_map *amp;
          int dogetpage = 0;
*** 5387,5397 ****
  
                  if (dogetpage) {
                          enum seg_rw arw;
                          struct as *as = seg->s_as;
  
!                         if (len > ptob((sizeof (pl) / sizeof (pl[0])) - 1)) {
                                  /*
                                   * Page list won't fit in local array,
                                   * allocate one of the needed size.
                                   */
                                  pl_alloc_sz =
--- 5351,5361 ----
  
                  if (dogetpage) {
                          enum seg_rw arw;
                          struct as *as = seg->s_as;
  
!                         if (len > FAULT_TMP_PAGES_SZ) {
                                  /*
                                   * Page list won't fit in local array,
                                   * allocate one of the needed size.
                                   */
                                  pl_alloc_sz =
*** 5415,5425 ****
                          } else {
                                  /*
                                   * Ask VOP_GETPAGE to return adjacent pages
                                   * within the segment.
                                   */
!                                 plsz = MIN((size_t)PVN_GETPAGE_SZ, (size_t)
                                      ((seg->s_base + seg->s_size) - addr));
                                  ASSERT((addr + plsz) <=
                                      (seg->s_base + seg->s_size));
                          }
  
--- 5379,5389 ----
                          } else {
                                  /*
                                   * Ask VOP_GETPAGE to return adjacent pages
                                   * within the segment.
                                   */
!                                 plsz = MIN((size_t)FAULT_TMP_PAGES_SZ, (size_t)
                                      ((seg->s_base + seg->s_size) - addr));
                                  ASSERT((addr + plsz) <=
                                      (seg->s_base + seg->s_size));
                          }
  
*** 6082,6092 ****
  
          return (0);
  }
  
  /*
!  * segvn_setpagesize is called via SEGOP_SETPAGESIZE from as_setpagesize,
   * to determine if the seg is capable of mapping the requested szc.
   */
  static int
  segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
  {
--- 6046,6056 ----
  
          return (0);
  }
  
  /*
!  * segvn_setpagesize is called via segop_setpagesize from as_setpagesize,
   * to determine if the seg is capable of mapping the requested szc.
   */
  static int
  segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
  {
*** 7070,7262 ****
                  return (-1);
          return (0);
  }
  
  /*
-  * Swap the pages of seg out to secondary storage, returning the
-  * number of bytes of storage freed.
-  *
-  * The basic idea is first to unload all translations and then to call
-  * VOP_PUTPAGE() for all newly-unmapped pages, to push them out to the
-  * swap device.  Pages to which other segments have mappings will remain
-  * mapped and won't be swapped.  Our caller (as_swapout) has already
-  * performed the unloading step.
-  *
-  * The value returned is intended to correlate well with the process's
-  * memory requirements.  However, there are some caveats:
-  * 1)   When given a shared segment as argument, this routine will
-  *      only succeed in swapping out pages for the last sharer of the
-  *      segment.  (Previous callers will only have decremented mapping
-  *      reference counts.)
-  * 2)   We assume that the hat layer maintains a large enough translation
-  *      cache to capture process reference patterns.
-  */
- static size_t
- segvn_swapout(struct seg *seg)
- {
-         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
-         struct anon_map *amp;
-         pgcnt_t pgcnt = 0;
-         pgcnt_t npages;
-         pgcnt_t page;
-         ulong_t anon_index;
- 
-         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
- 
-         SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER);
-         /*
-          * Find pages unmapped by our caller and force them
-          * out to the virtual swap device.
-          */
-         if ((amp = svd->amp) != NULL)
-                 anon_index = svd->anon_index;
-         npages = seg->s_size >> PAGESHIFT;
-         for (page = 0; page < npages; page++) {
-                 page_t *pp;
-                 struct anon *ap;
-                 struct vnode *vp;
-                 u_offset_t off;
-                 anon_sync_obj_t cookie;
- 
-                 /*
-                  * Obtain <vp, off> pair for the page, then look it up.
-                  *
-                  * Note that this code is willing to consider regular
-                  * pages as well as anon pages.  Is this appropriate here?
-                  */
-                 ap = NULL;
-                 if (amp != NULL) {
-                         ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
-                         if (anon_array_try_enter(amp, anon_index + page,
-                             &cookie)) {
-                                 ANON_LOCK_EXIT(&amp->a_rwlock);
-                                 continue;
-                         }
-                         ap = anon_get_ptr(amp->ahp, anon_index + page);
-                         if (ap != NULL) {
-                                 swap_xlate(ap, &vp, &off);
-                         } else {
-                                 vp = svd->vp;
-                                 off = svd->offset + ptob(page);
-                         }
-                         anon_array_exit(&cookie);
-                         ANON_LOCK_EXIT(&amp->a_rwlock);
-                 } else {
-                         vp = svd->vp;
-                         off = svd->offset + ptob(page);
-                 }
-                 if (vp == NULL) {               /* untouched zfod page */
-                         ASSERT(ap == NULL);
-                         continue;
-                 }
- 
-                 pp = page_lookup_nowait(vp, off, SE_SHARED);
-                 if (pp == NULL)
-                         continue;
- 
- 
-                 /*
-                  * Examine the page to see whether it can be tossed out,
-                  * keeping track of how many we've found.
-                  */
-                 if (!page_tryupgrade(pp)) {
-                         /*
-                          * If the page has an i/o lock and no mappings,
-                          * it's very likely that the page is being
-                          * written out as a result of klustering.
-                          * Assume this is so and take credit for it here.
-                          */
-                         if (!page_io_trylock(pp)) {
-                                 if (!hat_page_is_mapped(pp))
-                                         pgcnt++;
-                         } else {
-                                 page_io_unlock(pp);
-                         }
-                         page_unlock(pp);
-                         continue;
-                 }
-                 ASSERT(!page_iolock_assert(pp));
- 
- 
-                 /*
-                  * Skip if page is locked or has mappings.
-                  * We don't need the page_struct_lock to look at lckcnt
-                  * and cowcnt because the page is exclusive locked.
-                  */
-                 if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0 ||
-                     hat_page_is_mapped(pp)) {
-                         page_unlock(pp);
-                         continue;
-                 }
- 
-                 /*
-                  * dispose skips large pages so try to demote first.
-                  */
-                 if (pp->p_szc != 0 && !page_try_demote_pages(pp)) {
-                         page_unlock(pp);
-                         /*
-                          * XXX should skip the remaining page_t's of this
-                          * large page.
-                          */
-                         continue;
-                 }
- 
-                 ASSERT(pp->p_szc == 0);
- 
-                 /*
-                  * No longer mapped -- we can toss it out.  How
-                  * we do so depends on whether or not it's dirty.
-                  */
-                 if (hat_ismod(pp) && pp->p_vnode) {
-                         /*
-                          * We must clean the page before it can be
-                          * freed.  Setting B_FREE will cause pvn_done
-                          * to free the page when the i/o completes.
-                          * XXX: This also causes it to be accounted
-                          *      as a pageout instead of a swap: need
-                          *      B_SWAPOUT bit to use instead of B_FREE.
-                          *
-                          * Hold the vnode before releasing the page lock
-                          * to prevent it from being freed and re-used by
-                          * some other thread.
-                          */
-                         VN_HOLD(vp);
-                         page_unlock(pp);
- 
-                         /*
-                          * Queue all i/o requests for the pageout thread
-                          * to avoid saturating the pageout devices.
-                          */
-                         if (!queue_io_request(vp, off))
-                                 VN_RELE(vp);
-                 } else {
-                         /*
-                          * The page was clean, free it.
-                          *
-                          * XXX: Can we ever encounter modified pages
-                          *      with no associated vnode here?
-                          */
-                         ASSERT(pp->p_vnode != NULL);
-                         /*LINTED: constant in conditional context*/
-                         VN_DISPOSE(pp, B_FREE, 0, kcred);
-                 }
- 
-                 /*
-                  * Credit now even if i/o is in progress.
-                  */
-                 pgcnt++;
-         }
-         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
- 
-         /*
-          * Wakeup pageout to initiate i/o on all queued requests.
-          */
-         cv_signal_pageout();
-         return (ptob(pgcnt));
- }
- 
- /*
   * Synchronize primary storage cache with real object in virtual memory.
   *
   * XXX - Anonymous pages should not be sync'ed out at all.
   */
  static int
--- 7034,7043 ----
*** 9689,9705 ****
          }
  
          return (policy_info);
  }
  
- /*ARGSUSED*/
- static int
- segvn_capable(struct seg *seg, segcapability_t capability)
- {
-         return (0);
- }
- 
  /*
   * Bind text vnode segment to an amp. If we bind successfully mappings will be
   * established to per vnode mapping per lgroup amp pages instead of to vnode
   * pages. There's one amp per vnode text mapping per lgroup. Many processes
   * may share the same text replication amp. If a suitable amp doesn't already
--- 9470,9479 ----