combined Udiff usr/src/uts/common/vm/seg

Print this page

PVN_GETPAGE_{SZ,NUM} are misnamed and unnecessarily complicated
There is really no reason to not allow 8 pages all the time.  With the
current logic, we get the following:
Assuming 4kB pages (x86):
    _SZ  = ptob(8) /* 32kB */
    _NUM = 8
Assuming 8kB pages (sparc):
    _SZ  = ptob(8) /* 64kB */
    _NUM = 8
We'd have to deal with 16kB base pages in order for the _NUM #define to not
be 8 (it'd be 4 in that case).  So, in the spirit of simplicity, let's just
always grab 8 pages as there are no interesting systems with 16kB+ base pages.
Finally, the defines are poorly named.
const-ify make segment ops structures
There is no reason to keep the segment ops structures writable.
use NULL capable segop as a shorthand for no-capabilities
Instead of forcing every segment driver to implement a dummy "return 0"
function, handle NULL capable segop function pointer as "no copabilities
supported" shorthand.
patch lower-case-segops
use C99 initializers in segment ops structures
remove whole-process swapping
Long before Unix supported paging, it used process swapping to reclaim
memory.  The code is there and in theory it runs when we get *extremely* low
on memory.  In practice, it never runs since the definition of low-on-memory
is antiquated. (XXX: define what antiquated means)
You can check the number of swapout/swapin events with kstats:
$ kstat -p ::vm:swapin ::vm:swapout
remove xhat
The xhat infrastructure was added to support hardware such as the zulu
graphics card - hardware which had on-board MMUs.  The VM used the xhat code
to keep the CPU's and Zulu's page tables in-sync.  Since the only xhat user
was zulu (which is gone), we can safely remove it simplifying the whole VM
subsystem.
Assorted notes:
- AS_BUSY flag was used solely by xhat

@@ -76,27 +76,16 @@
 #include <sys/zone.h>
 #include <sys/shm_impl.h>
 
 /*
  * segvn_fault needs a temporary page list array.  To avoid calling kmem all
- * the time, it creates a small (PVN_GETPAGE_NUM entry) array and uses it if
- * it can.  In the rare case when this page list is not large enough, it
- * goes and gets a large enough array from kmem.
- *
- * This small page list array covers either 8 pages or 64kB worth of pages -
- * whichever is smaller.
- */
-#define PVN_MAX_GETPAGE_SZ      0x10000
-#define PVN_MAX_GETPAGE_NUM     0x8
-
-#if PVN_MAX_GETPAGE_SZ > PVN_MAX_GETPAGE_NUM * PAGESIZE
-#define PVN_GETPAGE_SZ  ptob(PVN_MAX_GETPAGE_NUM)
-#define PVN_GETPAGE_NUM PVN_MAX_GETPAGE_NUM
-#else
-#define PVN_GETPAGE_SZ  PVN_MAX_GETPAGE_SZ
-#define PVN_GETPAGE_NUM btop(PVN_MAX_GETPAGE_SZ)
-#endif
+ * the time, it creates a small (FAULT_TMP_PAGES_NUM entry) array and uses
+ * it if it can.  In the rare case when this page list is not large enough,
+ * it goes and gets a large enough array from kmem.
+ */
+#define FAULT_TMP_PAGES_NUM     0x8
+#define FAULT_TMP_PAGES_SZ      ptob(FAULT_TMP_PAGES_NUM)
 
 /*
  * Private seg op routines.
  */
 static int      segvn_dup(struct seg *seg, struct seg *newseg);

@@ -109,11 +98,10 @@
 static int      segvn_setprot(struct seg *seg, caddr_t addr,
                     size_t len, uint_t prot);
 static int      segvn_checkprot(struct seg *seg, caddr_t addr,
                     size_t len, uint_t prot);
 static int      segvn_kluster(struct seg *seg, caddr_t addr, ssize_t delta);
-static size_t   segvn_swapout(struct seg *seg);
 static int      segvn_sync(struct seg *seg, caddr_t addr, size_t len,
                     int attr, uint_t flags);
 static size_t   segvn_incore(struct seg *seg, caddr_t addr, size_t len,
                     char *vec);
 static int      segvn_lockop(struct seg *seg, caddr_t addr, size_t len,

@@ -132,38 +120,35 @@
 static int      segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len,
                     uint_t szc);
 static int      segvn_getmemid(struct seg *seg, caddr_t addr,
                     memid_t *memidp);
 static lgrp_mem_policy_info_t   *segvn_getpolicy(struct seg *, caddr_t);
-static int      segvn_capable(struct seg *seg, segcapability_t capable);
 static int      segvn_inherit(struct seg *, caddr_t, size_t, uint_t);
 
-struct  seg_ops segvn_ops = {
-        segvn_dup,
-        segvn_unmap,
-        segvn_free,
-        segvn_fault,
-        segvn_faulta,
-        segvn_setprot,
-        segvn_checkprot,
-        segvn_kluster,
-        segvn_swapout,
-        segvn_sync,
-        segvn_incore,
-        segvn_lockop,
-        segvn_getprot,
-        segvn_getoffset,
-        segvn_gettype,
-        segvn_getvp,
-        segvn_advise,
-        segvn_dump,
-        segvn_pagelock,
-        segvn_setpagesize,
-        segvn_getmemid,
-        segvn_getpolicy,
-        segvn_capable,
-        segvn_inherit
+const struct seg_ops segvn_ops = {
+        .dup            = segvn_dup,
+        .unmap          = segvn_unmap,
+        .free           = segvn_free,
+        .fault          = segvn_fault,
+        .faulta         = segvn_faulta,
+        .setprot        = segvn_setprot,
+        .checkprot      = segvn_checkprot,
+        .kluster        = segvn_kluster,
+        .sync           = segvn_sync,
+        .incore         = segvn_incore,
+        .lockop         = segvn_lockop,
+        .getprot        = segvn_getprot,
+        .getoffset      = segvn_getoffset,
+        .gettype        = segvn_gettype,
+        .getvp          = segvn_getvp,
+        .advise         = segvn_advise,
+        .dump           = segvn_dump,
+        .pagelock       = segvn_pagelock,
+        .setpagesize    = segvn_setpagesize,
+        .getmemid       = segvn_getmemid,
+        .getpolicy      = segvn_getpolicy,
+        .inherit        = segvn_inherit,
 };
 
 /*
  * Common zfod structures, provided as a shorthand for others to use.
  */

@@ -3854,11 +3839,10 @@
         anon_sync_obj_t an_cookie;
         enum seg_rw arw;
         int alloc_failed = 0;
         int adjszc_chk;
         struct vattr va;
-        int xhat = 0;
         page_t *pplist;
         pfn_t pfn;
         int physcontig;
         int upgrdfail;
         int segvn_anypgsz_vnode = 0; /* for now map vnode with 2 page sizes */

@@ -3904,14 +3888,10 @@
         } else {
                 prot = svd->prot;
                 /* caller has already done segment level protection check. */
         }
 
-        if (seg->s_as->a_hat != hat) {
-                xhat = 1;
-        }
-
         if (rw == S_WRITE && segtype == MAP_PRIVATE) {
                 SEGVN_VMSTAT_FLTVNPAGES(2);
                 arw = S_READ;
         } else {
                 arw = rw;

@@ -4263,29 +4243,13 @@
                                 if (PP_ISMIGRATE(ppa[0])) {
                                         page_migrate(seg, a, ppa, pages);
                                 }
                                 SEGVN_UPDATE_MODBITS(ppa, pages, rw,
                                     prot, vpprot);
-                                if (!xhat) {
                                         hat_memload_array_region(hat, a, pgsz,
                                             ppa, prot & vpprot, hat_flag,
                                             svd->rcookie);
-                                } else {
-                                        /*
-                                         * avoid large xhat mappings to FS
-                                         * pages so that hat_page_demote()
-                                         * doesn't need to check for xhat
-                                         * large mappings.
-                                         * Don't use regions with xhats.
-                                         */
-                                        for (i = 0; i < pages; i++) {
-                                                hat_memload(hat,
-                                                    a + (i << PAGESHIFT),
-                                                    ppa[i], prot & vpprot,
-                                                    hat_flag);
-                                        }
-                                }
 
                                 if (!(hat_flag & HAT_LOAD_LOCK)) {
                                         for (i = 0; i < pages; i++) {
                                                 page_unlock(ppa[i]);
                                         }

@@ -4335,11 +4299,11 @@
 
                         /*
                          * check if we should use smallest mapping size.
                          */
                         upgrdfail = 0;
-                        if (szc == 0 || xhat ||
+                        if (szc == 0 ||
                             (pszc >= szc &&
                             !IS_P2ALIGNED(pfn, pages)) ||
                             (pszc < szc &&
                             !segvn_full_szcpages(ppa, szc, &upgrdfail,
                             &pszc))) {

@@ -4367,11 +4331,11 @@
                                                 ANON_LOCK_EXIT(&amp->a_rwlock);
                                         }
                                         ierr = -1;
                                         break;
                                 }
-                                if (szc != 0 && !xhat && !upgrdfail) {
+                                if (szc != 0 && !upgrdfail) {
                                         segvn_faultvnmpss_align_err5++;
                                 }
                                 SEGVN_VMSTAT_FLTVNPAGES(34);
                                 if (pplist != NULL) {
                                         page_free_replacement_page(pplist);

@@ -4948,11 +4912,11 @@
         u_offset_t off;
         caddr_t a;
         struct vpage *vpage;
         uint_t vpprot, prot;
         int err;
-        page_t *pl[PVN_GETPAGE_NUM + 1];
+        page_t *pl[FAULT_TMP_PAGES_NUM + 1];
         size_t plsz, pl_alloc_sz;
         size_t page;
         ulong_t anon_index;
         struct anon_map *amp;
         int dogetpage = 0;

@@ -5387,11 +5351,11 @@
 
                 if (dogetpage) {
                         enum seg_rw arw;
                         struct as *as = seg->s_as;
 
-                        if (len > ptob((sizeof (pl) / sizeof (pl[0])) - 1)) {
+                        if (len > FAULT_TMP_PAGES_SZ) {
                                 /*
                                  * Page list won't fit in local array,
                                  * allocate one of the needed size.
                                  */
                                 pl_alloc_sz =

@@ -5415,11 +5379,11 @@
                         } else {
                                 /*
                                  * Ask VOP_GETPAGE to return adjacent pages
                                  * within the segment.
                                  */
-                                plsz = MIN((size_t)PVN_GETPAGE_SZ, (size_t)
+                                plsz = MIN((size_t)FAULT_TMP_PAGES_SZ, (size_t)
                                     ((seg->s_base + seg->s_size) - addr));
                                 ASSERT((addr + plsz) <=
                                     (seg->s_base + seg->s_size));
                         }

@@ -6082,11 +6046,11 @@
 
         return (0);
 }
 
 /*
- * segvn_setpagesize is called via SEGOP_SETPAGESIZE from as_setpagesize,
+ * segvn_setpagesize is called via segop_setpagesize from as_setpagesize,
  * to determine if the seg is capable of mapping the requested szc.
  */
 static int
 segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
 {

@@ -7070,193 +7034,10 @@
                 return (-1);
         return (0);
 }
 
 /*
- * Swap the pages of seg out to secondary storage, returning the
- * number of bytes of storage freed.
- *
- * The basic idea is first to unload all translations and then to call
- * VOP_PUTPAGE() for all newly-unmapped pages, to push them out to the
- * swap device.  Pages to which other segments have mappings will remain
- * mapped and won't be swapped.  Our caller (as_swapout) has already
- * performed the unloading step.
- *
- * The value returned is intended to correlate well with the process's
- * memory requirements.  However, there are some caveats:
- * 1)   When given a shared segment as argument, this routine will
- *      only succeed in swapping out pages for the last sharer of the
- *      segment.  (Previous callers will only have decremented mapping
- *      reference counts.)
- * 2)   We assume that the hat layer maintains a large enough translation
- *      cache to capture process reference patterns.
- */
-static size_t
-segvn_swapout(struct seg *seg)
-{
-        struct segvn_data *svd = (struct segvn_data *)seg->s_data;
-        struct anon_map *amp;
-        pgcnt_t pgcnt = 0;
-        pgcnt_t npages;
-        pgcnt_t page;
-        ulong_t anon_index;
-
-        ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
-
-        SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER);
-        /*
-         * Find pages unmapped by our caller and force them
-         * out to the virtual swap device.
-         */
-        if ((amp = svd->amp) != NULL)
-                anon_index = svd->anon_index;
-        npages = seg->s_size >> PAGESHIFT;
-        for (page = 0; page < npages; page++) {
-                page_t *pp;
-                struct anon *ap;
-                struct vnode *vp;
-                u_offset_t off;
-                anon_sync_obj_t cookie;
-
-                /*
-                 * Obtain <vp, off> pair for the page, then look it up.
-                 *
-                 * Note that this code is willing to consider regular
-                 * pages as well as anon pages.  Is this appropriate here?
-                 */
-                ap = NULL;
-                if (amp != NULL) {
-                        ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
-                        if (anon_array_try_enter(amp, anon_index + page,
-                            &cookie)) {
-                                ANON_LOCK_EXIT(&amp->a_rwlock);
-                                continue;
-                        }
-                        ap = anon_get_ptr(amp->ahp, anon_index + page);
-                        if (ap != NULL) {
-                                swap_xlate(ap, &vp, &off);
-                        } else {
-                                vp = svd->vp;
-                                off = svd->offset + ptob(page);
-                        }
-                        anon_array_exit(&cookie);
-                        ANON_LOCK_EXIT(&amp->a_rwlock);
-                } else {
-                        vp = svd->vp;
-                        off = svd->offset + ptob(page);
-                }
-                if (vp == NULL) {               /* untouched zfod page */
-                        ASSERT(ap == NULL);
-                        continue;
-                }
-
-                pp = page_lookup_nowait(vp, off, SE_SHARED);
-                if (pp == NULL)
-                        continue;
-
-
-                /*
-                 * Examine the page to see whether it can be tossed out,
-                 * keeping track of how many we've found.
-                 */
-                if (!page_tryupgrade(pp)) {
-                        /*
-                         * If the page has an i/o lock and no mappings,
-                         * it's very likely that the page is being
-                         * written out as a result of klustering.
-                         * Assume this is so and take credit for it here.
-                         */
-                        if (!page_io_trylock(pp)) {
-                                if (!hat_page_is_mapped(pp))
-                                        pgcnt++;
-                        } else {
-                                page_io_unlock(pp);
-                        }
-                        page_unlock(pp);
-                        continue;
-                }
-                ASSERT(!page_iolock_assert(pp));
-
-
-                /*
-                 * Skip if page is locked or has mappings.
-                 * We don't need the page_struct_lock to look at lckcnt
-                 * and cowcnt because the page is exclusive locked.
-                 */
-                if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0 ||
-                    hat_page_is_mapped(pp)) {
-                        page_unlock(pp);
-                        continue;
-                }
-
-                /*
-                 * dispose skips large pages so try to demote first.
-                 */
-                if (pp->p_szc != 0 && !page_try_demote_pages(pp)) {
-                        page_unlock(pp);
-                        /*
-                         * XXX should skip the remaining page_t's of this
-                         * large page.
-                         */
-                        continue;
-                }
-
-                ASSERT(pp->p_szc == 0);
-
-                /*
-                 * No longer mapped -- we can toss it out.  How
-                 * we do so depends on whether or not it's dirty.
-                 */
-                if (hat_ismod(pp) && pp->p_vnode) {
-                        /*
-                         * We must clean the page before it can be
-                         * freed.  Setting B_FREE will cause pvn_done
-                         * to free the page when the i/o completes.
-                         * XXX: This also causes it to be accounted
-                         *      as a pageout instead of a swap: need
-                         *      B_SWAPOUT bit to use instead of B_FREE.
-                         *
-                         * Hold the vnode before releasing the page lock
-                         * to prevent it from being freed and re-used by
-                         * some other thread.
-                         */
-                        VN_HOLD(vp);
-                        page_unlock(pp);
-
-                        /*
-                         * Queue all i/o requests for the pageout thread
-                         * to avoid saturating the pageout devices.
-                         */
-                        if (!queue_io_request(vp, off))
-                                VN_RELE(vp);
-                } else {
-                        /*
-                         * The page was clean, free it.
-                         *
-                         * XXX: Can we ever encounter modified pages
-                         *      with no associated vnode here?
-                         */
-                        ASSERT(pp->p_vnode != NULL);
-                        /*LINTED: constant in conditional context*/
-                        VN_DISPOSE(pp, B_FREE, 0, kcred);
-                }
-
-                /*
-                 * Credit now even if i/o is in progress.
-                 */
-                pgcnt++;
-        }
-        SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
-
-        /*
-         * Wakeup pageout to initiate i/o on all queued requests.
-         */
-        cv_signal_pageout();
-        return (ptob(pgcnt));
-}
-
-/*
  * Synchronize primary storage cache with real object in virtual memory.
  *
  * XXX - Anonymous pages should not be sync'ed out at all.
  */
 static int

@@ -9689,17 +9470,10 @@
         }
 
         return (policy_info);
 }
 
-/*ARGSUSED*/
-static int
-segvn_capable(struct seg *seg, segcapability_t capability)
-{
-        return (0);
-}
-
 /*
  * Bind text vnode segment to an amp. If we bind successfully mappings will be
  * established to per vnode mapping per lgroup amp pages instead of to vnode
  * pages. There's one amp per vnode text mapping per lgroup. Many processes
  * may share the same text replication amp. If a suitable amp doesn't already