Print this page
PVN_GETPAGE_{SZ,NUM} are misnamed and unnecessarily complicated
There is really no reason to not allow 8 pages all the time.  With the
current logic, we get the following:
Assuming 4kB pages (x86):
    _SZ  = ptob(8) /* 32kB */
    _NUM = 8
Assuming 8kB pages (sparc):
    _SZ  = ptob(8) /* 64kB */
    _NUM = 8
We'd have to deal with 16kB base pages in order for the _NUM #define to not
be 8 (it'd be 4 in that case).  So, in the spirit of simplicity, let's just
always grab 8 pages as there are no interesting systems with 16kB+ base pages.
Finally, the defines are poorly named.
const-ify make segment ops structures
There is no reason to keep the segment ops structures writable.
use NULL capable segop as a shorthand for no-capabilities
Instead of forcing every segment driver to implement a dummy "return 0"
function, handle NULL capable segop function pointer as "no copabilities
supported" shorthand.
patch lower-case-segops
use C99 initializers in segment ops structures
remove whole-process swapping
Long before Unix supported paging, it used process swapping to reclaim
memory.  The code is there and in theory it runs when we get *extremely* low
on memory.  In practice, it never runs since the definition of low-on-memory
is antiquated. (XXX: define what antiquated means)
You can check the number of swapout/swapin events with kstats:
$ kstat -p ::vm:swapin ::vm:swapout
remove xhat
The xhat infrastructure was added to support hardware such as the zulu
graphics card - hardware which had on-board MMUs.  The VM used the xhat code
to keep the CPU's and Zulu's page tables in-sync.  Since the only xhat user
was zulu (which is gone), we can safely remove it simplifying the whole VM
subsystem.
Assorted notes:
- AS_BUSY flag was used solely by xhat

*** 76,102 **** #include <sys/zone.h> #include <sys/shm_impl.h> /* * segvn_fault needs a temporary page list array. To avoid calling kmem all ! * the time, it creates a small (PVN_GETPAGE_NUM entry) array and uses it if ! * it can. In the rare case when this page list is not large enough, it ! * goes and gets a large enough array from kmem. ! * ! * This small page list array covers either 8 pages or 64kB worth of pages - ! * whichever is smaller. ! */ ! #define PVN_MAX_GETPAGE_SZ 0x10000 ! #define PVN_MAX_GETPAGE_NUM 0x8 ! ! #if PVN_MAX_GETPAGE_SZ > PVN_MAX_GETPAGE_NUM * PAGESIZE ! #define PVN_GETPAGE_SZ ptob(PVN_MAX_GETPAGE_NUM) ! #define PVN_GETPAGE_NUM PVN_MAX_GETPAGE_NUM ! #else ! #define PVN_GETPAGE_SZ PVN_MAX_GETPAGE_SZ ! #define PVN_GETPAGE_NUM btop(PVN_MAX_GETPAGE_SZ) ! #endif /* * Private seg op routines. */ static int segvn_dup(struct seg *seg, struct seg *newseg); --- 76,91 ---- #include <sys/zone.h> #include <sys/shm_impl.h> /* * segvn_fault needs a temporary page list array. To avoid calling kmem all ! * the time, it creates a small (FAULT_TMP_PAGES_NUM entry) array and uses ! * it if it can. In the rare case when this page list is not large enough, ! * it goes and gets a large enough array from kmem. ! */ ! #define FAULT_TMP_PAGES_NUM 0x8 ! #define FAULT_TMP_PAGES_SZ ptob(FAULT_TMP_PAGES_NUM) /* * Private seg op routines. */ static int segvn_dup(struct seg *seg, struct seg *newseg);
*** 109,119 **** static int segvn_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot); static int segvn_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot); static int segvn_kluster(struct seg *seg, caddr_t addr, ssize_t delta); - static size_t segvn_swapout(struct seg *seg); static int segvn_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags); static size_t segvn_incore(struct seg *seg, caddr_t addr, size_t len, char *vec); static int segvn_lockop(struct seg *seg, caddr_t addr, size_t len, --- 98,107 ----
*** 132,169 **** static int segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc); static int segvn_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp); static lgrp_mem_policy_info_t *segvn_getpolicy(struct seg *, caddr_t); - static int segvn_capable(struct seg *seg, segcapability_t capable); static int segvn_inherit(struct seg *, caddr_t, size_t, uint_t); ! struct seg_ops segvn_ops = { ! segvn_dup, ! segvn_unmap, ! segvn_free, ! segvn_fault, ! segvn_faulta, ! segvn_setprot, ! segvn_checkprot, ! segvn_kluster, ! segvn_swapout, ! segvn_sync, ! segvn_incore, ! segvn_lockop, ! segvn_getprot, ! segvn_getoffset, ! segvn_gettype, ! segvn_getvp, ! segvn_advise, ! segvn_dump, ! segvn_pagelock, ! segvn_setpagesize, ! segvn_getmemid, ! segvn_getpolicy, ! segvn_capable, ! segvn_inherit }; /* * Common zfod structures, provided as a shorthand for others to use. */ --- 120,154 ---- static int segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc); static int segvn_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp); static lgrp_mem_policy_info_t *segvn_getpolicy(struct seg *, caddr_t); static int segvn_inherit(struct seg *, caddr_t, size_t, uint_t); ! const struct seg_ops segvn_ops = { ! .dup = segvn_dup, ! .unmap = segvn_unmap, ! .free = segvn_free, ! .fault = segvn_fault, ! .faulta = segvn_faulta, ! .setprot = segvn_setprot, ! .checkprot = segvn_checkprot, ! .kluster = segvn_kluster, ! .sync = segvn_sync, ! .incore = segvn_incore, ! .lockop = segvn_lockop, ! .getprot = segvn_getprot, ! .getoffset = segvn_getoffset, ! .gettype = segvn_gettype, ! .getvp = segvn_getvp, ! .advise = segvn_advise, ! .dump = segvn_dump, ! .pagelock = segvn_pagelock, ! .setpagesize = segvn_setpagesize, ! .getmemid = segvn_getmemid, ! .getpolicy = segvn_getpolicy, ! .inherit = segvn_inherit, }; /* * Common zfod structures, provided as a shorthand for others to use. */
*** 3854,3864 **** anon_sync_obj_t an_cookie; enum seg_rw arw; int alloc_failed = 0; int adjszc_chk; struct vattr va; - int xhat = 0; page_t *pplist; pfn_t pfn; int physcontig; int upgrdfail; int segvn_anypgsz_vnode = 0; /* for now map vnode with 2 page sizes */ --- 3839,3848 ----
*** 3904,3917 **** } else { prot = svd->prot; /* caller has already done segment level protection check. */ } - if (seg->s_as->a_hat != hat) { - xhat = 1; - } - if (rw == S_WRITE && segtype == MAP_PRIVATE) { SEGVN_VMSTAT_FLTVNPAGES(2); arw = S_READ; } else { arw = rw; --- 3888,3897 ----
*** 4263,4291 **** if (PP_ISMIGRATE(ppa[0])) { page_migrate(seg, a, ppa, pages); } SEGVN_UPDATE_MODBITS(ppa, pages, rw, prot, vpprot); - if (!xhat) { hat_memload_array_region(hat, a, pgsz, ppa, prot & vpprot, hat_flag, svd->rcookie); - } else { - /* - * avoid large xhat mappings to FS - * pages so that hat_page_demote() - * doesn't need to check for xhat - * large mappings. - * Don't use regions with xhats. - */ - for (i = 0; i < pages; i++) { - hat_memload(hat, - a + (i << PAGESHIFT), - ppa[i], prot & vpprot, - hat_flag); - } - } if (!(hat_flag & HAT_LOAD_LOCK)) { for (i = 0; i < pages; i++) { page_unlock(ppa[i]); } --- 4243,4255 ----
*** 4335,4345 **** /* * check if we should use smallest mapping size. */ upgrdfail = 0; ! if (szc == 0 || xhat || (pszc >= szc && !IS_P2ALIGNED(pfn, pages)) || (pszc < szc && !segvn_full_szcpages(ppa, szc, &upgrdfail, &pszc))) { --- 4299,4309 ---- /* * check if we should use smallest mapping size. */ upgrdfail = 0; ! if (szc == 0 || (pszc >= szc && !IS_P2ALIGNED(pfn, pages)) || (pszc < szc && !segvn_full_szcpages(ppa, szc, &upgrdfail, &pszc))) {
*** 4367,4377 **** ANON_LOCK_EXIT(&amp->a_rwlock); } ierr = -1; break; } ! if (szc != 0 && !xhat && !upgrdfail) { segvn_faultvnmpss_align_err5++; } SEGVN_VMSTAT_FLTVNPAGES(34); if (pplist != NULL) { page_free_replacement_page(pplist); --- 4331,4341 ---- ANON_LOCK_EXIT(&amp->a_rwlock); } ierr = -1; break; } ! if (szc != 0 && !upgrdfail) { segvn_faultvnmpss_align_err5++; } SEGVN_VMSTAT_FLTVNPAGES(34); if (pplist != NULL) { page_free_replacement_page(pplist);
*** 4948,4958 **** u_offset_t off; caddr_t a; struct vpage *vpage; uint_t vpprot, prot; int err; ! page_t *pl[PVN_GETPAGE_NUM + 1]; size_t plsz, pl_alloc_sz; size_t page; ulong_t anon_index; struct anon_map *amp; int dogetpage = 0; --- 4912,4922 ---- u_offset_t off; caddr_t a; struct vpage *vpage; uint_t vpprot, prot; int err; ! page_t *pl[FAULT_TMP_PAGES_NUM + 1]; size_t plsz, pl_alloc_sz; size_t page; ulong_t anon_index; struct anon_map *amp; int dogetpage = 0;
*** 5387,5397 **** if (dogetpage) { enum seg_rw arw; struct as *as = seg->s_as; ! if (len > ptob((sizeof (pl) / sizeof (pl[0])) - 1)) { /* * Page list won't fit in local array, * allocate one of the needed size. */ pl_alloc_sz = --- 5351,5361 ---- if (dogetpage) { enum seg_rw arw; struct as *as = seg->s_as; ! if (len > FAULT_TMP_PAGES_SZ) { /* * Page list won't fit in local array, * allocate one of the needed size. */ pl_alloc_sz =
*** 5415,5425 **** } else { /* * Ask VOP_GETPAGE to return adjacent pages * within the segment. */ ! plsz = MIN((size_t)PVN_GETPAGE_SZ, (size_t) ((seg->s_base + seg->s_size) - addr)); ASSERT((addr + plsz) <= (seg->s_base + seg->s_size)); } --- 5379,5389 ---- } else { /* * Ask VOP_GETPAGE to return adjacent pages * within the segment. */ ! plsz = MIN((size_t)FAULT_TMP_PAGES_SZ, (size_t) ((seg->s_base + seg->s_size) - addr)); ASSERT((addr + plsz) <= (seg->s_base + seg->s_size)); }
*** 6082,6092 **** return (0); } /* ! * segvn_setpagesize is called via SEGOP_SETPAGESIZE from as_setpagesize, * to determine if the seg is capable of mapping the requested szc. */ static int segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc) { --- 6046,6056 ---- return (0); } /* ! * segvn_setpagesize is called via segop_setpagesize from as_setpagesize, * to determine if the seg is capable of mapping the requested szc. */ static int segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc) {
*** 7070,7262 **** return (-1); return (0); } /* - * Swap the pages of seg out to secondary storage, returning the - * number of bytes of storage freed. - * - * The basic idea is first to unload all translations and then to call - * VOP_PUTPAGE() for all newly-unmapped pages, to push them out to the - * swap device. Pages to which other segments have mappings will remain - * mapped and won't be swapped. Our caller (as_swapout) has already - * performed the unloading step. - * - * The value returned is intended to correlate well with the process's - * memory requirements. However, there are some caveats: - * 1) When given a shared segment as argument, this routine will - * only succeed in swapping out pages for the last sharer of the - * segment. (Previous callers will only have decremented mapping - * reference counts.) - * 2) We assume that the hat layer maintains a large enough translation - * cache to capture process reference patterns. - */ - static size_t - segvn_swapout(struct seg *seg) - { - struct segvn_data *svd = (struct segvn_data *)seg->s_data; - struct anon_map *amp; - pgcnt_t pgcnt = 0; - pgcnt_t npages; - pgcnt_t page; - ulong_t anon_index; - - ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); - - SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER); - /* - * Find pages unmapped by our caller and force them - * out to the virtual swap device. - */ - if ((amp = svd->amp) != NULL) - anon_index = svd->anon_index; - npages = seg->s_size >> PAGESHIFT; - for (page = 0; page < npages; page++) { - page_t *pp; - struct anon *ap; - struct vnode *vp; - u_offset_t off; - anon_sync_obj_t cookie; - - /* - * Obtain <vp, off> pair for the page, then look it up. - * - * Note that this code is willing to consider regular - * pages as well as anon pages. Is this appropriate here? - */ - ap = NULL; - if (amp != NULL) { - ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER); - if (anon_array_try_enter(amp, anon_index + page, - &cookie)) { - ANON_LOCK_EXIT(&amp->a_rwlock); - continue; - } - ap = anon_get_ptr(amp->ahp, anon_index + page); - if (ap != NULL) { - swap_xlate(ap, &vp, &off); - } else { - vp = svd->vp; - off = svd->offset + ptob(page); - } - anon_array_exit(&cookie); - ANON_LOCK_EXIT(&amp->a_rwlock); - } else { - vp = svd->vp; - off = svd->offset + ptob(page); - } - if (vp == NULL) { /* untouched zfod page */ - ASSERT(ap == NULL); - continue; - } - - pp = page_lookup_nowait(vp, off, SE_SHARED); - if (pp == NULL) - continue; - - - /* - * Examine the page to see whether it can be tossed out, - * keeping track of how many we've found. - */ - if (!page_tryupgrade(pp)) { - /* - * If the page has an i/o lock and no mappings, - * it's very likely that the page is being - * written out as a result of klustering. - * Assume this is so and take credit for it here. - */ - if (!page_io_trylock(pp)) { - if (!hat_page_is_mapped(pp)) - pgcnt++; - } else { - page_io_unlock(pp); - } - page_unlock(pp); - continue; - } - ASSERT(!page_iolock_assert(pp)); - - - /* - * Skip if page is locked or has mappings. - * We don't need the page_struct_lock to look at lckcnt - * and cowcnt because the page is exclusive locked. - */ - if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0 || - hat_page_is_mapped(pp)) { - page_unlock(pp); - continue; - } - - /* - * dispose skips large pages so try to demote first. - */ - if (pp->p_szc != 0 && !page_try_demote_pages(pp)) { - page_unlock(pp); - /* - * XXX should skip the remaining page_t's of this - * large page. - */ - continue; - } - - ASSERT(pp->p_szc == 0); - - /* - * No longer mapped -- we can toss it out. How - * we do so depends on whether or not it's dirty. - */ - if (hat_ismod(pp) && pp->p_vnode) { - /* - * We must clean the page before it can be - * freed. Setting B_FREE will cause pvn_done - * to free the page when the i/o completes. - * XXX: This also causes it to be accounted - * as a pageout instead of a swap: need - * B_SWAPOUT bit to use instead of B_FREE. - * - * Hold the vnode before releasing the page lock - * to prevent it from being freed and re-used by - * some other thread. - */ - VN_HOLD(vp); - page_unlock(pp); - - /* - * Queue all i/o requests for the pageout thread - * to avoid saturating the pageout devices. - */ - if (!queue_io_request(vp, off)) - VN_RELE(vp); - } else { - /* - * The page was clean, free it. - * - * XXX: Can we ever encounter modified pages - * with no associated vnode here? - */ - ASSERT(pp->p_vnode != NULL); - /*LINTED: constant in conditional context*/ - VN_DISPOSE(pp, B_FREE, 0, kcred); - } - - /* - * Credit now even if i/o is in progress. - */ - pgcnt++; - } - SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); - - /* - * Wakeup pageout to initiate i/o on all queued requests. - */ - cv_signal_pageout(); - return (ptob(pgcnt)); - } - - /* * Synchronize primary storage cache with real object in virtual memory. * * XXX - Anonymous pages should not be sync'ed out at all. */ static int --- 7034,7043 ----
*** 9689,9705 **** } return (policy_info); } - /*ARGSUSED*/ - static int - segvn_capable(struct seg *seg, segcapability_t capability) - { - return (0); - } - /* * Bind text vnode segment to an amp. If we bind successfully mappings will be * established to per vnode mapping per lgroup amp pages instead of to vnode * pages. There's one amp per vnode text mapping per lgroup. Many processes * may share the same text replication amp. If a suitable amp doesn't already --- 9470,9479 ----