Print this page
PVN_GETPAGE_{SZ,NUM} are misnamed and unnecessarily complicated
There is really no reason to not allow 8 pages all the time. With the
current logic, we get the following:
Assuming 4kB pages (x86):
_SZ = ptob(8) /* 32kB */
_NUM = 8
Assuming 8kB pages (sparc):
_SZ = ptob(8) /* 64kB */
_NUM = 8
We'd have to deal with 16kB base pages in order for the _NUM #define to not
be 8 (it'd be 4 in that case). So, in the spirit of simplicity, let's just
always grab 8 pages as there are no interesting systems with 16kB+ base pages.
Finally, the defines are poorly named.
const-ify make segment ops structures
There is no reason to keep the segment ops structures writable.
use NULL capable segop as a shorthand for no-capabilities
Instead of forcing every segment driver to implement a dummy "return 0"
function, handle NULL capable segop function pointer as "no copabilities
supported" shorthand.
patch lower-case-segops
use C99 initializers in segment ops structures
remove whole-process swapping
Long before Unix supported paging, it used process swapping to reclaim
memory. The code is there and in theory it runs when we get *extremely* low
on memory. In practice, it never runs since the definition of low-on-memory
is antiquated. (XXX: define what antiquated means)
You can check the number of swapout/swapin events with kstats:
$ kstat -p ::vm:swapin ::vm:swapout
remove xhat
The xhat infrastructure was added to support hardware such as the zulu
graphics card - hardware which had on-board MMUs. The VM used the xhat code
to keep the CPU's and Zulu's page tables in-sync. Since the only xhat user
was zulu (which is gone), we can safely remove it simplifying the whole VM
subsystem.
Assorted notes:
- AS_BUSY flag was used solely by xhat
@@ -76,27 +76,16 @@
#include <sys/zone.h>
#include <sys/shm_impl.h>
/*
* segvn_fault needs a temporary page list array. To avoid calling kmem all
- * the time, it creates a small (PVN_GETPAGE_NUM entry) array and uses it if
- * it can. In the rare case when this page list is not large enough, it
- * goes and gets a large enough array from kmem.
- *
- * This small page list array covers either 8 pages or 64kB worth of pages -
- * whichever is smaller.
- */
-#define PVN_MAX_GETPAGE_SZ 0x10000
-#define PVN_MAX_GETPAGE_NUM 0x8
-
-#if PVN_MAX_GETPAGE_SZ > PVN_MAX_GETPAGE_NUM * PAGESIZE
-#define PVN_GETPAGE_SZ ptob(PVN_MAX_GETPAGE_NUM)
-#define PVN_GETPAGE_NUM PVN_MAX_GETPAGE_NUM
-#else
-#define PVN_GETPAGE_SZ PVN_MAX_GETPAGE_SZ
-#define PVN_GETPAGE_NUM btop(PVN_MAX_GETPAGE_SZ)
-#endif
+ * the time, it creates a small (FAULT_TMP_PAGES_NUM entry) array and uses
+ * it if it can. In the rare case when this page list is not large enough,
+ * it goes and gets a large enough array from kmem.
+ */
+#define FAULT_TMP_PAGES_NUM 0x8
+#define FAULT_TMP_PAGES_SZ ptob(FAULT_TMP_PAGES_NUM)
/*
* Private seg op routines.
*/
static int segvn_dup(struct seg *seg, struct seg *newseg);
@@ -109,11 +98,10 @@
static int segvn_setprot(struct seg *seg, caddr_t addr,
size_t len, uint_t prot);
static int segvn_checkprot(struct seg *seg, caddr_t addr,
size_t len, uint_t prot);
static int segvn_kluster(struct seg *seg, caddr_t addr, ssize_t delta);
-static size_t segvn_swapout(struct seg *seg);
static int segvn_sync(struct seg *seg, caddr_t addr, size_t len,
int attr, uint_t flags);
static size_t segvn_incore(struct seg *seg, caddr_t addr, size_t len,
char *vec);
static int segvn_lockop(struct seg *seg, caddr_t addr, size_t len,
@@ -132,38 +120,35 @@
static int segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len,
uint_t szc);
static int segvn_getmemid(struct seg *seg, caddr_t addr,
memid_t *memidp);
static lgrp_mem_policy_info_t *segvn_getpolicy(struct seg *, caddr_t);
-static int segvn_capable(struct seg *seg, segcapability_t capable);
static int segvn_inherit(struct seg *, caddr_t, size_t, uint_t);
-struct seg_ops segvn_ops = {
- segvn_dup,
- segvn_unmap,
- segvn_free,
- segvn_fault,
- segvn_faulta,
- segvn_setprot,
- segvn_checkprot,
- segvn_kluster,
- segvn_swapout,
- segvn_sync,
- segvn_incore,
- segvn_lockop,
- segvn_getprot,
- segvn_getoffset,
- segvn_gettype,
- segvn_getvp,
- segvn_advise,
- segvn_dump,
- segvn_pagelock,
- segvn_setpagesize,
- segvn_getmemid,
- segvn_getpolicy,
- segvn_capable,
- segvn_inherit
+const struct seg_ops segvn_ops = {
+ .dup = segvn_dup,
+ .unmap = segvn_unmap,
+ .free = segvn_free,
+ .fault = segvn_fault,
+ .faulta = segvn_faulta,
+ .setprot = segvn_setprot,
+ .checkprot = segvn_checkprot,
+ .kluster = segvn_kluster,
+ .sync = segvn_sync,
+ .incore = segvn_incore,
+ .lockop = segvn_lockop,
+ .getprot = segvn_getprot,
+ .getoffset = segvn_getoffset,
+ .gettype = segvn_gettype,
+ .getvp = segvn_getvp,
+ .advise = segvn_advise,
+ .dump = segvn_dump,
+ .pagelock = segvn_pagelock,
+ .setpagesize = segvn_setpagesize,
+ .getmemid = segvn_getmemid,
+ .getpolicy = segvn_getpolicy,
+ .inherit = segvn_inherit,
};
/*
* Common zfod structures, provided as a shorthand for others to use.
*/
@@ -3854,11 +3839,10 @@
anon_sync_obj_t an_cookie;
enum seg_rw arw;
int alloc_failed = 0;
int adjszc_chk;
struct vattr va;
- int xhat = 0;
page_t *pplist;
pfn_t pfn;
int physcontig;
int upgrdfail;
int segvn_anypgsz_vnode = 0; /* for now map vnode with 2 page sizes */
@@ -3904,14 +3888,10 @@
} else {
prot = svd->prot;
/* caller has already done segment level protection check. */
}
- if (seg->s_as->a_hat != hat) {
- xhat = 1;
- }
-
if (rw == S_WRITE && segtype == MAP_PRIVATE) {
SEGVN_VMSTAT_FLTVNPAGES(2);
arw = S_READ;
} else {
arw = rw;
@@ -4263,29 +4243,13 @@
if (PP_ISMIGRATE(ppa[0])) {
page_migrate(seg, a, ppa, pages);
}
SEGVN_UPDATE_MODBITS(ppa, pages, rw,
prot, vpprot);
- if (!xhat) {
hat_memload_array_region(hat, a, pgsz,
ppa, prot & vpprot, hat_flag,
svd->rcookie);
- } else {
- /*
- * avoid large xhat mappings to FS
- * pages so that hat_page_demote()
- * doesn't need to check for xhat
- * large mappings.
- * Don't use regions with xhats.
- */
- for (i = 0; i < pages; i++) {
- hat_memload(hat,
- a + (i << PAGESHIFT),
- ppa[i], prot & vpprot,
- hat_flag);
- }
- }
if (!(hat_flag & HAT_LOAD_LOCK)) {
for (i = 0; i < pages; i++) {
page_unlock(ppa[i]);
}
@@ -4335,11 +4299,11 @@
/*
* check if we should use smallest mapping size.
*/
upgrdfail = 0;
- if (szc == 0 || xhat ||
+ if (szc == 0 ||
(pszc >= szc &&
!IS_P2ALIGNED(pfn, pages)) ||
(pszc < szc &&
!segvn_full_szcpages(ppa, szc, &upgrdfail,
&pszc))) {
@@ -4367,11 +4331,11 @@
ANON_LOCK_EXIT(&->a_rwlock);
}
ierr = -1;
break;
}
- if (szc != 0 && !xhat && !upgrdfail) {
+ if (szc != 0 && !upgrdfail) {
segvn_faultvnmpss_align_err5++;
}
SEGVN_VMSTAT_FLTVNPAGES(34);
if (pplist != NULL) {
page_free_replacement_page(pplist);
@@ -4948,11 +4912,11 @@
u_offset_t off;
caddr_t a;
struct vpage *vpage;
uint_t vpprot, prot;
int err;
- page_t *pl[PVN_GETPAGE_NUM + 1];
+ page_t *pl[FAULT_TMP_PAGES_NUM + 1];
size_t plsz, pl_alloc_sz;
size_t page;
ulong_t anon_index;
struct anon_map *amp;
int dogetpage = 0;
@@ -5387,11 +5351,11 @@
if (dogetpage) {
enum seg_rw arw;
struct as *as = seg->s_as;
- if (len > ptob((sizeof (pl) / sizeof (pl[0])) - 1)) {
+ if (len > FAULT_TMP_PAGES_SZ) {
/*
* Page list won't fit in local array,
* allocate one of the needed size.
*/
pl_alloc_sz =
@@ -5415,11 +5379,11 @@
} else {
/*
* Ask VOP_GETPAGE to return adjacent pages
* within the segment.
*/
- plsz = MIN((size_t)PVN_GETPAGE_SZ, (size_t)
+ plsz = MIN((size_t)FAULT_TMP_PAGES_SZ, (size_t)
((seg->s_base + seg->s_size) - addr));
ASSERT((addr + plsz) <=
(seg->s_base + seg->s_size));
}
@@ -6082,11 +6046,11 @@
return (0);
}
/*
- * segvn_setpagesize is called via SEGOP_SETPAGESIZE from as_setpagesize,
+ * segvn_setpagesize is called via segop_setpagesize from as_setpagesize,
* to determine if the seg is capable of mapping the requested szc.
*/
static int
segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
{
@@ -7070,193 +7034,10 @@
return (-1);
return (0);
}
/*
- * Swap the pages of seg out to secondary storage, returning the
- * number of bytes of storage freed.
- *
- * The basic idea is first to unload all translations and then to call
- * VOP_PUTPAGE() for all newly-unmapped pages, to push them out to the
- * swap device. Pages to which other segments have mappings will remain
- * mapped and won't be swapped. Our caller (as_swapout) has already
- * performed the unloading step.
- *
- * The value returned is intended to correlate well with the process's
- * memory requirements. However, there are some caveats:
- * 1) When given a shared segment as argument, this routine will
- * only succeed in swapping out pages for the last sharer of the
- * segment. (Previous callers will only have decremented mapping
- * reference counts.)
- * 2) We assume that the hat layer maintains a large enough translation
- * cache to capture process reference patterns.
- */
-static size_t
-segvn_swapout(struct seg *seg)
-{
- struct segvn_data *svd = (struct segvn_data *)seg->s_data;
- struct anon_map *amp;
- pgcnt_t pgcnt = 0;
- pgcnt_t npages;
- pgcnt_t page;
- ulong_t anon_index;
-
- ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
-
- SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER);
- /*
- * Find pages unmapped by our caller and force them
- * out to the virtual swap device.
- */
- if ((amp = svd->amp) != NULL)
- anon_index = svd->anon_index;
- npages = seg->s_size >> PAGESHIFT;
- for (page = 0; page < npages; page++) {
- page_t *pp;
- struct anon *ap;
- struct vnode *vp;
- u_offset_t off;
- anon_sync_obj_t cookie;
-
- /*
- * Obtain <vp, off> pair for the page, then look it up.
- *
- * Note that this code is willing to consider regular
- * pages as well as anon pages. Is this appropriate here?
- */
- ap = NULL;
- if (amp != NULL) {
- ANON_LOCK_ENTER(&->a_rwlock, RW_READER);
- if (anon_array_try_enter(amp, anon_index + page,
- &cookie)) {
- ANON_LOCK_EXIT(&->a_rwlock);
- continue;
- }
- ap = anon_get_ptr(amp->ahp, anon_index + page);
- if (ap != NULL) {
- swap_xlate(ap, &vp, &off);
- } else {
- vp = svd->vp;
- off = svd->offset + ptob(page);
- }
- anon_array_exit(&cookie);
- ANON_LOCK_EXIT(&->a_rwlock);
- } else {
- vp = svd->vp;
- off = svd->offset + ptob(page);
- }
- if (vp == NULL) { /* untouched zfod page */
- ASSERT(ap == NULL);
- continue;
- }
-
- pp = page_lookup_nowait(vp, off, SE_SHARED);
- if (pp == NULL)
- continue;
-
-
- /*
- * Examine the page to see whether it can be tossed out,
- * keeping track of how many we've found.
- */
- if (!page_tryupgrade(pp)) {
- /*
- * If the page has an i/o lock and no mappings,
- * it's very likely that the page is being
- * written out as a result of klustering.
- * Assume this is so and take credit for it here.
- */
- if (!page_io_trylock(pp)) {
- if (!hat_page_is_mapped(pp))
- pgcnt++;
- } else {
- page_io_unlock(pp);
- }
- page_unlock(pp);
- continue;
- }
- ASSERT(!page_iolock_assert(pp));
-
-
- /*
- * Skip if page is locked or has mappings.
- * We don't need the page_struct_lock to look at lckcnt
- * and cowcnt because the page is exclusive locked.
- */
- if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0 ||
- hat_page_is_mapped(pp)) {
- page_unlock(pp);
- continue;
- }
-
- /*
- * dispose skips large pages so try to demote first.
- */
- if (pp->p_szc != 0 && !page_try_demote_pages(pp)) {
- page_unlock(pp);
- /*
- * XXX should skip the remaining page_t's of this
- * large page.
- */
- continue;
- }
-
- ASSERT(pp->p_szc == 0);
-
- /*
- * No longer mapped -- we can toss it out. How
- * we do so depends on whether or not it's dirty.
- */
- if (hat_ismod(pp) && pp->p_vnode) {
- /*
- * We must clean the page before it can be
- * freed. Setting B_FREE will cause pvn_done
- * to free the page when the i/o completes.
- * XXX: This also causes it to be accounted
- * as a pageout instead of a swap: need
- * B_SWAPOUT bit to use instead of B_FREE.
- *
- * Hold the vnode before releasing the page lock
- * to prevent it from being freed and re-used by
- * some other thread.
- */
- VN_HOLD(vp);
- page_unlock(pp);
-
- /*
- * Queue all i/o requests for the pageout thread
- * to avoid saturating the pageout devices.
- */
- if (!queue_io_request(vp, off))
- VN_RELE(vp);
- } else {
- /*
- * The page was clean, free it.
- *
- * XXX: Can we ever encounter modified pages
- * with no associated vnode here?
- */
- ASSERT(pp->p_vnode != NULL);
- /*LINTED: constant in conditional context*/
- VN_DISPOSE(pp, B_FREE, 0, kcred);
- }
-
- /*
- * Credit now even if i/o is in progress.
- */
- pgcnt++;
- }
- SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
-
- /*
- * Wakeup pageout to initiate i/o on all queued requests.
- */
- cv_signal_pageout();
- return (ptob(pgcnt));
-}
-
-/*
* Synchronize primary storage cache with real object in virtual memory.
*
* XXX - Anonymous pages should not be sync'ed out at all.
*/
static int
@@ -9689,17 +9470,10 @@
}
return (policy_info);
}
-/*ARGSUSED*/
-static int
-segvn_capable(struct seg *seg, segcapability_t capability)
-{
- return (0);
-}
-
/*
* Bind text vnode segment to an amp. If we bind successfully mappings will be
* established to per vnode mapping per lgroup amp pages instead of to vnode
* pages. There's one amp per vnode text mapping per lgroup. Many processes
* may share the same text replication amp. If a suitable amp doesn't already