71 #include <sys/proc.h>
72 #include <sys/task.h>
73 #include <sys/project.h>
74 #include <sys/zone.h>
75 #include <sys/shm_impl.h>
76 /*
77 * Private seg op routines.
78 */
79 static int segvn_dup(struct seg *seg, struct seg *newseg);
80 static int segvn_unmap(struct seg *seg, caddr_t addr, size_t len);
81 static void segvn_free(struct seg *seg);
82 static faultcode_t segvn_fault(struct hat *hat, struct seg *seg,
83 caddr_t addr, size_t len, enum fault_type type,
84 enum seg_rw rw);
85 static faultcode_t segvn_faulta(struct seg *seg, caddr_t addr);
86 static int segvn_setprot(struct seg *seg, caddr_t addr,
87 size_t len, uint_t prot);
88 static int segvn_checkprot(struct seg *seg, caddr_t addr,
89 size_t len, uint_t prot);
90 static int segvn_kluster(struct seg *seg, caddr_t addr, ssize_t delta);
91 static size_t segvn_swapout(struct seg *seg);
92 static int segvn_sync(struct seg *seg, caddr_t addr, size_t len,
93 int attr, uint_t flags);
94 static size_t segvn_incore(struct seg *seg, caddr_t addr, size_t len,
95 char *vec);
96 static int segvn_lockop(struct seg *seg, caddr_t addr, size_t len,
97 int attr, int op, ulong_t *lockmap, size_t pos);
98 static int segvn_getprot(struct seg *seg, caddr_t addr, size_t len,
99 uint_t *protv);
100 static u_offset_t segvn_getoffset(struct seg *seg, caddr_t addr);
101 static int segvn_gettype(struct seg *seg, caddr_t addr);
102 static int segvn_getvp(struct seg *seg, caddr_t addr,
103 struct vnode **vpp);
104 static int segvn_advise(struct seg *seg, caddr_t addr, size_t len,
105 uint_t behav);
106 static void segvn_dump(struct seg *seg);
107 static int segvn_pagelock(struct seg *seg, caddr_t addr, size_t len,
108 struct page ***ppp, enum lock_type type, enum seg_rw rw);
109 static int segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len,
110 uint_t szc);
111 static int segvn_getmemid(struct seg *seg, caddr_t addr,
112 memid_t *memidp);
113 static lgrp_mem_policy_info_t *segvn_getpolicy(struct seg *, caddr_t);
114 static int segvn_capable(struct seg *seg, segcapability_t capable);
115
116 struct seg_ops segvn_ops = {
117 segvn_dup,
118 segvn_unmap,
119 segvn_free,
120 segvn_fault,
121 segvn_faulta,
122 segvn_setprot,
123 segvn_checkprot,
124 segvn_kluster,
125 segvn_swapout,
126 segvn_sync,
127 segvn_incore,
128 segvn_lockop,
129 segvn_getprot,
130 segvn_getoffset,
131 segvn_gettype,
132 segvn_getvp,
133 segvn_advise,
134 segvn_dump,
135 segvn_pagelock,
136 segvn_setpagesize,
137 segvn_getmemid,
138 segvn_getpolicy,
139 segvn_capable,
140 };
141
142 /*
143 * Common zfod structures, provided as a shorthand for others to use.
144 */
145 static segvn_crargs_t zfod_segvn_crargs =
6980 * see if they happen to be properly allocated.
6981 */
6982
6983 /*
6984 * XXX We cheat here and don't lock the anon slots. We can't because
6985 * we may have been called from the anon layer which might already
6986 * have locked them. We are holding a refcnt on the slots so they
6987 * can't disappear. The worst that will happen is we'll get the wrong
6988 * names (vp, off) for the slots and make a poor klustering decision.
6989 */
6990 swap_xlate(ap, &vp1, &off1);
6991 swap_xlate(oap, &vp2, &off2);
6992
6993
6994 if (!VOP_CMP(vp1, vp2, NULL) || off1 - off2 != delta)
6995 return (-1);
6996 return (0);
6997 }
6998
6999 /*
7000 * Swap the pages of seg out to secondary storage, returning the
7001 * number of bytes of storage freed.
7002 *
7003 * The basic idea is first to unload all translations and then to call
7004 * VOP_PUTPAGE() for all newly-unmapped pages, to push them out to the
7005 * swap device. Pages to which other segments have mappings will remain
7006 * mapped and won't be swapped. Our caller (as_swapout) has already
7007 * performed the unloading step.
7008 *
7009 * The value returned is intended to correlate well with the process's
7010 * memory requirements. However, there are some caveats:
7011 * 1) When given a shared segment as argument, this routine will
7012 * only succeed in swapping out pages for the last sharer of the
7013 * segment. (Previous callers will only have decremented mapping
7014 * reference counts.)
7015 * 2) We assume that the hat layer maintains a large enough translation
7016 * cache to capture process reference patterns.
7017 */
7018 static size_t
7019 segvn_swapout(struct seg *seg)
7020 {
7021 struct segvn_data *svd = (struct segvn_data *)seg->s_data;
7022 struct anon_map *amp;
7023 pgcnt_t pgcnt = 0;
7024 pgcnt_t npages;
7025 pgcnt_t page;
7026 ulong_t anon_index;
7027
7028 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
7029
7030 SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER);
7031 /*
7032 * Find pages unmapped by our caller and force them
7033 * out to the virtual swap device.
7034 */
7035 if ((amp = svd->amp) != NULL)
7036 anon_index = svd->anon_index;
7037 npages = seg->s_size >> PAGESHIFT;
7038 for (page = 0; page < npages; page++) {
7039 page_t *pp;
7040 struct anon *ap;
7041 struct vnode *vp;
7042 u_offset_t off;
7043 anon_sync_obj_t cookie;
7044
7045 /*
7046 * Obtain <vp, off> pair for the page, then look it up.
7047 *
7048 * Note that this code is willing to consider regular
7049 * pages as well as anon pages. Is this appropriate here?
7050 */
7051 ap = NULL;
7052 if (amp != NULL) {
7053 ANON_LOCK_ENTER(&->a_rwlock, RW_READER);
7054 if (anon_array_try_enter(amp, anon_index + page,
7055 &cookie)) {
7056 ANON_LOCK_EXIT(&->a_rwlock);
7057 continue;
7058 }
7059 ap = anon_get_ptr(amp->ahp, anon_index + page);
7060 if (ap != NULL) {
7061 swap_xlate(ap, &vp, &off);
7062 } else {
7063 vp = svd->vp;
7064 off = svd->offset + ptob(page);
7065 }
7066 anon_array_exit(&cookie);
7067 ANON_LOCK_EXIT(&->a_rwlock);
7068 } else {
7069 vp = svd->vp;
7070 off = svd->offset + ptob(page);
7071 }
7072 if (vp == NULL) { /* untouched zfod page */
7073 ASSERT(ap == NULL);
7074 continue;
7075 }
7076
7077 pp = page_lookup_nowait(vp, off, SE_SHARED);
7078 if (pp == NULL)
7079 continue;
7080
7081
7082 /*
7083 * Examine the page to see whether it can be tossed out,
7084 * keeping track of how many we've found.
7085 */
7086 if (!page_tryupgrade(pp)) {
7087 /*
7088 * If the page has an i/o lock and no mappings,
7089 * it's very likely that the page is being
7090 * written out as a result of klustering.
7091 * Assume this is so and take credit for it here.
7092 */
7093 if (!page_io_trylock(pp)) {
7094 if (!hat_page_is_mapped(pp))
7095 pgcnt++;
7096 } else {
7097 page_io_unlock(pp);
7098 }
7099 page_unlock(pp);
7100 continue;
7101 }
7102 ASSERT(!page_iolock_assert(pp));
7103
7104
7105 /*
7106 * Skip if page is locked or has mappings.
7107 * We don't need the page_struct_lock to look at lckcnt
7108 * and cowcnt because the page is exclusive locked.
7109 */
7110 if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0 ||
7111 hat_page_is_mapped(pp)) {
7112 page_unlock(pp);
7113 continue;
7114 }
7115
7116 /*
7117 * dispose skips large pages so try to demote first.
7118 */
7119 if (pp->p_szc != 0 && !page_try_demote_pages(pp)) {
7120 page_unlock(pp);
7121 /*
7122 * XXX should skip the remaining page_t's of this
7123 * large page.
7124 */
7125 continue;
7126 }
7127
7128 ASSERT(pp->p_szc == 0);
7129
7130 /*
7131 * No longer mapped -- we can toss it out. How
7132 * we do so depends on whether or not it's dirty.
7133 */
7134 if (hat_ismod(pp) && pp->p_vnode) {
7135 /*
7136 * We must clean the page before it can be
7137 * freed. Setting B_FREE will cause pvn_done
7138 * to free the page when the i/o completes.
7139 * XXX: This also causes it to be accounted
7140 * as a pageout instead of a swap: need
7141 * B_SWAPOUT bit to use instead of B_FREE.
7142 *
7143 * Hold the vnode before releasing the page lock
7144 * to prevent it from being freed and re-used by
7145 * some other thread.
7146 */
7147 VN_HOLD(vp);
7148 page_unlock(pp);
7149
7150 /*
7151 * Queue all i/o requests for the pageout thread
7152 * to avoid saturating the pageout devices.
7153 */
7154 if (!queue_io_request(vp, off))
7155 VN_RELE(vp);
7156 } else {
7157 /*
7158 * The page was clean, free it.
7159 *
7160 * XXX: Can we ever encounter modified pages
7161 * with no associated vnode here?
7162 */
7163 ASSERT(pp->p_vnode != NULL);
7164 /*LINTED: constant in conditional context*/
7165 VN_DISPOSE(pp, B_FREE, 0, kcred);
7166 }
7167
7168 /*
7169 * Credit now even if i/o is in progress.
7170 */
7171 pgcnt++;
7172 }
7173 SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
7174
7175 /*
7176 * Wakeup pageout to initiate i/o on all queued requests.
7177 */
7178 cv_signal_pageout();
7179 return (ptob(pgcnt));
7180 }
7181
7182 /*
7183 * Synchronize primary storage cache with real object in virtual memory.
7184 *
7185 * XXX - Anonymous pages should not be sync'ed out at all.
7186 */
7187 static int
7188 segvn_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
7189 {
7190 struct segvn_data *svd = (struct segvn_data *)seg->s_data;
7191 struct vpage *vpp;
7192 page_t *pp;
7193 u_offset_t offset;
7194 struct vnode *vp;
7195 u_offset_t off;
7196 caddr_t eaddr;
7197 int bflags;
7198 int err = 0;
7199 int segtype;
7200 int pageprot;
7201 int prot;
7202 ulong_t anon_index;
|
71 #include <sys/proc.h>
72 #include <sys/task.h>
73 #include <sys/project.h>
74 #include <sys/zone.h>
75 #include <sys/shm_impl.h>
76 /*
77 * Private seg op routines.
78 */
79 static int segvn_dup(struct seg *seg, struct seg *newseg);
80 static int segvn_unmap(struct seg *seg, caddr_t addr, size_t len);
81 static void segvn_free(struct seg *seg);
82 static faultcode_t segvn_fault(struct hat *hat, struct seg *seg,
83 caddr_t addr, size_t len, enum fault_type type,
84 enum seg_rw rw);
85 static faultcode_t segvn_faulta(struct seg *seg, caddr_t addr);
86 static int segvn_setprot(struct seg *seg, caddr_t addr,
87 size_t len, uint_t prot);
88 static int segvn_checkprot(struct seg *seg, caddr_t addr,
89 size_t len, uint_t prot);
90 static int segvn_kluster(struct seg *seg, caddr_t addr, ssize_t delta);
91 static int segvn_sync(struct seg *seg, caddr_t addr, size_t len,
92 int attr, uint_t flags);
93 static size_t segvn_incore(struct seg *seg, caddr_t addr, size_t len,
94 char *vec);
95 static int segvn_lockop(struct seg *seg, caddr_t addr, size_t len,
96 int attr, int op, ulong_t *lockmap, size_t pos);
97 static int segvn_getprot(struct seg *seg, caddr_t addr, size_t len,
98 uint_t *protv);
99 static u_offset_t segvn_getoffset(struct seg *seg, caddr_t addr);
100 static int segvn_gettype(struct seg *seg, caddr_t addr);
101 static int segvn_getvp(struct seg *seg, caddr_t addr,
102 struct vnode **vpp);
103 static int segvn_advise(struct seg *seg, caddr_t addr, size_t len,
104 uint_t behav);
105 static void segvn_dump(struct seg *seg);
106 static int segvn_pagelock(struct seg *seg, caddr_t addr, size_t len,
107 struct page ***ppp, enum lock_type type, enum seg_rw rw);
108 static int segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len,
109 uint_t szc);
110 static int segvn_getmemid(struct seg *seg, caddr_t addr,
111 memid_t *memidp);
112 static lgrp_mem_policy_info_t *segvn_getpolicy(struct seg *, caddr_t);
113 static int segvn_capable(struct seg *seg, segcapability_t capable);
114
115 struct seg_ops segvn_ops = {
116 segvn_dup,
117 segvn_unmap,
118 segvn_free,
119 segvn_fault,
120 segvn_faulta,
121 segvn_setprot,
122 segvn_checkprot,
123 segvn_kluster,
124 segvn_sync,
125 segvn_incore,
126 segvn_lockop,
127 segvn_getprot,
128 segvn_getoffset,
129 segvn_gettype,
130 segvn_getvp,
131 segvn_advise,
132 segvn_dump,
133 segvn_pagelock,
134 segvn_setpagesize,
135 segvn_getmemid,
136 segvn_getpolicy,
137 segvn_capable,
138 };
139
140 /*
141 * Common zfod structures, provided as a shorthand for others to use.
142 */
143 static segvn_crargs_t zfod_segvn_crargs =
6978 * see if they happen to be properly allocated.
6979 */
6980
6981 /*
6982 * XXX We cheat here and don't lock the anon slots. We can't because
6983 * we may have been called from the anon layer which might already
6984 * have locked them. We are holding a refcnt on the slots so they
6985 * can't disappear. The worst that will happen is we'll get the wrong
6986 * names (vp, off) for the slots and make a poor klustering decision.
6987 */
6988 swap_xlate(ap, &vp1, &off1);
6989 swap_xlate(oap, &vp2, &off2);
6990
6991
6992 if (!VOP_CMP(vp1, vp2, NULL) || off1 - off2 != delta)
6993 return (-1);
6994 return (0);
6995 }
6996
6997 /*
6998 * Synchronize primary storage cache with real object in virtual memory.
6999 *
7000 * XXX - Anonymous pages should not be sync'ed out at all.
7001 */
7002 static int
7003 segvn_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
7004 {
7005 struct segvn_data *svd = (struct segvn_data *)seg->s_data;
7006 struct vpage *vpp;
7007 page_t *pp;
7008 u_offset_t offset;
7009 struct vnode *vp;
7010 u_offset_t off;
7011 caddr_t eaddr;
7012 int bflags;
7013 int err = 0;
7014 int segtype;
7015 int pageprot;
7016 int prot;
7017 ulong_t anon_index;
|