61 #include <sys/vm.h>
62 #include <sys/dumphdr.h>
63 #include <sys/lgrp.h>
64
65 #include <vm/hat.h>
66 #include <vm/as.h>
67 #include <vm/seg.h>
68 #include <vm/seg_vn.h>
69 #include <vm/pvn.h>
70 #include <vm/anon.h>
71 #include <vm/page.h>
72 #include <vm/vpage.h>
73 #include <sys/proc.h>
74 #include <sys/task.h>
75 #include <sys/project.h>
76 #include <sys/zone.h>
77 #include <sys/shm_impl.h>
78
79 /*
80 * segvn_fault needs a temporary page list array. To avoid calling kmem all
81 * the time, it creates a small (PVN_GETPAGE_NUM entry) array and uses it if
82 * it can. In the rare case when this page list is not large enough, it
83 * goes and gets a large enough array from kmem.
84 *
85 * This small page list array covers either 8 pages or 64kB worth of pages -
86 * whichever is smaller.
87 */
88 #define PVN_MAX_GETPAGE_SZ 0x10000
89 #define PVN_MAX_GETPAGE_NUM 0x8
90
91 #if PVN_MAX_GETPAGE_SZ > PVN_MAX_GETPAGE_NUM * PAGESIZE
92 #define PVN_GETPAGE_SZ ptob(PVN_MAX_GETPAGE_NUM)
93 #define PVN_GETPAGE_NUM PVN_MAX_GETPAGE_NUM
94 #else
95 #define PVN_GETPAGE_SZ PVN_MAX_GETPAGE_SZ
96 #define PVN_GETPAGE_NUM btop(PVN_MAX_GETPAGE_SZ)
97 #endif
98
99 /*
100 * Private seg op routines.
101 */
102 static int segvn_dup(struct seg *seg, struct seg *newseg);
103 static int segvn_unmap(struct seg *seg, caddr_t addr, size_t len);
104 static void segvn_free(struct seg *seg);
105 static faultcode_t segvn_fault(struct hat *hat, struct seg *seg,
106 caddr_t addr, size_t len, enum fault_type type,
107 enum seg_rw rw);
108 static faultcode_t segvn_faulta(struct seg *seg, caddr_t addr);
109 static int segvn_setprot(struct seg *seg, caddr_t addr,
110 size_t len, uint_t prot);
111 static int segvn_checkprot(struct seg *seg, caddr_t addr,
112 size_t len, uint_t prot);
113 static int segvn_kluster(struct seg *seg, caddr_t addr, ssize_t delta);
114 static size_t segvn_swapout(struct seg *seg);
115 static int segvn_sync(struct seg *seg, caddr_t addr, size_t len,
116 int attr, uint_t flags);
117 static size_t segvn_incore(struct seg *seg, caddr_t addr, size_t len,
118 char *vec);
119 static int segvn_lockop(struct seg *seg, caddr_t addr, size_t len,
120 int attr, int op, ulong_t *lockmap, size_t pos);
121 static int segvn_getprot(struct seg *seg, caddr_t addr, size_t len,
122 uint_t *protv);
123 static u_offset_t segvn_getoffset(struct seg *seg, caddr_t addr);
124 static int segvn_gettype(struct seg *seg, caddr_t addr);
125 static int segvn_getvp(struct seg *seg, caddr_t addr,
126 struct vnode **vpp);
127 static int segvn_advise(struct seg *seg, caddr_t addr, size_t len,
128 uint_t behav);
129 static void segvn_dump(struct seg *seg);
130 static int segvn_pagelock(struct seg *seg, caddr_t addr, size_t len,
131 struct page ***ppp, enum lock_type type, enum seg_rw rw);
132 static int segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len,
133 uint_t szc);
134 static int segvn_getmemid(struct seg *seg, caddr_t addr,
135 memid_t *memidp);
136 static lgrp_mem_policy_info_t *segvn_getpolicy(struct seg *, caddr_t);
137 static int segvn_capable(struct seg *seg, segcapability_t capable);
138 static int segvn_inherit(struct seg *, caddr_t, size_t, uint_t);
139
140 struct seg_ops segvn_ops = {
141 segvn_dup,
142 segvn_unmap,
143 segvn_free,
144 segvn_fault,
145 segvn_faulta,
146 segvn_setprot,
147 segvn_checkprot,
148 segvn_kluster,
149 segvn_swapout,
150 segvn_sync,
151 segvn_incore,
152 segvn_lockop,
153 segvn_getprot,
154 segvn_getoffset,
155 segvn_gettype,
156 segvn_getvp,
157 segvn_advise,
158 segvn_dump,
159 segvn_pagelock,
160 segvn_setpagesize,
161 segvn_getmemid,
162 segvn_getpolicy,
163 segvn_capable,
164 segvn_inherit
165 };
166
167 /*
168 * Common zfod structures, provided as a shorthand for others to use.
169 */
170 static segvn_crargs_t zfod_segvn_crargs =
171 SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL);
172 static segvn_crargs_t kzfod_segvn_crargs =
173 SEGVN_ZFOD_ARGS(PROT_ZFOD & ~PROT_USER,
174 PROT_ALL & ~PROT_USER);
175 static segvn_crargs_t stack_noexec_crargs =
176 SEGVN_ZFOD_ARGS(PROT_ZFOD & ~PROT_EXEC, PROT_ALL);
177
178 caddr_t zfod_argsp = (caddr_t)&zfod_segvn_crargs; /* user zfod argsp */
179 caddr_t kzfod_argsp = (caddr_t)&kzfod_segvn_crargs; /* kernel zfod argsp */
180 caddr_t stack_exec_argsp = (caddr_t)&zfod_segvn_crargs; /* executable stack */
181 caddr_t stack_noexec_argsp = (caddr_t)&stack_noexec_crargs; /* noexec stack */
182
183 #define vpgtob(n) ((n) * sizeof (struct vpage)) /* For brevity */
184
3839 u_offset_t off = svd->offset + (uintptr_t)(a - seg->s_base);
3840 ulong_t aindx = svd->anon_index + seg_page(seg, a);
3841 struct vpage *vpage = (svd->vpage != NULL) ?
3842 &svd->vpage[seg_page(seg, a)] : NULL;
3843 vnode_t *vp = svd->vp;
3844 page_t **ppa;
3845 uint_t pszc;
3846 size_t ppgsz;
3847 pgcnt_t ppages;
3848 faultcode_t err = 0;
3849 int ierr;
3850 int vop_size_err = 0;
3851 uint_t protchk, prot, vpprot;
3852 ulong_t i;
3853 int hat_flag = (type == F_SOFTLOCK) ? HAT_LOAD_LOCK : HAT_LOAD;
3854 anon_sync_obj_t an_cookie;
3855 enum seg_rw arw;
3856 int alloc_failed = 0;
3857 int adjszc_chk;
3858 struct vattr va;
3859 int xhat = 0;
3860 page_t *pplist;
3861 pfn_t pfn;
3862 int physcontig;
3863 int upgrdfail;
3864 int segvn_anypgsz_vnode = 0; /* for now map vnode with 2 page sizes */
3865 int tron = (svd->tr_state == SEGVN_TR_ON);
3866
3867 ASSERT(szc != 0);
3868 ASSERT(vp != NULL);
3869 ASSERT(brkcow == 0 || amp != NULL);
3870 ASSERT(tron == 0 || amp != NULL);
3871 ASSERT(enable_mbit_wa == 0); /* no mbit simulations with large pages */
3872 ASSERT(!(svd->flags & MAP_NORESERVE));
3873 ASSERT(type != F_SOFTUNLOCK);
3874 ASSERT(IS_P2ALIGNED(a, maxpgsz));
3875 ASSERT(amp == NULL || IS_P2ALIGNED(aindx, maxpages));
3876 ASSERT(SEGVN_LOCK_HELD(seg->s_as, &svd->lock));
3877 ASSERT(seg->s_szc < NBBY * sizeof (int));
3878 ASSERT(type != F_SOFTLOCK || lpgeaddr - a == maxpgsz);
3879 ASSERT(svd->tr_state != SEGVN_TR_INIT);
3889 switch (rw) {
3890 case S_READ:
3891 protchk = PROT_READ;
3892 break;
3893 case S_WRITE:
3894 protchk = PROT_WRITE;
3895 break;
3896 case S_EXEC:
3897 protchk = PROT_EXEC;
3898 break;
3899 case S_OTHER:
3900 default:
3901 protchk = PROT_READ | PROT_WRITE | PROT_EXEC;
3902 break;
3903 }
3904 } else {
3905 prot = svd->prot;
3906 /* caller has already done segment level protection check. */
3907 }
3908
3909 if (seg->s_as->a_hat != hat) {
3910 xhat = 1;
3911 }
3912
3913 if (rw == S_WRITE && segtype == MAP_PRIVATE) {
3914 SEGVN_VMSTAT_FLTVNPAGES(2);
3915 arw = S_READ;
3916 } else {
3917 arw = rw;
3918 }
3919
3920 ppa = kmem_alloc(ppasize, KM_SLEEP);
3921
3922 VM_STAT_COND_ADD(amp != NULL, segvnvmstats.fltvnpages[3]);
3923
3924 for (;;) {
3925 adjszc_chk = 0;
3926 for (; a < lpgeaddr; a += pgsz, off += pgsz, aindx += pages) {
3927 if (adjszc_chk) {
3928 while (szc < seg->s_szc) {
3929 uintptr_t e;
3930 uint_t tszc;
3931 tszc = segvn_anypgsz_vnode ? szc + 1 :
3932 seg->s_szc;
4248 off + (i << PAGESHIFT));
4249 }
4250 #endif /* DEBUG */
4251 /*
4252 * All pages are of szc we need and they are
4253 * all locked so they can't change szc. load
4254 * translations.
4255 *
4256 * if page got promoted since last check
4257 * we don't need pplist.
4258 */
4259 if (pplist != NULL) {
4260 page_free_replacement_page(pplist);
4261 page_create_putback(pages);
4262 }
4263 if (PP_ISMIGRATE(ppa[0])) {
4264 page_migrate(seg, a, ppa, pages);
4265 }
4266 SEGVN_UPDATE_MODBITS(ppa, pages, rw,
4267 prot, vpprot);
4268 if (!xhat) {
4269 hat_memload_array_region(hat, a, pgsz,
4270 ppa, prot & vpprot, hat_flag,
4271 svd->rcookie);
4272 } else {
4273 /*
4274 * avoid large xhat mappings to FS
4275 * pages so that hat_page_demote()
4276 * doesn't need to check for xhat
4277 * large mappings.
4278 * Don't use regions with xhats.
4279 */
4280 for (i = 0; i < pages; i++) {
4281 hat_memload(hat,
4282 a + (i << PAGESHIFT),
4283 ppa[i], prot & vpprot,
4284 hat_flag);
4285 }
4286 }
4287
4288 if (!(hat_flag & HAT_LOAD_LOCK)) {
4289 for (i = 0; i < pages; i++) {
4290 page_unlock(ppa[i]);
4291 }
4292 }
4293 if (amp != NULL) {
4294 anon_array_exit(&an_cookie);
4295 ANON_LOCK_EXIT(&->a_rwlock);
4296 }
4297 goto next;
4298 }
4299
4300 /*
4301 * See if upsize is possible.
4302 */
4303 if (pszc > szc && szc < seg->s_szc &&
4304 (segvn_anypgsz_vnode || pszc >= seg->s_szc)) {
4305 pgcnt_t aphase;
4306 uint_t pszc1 = MIN(pszc, seg->s_szc);
4320 page_free_replacement_page(pl);
4321 page_create_putback(pages);
4322 }
4323 for (i = 0; i < pages; i++) {
4324 page_unlock(ppa[i]);
4325 }
4326 if (amp != NULL) {
4327 anon_array_exit(&an_cookie);
4328 ANON_LOCK_EXIT(&->a_rwlock);
4329 }
4330 pszc = pszc1;
4331 ierr = -2;
4332 break;
4333 }
4334 }
4335
4336 /*
4337 * check if we should use smallest mapping size.
4338 */
4339 upgrdfail = 0;
4340 if (szc == 0 || xhat ||
4341 (pszc >= szc &&
4342 !IS_P2ALIGNED(pfn, pages)) ||
4343 (pszc < szc &&
4344 !segvn_full_szcpages(ppa, szc, &upgrdfail,
4345 &pszc))) {
4346
4347 if (upgrdfail && type != F_SOFTLOCK) {
4348 /*
4349 * segvn_full_szcpages failed to lock
4350 * all pages EXCL. Size down.
4351 */
4352 ASSERT(pszc < szc);
4353
4354 SEGVN_VMSTAT_FLTVNPAGES(33);
4355
4356 if (pplist != NULL) {
4357 page_t *pl = pplist;
4358 page_free_replacement_page(pl);
4359 page_create_putback(pages);
4360 }
4361
4362 for (i = 0; i < pages; i++) {
4363 page_unlock(ppa[i]);
4364 }
4365 if (amp != NULL) {
4366 anon_array_exit(&an_cookie);
4367 ANON_LOCK_EXIT(&->a_rwlock);
4368 }
4369 ierr = -1;
4370 break;
4371 }
4372 if (szc != 0 && !xhat && !upgrdfail) {
4373 segvn_faultvnmpss_align_err5++;
4374 }
4375 SEGVN_VMSTAT_FLTVNPAGES(34);
4376 if (pplist != NULL) {
4377 page_free_replacement_page(pplist);
4378 page_create_putback(pages);
4379 }
4380 SEGVN_UPDATE_MODBITS(ppa, pages, rw,
4381 prot, vpprot);
4382 if (upgrdfail && segvn_anypgsz_vnode) {
4383 /* SOFTLOCK case */
4384 hat_memload_array_region(hat, a, pgsz,
4385 ppa, prot & vpprot, hat_flag,
4386 svd->rcookie);
4387 } else {
4388 for (i = 0; i < pages; i++) {
4389 hat_memload_region(hat,
4390 a + (i << PAGESHIFT),
4391 ppa[i], prot & vpprot,
4392 hat_flag, svd->rcookie);
4933 * Call VOP_GETPAGE over the range of non-anonymous pages
4934 * endif
4935 * Loop over all addresses requested
4936 * Call segvn_faultpage passing in page list
4937 * to load up translations and handle anonymous pages
4938 * endloop
4939 * Load up translation to any additional pages in page list not
4940 * already handled that fit into this segment
4941 */
4942 static faultcode_t
4943 segvn_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
4944 enum fault_type type, enum seg_rw rw)
4945 {
4946 struct segvn_data *svd = (struct segvn_data *)seg->s_data;
4947 page_t **plp, **ppp, *pp;
4948 u_offset_t off;
4949 caddr_t a;
4950 struct vpage *vpage;
4951 uint_t vpprot, prot;
4952 int err;
4953 page_t *pl[PVN_GETPAGE_NUM + 1];
4954 size_t plsz, pl_alloc_sz;
4955 size_t page;
4956 ulong_t anon_index;
4957 struct anon_map *amp;
4958 int dogetpage = 0;
4959 caddr_t lpgaddr, lpgeaddr;
4960 size_t pgsz;
4961 anon_sync_obj_t cookie;
4962 int brkcow = BREAK_COW_SHARE(rw, type, svd->type);
4963
4964 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
4965 ASSERT(svd->amp == NULL || svd->rcookie == HAT_INVALID_REGION_COOKIE);
4966
4967 /*
4968 * First handle the easy stuff
4969 */
4970 if (type == F_SOFTUNLOCK) {
4971 if (rw == S_READ_NOCOW) {
4972 rw = S_READ;
4973 ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
5372 * Only acquire reader lock to prevent amp->ahp
5373 * from being changed. It's ok to miss pages,
5374 * hence we don't do anon_array_enter
5375 */
5376 ANON_LOCK_ENTER(&->a_rwlock, RW_READER);
5377 ap = anon_get_ptr(amp->ahp, anon_index);
5378
5379 if (len <= PAGESIZE)
5380 /* inline non_anon() */
5381 dogetpage = (ap == NULL);
5382 else
5383 dogetpage = non_anon(amp->ahp, anon_index,
5384 &vp_off, &vp_len);
5385 ANON_LOCK_EXIT(&->a_rwlock);
5386 }
5387
5388 if (dogetpage) {
5389 enum seg_rw arw;
5390 struct as *as = seg->s_as;
5391
5392 if (len > ptob((sizeof (pl) / sizeof (pl[0])) - 1)) {
5393 /*
5394 * Page list won't fit in local array,
5395 * allocate one of the needed size.
5396 */
5397 pl_alloc_sz =
5398 (btop(len) + 1) * sizeof (page_t *);
5399 plp = kmem_alloc(pl_alloc_sz, KM_SLEEP);
5400 plp[0] = NULL;
5401 plsz = len;
5402 } else if (rw == S_WRITE && svd->type == MAP_PRIVATE ||
5403 svd->tr_state == SEGVN_TR_ON || rw == S_OTHER ||
5404 (((size_t)(addr + PAGESIZE) <
5405 (size_t)(seg->s_base + seg->s_size)) &&
5406 hat_probe(as->a_hat, addr + PAGESIZE))) {
5407 /*
5408 * Ask VOP_GETPAGE to return the exact number
5409 * of pages if
5410 * (a) this is a COW fault, or
5411 * (b) this is a software fault, or
5412 * (c) next page is already mapped.
5413 */
5414 plsz = len;
5415 } else {
5416 /*
5417 * Ask VOP_GETPAGE to return adjacent pages
5418 * within the segment.
5419 */
5420 plsz = MIN((size_t)PVN_GETPAGE_SZ, (size_t)
5421 ((seg->s_base + seg->s_size) - addr));
5422 ASSERT((addr + plsz) <=
5423 (seg->s_base + seg->s_size));
5424 }
5425
5426 /*
5427 * Need to get some non-anonymous pages.
5428 * We need to make only one call to GETPAGE to do
5429 * this to prevent certain deadlocking conditions
5430 * when we are doing locking. In this case
5431 * non_anon() should have picked up the smallest
5432 * range which includes all the non-anonymous
5433 * pages in the requested range. We have to
5434 * be careful regarding which rw flag to pass in
5435 * because on a private mapping, the underlying
5436 * object is never allowed to be written.
5437 */
5438 if (rw == S_WRITE && svd->type == MAP_PRIVATE) {
5439 arw = S_READ;
5440 } else {
6067 * unload any current translations that might exist).
6068 */
6069 hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD);
6070 } else {
6071 /*
6072 * A shared mapping or a private mapping in which write
6073 * protection is going to be denied - just change all the
6074 * protections over the range of addresses in question.
6075 * segvn does not support any other attributes other
6076 * than prot so we can use hat_chgattr.
6077 */
6078 hat_chgattr(seg->s_as->a_hat, addr, len, prot);
6079 }
6080
6081 SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
6082
6083 return (0);
6084 }
6085
6086 /*
6087 * segvn_setpagesize is called via SEGOP_SETPAGESIZE from as_setpagesize,
6088 * to determine if the seg is capable of mapping the requested szc.
6089 */
6090 static int
6091 segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
6092 {
6093 struct segvn_data *svd = (struct segvn_data *)seg->s_data;
6094 struct segvn_data *nsvd;
6095 struct anon_map *amp = svd->amp;
6096 struct seg *nseg;
6097 caddr_t eaddr = addr + len, a;
6098 size_t pgsz = page_get_pagesize(szc);
6099 pgcnt_t pgcnt = page_get_pagecnt(szc);
6100 int err;
6101 u_offset_t off = svd->offset + (uintptr_t)(addr - seg->s_base);
6102
6103 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
6104 ASSERT(addr >= seg->s_base && eaddr <= seg->s_base + seg->s_size);
6105
6106 if (seg->s_szc == szc || segvn_lpg_disable != 0) {
6107 return (0);
7055 * see if they happen to be properly allocated.
7056 */
7057
7058 /*
7059 * XXX We cheat here and don't lock the anon slots. We can't because
7060 * we may have been called from the anon layer which might already
7061 * have locked them. We are holding a refcnt on the slots so they
7062 * can't disappear. The worst that will happen is we'll get the wrong
7063 * names (vp, off) for the slots and make a poor klustering decision.
7064 */
7065 swap_xlate(ap, &vp1, &off1);
7066 swap_xlate(oap, &vp2, &off2);
7067
7068
7069 if (!VOP_CMP(vp1, vp2, NULL) || off1 - off2 != delta)
7070 return (-1);
7071 return (0);
7072 }
7073
7074 /*
7075 * Swap the pages of seg out to secondary storage, returning the
7076 * number of bytes of storage freed.
7077 *
7078 * The basic idea is first to unload all translations and then to call
7079 * VOP_PUTPAGE() for all newly-unmapped pages, to push them out to the
7080 * swap device. Pages to which other segments have mappings will remain
7081 * mapped and won't be swapped. Our caller (as_swapout) has already
7082 * performed the unloading step.
7083 *
7084 * The value returned is intended to correlate well with the process's
7085 * memory requirements. However, there are some caveats:
7086 * 1) When given a shared segment as argument, this routine will
7087 * only succeed in swapping out pages for the last sharer of the
7088 * segment. (Previous callers will only have decremented mapping
7089 * reference counts.)
7090 * 2) We assume that the hat layer maintains a large enough translation
7091 * cache to capture process reference patterns.
7092 */
7093 static size_t
7094 segvn_swapout(struct seg *seg)
7095 {
7096 struct segvn_data *svd = (struct segvn_data *)seg->s_data;
7097 struct anon_map *amp;
7098 pgcnt_t pgcnt = 0;
7099 pgcnt_t npages;
7100 pgcnt_t page;
7101 ulong_t anon_index;
7102
7103 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
7104
7105 SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER);
7106 /*
7107 * Find pages unmapped by our caller and force them
7108 * out to the virtual swap device.
7109 */
7110 if ((amp = svd->amp) != NULL)
7111 anon_index = svd->anon_index;
7112 npages = seg->s_size >> PAGESHIFT;
7113 for (page = 0; page < npages; page++) {
7114 page_t *pp;
7115 struct anon *ap;
7116 struct vnode *vp;
7117 u_offset_t off;
7118 anon_sync_obj_t cookie;
7119
7120 /*
7121 * Obtain <vp, off> pair for the page, then look it up.
7122 *
7123 * Note that this code is willing to consider regular
7124 * pages as well as anon pages. Is this appropriate here?
7125 */
7126 ap = NULL;
7127 if (amp != NULL) {
7128 ANON_LOCK_ENTER(&->a_rwlock, RW_READER);
7129 if (anon_array_try_enter(amp, anon_index + page,
7130 &cookie)) {
7131 ANON_LOCK_EXIT(&->a_rwlock);
7132 continue;
7133 }
7134 ap = anon_get_ptr(amp->ahp, anon_index + page);
7135 if (ap != NULL) {
7136 swap_xlate(ap, &vp, &off);
7137 } else {
7138 vp = svd->vp;
7139 off = svd->offset + ptob(page);
7140 }
7141 anon_array_exit(&cookie);
7142 ANON_LOCK_EXIT(&->a_rwlock);
7143 } else {
7144 vp = svd->vp;
7145 off = svd->offset + ptob(page);
7146 }
7147 if (vp == NULL) { /* untouched zfod page */
7148 ASSERT(ap == NULL);
7149 continue;
7150 }
7151
7152 pp = page_lookup_nowait(vp, off, SE_SHARED);
7153 if (pp == NULL)
7154 continue;
7155
7156
7157 /*
7158 * Examine the page to see whether it can be tossed out,
7159 * keeping track of how many we've found.
7160 */
7161 if (!page_tryupgrade(pp)) {
7162 /*
7163 * If the page has an i/o lock and no mappings,
7164 * it's very likely that the page is being
7165 * written out as a result of klustering.
7166 * Assume this is so and take credit for it here.
7167 */
7168 if (!page_io_trylock(pp)) {
7169 if (!hat_page_is_mapped(pp))
7170 pgcnt++;
7171 } else {
7172 page_io_unlock(pp);
7173 }
7174 page_unlock(pp);
7175 continue;
7176 }
7177 ASSERT(!page_iolock_assert(pp));
7178
7179
7180 /*
7181 * Skip if page is locked or has mappings.
7182 * We don't need the page_struct_lock to look at lckcnt
7183 * and cowcnt because the page is exclusive locked.
7184 */
7185 if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0 ||
7186 hat_page_is_mapped(pp)) {
7187 page_unlock(pp);
7188 continue;
7189 }
7190
7191 /*
7192 * dispose skips large pages so try to demote first.
7193 */
7194 if (pp->p_szc != 0 && !page_try_demote_pages(pp)) {
7195 page_unlock(pp);
7196 /*
7197 * XXX should skip the remaining page_t's of this
7198 * large page.
7199 */
7200 continue;
7201 }
7202
7203 ASSERT(pp->p_szc == 0);
7204
7205 /*
7206 * No longer mapped -- we can toss it out. How
7207 * we do so depends on whether or not it's dirty.
7208 */
7209 if (hat_ismod(pp) && pp->p_vnode) {
7210 /*
7211 * We must clean the page before it can be
7212 * freed. Setting B_FREE will cause pvn_done
7213 * to free the page when the i/o completes.
7214 * XXX: This also causes it to be accounted
7215 * as a pageout instead of a swap: need
7216 * B_SWAPOUT bit to use instead of B_FREE.
7217 *
7218 * Hold the vnode before releasing the page lock
7219 * to prevent it from being freed and re-used by
7220 * some other thread.
7221 */
7222 VN_HOLD(vp);
7223 page_unlock(pp);
7224
7225 /*
7226 * Queue all i/o requests for the pageout thread
7227 * to avoid saturating the pageout devices.
7228 */
7229 if (!queue_io_request(vp, off))
7230 VN_RELE(vp);
7231 } else {
7232 /*
7233 * The page was clean, free it.
7234 *
7235 * XXX: Can we ever encounter modified pages
7236 * with no associated vnode here?
7237 */
7238 ASSERT(pp->p_vnode != NULL);
7239 /*LINTED: constant in conditional context*/
7240 VN_DISPOSE(pp, B_FREE, 0, kcred);
7241 }
7242
7243 /*
7244 * Credit now even if i/o is in progress.
7245 */
7246 pgcnt++;
7247 }
7248 SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
7249
7250 /*
7251 * Wakeup pageout to initiate i/o on all queued requests.
7252 */
7253 cv_signal_pageout();
7254 return (ptob(pgcnt));
7255 }
7256
7257 /*
7258 * Synchronize primary storage cache with real object in virtual memory.
7259 *
7260 * XXX - Anonymous pages should not be sync'ed out at all.
7261 */
7262 static int
7263 segvn_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
7264 {
7265 struct segvn_data *svd = (struct segvn_data *)seg->s_data;
7266 struct vpage *vpp;
7267 page_t *pp;
7268 u_offset_t offset;
7269 struct vnode *vp;
7270 u_offset_t off;
7271 caddr_t eaddr;
7272 int bflags;
7273 int err = 0;
7274 int segtype;
7275 int pageprot;
7276 int prot;
7277 ulong_t anon_index;
9672 /*
9673 * Get policy info for private or shared memory
9674 */
9675 if (svn_data->type != MAP_SHARED) {
9676 if (svn_data->tr_state != SEGVN_TR_ON) {
9677 policy_info = &svn_data->policy_info;
9678 } else {
9679 policy_info = &svn_data->tr_policy_info;
9680 ASSERT(policy_info->mem_policy ==
9681 LGRP_MEM_POLICY_NEXT_SEG);
9682 }
9683 } else {
9684 amp = svn_data->amp;
9685 anon_index = svn_data->anon_index + seg_page(seg, addr);
9686 vp = svn_data->vp;
9687 vn_off = svn_data->offset + (uintptr_t)(addr - seg->s_base);
9688 policy_info = lgrp_shm_policy_get(amp, anon_index, vp, vn_off);
9689 }
9690
9691 return (policy_info);
9692 }
9693
9694 /*ARGSUSED*/
9695 static int
9696 segvn_capable(struct seg *seg, segcapability_t capability)
9697 {
9698 return (0);
9699 }
9700
9701 /*
9702 * Bind text vnode segment to an amp. If we bind successfully mappings will be
9703 * established to per vnode mapping per lgroup amp pages instead of to vnode
9704 * pages. There's one amp per vnode text mapping per lgroup. Many processes
9705 * may share the same text replication amp. If a suitable amp doesn't already
9706 * exist in svntr hash table create a new one. We may fail to bind to amp if
9707 * segment is not eligible for text replication. Code below first checks for
9708 * these conditions. If binding is successful segment tr_state is set to on
9709 * and svd->amp points to the amp to use. Otherwise tr_state is set to off and
9710 * svd->amp remains as NULL.
9711 */
9712 static void
9713 segvn_textrepl(struct seg *seg)
9714 {
9715 struct segvn_data *svd = (struct segvn_data *)seg->s_data;
9716 vnode_t *vp = svd->vp;
9717 u_offset_t off = svd->offset;
9718 size_t size = seg->s_size;
|
61 #include <sys/vm.h>
62 #include <sys/dumphdr.h>
63 #include <sys/lgrp.h>
64
65 #include <vm/hat.h>
66 #include <vm/as.h>
67 #include <vm/seg.h>
68 #include <vm/seg_vn.h>
69 #include <vm/pvn.h>
70 #include <vm/anon.h>
71 #include <vm/page.h>
72 #include <vm/vpage.h>
73 #include <sys/proc.h>
74 #include <sys/task.h>
75 #include <sys/project.h>
76 #include <sys/zone.h>
77 #include <sys/shm_impl.h>
78
79 /*
80 * segvn_fault needs a temporary page list array. To avoid calling kmem all
81 * the time, it creates a small (FAULT_TMP_PAGES_NUM entry) array and uses
82 * it if it can. In the rare case when this page list is not large enough,
83 * it goes and gets a large enough array from kmem.
84 */
85 #define FAULT_TMP_PAGES_NUM 0x8
86 #define FAULT_TMP_PAGES_SZ ptob(FAULT_TMP_PAGES_NUM)
87
88 /*
89 * Private seg op routines.
90 */
91 static int segvn_dup(struct seg *seg, struct seg *newseg);
92 static int segvn_unmap(struct seg *seg, caddr_t addr, size_t len);
93 static void segvn_free(struct seg *seg);
94 static faultcode_t segvn_fault(struct hat *hat, struct seg *seg,
95 caddr_t addr, size_t len, enum fault_type type,
96 enum seg_rw rw);
97 static faultcode_t segvn_faulta(struct seg *seg, caddr_t addr);
98 static int segvn_setprot(struct seg *seg, caddr_t addr,
99 size_t len, uint_t prot);
100 static int segvn_checkprot(struct seg *seg, caddr_t addr,
101 size_t len, uint_t prot);
102 static int segvn_kluster(struct seg *seg, caddr_t addr, ssize_t delta);
103 static int segvn_sync(struct seg *seg, caddr_t addr, size_t len,
104 int attr, uint_t flags);
105 static size_t segvn_incore(struct seg *seg, caddr_t addr, size_t len,
106 char *vec);
107 static int segvn_lockop(struct seg *seg, caddr_t addr, size_t len,
108 int attr, int op, ulong_t *lockmap, size_t pos);
109 static int segvn_getprot(struct seg *seg, caddr_t addr, size_t len,
110 uint_t *protv);
111 static u_offset_t segvn_getoffset(struct seg *seg, caddr_t addr);
112 static int segvn_gettype(struct seg *seg, caddr_t addr);
113 static int segvn_getvp(struct seg *seg, caddr_t addr,
114 struct vnode **vpp);
115 static int segvn_advise(struct seg *seg, caddr_t addr, size_t len,
116 uint_t behav);
117 static void segvn_dump(struct seg *seg);
118 static int segvn_pagelock(struct seg *seg, caddr_t addr, size_t len,
119 struct page ***ppp, enum lock_type type, enum seg_rw rw);
120 static int segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len,
121 uint_t szc);
122 static int segvn_getmemid(struct seg *seg, caddr_t addr,
123 memid_t *memidp);
124 static lgrp_mem_policy_info_t *segvn_getpolicy(struct seg *, caddr_t);
125 static int segvn_inherit(struct seg *, caddr_t, size_t, uint_t);
126
127 const struct seg_ops segvn_ops = {
128 .dup = segvn_dup,
129 .unmap = segvn_unmap,
130 .free = segvn_free,
131 .fault = segvn_fault,
132 .faulta = segvn_faulta,
133 .setprot = segvn_setprot,
134 .checkprot = segvn_checkprot,
135 .kluster = segvn_kluster,
136 .sync = segvn_sync,
137 .incore = segvn_incore,
138 .lockop = segvn_lockop,
139 .getprot = segvn_getprot,
140 .getoffset = segvn_getoffset,
141 .gettype = segvn_gettype,
142 .getvp = segvn_getvp,
143 .advise = segvn_advise,
144 .dump = segvn_dump,
145 .pagelock = segvn_pagelock,
146 .setpagesize = segvn_setpagesize,
147 .getmemid = segvn_getmemid,
148 .getpolicy = segvn_getpolicy,
149 .inherit = segvn_inherit,
150 };
151
152 /*
153 * Common zfod structures, provided as a shorthand for others to use.
154 */
155 static segvn_crargs_t zfod_segvn_crargs =
156 SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL);
157 static segvn_crargs_t kzfod_segvn_crargs =
158 SEGVN_ZFOD_ARGS(PROT_ZFOD & ~PROT_USER,
159 PROT_ALL & ~PROT_USER);
160 static segvn_crargs_t stack_noexec_crargs =
161 SEGVN_ZFOD_ARGS(PROT_ZFOD & ~PROT_EXEC, PROT_ALL);
162
163 caddr_t zfod_argsp = (caddr_t)&zfod_segvn_crargs; /* user zfod argsp */
164 caddr_t kzfod_argsp = (caddr_t)&kzfod_segvn_crargs; /* kernel zfod argsp */
165 caddr_t stack_exec_argsp = (caddr_t)&zfod_segvn_crargs; /* executable stack */
166 caddr_t stack_noexec_argsp = (caddr_t)&stack_noexec_crargs; /* noexec stack */
167
168 #define vpgtob(n) ((n) * sizeof (struct vpage)) /* For brevity */
169
3824 u_offset_t off = svd->offset + (uintptr_t)(a - seg->s_base);
3825 ulong_t aindx = svd->anon_index + seg_page(seg, a);
3826 struct vpage *vpage = (svd->vpage != NULL) ?
3827 &svd->vpage[seg_page(seg, a)] : NULL;
3828 vnode_t *vp = svd->vp;
3829 page_t **ppa;
3830 uint_t pszc;
3831 size_t ppgsz;
3832 pgcnt_t ppages;
3833 faultcode_t err = 0;
3834 int ierr;
3835 int vop_size_err = 0;
3836 uint_t protchk, prot, vpprot;
3837 ulong_t i;
3838 int hat_flag = (type == F_SOFTLOCK) ? HAT_LOAD_LOCK : HAT_LOAD;
3839 anon_sync_obj_t an_cookie;
3840 enum seg_rw arw;
3841 int alloc_failed = 0;
3842 int adjszc_chk;
3843 struct vattr va;
3844 page_t *pplist;
3845 pfn_t pfn;
3846 int physcontig;
3847 int upgrdfail;
3848 int segvn_anypgsz_vnode = 0; /* for now map vnode with 2 page sizes */
3849 int tron = (svd->tr_state == SEGVN_TR_ON);
3850
3851 ASSERT(szc != 0);
3852 ASSERT(vp != NULL);
3853 ASSERT(brkcow == 0 || amp != NULL);
3854 ASSERT(tron == 0 || amp != NULL);
3855 ASSERT(enable_mbit_wa == 0); /* no mbit simulations with large pages */
3856 ASSERT(!(svd->flags & MAP_NORESERVE));
3857 ASSERT(type != F_SOFTUNLOCK);
3858 ASSERT(IS_P2ALIGNED(a, maxpgsz));
3859 ASSERT(amp == NULL || IS_P2ALIGNED(aindx, maxpages));
3860 ASSERT(SEGVN_LOCK_HELD(seg->s_as, &svd->lock));
3861 ASSERT(seg->s_szc < NBBY * sizeof (int));
3862 ASSERT(type != F_SOFTLOCK || lpgeaddr - a == maxpgsz);
3863 ASSERT(svd->tr_state != SEGVN_TR_INIT);
3873 switch (rw) {
3874 case S_READ:
3875 protchk = PROT_READ;
3876 break;
3877 case S_WRITE:
3878 protchk = PROT_WRITE;
3879 break;
3880 case S_EXEC:
3881 protchk = PROT_EXEC;
3882 break;
3883 case S_OTHER:
3884 default:
3885 protchk = PROT_READ | PROT_WRITE | PROT_EXEC;
3886 break;
3887 }
3888 } else {
3889 prot = svd->prot;
3890 /* caller has already done segment level protection check. */
3891 }
3892
3893 if (rw == S_WRITE && segtype == MAP_PRIVATE) {
3894 SEGVN_VMSTAT_FLTVNPAGES(2);
3895 arw = S_READ;
3896 } else {
3897 arw = rw;
3898 }
3899
3900 ppa = kmem_alloc(ppasize, KM_SLEEP);
3901
3902 VM_STAT_COND_ADD(amp != NULL, segvnvmstats.fltvnpages[3]);
3903
3904 for (;;) {
3905 adjszc_chk = 0;
3906 for (; a < lpgeaddr; a += pgsz, off += pgsz, aindx += pages) {
3907 if (adjszc_chk) {
3908 while (szc < seg->s_szc) {
3909 uintptr_t e;
3910 uint_t tszc;
3911 tszc = segvn_anypgsz_vnode ? szc + 1 :
3912 seg->s_szc;
4228 off + (i << PAGESHIFT));
4229 }
4230 #endif /* DEBUG */
4231 /*
4232 * All pages are of szc we need and they are
4233 * all locked so they can't change szc. load
4234 * translations.
4235 *
4236 * if page got promoted since last check
4237 * we don't need pplist.
4238 */
4239 if (pplist != NULL) {
4240 page_free_replacement_page(pplist);
4241 page_create_putback(pages);
4242 }
4243 if (PP_ISMIGRATE(ppa[0])) {
4244 page_migrate(seg, a, ppa, pages);
4245 }
4246 SEGVN_UPDATE_MODBITS(ppa, pages, rw,
4247 prot, vpprot);
4248 hat_memload_array_region(hat, a, pgsz,
4249 ppa, prot & vpprot, hat_flag,
4250 svd->rcookie);
4251
4252 if (!(hat_flag & HAT_LOAD_LOCK)) {
4253 for (i = 0; i < pages; i++) {
4254 page_unlock(ppa[i]);
4255 }
4256 }
4257 if (amp != NULL) {
4258 anon_array_exit(&an_cookie);
4259 ANON_LOCK_EXIT(&->a_rwlock);
4260 }
4261 goto next;
4262 }
4263
4264 /*
4265 * See if upsize is possible.
4266 */
4267 if (pszc > szc && szc < seg->s_szc &&
4268 (segvn_anypgsz_vnode || pszc >= seg->s_szc)) {
4269 pgcnt_t aphase;
4270 uint_t pszc1 = MIN(pszc, seg->s_szc);
4284 page_free_replacement_page(pl);
4285 page_create_putback(pages);
4286 }
4287 for (i = 0; i < pages; i++) {
4288 page_unlock(ppa[i]);
4289 }
4290 if (amp != NULL) {
4291 anon_array_exit(&an_cookie);
4292 ANON_LOCK_EXIT(&->a_rwlock);
4293 }
4294 pszc = pszc1;
4295 ierr = -2;
4296 break;
4297 }
4298 }
4299
4300 /*
4301 * check if we should use smallest mapping size.
4302 */
4303 upgrdfail = 0;
4304 if (szc == 0 ||
4305 (pszc >= szc &&
4306 !IS_P2ALIGNED(pfn, pages)) ||
4307 (pszc < szc &&
4308 !segvn_full_szcpages(ppa, szc, &upgrdfail,
4309 &pszc))) {
4310
4311 if (upgrdfail && type != F_SOFTLOCK) {
4312 /*
4313 * segvn_full_szcpages failed to lock
4314 * all pages EXCL. Size down.
4315 */
4316 ASSERT(pszc < szc);
4317
4318 SEGVN_VMSTAT_FLTVNPAGES(33);
4319
4320 if (pplist != NULL) {
4321 page_t *pl = pplist;
4322 page_free_replacement_page(pl);
4323 page_create_putback(pages);
4324 }
4325
4326 for (i = 0; i < pages; i++) {
4327 page_unlock(ppa[i]);
4328 }
4329 if (amp != NULL) {
4330 anon_array_exit(&an_cookie);
4331 ANON_LOCK_EXIT(&->a_rwlock);
4332 }
4333 ierr = -1;
4334 break;
4335 }
4336 if (szc != 0 && !upgrdfail) {
4337 segvn_faultvnmpss_align_err5++;
4338 }
4339 SEGVN_VMSTAT_FLTVNPAGES(34);
4340 if (pplist != NULL) {
4341 page_free_replacement_page(pplist);
4342 page_create_putback(pages);
4343 }
4344 SEGVN_UPDATE_MODBITS(ppa, pages, rw,
4345 prot, vpprot);
4346 if (upgrdfail && segvn_anypgsz_vnode) {
4347 /* SOFTLOCK case */
4348 hat_memload_array_region(hat, a, pgsz,
4349 ppa, prot & vpprot, hat_flag,
4350 svd->rcookie);
4351 } else {
4352 for (i = 0; i < pages; i++) {
4353 hat_memload_region(hat,
4354 a + (i << PAGESHIFT),
4355 ppa[i], prot & vpprot,
4356 hat_flag, svd->rcookie);
4897 * Call VOP_GETPAGE over the range of non-anonymous pages
4898 * endif
4899 * Loop over all addresses requested
4900 * Call segvn_faultpage passing in page list
4901 * to load up translations and handle anonymous pages
4902 * endloop
4903 * Load up translation to any additional pages in page list not
4904 * already handled that fit into this segment
4905 */
4906 static faultcode_t
4907 segvn_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
4908 enum fault_type type, enum seg_rw rw)
4909 {
4910 struct segvn_data *svd = (struct segvn_data *)seg->s_data;
4911 page_t **plp, **ppp, *pp;
4912 u_offset_t off;
4913 caddr_t a;
4914 struct vpage *vpage;
4915 uint_t vpprot, prot;
4916 int err;
4917 page_t *pl[FAULT_TMP_PAGES_NUM + 1];
4918 size_t plsz, pl_alloc_sz;
4919 size_t page;
4920 ulong_t anon_index;
4921 struct anon_map *amp;
4922 int dogetpage = 0;
4923 caddr_t lpgaddr, lpgeaddr;
4924 size_t pgsz;
4925 anon_sync_obj_t cookie;
4926 int brkcow = BREAK_COW_SHARE(rw, type, svd->type);
4927
4928 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
4929 ASSERT(svd->amp == NULL || svd->rcookie == HAT_INVALID_REGION_COOKIE);
4930
4931 /*
4932 * First handle the easy stuff
4933 */
4934 if (type == F_SOFTUNLOCK) {
4935 if (rw == S_READ_NOCOW) {
4936 rw = S_READ;
4937 ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
5336 * Only acquire reader lock to prevent amp->ahp
5337 * from being changed. It's ok to miss pages,
5338 * hence we don't do anon_array_enter
5339 */
5340 ANON_LOCK_ENTER(&->a_rwlock, RW_READER);
5341 ap = anon_get_ptr(amp->ahp, anon_index);
5342
5343 if (len <= PAGESIZE)
5344 /* inline non_anon() */
5345 dogetpage = (ap == NULL);
5346 else
5347 dogetpage = non_anon(amp->ahp, anon_index,
5348 &vp_off, &vp_len);
5349 ANON_LOCK_EXIT(&->a_rwlock);
5350 }
5351
5352 if (dogetpage) {
5353 enum seg_rw arw;
5354 struct as *as = seg->s_as;
5355
5356 if (len > FAULT_TMP_PAGES_SZ) {
5357 /*
5358 * Page list won't fit in local array,
5359 * allocate one of the needed size.
5360 */
5361 pl_alloc_sz =
5362 (btop(len) + 1) * sizeof (page_t *);
5363 plp = kmem_alloc(pl_alloc_sz, KM_SLEEP);
5364 plp[0] = NULL;
5365 plsz = len;
5366 } else if (rw == S_WRITE && svd->type == MAP_PRIVATE ||
5367 svd->tr_state == SEGVN_TR_ON || rw == S_OTHER ||
5368 (((size_t)(addr + PAGESIZE) <
5369 (size_t)(seg->s_base + seg->s_size)) &&
5370 hat_probe(as->a_hat, addr + PAGESIZE))) {
5371 /*
5372 * Ask VOP_GETPAGE to return the exact number
5373 * of pages if
5374 * (a) this is a COW fault, or
5375 * (b) this is a software fault, or
5376 * (c) next page is already mapped.
5377 */
5378 plsz = len;
5379 } else {
5380 /*
5381 * Ask VOP_GETPAGE to return adjacent pages
5382 * within the segment.
5383 */
5384 plsz = MIN((size_t)FAULT_TMP_PAGES_SZ, (size_t)
5385 ((seg->s_base + seg->s_size) - addr));
5386 ASSERT((addr + plsz) <=
5387 (seg->s_base + seg->s_size));
5388 }
5389
5390 /*
5391 * Need to get some non-anonymous pages.
5392 * We need to make only one call to GETPAGE to do
5393 * this to prevent certain deadlocking conditions
5394 * when we are doing locking. In this case
5395 * non_anon() should have picked up the smallest
5396 * range which includes all the non-anonymous
5397 * pages in the requested range. We have to
5398 * be careful regarding which rw flag to pass in
5399 * because on a private mapping, the underlying
5400 * object is never allowed to be written.
5401 */
5402 if (rw == S_WRITE && svd->type == MAP_PRIVATE) {
5403 arw = S_READ;
5404 } else {
6031 * unload any current translations that might exist).
6032 */
6033 hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD);
6034 } else {
6035 /*
6036 * A shared mapping or a private mapping in which write
6037 * protection is going to be denied - just change all the
6038 * protections over the range of addresses in question.
6039 * segvn does not support any other attributes other
6040 * than prot so we can use hat_chgattr.
6041 */
6042 hat_chgattr(seg->s_as->a_hat, addr, len, prot);
6043 }
6044
6045 SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
6046
6047 return (0);
6048 }
6049
6050 /*
6051 * segvn_setpagesize is called via segop_setpagesize from as_setpagesize,
6052 * to determine if the seg is capable of mapping the requested szc.
6053 */
6054 static int
6055 segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
6056 {
6057 struct segvn_data *svd = (struct segvn_data *)seg->s_data;
6058 struct segvn_data *nsvd;
6059 struct anon_map *amp = svd->amp;
6060 struct seg *nseg;
6061 caddr_t eaddr = addr + len, a;
6062 size_t pgsz = page_get_pagesize(szc);
6063 pgcnt_t pgcnt = page_get_pagecnt(szc);
6064 int err;
6065 u_offset_t off = svd->offset + (uintptr_t)(addr - seg->s_base);
6066
6067 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
6068 ASSERT(addr >= seg->s_base && eaddr <= seg->s_base + seg->s_size);
6069
6070 if (seg->s_szc == szc || segvn_lpg_disable != 0) {
6071 return (0);
7019 * see if they happen to be properly allocated.
7020 */
7021
7022 /*
7023 * XXX We cheat here and don't lock the anon slots. We can't because
7024 * we may have been called from the anon layer which might already
7025 * have locked them. We are holding a refcnt on the slots so they
7026 * can't disappear. The worst that will happen is we'll get the wrong
7027 * names (vp, off) for the slots and make a poor klustering decision.
7028 */
7029 swap_xlate(ap, &vp1, &off1);
7030 swap_xlate(oap, &vp2, &off2);
7031
7032
7033 if (!VOP_CMP(vp1, vp2, NULL) || off1 - off2 != delta)
7034 return (-1);
7035 return (0);
7036 }
7037
7038 /*
7039 * Synchronize primary storage cache with real object in virtual memory.
7040 *
7041 * XXX - Anonymous pages should not be sync'ed out at all.
7042 */
7043 static int
7044 segvn_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
7045 {
7046 struct segvn_data *svd = (struct segvn_data *)seg->s_data;
7047 struct vpage *vpp;
7048 page_t *pp;
7049 u_offset_t offset;
7050 struct vnode *vp;
7051 u_offset_t off;
7052 caddr_t eaddr;
7053 int bflags;
7054 int err = 0;
7055 int segtype;
7056 int pageprot;
7057 int prot;
7058 ulong_t anon_index;
9453 /*
9454 * Get policy info for private or shared memory
9455 */
9456 if (svn_data->type != MAP_SHARED) {
9457 if (svn_data->tr_state != SEGVN_TR_ON) {
9458 policy_info = &svn_data->policy_info;
9459 } else {
9460 policy_info = &svn_data->tr_policy_info;
9461 ASSERT(policy_info->mem_policy ==
9462 LGRP_MEM_POLICY_NEXT_SEG);
9463 }
9464 } else {
9465 amp = svn_data->amp;
9466 anon_index = svn_data->anon_index + seg_page(seg, addr);
9467 vp = svn_data->vp;
9468 vn_off = svn_data->offset + (uintptr_t)(addr - seg->s_base);
9469 policy_info = lgrp_shm_policy_get(amp, anon_index, vp, vn_off);
9470 }
9471
9472 return (policy_info);
9473 }
9474
9475 /*
9476 * Bind text vnode segment to an amp. If we bind successfully mappings will be
9477 * established to per vnode mapping per lgroup amp pages instead of to vnode
9478 * pages. There's one amp per vnode text mapping per lgroup. Many processes
9479 * may share the same text replication amp. If a suitable amp doesn't already
9480 * exist in svntr hash table create a new one. We may fail to bind to amp if
9481 * segment is not eligible for text replication. Code below first checks for
9482 * these conditions. If binding is successful segment tr_state is set to on
9483 * and svd->amp points to the amp to use. Otherwise tr_state is set to off and
9484 * svd->amp remains as NULL.
9485 */
9486 static void
9487 segvn_textrepl(struct seg *seg)
9488 {
9489 struct segvn_data *svd = (struct segvn_data *)seg->s_data;
9490 vnode_t *vp = svd->vp;
9491 u_offset_t off = svd->offset;
9492 size_t size = seg->s_size;
|