5255 uts shouldn't open-code ISP2
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
28
29 /*
30 * Portions of this source code were derived from Berkeley 4.3 BSD
31 * under license from the Regents of the University of California.
32 */
33
34 /*
35 * VM - generic vnode mapping segment.
36 *
37 * The segmap driver is used only by the kernel to get faster (than seg_vn)
38 * mappings [lower routine overhead; more persistent cache] to random
39 * vnode/offsets. Note than the kernel may (and does) use seg_vn as well.
40 */
41
42 #include <sys/types.h>
43 #include <sys/t_lock.h>
44 #include <sys/param.h>
45 #include <sys/sysmacros.h>
46 #include <sys/buf.h>
47 #include <sys/systm.h>
48 #include <sys/vnode.h>
49 #include <sys/mman.h>
50 #include <sys/errno.h>
51 #include <sys/cred.h>
52 #include <sys/kmem.h>
53 #include <sys/vtrace.h>
54 #include <sys/cmn_err.h>
55 #include <sys/debug.h>
56 #include <sys/thread.h>
57 #include <sys/dumphdr.h>
58 #include <sys/bitmap.h>
59 #include <sys/lgrp.h>
60
61 #include <vm/seg_kmem.h>
62 #include <vm/hat.h>
63 #include <vm/as.h>
64 #include <vm/seg.h>
65 #include <vm/seg_kpm.h>
66 #include <vm/seg_map.h>
67 #include <vm/page.h>
68 #include <vm/pvn.h>
69 #include <vm/rm.h>
70
71 /*
72 * Private seg op routines.
73 */
74 static void segmap_free(struct seg *seg);
75 faultcode_t segmap_fault(struct hat *hat, struct seg *seg, caddr_t addr,
76 size_t len, enum fault_type type, enum seg_rw rw);
77 static faultcode_t segmap_faulta(struct seg *seg, caddr_t addr);
78 static int segmap_checkprot(struct seg *seg, caddr_t addr, size_t len,
79 uint_t prot);
80 static int segmap_kluster(struct seg *seg, caddr_t addr, ssize_t);
81 static int segmap_getprot(struct seg *seg, caddr_t addr, size_t len,
82 uint_t *protv);
83 static u_offset_t segmap_getoffset(struct seg *seg, caddr_t addr);
84 static int segmap_gettype(struct seg *seg, caddr_t addr);
85 static int segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp);
86 static void segmap_dump(struct seg *seg);
87 static int segmap_pagelock(struct seg *seg, caddr_t addr, size_t len,
88 struct page ***ppp, enum lock_type type,
89 enum seg_rw rw);
90 static void segmap_badop(void);
91 static int segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp);
92 static lgrp_mem_policy_info_t *segmap_getpolicy(struct seg *seg,
93 caddr_t addr);
94 static int segmap_capable(struct seg *seg, segcapability_t capability);
95
96 /* segkpm support */
97 static caddr_t segmap_pagecreate_kpm(struct seg *, vnode_t *, u_offset_t,
98 struct smap *, enum seg_rw);
99 struct smap *get_smap_kpm(caddr_t, page_t **);
100
101 #define SEGMAP_BADOP(t) (t(*)())segmap_badop
102
103 static struct seg_ops segmap_ops = {
104 SEGMAP_BADOP(int), /* dup */
105 SEGMAP_BADOP(int), /* unmap */
106 segmap_free,
107 segmap_fault,
108 segmap_faulta,
109 SEGMAP_BADOP(int), /* setprot */
110 segmap_checkprot,
111 segmap_kluster,
112 SEGMAP_BADOP(size_t), /* swapout */
113 SEGMAP_BADOP(int), /* sync */
114 SEGMAP_BADOP(size_t), /* incore */
115 SEGMAP_BADOP(int), /* lockop */
116 segmap_getprot,
117 segmap_getoffset,
118 segmap_gettype,
119 segmap_getvp,
120 SEGMAP_BADOP(int), /* advise */
121 segmap_dump,
122 segmap_pagelock, /* pagelock */
123 SEGMAP_BADOP(int), /* setpgsz */
124 segmap_getmemid, /* getmemid */
125 segmap_getpolicy, /* getpolicy */
126 segmap_capable, /* capable */
127 };
128
129 /*
130 * Private segmap routines.
131 */
132 static void segmap_unlock(struct hat *hat, struct seg *seg, caddr_t addr,
133 size_t len, enum seg_rw rw, struct smap *smp);
134 static void segmap_smapadd(struct smap *smp);
135 static struct smap *segmap_hashin(struct smap *smp, struct vnode *vp,
136 u_offset_t off, int hashid);
137 static void segmap_hashout(struct smap *smp);
138
139
140 /*
141 * Statistics for segmap operations.
142 *
143 * No explicit locking to protect these stats.
144 */
145 struct segmapcnt segmapcnt = {
146 { "fault", KSTAT_DATA_ULONG },
147 { "faulta", KSTAT_DATA_ULONG },
148 { "getmap", KSTAT_DATA_ULONG },
149 { "get_use", KSTAT_DATA_ULONG },
150 { "get_reclaim", KSTAT_DATA_ULONG },
151 { "get_reuse", KSTAT_DATA_ULONG },
152 { "get_unused", KSTAT_DATA_ULONG },
153 { "get_nofree", KSTAT_DATA_ULONG },
154 { "rel_async", KSTAT_DATA_ULONG },
155 { "rel_write", KSTAT_DATA_ULONG },
156 { "rel_free", KSTAT_DATA_ULONG },
157 { "rel_abort", KSTAT_DATA_ULONG },
158 { "rel_dontneed", KSTAT_DATA_ULONG },
159 { "release", KSTAT_DATA_ULONG },
160 { "pagecreate", KSTAT_DATA_ULONG },
161 { "free_notfree", KSTAT_DATA_ULONG },
162 { "free_dirty", KSTAT_DATA_ULONG },
163 { "free", KSTAT_DATA_ULONG },
164 { "stolen", KSTAT_DATA_ULONG },
165 { "get_nomtx", KSTAT_DATA_ULONG }
166 };
167
168 kstat_named_t *segmapcnt_ptr = (kstat_named_t *)&segmapcnt;
169 uint_t segmapcnt_ndata = sizeof (segmapcnt) / sizeof (kstat_named_t);
170
171 /*
172 * Return number of map pages in segment.
173 */
174 #define MAP_PAGES(seg) ((seg)->s_size >> MAXBSHIFT)
175
176 /*
177 * Translate addr into smap number within segment.
178 */
179 #define MAP_PAGE(seg, addr) (((addr) - (seg)->s_base) >> MAXBSHIFT)
180
181 /*
182 * Translate addr in seg into struct smap pointer.
183 */
184 #define GET_SMAP(seg, addr) \
185 &(((struct segmap_data *)((seg)->s_data))->smd_sm[MAP_PAGE(seg, addr)])
186
187 /*
188 * Bit in map (16 bit bitmap).
189 */
190 #define SMAP_BIT_MASK(bitindex) (1 << ((bitindex) & 0xf))
191
192 static int smd_colormsk = 0;
193 static int smd_ncolor = 0;
194 static int smd_nfree = 0;
195 static int smd_freemsk = 0;
196 #ifdef DEBUG
197 static int *colors_used;
198 #endif
199 static struct smap *smd_smap;
200 static struct smaphash *smd_hash;
201 #ifdef SEGMAP_HASHSTATS
202 static unsigned int *smd_hash_len;
203 #endif
204 static struct smfree *smd_free;
205 static ulong_t smd_hashmsk = 0;
206
207 #define SEGMAP_MAXCOLOR 2
208 #define SEGMAP_CACHE_PAD 64
209
210 union segmap_cpu {
211 struct {
212 uint32_t scpu_free_ndx[SEGMAP_MAXCOLOR];
213 struct smap *scpu_last_smap;
214 ulong_t scpu_getmap;
215 ulong_t scpu_release;
216 ulong_t scpu_get_reclaim;
217 ulong_t scpu_fault;
218 ulong_t scpu_pagecreate;
219 ulong_t scpu_get_reuse;
220 } scpu;
221 char scpu_pad[SEGMAP_CACHE_PAD];
222 };
223 static union segmap_cpu *smd_cpu;
224
225 /*
226 * There are three locks in seg_map:
227 * - per freelist mutexes
228 * - per hashchain mutexes
229 * - per smap mutexes
230 *
231 * The lock ordering is to get the smap mutex to lock down the slot
232 * first then the hash lock (for hash in/out (vp, off) list) or the
233 * freelist lock to put the slot back on the free list.
234 *
235 * The hash search is done by only holding the hashchain lock, when a wanted
236 * slot is found, we drop the hashchain lock then lock the slot so there
237 * is no overlapping of hashchain and smap locks. After the slot is
238 * locked, we verify again if the slot is still what we are looking
239 * for.
240 *
241 * Allocation of a free slot is done by holding the freelist lock,
242 * then locking the smap slot at the head of the freelist. This is
243 * in reversed lock order so mutex_tryenter() is used.
244 *
245 * The smap lock protects all fields in smap structure except for
246 * the link fields for hash/free lists which are protected by
247 * hashchain and freelist locks.
248 */
249
250 #define SHASHMTX(hashid) (&smd_hash[hashid].sh_mtx)
251
252 #define SMP2SMF(smp) (&smd_free[(smp - smd_smap) & smd_freemsk])
253 #define SMP2SMF_NDX(smp) (ushort_t)((smp - smd_smap) & smd_freemsk)
254
255 #define SMAPMTX(smp) (&smp->sm_mtx)
256
257 #define SMAP_HASHFUNC(vp, off, hashid) \
258 { \
259 hashid = ((((uintptr_t)(vp) >> 6) + ((uintptr_t)(vp) >> 3) + \
260 ((off) >> MAXBSHIFT)) & smd_hashmsk); \
261 }
262
263 /*
264 * The most frequently updated kstat counters are kept in the
265 * per cpu array to avoid hot cache blocks. The update function
266 * sums the cpu local counters to update the global counters.
267 */
268
269 /* ARGSUSED */
270 int
271 segmap_kstat_update(kstat_t *ksp, int rw)
272 {
273 int i;
274 ulong_t getmap, release, get_reclaim;
275 ulong_t fault, pagecreate, get_reuse;
276
277 if (rw == KSTAT_WRITE)
278 return (EACCES);
279 getmap = release = get_reclaim = (ulong_t)0;
280 fault = pagecreate = get_reuse = (ulong_t)0;
281 for (i = 0; i < max_ncpus; i++) {
282 getmap += smd_cpu[i].scpu.scpu_getmap;
283 release += smd_cpu[i].scpu.scpu_release;
284 get_reclaim += smd_cpu[i].scpu.scpu_get_reclaim;
285 fault += smd_cpu[i].scpu.scpu_fault;
286 pagecreate += smd_cpu[i].scpu.scpu_pagecreate;
287 get_reuse += smd_cpu[i].scpu.scpu_get_reuse;
288 }
289 segmapcnt.smp_getmap.value.ul = getmap;
290 segmapcnt.smp_release.value.ul = release;
291 segmapcnt.smp_get_reclaim.value.ul = get_reclaim;
292 segmapcnt.smp_fault.value.ul = fault;
293 segmapcnt.smp_pagecreate.value.ul = pagecreate;
294 segmapcnt.smp_get_reuse.value.ul = get_reuse;
295 return (0);
296 }
297
298 int
299 segmap_create(struct seg *seg, void *argsp)
300 {
301 struct segmap_data *smd;
302 struct smap *smp;
303 struct smfree *sm;
304 struct segmap_crargs *a = (struct segmap_crargs *)argsp;
305 struct smaphash *shashp;
306 union segmap_cpu *scpu;
307 long i, npages;
308 size_t hashsz;
309 uint_t nfreelist;
310 extern void prefetch_smap_w(void *);
311 extern int max_ncpus;
312
313 ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock));
314
315 if (((uintptr_t)seg->s_base | seg->s_size) & MAXBOFFSET) {
316 panic("segkmap not MAXBSIZE aligned");
317 /*NOTREACHED*/
318 }
319
320 smd = kmem_zalloc(sizeof (struct segmap_data), KM_SLEEP);
321
322 seg->s_data = (void *)smd;
323 seg->s_ops = &segmap_ops;
324 smd->smd_prot = a->prot;
325
326 /*
327 * Scale the number of smap freelists to be
328 * proportional to max_ncpus * number of virtual colors.
329 * The caller can over-ride this scaling by providing
330 * a non-zero a->nfreelist argument.
331 */
332 nfreelist = a->nfreelist;
333 if (nfreelist == 0)
334 nfreelist = max_ncpus;
335 else if (nfreelist < 0 || nfreelist > 4 * max_ncpus) {
336 cmn_err(CE_WARN, "segmap_create: nfreelist out of range "
337 "%d, using %d", nfreelist, max_ncpus);
338 nfreelist = max_ncpus;
339 }
340 if (!ISP2(nfreelist)) {
341 /* round up nfreelist to the next power of two. */
342 nfreelist = 1 << (highbit(nfreelist));
343 }
344
345 /*
346 * Get the number of virtual colors - must be a power of 2.
347 */
348 if (a->shmsize)
349 smd_ncolor = a->shmsize >> MAXBSHIFT;
350 else
351 smd_ncolor = 1;
352 ASSERT((smd_ncolor & (smd_ncolor - 1)) == 0);
353 ASSERT(smd_ncolor <= SEGMAP_MAXCOLOR);
354 smd_colormsk = smd_ncolor - 1;
355 smd->smd_nfree = smd_nfree = smd_ncolor * nfreelist;
356 smd_freemsk = smd_nfree - 1;
357
358 /*
359 * Allocate and initialize the freelist headers.
360 * Note that sm_freeq[1] starts out as the release queue. This
361 * is known when the smap structures are initialized below.
362 */
363 smd_free = smd->smd_free =
364 kmem_zalloc(smd_nfree * sizeof (struct smfree), KM_SLEEP);
365 for (i = 0; i < smd_nfree; i++) {
366 sm = &smd->smd_free[i];
367 mutex_init(&sm->sm_freeq[0].smq_mtx, NULL, MUTEX_DEFAULT, NULL);
368 mutex_init(&sm->sm_freeq[1].smq_mtx, NULL, MUTEX_DEFAULT, NULL);
369 sm->sm_allocq = &sm->sm_freeq[0];
370 sm->sm_releq = &sm->sm_freeq[1];
371 }
372
373 /*
374 * Allocate and initialize the smap hash chain headers.
375 * Compute hash size rounding down to the next power of two.
376 */
377 npages = MAP_PAGES(seg);
378 smd->smd_npages = npages;
379 hashsz = npages / SMAP_HASHAVELEN;
380 hashsz = 1 << (highbit(hashsz)-1);
381 smd_hashmsk = hashsz - 1;
382 smd_hash = smd->smd_hash =
383 kmem_alloc(hashsz * sizeof (struct smaphash), KM_SLEEP);
384 #ifdef SEGMAP_HASHSTATS
385 smd_hash_len =
386 kmem_zalloc(hashsz * sizeof (unsigned int), KM_SLEEP);
387 #endif
388 for (i = 0, shashp = smd_hash; i < hashsz; i++, shashp++) {
389 shashp->sh_hash_list = NULL;
390 mutex_init(&shashp->sh_mtx, NULL, MUTEX_DEFAULT, NULL);
391 }
392
393 /*
394 * Allocate and initialize the smap structures.
395 * Link all slots onto the appropriate freelist.
396 * The smap array is large enough to affect boot time
397 * on large systems, so use memory prefetching and only
398 * go through the array 1 time. Inline a optimized version
399 * of segmap_smapadd to add structures to freelists with
400 * knowledge that no locks are needed here.
401 */
402 smd_smap = smd->smd_sm =
403 kmem_alloc(sizeof (struct smap) * npages, KM_SLEEP);
404
405 for (smp = &smd->smd_sm[MAP_PAGES(seg) - 1];
406 smp >= smd->smd_sm; smp--) {
407 struct smap *smpfreelist;
408 struct sm_freeq *releq;
409
410 prefetch_smap_w((char *)smp);
411
412 smp->sm_vp = NULL;
413 smp->sm_hash = NULL;
414 smp->sm_off = 0;
415 smp->sm_bitmap = 0;
416 smp->sm_refcnt = 0;
417 mutex_init(&smp->sm_mtx, NULL, MUTEX_DEFAULT, NULL);
418 smp->sm_free_ndx = SMP2SMF_NDX(smp);
419
420 sm = SMP2SMF(smp);
421 releq = sm->sm_releq;
422
423 smpfreelist = releq->smq_free;
424 if (smpfreelist == 0) {
425 releq->smq_free = smp->sm_next = smp->sm_prev = smp;
426 } else {
427 smp->sm_next = smpfreelist;
428 smp->sm_prev = smpfreelist->sm_prev;
429 smpfreelist->sm_prev = smp;
430 smp->sm_prev->sm_next = smp;
431 releq->smq_free = smp->sm_next;
432 }
433
434 /*
435 * sm_flag = 0 (no SM_QNDX_ZERO) implies smap on sm_freeq[1]
436 */
437 smp->sm_flags = 0;
438
439 #ifdef SEGKPM_SUPPORT
440 /*
441 * Due to the fragile prefetch loop no
442 * separate function is used here.
443 */
444 smp->sm_kpme_next = NULL;
445 smp->sm_kpme_prev = NULL;
446 smp->sm_kpme_page = NULL;
447 #endif
448 }
449
450 /*
451 * Allocate the per color indices that distribute allocation
452 * requests over the free lists. Each cpu will have a private
453 * rotor index to spread the allocations even across the available
454 * smap freelists. Init the scpu_last_smap field to the first
455 * smap element so there is no need to check for NULL.
456 */
457 smd_cpu =
458 kmem_zalloc(sizeof (union segmap_cpu) * max_ncpus, KM_SLEEP);
459 for (i = 0, scpu = smd_cpu; i < max_ncpus; i++, scpu++) {
460 int j;
461 for (j = 0; j < smd_ncolor; j++)
462 scpu->scpu.scpu_free_ndx[j] = j;
463 scpu->scpu.scpu_last_smap = smd_smap;
464 }
465
466 vpm_init();
467
468 #ifdef DEBUG
469 /*
470 * Keep track of which colors are used more often.
471 */
472 colors_used = kmem_zalloc(smd_nfree * sizeof (int), KM_SLEEP);
473 #endif /* DEBUG */
474
475 return (0);
476 }
477
478 static void
479 segmap_free(seg)
480 struct seg *seg;
481 {
482 ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock));
483 }
484
485 /*
486 * Do a F_SOFTUNLOCK call over the range requested.
487 * The range must have already been F_SOFTLOCK'ed.
488 */
489 static void
490 segmap_unlock(
491 struct hat *hat,
492 struct seg *seg,
493 caddr_t addr,
494 size_t len,
495 enum seg_rw rw,
496 struct smap *smp)
497 {
498 page_t *pp;
499 caddr_t adr;
500 u_offset_t off;
501 struct vnode *vp;
502 kmutex_t *smtx;
503
504 ASSERT(smp->sm_refcnt > 0);
505
506 #ifdef lint
507 seg = seg;
508 #endif
509
510 if (segmap_kpm && IS_KPM_ADDR(addr)) {
511
512 /*
513 * We're called only from segmap_fault and this was a
514 * NOP in case of a kpm based smap, so dangerous things
515 * must have happened in the meantime. Pages are prefaulted
516 * and locked in segmap_getmapflt and they will not be
517 * unlocked until segmap_release.
518 */
519 panic("segmap_unlock: called with kpm addr %p", (void *)addr);
520 /*NOTREACHED*/
521 }
522
523 vp = smp->sm_vp;
524 off = smp->sm_off + (u_offset_t)((uintptr_t)addr & MAXBOFFSET);
525
526 hat_unlock(hat, addr, P2ROUNDUP(len, PAGESIZE));
527 for (adr = addr; adr < addr + len; adr += PAGESIZE, off += PAGESIZE) {
528 ushort_t bitmask;
529
530 /*
531 * Use page_find() instead of page_lookup() to
532 * find the page since we know that it has
533 * "shared" lock.
534 */
535 pp = page_find(vp, off);
536 if (pp == NULL) {
537 panic("segmap_unlock: page not found");
538 /*NOTREACHED*/
539 }
540
541 if (rw == S_WRITE) {
542 hat_setrefmod(pp);
543 } else if (rw != S_OTHER) {
544 TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT,
545 "segmap_fault:pp %p vp %p offset %llx", pp, vp, off);
546 hat_setref(pp);
547 }
548
549 /*
550 * Clear bitmap, if the bit corresponding to "off" is set,
551 * since the page and translation are being unlocked.
552 */
553 bitmask = SMAP_BIT_MASK((off - smp->sm_off) >> PAGESHIFT);
554
555 /*
556 * Large Files: Following assertion is to verify
557 * the correctness of the cast to (int) above.
558 */
559 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);
560 smtx = SMAPMTX(smp);
561 mutex_enter(smtx);
562 if (smp->sm_bitmap & bitmask) {
563 smp->sm_bitmap &= ~bitmask;
564 }
565 mutex_exit(smtx);
566
567 page_unlock(pp);
568 }
569 }
570
571 #define MAXPPB (MAXBSIZE/4096) /* assumes minimum page size of 4k */
572
573 /*
574 * This routine is called via a machine specific fault handling
575 * routine. It is also called by software routines wishing to
576 * lock or unlock a range of addresses.
577 *
578 * Note that this routine expects a page-aligned "addr".
579 */
580 faultcode_t
581 segmap_fault(
582 struct hat *hat,
583 struct seg *seg,
584 caddr_t addr,
585 size_t len,
586 enum fault_type type,
587 enum seg_rw rw)
588 {
589 struct segmap_data *smd = (struct segmap_data *)seg->s_data;
590 struct smap *smp;
591 page_t *pp, **ppp;
592 struct vnode *vp;
593 u_offset_t off;
594 page_t *pl[MAXPPB + 1];
595 uint_t prot;
596 u_offset_t addroff;
597 caddr_t adr;
598 int err;
599 u_offset_t sm_off;
600 int hat_flag;
601
602 if (segmap_kpm && IS_KPM_ADDR(addr)) {
603 int newpage;
604 kmutex_t *smtx;
605
606 /*
607 * Pages are successfully prefaulted and locked in
608 * segmap_getmapflt and can't be unlocked until
609 * segmap_release. No hat mappings have to be locked
610 * and they also can't be unlocked as long as the
611 * caller owns an active kpm addr.
612 */
613 #ifndef DEBUG
614 if (type != F_SOFTUNLOCK)
615 return (0);
616 #endif
617
618 if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
619 panic("segmap_fault: smap not found "
620 "for addr %p", (void *)addr);
621 /*NOTREACHED*/
622 }
623
624 smtx = SMAPMTX(smp);
625 #ifdef DEBUG
626 newpage = smp->sm_flags & SM_KPM_NEWPAGE;
627 if (newpage) {
628 cmn_err(CE_WARN, "segmap_fault: newpage? smp %p",
629 (void *)smp);
630 }
631
632 if (type != F_SOFTUNLOCK) {
633 mutex_exit(smtx);
634 return (0);
635 }
636 #endif
637 mutex_exit(smtx);
638 vp = smp->sm_vp;
639 sm_off = smp->sm_off;
640
641 if (vp == NULL)
642 return (FC_MAKE_ERR(EIO));
643
644 ASSERT(smp->sm_refcnt > 0);
645
646 addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET);
647 if (addroff + len > MAXBSIZE)
648 panic("segmap_fault: endaddr %p exceeds MAXBSIZE chunk",
649 (void *)(addr + len));
650
651 off = sm_off + addroff;
652
653 pp = page_find(vp, off);
654
655 if (pp == NULL)
656 panic("segmap_fault: softunlock page not found");
657
658 /*
659 * Set ref bit also here in case of S_OTHER to avoid the
660 * overhead of supporting other cases than F_SOFTUNLOCK
661 * with segkpm. We can do this because the underlying
662 * pages are locked anyway.
663 */
664 if (rw == S_WRITE) {
665 hat_setrefmod(pp);
666 } else {
667 TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT,
668 "segmap_fault:pp %p vp %p offset %llx",
669 pp, vp, off);
670 hat_setref(pp);
671 }
672
673 return (0);
674 }
675
676 smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++;
677 smp = GET_SMAP(seg, addr);
678 vp = smp->sm_vp;
679 sm_off = smp->sm_off;
680
681 if (vp == NULL)
682 return (FC_MAKE_ERR(EIO));
683
684 ASSERT(smp->sm_refcnt > 0);
685
686 addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET);
687 if (addroff + len > MAXBSIZE) {
688 panic("segmap_fault: endaddr %p "
689 "exceeds MAXBSIZE chunk", (void *)(addr + len));
690 /*NOTREACHED*/
691 }
692 off = sm_off + addroff;
693
694 /*
695 * First handle the easy stuff
696 */
697 if (type == F_SOFTUNLOCK) {
698 segmap_unlock(hat, seg, addr, len, rw, smp);
699 return (0);
700 }
701
702 TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE,
703 "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp);
704 err = VOP_GETPAGE(vp, (offset_t)off, len, &prot, pl, MAXBSIZE,
705 seg, addr, rw, CRED(), NULL);
706
707 if (err)
708 return (FC_MAKE_ERR(err));
709
710 prot &= smd->smd_prot;
711
712 /*
713 * Handle all pages returned in the pl[] array.
714 * This loop is coded on the assumption that if
715 * there was no error from the VOP_GETPAGE routine,
716 * that the page list returned will contain all the
717 * needed pages for the vp from [off..off + len].
718 */
719 ppp = pl;
720 while ((pp = *ppp++) != NULL) {
721 u_offset_t poff;
722 ASSERT(pp->p_vnode == vp);
723 hat_flag = HAT_LOAD;
724
725 /*
726 * Verify that the pages returned are within the range
727 * of this segmap region. Note that it is theoretically
728 * possible for pages outside this range to be returned,
729 * but it is not very likely. If we cannot use the
730 * page here, just release it and go on to the next one.
731 */
732 if (pp->p_offset < sm_off ||
733 pp->p_offset >= sm_off + MAXBSIZE) {
734 (void) page_release(pp, 1);
735 continue;
736 }
737
738 ASSERT(hat == kas.a_hat);
739 poff = pp->p_offset;
740 adr = addr + (poff - off);
741 if (adr >= addr && adr < addr + len) {
742 hat_setref(pp);
743 TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT,
744 "segmap_fault:pp %p vp %p offset %llx",
745 pp, vp, poff);
746 if (type == F_SOFTLOCK)
747 hat_flag = HAT_LOAD_LOCK;
748 }
749
750 /*
751 * Deal with VMODSORT pages here. If we know this is a write
752 * do the setmod now and allow write protection.
753 * As long as it's modified or not S_OTHER, remove write
754 * protection. With S_OTHER it's up to the FS to deal with this.
755 */
756 if (IS_VMODSORT(vp)) {
757 if (rw == S_WRITE)
758 hat_setmod(pp);
759 else if (rw != S_OTHER && !hat_ismod(pp))
760 prot &= ~PROT_WRITE;
761 }
762
763 hat_memload(hat, adr, pp, prot, hat_flag);
764 if (hat_flag != HAT_LOAD_LOCK)
765 page_unlock(pp);
766 }
767 return (0);
768 }
769
770 /*
771 * This routine is used to start I/O on pages asynchronously.
772 */
773 static faultcode_t
774 segmap_faulta(struct seg *seg, caddr_t addr)
775 {
776 struct smap *smp;
777 struct vnode *vp;
778 u_offset_t off;
779 int err;
780
781 if (segmap_kpm && IS_KPM_ADDR(addr)) {
782 int newpage;
783 kmutex_t *smtx;
784
785 /*
786 * Pages are successfully prefaulted and locked in
787 * segmap_getmapflt and can't be unlocked until
788 * segmap_release. No hat mappings have to be locked
789 * and they also can't be unlocked as long as the
790 * caller owns an active kpm addr.
791 */
792 #ifdef DEBUG
793 if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
794 panic("segmap_faulta: smap not found "
795 "for addr %p", (void *)addr);
796 /*NOTREACHED*/
797 }
798
799 smtx = SMAPMTX(smp);
800 newpage = smp->sm_flags & SM_KPM_NEWPAGE;
801 mutex_exit(smtx);
802 if (newpage)
803 cmn_err(CE_WARN, "segmap_faulta: newpage? smp %p",
804 (void *)smp);
805 #endif
806 return (0);
807 }
808
809 segmapcnt.smp_faulta.value.ul++;
810 smp = GET_SMAP(seg, addr);
811
812 ASSERT(smp->sm_refcnt > 0);
813
814 vp = smp->sm_vp;
815 off = smp->sm_off;
816
817 if (vp == NULL) {
818 cmn_err(CE_WARN, "segmap_faulta - no vp");
819 return (FC_MAKE_ERR(EIO));
820 }
821
822 TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE,
823 "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp);
824
825 err = VOP_GETPAGE(vp, (offset_t)(off + ((offset_t)((uintptr_t)addr
826 & MAXBOFFSET))), PAGESIZE, (uint_t *)NULL, (page_t **)NULL, 0,
827 seg, addr, S_READ, CRED(), NULL);
828
829 if (err)
830 return (FC_MAKE_ERR(err));
831 return (0);
832 }
833
834 /*ARGSUSED*/
835 static int
836 segmap_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
837 {
838 struct segmap_data *smd = (struct segmap_data *)seg->s_data;
839
840 ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock));
841
842 /*
843 * Need not acquire the segment lock since
844 * "smd_prot" is a read-only field.
845 */
846 return (((smd->smd_prot & prot) != prot) ? EACCES : 0);
847 }
848
849 static int
850 segmap_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
851 {
852 struct segmap_data *smd = (struct segmap_data *)seg->s_data;
853 size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
854
855 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
856
857 if (pgno != 0) {
858 do {
859 protv[--pgno] = smd->smd_prot;
860 } while (pgno != 0);
861 }
862 return (0);
863 }
864
865 static u_offset_t
866 segmap_getoffset(struct seg *seg, caddr_t addr)
867 {
868 struct segmap_data *smd = (struct segmap_data *)seg->s_data;
869
870 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock));
871
872 return ((u_offset_t)smd->smd_sm->sm_off + (addr - seg->s_base));
873 }
874
875 /*ARGSUSED*/
876 static int
877 segmap_gettype(struct seg *seg, caddr_t addr)
878 {
879 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock));
880
881 return (MAP_SHARED);
882 }
883
884 /*ARGSUSED*/
885 static int
886 segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
887 {
888 struct segmap_data *smd = (struct segmap_data *)seg->s_data;
889
890 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock));
891
892 /* XXX - This doesn't make any sense */
893 *vpp = smd->smd_sm->sm_vp;
894 return (0);
895 }
896
897 /*
898 * Check to see if it makes sense to do kluster/read ahead to
899 * addr + delta relative to the mapping at addr. We assume here
900 * that delta is a signed PAGESIZE'd multiple (which can be negative).
901 *
902 * For segmap we always "approve" of this action from our standpoint.
903 */
904 /*ARGSUSED*/
905 static int
906 segmap_kluster(struct seg *seg, caddr_t addr, ssize_t delta)
907 {
908 return (0);
909 }
910
911 static void
912 segmap_badop()
913 {
914 panic("segmap_badop");
915 /*NOTREACHED*/
916 }
917
918 /*
919 * Special private segmap operations
920 */
921
922 /*
923 * Add smap to the appropriate free list.
924 */
925 static void
926 segmap_smapadd(struct smap *smp)
927 {
928 struct smfree *sm;
929 struct smap *smpfreelist;
930 struct sm_freeq *releq;
931
932 ASSERT(MUTEX_HELD(SMAPMTX(smp)));
933
934 if (smp->sm_refcnt != 0) {
935 panic("segmap_smapadd");
936 /*NOTREACHED*/
937 }
938
939 sm = &smd_free[smp->sm_free_ndx];
940 /*
941 * Add to the tail of the release queue
942 * Note that sm_releq and sm_allocq could toggle
943 * before we get the lock. This does not affect
944 * correctness as the 2 queues are only maintained
945 * to reduce lock pressure.
946 */
947 releq = sm->sm_releq;
948 if (releq == &sm->sm_freeq[0])
949 smp->sm_flags |= SM_QNDX_ZERO;
950 else
951 smp->sm_flags &= ~SM_QNDX_ZERO;
952 mutex_enter(&releq->smq_mtx);
953 smpfreelist = releq->smq_free;
954 if (smpfreelist == 0) {
955 int want;
956
957 releq->smq_free = smp->sm_next = smp->sm_prev = smp;
958 /*
959 * Both queue mutexes held to set sm_want;
960 * snapshot the value before dropping releq mutex.
961 * If sm_want appears after the releq mutex is dropped,
962 * then the smap just freed is already gone.
963 */
964 want = sm->sm_want;
965 mutex_exit(&releq->smq_mtx);
966 /*
967 * See if there was a waiter before dropping the releq mutex
968 * then recheck after obtaining sm_freeq[0] mutex as
969 * the another thread may have already signaled.
970 */
971 if (want) {
972 mutex_enter(&sm->sm_freeq[0].smq_mtx);
973 if (sm->sm_want)
974 cv_signal(&sm->sm_free_cv);
975 mutex_exit(&sm->sm_freeq[0].smq_mtx);
976 }
977 } else {
978 smp->sm_next = smpfreelist;
979 smp->sm_prev = smpfreelist->sm_prev;
980 smpfreelist->sm_prev = smp;
981 smp->sm_prev->sm_next = smp;
982 mutex_exit(&releq->smq_mtx);
983 }
984 }
985
986
987 static struct smap *
988 segmap_hashin(struct smap *smp, struct vnode *vp, u_offset_t off, int hashid)
989 {
990 struct smap **hpp;
991 struct smap *tmp;
992 kmutex_t *hmtx;
993
994 ASSERT(MUTEX_HELD(SMAPMTX(smp)));
995 ASSERT(smp->sm_vp == NULL);
996 ASSERT(smp->sm_hash == NULL);
997 ASSERT(smp->sm_prev == NULL);
998 ASSERT(smp->sm_next == NULL);
999 ASSERT(hashid >= 0 && hashid <= smd_hashmsk);
1000
1001 hmtx = SHASHMTX(hashid);
1002
1003 mutex_enter(hmtx);
1004 /*
1005 * First we need to verify that no one has created a smp
1006 * with (vp,off) as its tag before we us.
1007 */
1008 for (tmp = smd_hash[hashid].sh_hash_list;
1009 tmp != NULL; tmp = tmp->sm_hash)
1010 if (tmp->sm_vp == vp && tmp->sm_off == off)
1011 break;
1012
1013 if (tmp == NULL) {
1014 /*
1015 * No one created one yet.
1016 *
1017 * Funniness here - we don't increment the ref count on the
1018 * vnode * even though we have another pointer to it here.
1019 * The reason for this is that we don't want the fact that
1020 * a seg_map entry somewhere refers to a vnode to prevent the
1021 * vnode * itself from going away. This is because this
1022 * reference to the vnode is a "soft one". In the case where
1023 * a mapping is being used by a rdwr [or directory routine?]
1024 * there already has to be a non-zero ref count on the vnode.
1025 * In the case where the vp has been freed and the the smap
1026 * structure is on the free list, there are no pages in memory
1027 * that can refer to the vnode. Thus even if we reuse the same
1028 * vnode/smap structure for a vnode which has the same
1029 * address but represents a different object, we are ok.
1030 */
1031 smp->sm_vp = vp;
1032 smp->sm_off = off;
1033
1034 hpp = &smd_hash[hashid].sh_hash_list;
1035 smp->sm_hash = *hpp;
1036 *hpp = smp;
1037 #ifdef SEGMAP_HASHSTATS
1038 smd_hash_len[hashid]++;
1039 #endif
1040 }
1041 mutex_exit(hmtx);
1042
1043 return (tmp);
1044 }
1045
1046 static void
1047 segmap_hashout(struct smap *smp)
1048 {
1049 struct smap **hpp, *hp;
1050 struct vnode *vp;
1051 kmutex_t *mtx;
1052 int hashid;
1053 u_offset_t off;
1054
1055 ASSERT(MUTEX_HELD(SMAPMTX(smp)));
1056
1057 vp = smp->sm_vp;
1058 off = smp->sm_off;
1059
1060 SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */
1061 mtx = SHASHMTX(hashid);
1062 mutex_enter(mtx);
1063
1064 hpp = &smd_hash[hashid].sh_hash_list;
1065 for (;;) {
1066 hp = *hpp;
1067 if (hp == NULL) {
1068 panic("segmap_hashout");
1069 /*NOTREACHED*/
1070 }
1071 if (hp == smp)
1072 break;
1073 hpp = &hp->sm_hash;
1074 }
1075
1076 *hpp = smp->sm_hash;
1077 smp->sm_hash = NULL;
1078 #ifdef SEGMAP_HASHSTATS
1079 smd_hash_len[hashid]--;
1080 #endif
1081 mutex_exit(mtx);
1082
1083 smp->sm_vp = NULL;
1084 smp->sm_off = (u_offset_t)0;
1085
1086 }
1087
1088 /*
1089 * Attempt to free unmodified, unmapped, and non locked segmap
1090 * pages.
1091 */
1092 void
1093 segmap_pagefree(struct vnode *vp, u_offset_t off)
1094 {
1095 u_offset_t pgoff;
1096 page_t *pp;
1097
1098 for (pgoff = off; pgoff < off + MAXBSIZE; pgoff += PAGESIZE) {
1099
1100 if ((pp = page_lookup_nowait(vp, pgoff, SE_EXCL)) == NULL)
1101 continue;
1102
1103 switch (page_release(pp, 1)) {
1104 case PGREL_NOTREL:
1105 segmapcnt.smp_free_notfree.value.ul++;
1106 break;
1107 case PGREL_MOD:
1108 segmapcnt.smp_free_dirty.value.ul++;
1109 break;
1110 case PGREL_CLEAN:
1111 segmapcnt.smp_free.value.ul++;
1112 break;
1113 }
1114 }
1115 }
1116
1117 /*
1118 * Locks held on entry: smap lock
1119 * Locks held on exit : smap lock.
1120 */
1121
1122 static void
1123 grab_smp(struct smap *smp, page_t *pp)
1124 {
1125 ASSERT(MUTEX_HELD(SMAPMTX(smp)));
1126 ASSERT(smp->sm_refcnt == 0);
1127
1128 if (smp->sm_vp != (struct vnode *)NULL) {
1129 struct vnode *vp = smp->sm_vp;
1130 u_offset_t off = smp->sm_off;
1131 /*
1132 * Destroy old vnode association and
1133 * unload any hardware translations to
1134 * the old object.
1135 */
1136 smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reuse++;
1137 segmap_hashout(smp);
1138
1139 /*
1140 * This node is off freelist and hashlist,
1141 * so there is no reason to drop/reacquire sm_mtx
1142 * across calls to hat_unload.
1143 */
1144 if (segmap_kpm) {
1145 caddr_t vaddr;
1146 int hat_unload_needed = 0;
1147
1148 /*
1149 * unload kpm mapping
1150 */
1151 if (pp != NULL) {
1152 vaddr = hat_kpm_page2va(pp, 1);
1153 hat_kpm_mapout(pp, GET_KPME(smp), vaddr);
1154 page_unlock(pp);
1155 }
1156
1157 /*
1158 * Check if we have (also) the rare case of a
1159 * non kpm mapping.
1160 */
1161 if (smp->sm_flags & SM_NOTKPM_RELEASED) {
1162 hat_unload_needed = 1;
1163 smp->sm_flags &= ~SM_NOTKPM_RELEASED;
1164 }
1165
1166 if (hat_unload_needed) {
1167 hat_unload(kas.a_hat, segkmap->s_base +
1168 ((smp - smd_smap) * MAXBSIZE),
1169 MAXBSIZE, HAT_UNLOAD);
1170 }
1171
1172 } else {
1173 ASSERT(smp->sm_flags & SM_NOTKPM_RELEASED);
1174 smp->sm_flags &= ~SM_NOTKPM_RELEASED;
1175 hat_unload(kas.a_hat, segkmap->s_base +
1176 ((smp - smd_smap) * MAXBSIZE),
1177 MAXBSIZE, HAT_UNLOAD);
1178 }
1179 segmap_pagefree(vp, off);
1180 }
1181 }
1182
1183 static struct smap *
1184 get_free_smp(int free_ndx)
1185 {
1186 struct smfree *sm;
1187 kmutex_t *smtx;
1188 struct smap *smp, *first;
1189 struct sm_freeq *allocq, *releq;
1190 struct kpme *kpme;
1191 page_t *pp = NULL;
1192 int end_ndx, page_locked = 0;
1193
1194 end_ndx = free_ndx;
1195 sm = &smd_free[free_ndx];
1196
1197 retry_queue:
1198 allocq = sm->sm_allocq;
1199 mutex_enter(&allocq->smq_mtx);
1200
1201 if ((smp = allocq->smq_free) == NULL) {
1202
1203 skip_queue:
1204 /*
1205 * The alloc list is empty or this queue is being skipped;
1206 * first see if the allocq toggled.
1207 */
1208 if (sm->sm_allocq != allocq) {
1209 /* queue changed */
1210 mutex_exit(&allocq->smq_mtx);
1211 goto retry_queue;
1212 }
1213 releq = sm->sm_releq;
1214 if (!mutex_tryenter(&releq->smq_mtx)) {
1215 /* cannot get releq; a free smp may be there now */
1216 mutex_exit(&allocq->smq_mtx);
1217
1218 /*
1219 * This loop could spin forever if this thread has
1220 * higher priority than the thread that is holding
1221 * releq->smq_mtx. In order to force the other thread
1222 * to run, we'll lock/unlock the mutex which is safe
1223 * since we just unlocked the allocq mutex.
1224 */
1225 mutex_enter(&releq->smq_mtx);
1226 mutex_exit(&releq->smq_mtx);
1227 goto retry_queue;
1228 }
1229 if (releq->smq_free == NULL) {
1230 /*
1231 * This freelist is empty.
1232 * This should not happen unless clients
1233 * are failing to release the segmap
1234 * window after accessing the data.
1235 * Before resorting to sleeping, try
1236 * the next list of the same color.
1237 */
1238 free_ndx = (free_ndx + smd_ncolor) & smd_freemsk;
1239 if (free_ndx != end_ndx) {
1240 mutex_exit(&releq->smq_mtx);
1241 mutex_exit(&allocq->smq_mtx);
1242 sm = &smd_free[free_ndx];
1243 goto retry_queue;
1244 }
1245 /*
1246 * Tried all freelists of the same color once,
1247 * wait on this list and hope something gets freed.
1248 */
1249 segmapcnt.smp_get_nofree.value.ul++;
1250 sm->sm_want++;
1251 mutex_exit(&sm->sm_freeq[1].smq_mtx);
1252 cv_wait(&sm->sm_free_cv,
1253 &sm->sm_freeq[0].smq_mtx);
1254 sm->sm_want--;
1255 mutex_exit(&sm->sm_freeq[0].smq_mtx);
1256 sm = &smd_free[free_ndx];
1257 goto retry_queue;
1258 } else {
1259 /*
1260 * Something on the rele queue; flip the alloc
1261 * and rele queues and retry.
1262 */
1263 sm->sm_allocq = releq;
1264 sm->sm_releq = allocq;
1265 mutex_exit(&allocq->smq_mtx);
1266 mutex_exit(&releq->smq_mtx);
1267 if (page_locked) {
1268 delay(hz >> 2);
1269 page_locked = 0;
1270 }
1271 goto retry_queue;
1272 }
1273 } else {
1274 /*
1275 * Fastpath the case we get the smap mutex
1276 * on the first try.
1277 */
1278 first = smp;
1279 next_smap:
1280 smtx = SMAPMTX(smp);
1281 if (!mutex_tryenter(smtx)) {
1282 /*
1283 * Another thread is trying to reclaim this slot.
1284 * Skip to the next queue or smap.
1285 */
1286 if ((smp = smp->sm_next) == first) {
1287 goto skip_queue;
1288 } else {
1289 goto next_smap;
1290 }
1291 } else {
1292 /*
1293 * if kpme exists, get shared lock on the page
1294 */
1295 if (segmap_kpm && smp->sm_vp != NULL) {
1296
1297 kpme = GET_KPME(smp);
1298 pp = kpme->kpe_page;
1299
1300 if (pp != NULL) {
1301 if (!page_trylock(pp, SE_SHARED)) {
1302 smp = smp->sm_next;
1303 mutex_exit(smtx);
1304 page_locked = 1;
1305
1306 pp = NULL;
1307
1308 if (smp == first) {
1309 goto skip_queue;
1310 } else {
1311 goto next_smap;
1312 }
1313 } else {
1314 if (kpme->kpe_page == NULL) {
1315 page_unlock(pp);
1316 pp = NULL;
1317 }
1318 }
1319 }
1320 }
1321
1322 /*
1323 * At this point, we've selected smp. Remove smp
1324 * from its freelist. If smp is the first one in
1325 * the freelist, update the head of the freelist.
1326 */
1327 if (first == smp) {
1328 ASSERT(first == allocq->smq_free);
1329 allocq->smq_free = smp->sm_next;
1330 }
1331
1332 /*
1333 * if the head of the freelist still points to smp,
1334 * then there are no more free smaps in that list.
1335 */
1336 if (allocq->smq_free == smp)
1337 /*
1338 * Took the last one
1339 */
1340 allocq->smq_free = NULL;
1341 else {
1342 smp->sm_prev->sm_next = smp->sm_next;
1343 smp->sm_next->sm_prev = smp->sm_prev;
1344 }
1345 mutex_exit(&allocq->smq_mtx);
1346 smp->sm_prev = smp->sm_next = NULL;
1347
1348 /*
1349 * if pp != NULL, pp must have been locked;
1350 * grab_smp() unlocks pp.
1351 */
1352 ASSERT((pp == NULL) || PAGE_LOCKED(pp));
1353 grab_smp(smp, pp);
1354 /* return smp locked. */
1355 ASSERT(SMAPMTX(smp) == smtx);
1356 ASSERT(MUTEX_HELD(smtx));
1357 return (smp);
1358 }
1359 }
1360 }
1361
1362 /*
1363 * Special public segmap operations
1364 */
1365
1366 /*
1367 * Create pages (without using VOP_GETPAGE) and load up translations to them.
1368 * If softlock is TRUE, then set things up so that it looks like a call
1369 * to segmap_fault with F_SOFTLOCK.
1370 *
1371 * Returns 1, if a page is created by calling page_create_va(), or 0 otherwise.
1372 *
1373 * All fields in the generic segment (struct seg) are considered to be
1374 * read-only for "segmap" even though the kernel address space (kas) may
1375 * not be locked, hence no lock is needed to access them.
1376 */
1377 int
1378 segmap_pagecreate(struct seg *seg, caddr_t addr, size_t len, int softlock)
1379 {
1380 struct segmap_data *smd = (struct segmap_data *)seg->s_data;
1381 page_t *pp;
1382 u_offset_t off;
1383 struct smap *smp;
1384 struct vnode *vp;
1385 caddr_t eaddr;
1386 int newpage = 0;
1387 uint_t prot;
1388 kmutex_t *smtx;
1389 int hat_flag;
1390
1391 ASSERT(seg->s_as == &kas);
1392
1393 if (segmap_kpm && IS_KPM_ADDR(addr)) {
1394 /*
1395 * Pages are successfully prefaulted and locked in
1396 * segmap_getmapflt and can't be unlocked until
1397 * segmap_release. The SM_KPM_NEWPAGE flag is set
1398 * in segmap_pagecreate_kpm when new pages are created.
1399 * and it is returned as "newpage" indication here.
1400 */
1401 if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
1402 panic("segmap_pagecreate: smap not found "
1403 "for addr %p", (void *)addr);
1404 /*NOTREACHED*/
1405 }
1406
1407 smtx = SMAPMTX(smp);
1408 newpage = smp->sm_flags & SM_KPM_NEWPAGE;
1409 smp->sm_flags &= ~SM_KPM_NEWPAGE;
1410 mutex_exit(smtx);
1411
1412 return (newpage);
1413 }
1414
1415 smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++;
1416
1417 eaddr = addr + len;
1418 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1419
1420 smp = GET_SMAP(seg, addr);
1421
1422 /*
1423 * We don't grab smp mutex here since we assume the smp
1424 * has a refcnt set already which prevents the slot from
1425 * changing its id.
1426 */
1427 ASSERT(smp->sm_refcnt > 0);
1428
1429 vp = smp->sm_vp;
1430 off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET));
1431 prot = smd->smd_prot;
1432
1433 for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) {
1434 hat_flag = HAT_LOAD;
1435 pp = page_lookup(vp, off, SE_SHARED);
1436 if (pp == NULL) {
1437 ushort_t bitindex;
1438
1439 if ((pp = page_create_va(vp, off,
1440 PAGESIZE, PG_WAIT, seg, addr)) == NULL) {
1441 panic("segmap_pagecreate: page_create failed");
1442 /*NOTREACHED*/
1443 }
1444 newpage = 1;
1445 page_io_unlock(pp);
1446
1447 /*
1448 * Since pages created here do not contain valid
1449 * data until the caller writes into them, the
1450 * "exclusive" lock will not be dropped to prevent
1451 * other users from accessing the page. We also
1452 * have to lock the translation to prevent a fault
1453 * from occurring when the virtual address mapped by
1454 * this page is written into. This is necessary to
1455 * avoid a deadlock since we haven't dropped the
1456 * "exclusive" lock.
1457 */
1458 bitindex = (ushort_t)((off - smp->sm_off) >> PAGESHIFT);
1459
1460 /*
1461 * Large Files: The following assertion is to
1462 * verify the cast above.
1463 */
1464 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);
1465 smtx = SMAPMTX(smp);
1466 mutex_enter(smtx);
1467 smp->sm_bitmap |= SMAP_BIT_MASK(bitindex);
1468 mutex_exit(smtx);
1469
1470 hat_flag = HAT_LOAD_LOCK;
1471 } else if (softlock) {
1472 hat_flag = HAT_LOAD_LOCK;
1473 }
1474
1475 if (IS_VMODSORT(pp->p_vnode) && (prot & PROT_WRITE))
1476 hat_setmod(pp);
1477
1478 hat_memload(kas.a_hat, addr, pp, prot, hat_flag);
1479
1480 if (hat_flag != HAT_LOAD_LOCK)
1481 page_unlock(pp);
1482
1483 TRACE_5(TR_FAC_VM, TR_SEGMAP_PAGECREATE,
1484 "segmap_pagecreate:seg %p addr %p pp %p vp %p offset %llx",
1485 seg, addr, pp, vp, off);
1486 }
1487
1488 return (newpage);
1489 }
1490
1491 void
1492 segmap_pageunlock(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw)
1493 {
1494 struct smap *smp;
1495 ushort_t bitmask;
1496 page_t *pp;
1497 struct vnode *vp;
1498 u_offset_t off;
1499 caddr_t eaddr;
1500 kmutex_t *smtx;
1501
1502 ASSERT(seg->s_as == &kas);
1503
1504 eaddr = addr + len;
1505 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1506
1507 if (segmap_kpm && IS_KPM_ADDR(addr)) {
1508 /*
1509 * Pages are successfully prefaulted and locked in
1510 * segmap_getmapflt and can't be unlocked until
1511 * segmap_release, so no pages or hat mappings have
1512 * to be unlocked at this point.
1513 */
1514 #ifdef DEBUG
1515 if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
1516 panic("segmap_pageunlock: smap not found "
1517 "for addr %p", (void *)addr);
1518 /*NOTREACHED*/
1519 }
1520
1521 ASSERT(smp->sm_refcnt > 0);
1522 mutex_exit(SMAPMTX(smp));
1523 #endif
1524 return;
1525 }
1526
1527 smp = GET_SMAP(seg, addr);
1528 smtx = SMAPMTX(smp);
1529
1530 ASSERT(smp->sm_refcnt > 0);
1531
1532 vp = smp->sm_vp;
1533 off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET));
1534
1535 for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) {
1536 bitmask = SMAP_BIT_MASK((int)(off - smp->sm_off) >> PAGESHIFT);
1537
1538 /*
1539 * Large Files: Following assertion is to verify
1540 * the correctness of the cast to (int) above.
1541 */
1542 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);
1543
1544 /*
1545 * If the bit corresponding to "off" is set,
1546 * clear this bit in the bitmap, unlock translations,
1547 * and release the "exclusive" lock on the page.
1548 */
1549 if (smp->sm_bitmap & bitmask) {
1550 mutex_enter(smtx);
1551 smp->sm_bitmap &= ~bitmask;
1552 mutex_exit(smtx);
1553
1554 hat_unlock(kas.a_hat, addr, PAGESIZE);
1555
1556 /*
1557 * Use page_find() instead of page_lookup() to
1558 * find the page since we know that it has
1559 * "exclusive" lock.
1560 */
1561 pp = page_find(vp, off);
1562 if (pp == NULL) {
1563 panic("segmap_pageunlock: page not found");
1564 /*NOTREACHED*/
1565 }
1566 if (rw == S_WRITE) {
1567 hat_setrefmod(pp);
1568 } else if (rw != S_OTHER) {
1569 hat_setref(pp);
1570 }
1571
1572 page_unlock(pp);
1573 }
1574 }
1575 }
1576
1577 caddr_t
1578 segmap_getmap(struct seg *seg, struct vnode *vp, u_offset_t off)
1579 {
1580 return (segmap_getmapflt(seg, vp, off, MAXBSIZE, 0, S_OTHER));
1581 }
1582
1583 /*
1584 * This is the magic virtual address that offset 0 of an ELF
1585 * file gets mapped to in user space. This is used to pick
1586 * the vac color on the freelist.
1587 */
1588 #define ELF_OFFZERO_VA (0x10000)
1589 /*
1590 * segmap_getmap allocates a MAXBSIZE big slot to map the vnode vp
1591 * in the range <off, off + len). off doesn't need to be MAXBSIZE aligned.
1592 * The return address is always MAXBSIZE aligned.
1593 *
1594 * If forcefault is nonzero and the MMU translations haven't yet been created,
1595 * segmap_getmap will call segmap_fault(..., F_INVAL, rw) to create them.
1596 */
1597 caddr_t
1598 segmap_getmapflt(
1599 struct seg *seg,
1600 struct vnode *vp,
1601 u_offset_t off,
1602 size_t len,
1603 int forcefault,
1604 enum seg_rw rw)
1605 {
1606 struct smap *smp, *nsmp;
1607 extern struct vnode *common_specvp();
1608 caddr_t baseaddr; /* MAXBSIZE aligned */
1609 u_offset_t baseoff;
1610 int newslot;
1611 caddr_t vaddr;
1612 int color, hashid;
1613 kmutex_t *hashmtx, *smapmtx;
1614 struct smfree *sm;
1615 page_t *pp;
1616 struct kpme *kpme;
1617 uint_t prot;
1618 caddr_t base;
1619 page_t *pl[MAXPPB + 1];
1620 int error;
1621 int is_kpm = 1;
1622
1623 ASSERT(seg->s_as == &kas);
1624 ASSERT(seg == segkmap);
1625
1626 baseoff = off & (offset_t)MAXBMASK;
1627 if (off + len > baseoff + MAXBSIZE) {
1628 panic("segmap_getmap bad len");
1629 /*NOTREACHED*/
1630 }
1631
1632 /*
1633 * If this is a block device we have to be sure to use the
1634 * "common" block device vnode for the mapping.
1635 */
1636 if (vp->v_type == VBLK)
1637 vp = common_specvp(vp);
1638
1639 smd_cpu[CPU->cpu_seqid].scpu.scpu_getmap++;
1640
1641 if (segmap_kpm == 0 ||
1642 (forcefault == SM_PAGECREATE && rw != S_WRITE)) {
1643 is_kpm = 0;
1644 }
1645
1646 SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */
1647 hashmtx = SHASHMTX(hashid);
1648
1649 retry_hash:
1650 mutex_enter(hashmtx);
1651 for (smp = smd_hash[hashid].sh_hash_list;
1652 smp != NULL; smp = smp->sm_hash)
1653 if (smp->sm_vp == vp && smp->sm_off == baseoff)
1654 break;
1655 mutex_exit(hashmtx);
1656
1657 vrfy_smp:
1658 if (smp != NULL) {
1659
1660 ASSERT(vp->v_count != 0);
1661
1662 /*
1663 * Get smap lock and recheck its tag. The hash lock
1664 * is dropped since the hash is based on (vp, off)
1665 * and (vp, off) won't change when we have smap mtx.
1666 */
1667 smapmtx = SMAPMTX(smp);
1668 mutex_enter(smapmtx);
1669 if (smp->sm_vp != vp || smp->sm_off != baseoff) {
1670 mutex_exit(smapmtx);
1671 goto retry_hash;
1672 }
1673
1674 if (smp->sm_refcnt == 0) {
1675
1676 smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reclaim++;
1677
1678 /*
1679 * Could still be on the free list. However, this
1680 * could also be an smp that is transitioning from
1681 * the free list when we have too much contention
1682 * for the smapmtx's. In this case, we have an
1683 * unlocked smp that is not on the free list any
1684 * longer, but still has a 0 refcnt. The only way
1685 * to be sure is to check the freelist pointers.
1686 * Since we now have the smapmtx, we are guaranteed
1687 * that the (vp, off) won't change, so we are safe
1688 * to reclaim it. get_free_smp() knows that this
1689 * can happen, and it will check the refcnt.
1690 */
1691
1692 if ((smp->sm_next != NULL)) {
1693 struct sm_freeq *freeq;
1694
1695 ASSERT(smp->sm_prev != NULL);
1696 sm = &smd_free[smp->sm_free_ndx];
1697
1698 if (smp->sm_flags & SM_QNDX_ZERO)
1699 freeq = &sm->sm_freeq[0];
1700 else
1701 freeq = &sm->sm_freeq[1];
1702
1703 mutex_enter(&freeq->smq_mtx);
1704 if (freeq->smq_free != smp) {
1705 /*
1706 * fastpath normal case
1707 */
1708 smp->sm_prev->sm_next = smp->sm_next;
1709 smp->sm_next->sm_prev = smp->sm_prev;
1710 } else if (smp == smp->sm_next) {
1711 /*
1712 * Taking the last smap on freelist
1713 */
1714 freeq->smq_free = NULL;
1715 } else {
1716 /*
1717 * Reclaiming 1st smap on list
1718 */
1719 freeq->smq_free = smp->sm_next;
1720 smp->sm_prev->sm_next = smp->sm_next;
1721 smp->sm_next->sm_prev = smp->sm_prev;
1722 }
1723 mutex_exit(&freeq->smq_mtx);
1724 smp->sm_prev = smp->sm_next = NULL;
1725 } else {
1726 ASSERT(smp->sm_prev == NULL);
1727 segmapcnt.smp_stolen.value.ul++;
1728 }
1729
1730 } else {
1731 segmapcnt.smp_get_use.value.ul++;
1732 }
1733 smp->sm_refcnt++; /* another user */
1734
1735 /*
1736 * We don't invoke segmap_fault via TLB miss, so we set ref
1737 * and mod bits in advance. For S_OTHER we set them in
1738 * segmap_fault F_SOFTUNLOCK.
1739 */
1740 if (is_kpm) {
1741 if (rw == S_WRITE) {
1742 smp->sm_flags |= SM_WRITE_DATA;
1743 } else if (rw == S_READ) {
1744 smp->sm_flags |= SM_READ_DATA;
1745 }
1746 }
1747 mutex_exit(smapmtx);
1748
1749 newslot = 0;
1750 } else {
1751
1752 uint32_t free_ndx, *free_ndxp;
1753 union segmap_cpu *scpu;
1754
1755 /*
1756 * On a PAC machine or a machine with anti-alias
1757 * hardware, smd_colormsk will be zero.
1758 *
1759 * On a VAC machine- pick color by offset in the file
1760 * so we won't get VAC conflicts on elf files.
1761 * On data files, color does not matter but we
1762 * don't know what kind of file it is so we always
1763 * pick color by offset. This causes color
1764 * corresponding to file offset zero to be used more
1765 * heavily.
1766 */
1767 color = (baseoff >> MAXBSHIFT) & smd_colormsk;
1768 scpu = smd_cpu+CPU->cpu_seqid;
1769 free_ndxp = &scpu->scpu.scpu_free_ndx[color];
1770 free_ndx = (*free_ndxp += smd_ncolor) & smd_freemsk;
1771 #ifdef DEBUG
1772 colors_used[free_ndx]++;
1773 #endif /* DEBUG */
1774
1775 /*
1776 * Get a locked smp slot from the free list.
1777 */
1778 smp = get_free_smp(free_ndx);
1779 smapmtx = SMAPMTX(smp);
1780
1781 ASSERT(smp->sm_vp == NULL);
1782
1783 if ((nsmp = segmap_hashin(smp, vp, baseoff, hashid)) != NULL) {
1784 /*
1785 * Failed to hashin, there exists one now.
1786 * Return the smp we just allocated.
1787 */
1788 segmap_smapadd(smp);
1789 mutex_exit(smapmtx);
1790
1791 smp = nsmp;
1792 goto vrfy_smp;
1793 }
1794 smp->sm_refcnt++; /* another user */
1795
1796 /*
1797 * We don't invoke segmap_fault via TLB miss, so we set ref
1798 * and mod bits in advance. For S_OTHER we set them in
1799 * segmap_fault F_SOFTUNLOCK.
1800 */
1801 if (is_kpm) {
1802 if (rw == S_WRITE) {
1803 smp->sm_flags |= SM_WRITE_DATA;
1804 } else if (rw == S_READ) {
1805 smp->sm_flags |= SM_READ_DATA;
1806 }
1807 }
1808 mutex_exit(smapmtx);
1809
1810 newslot = 1;
1811 }
1812
1813 if (!is_kpm)
1814 goto use_segmap_range;
1815
1816 /*
1817 * Use segkpm
1818 */
1819 /* Lint directive required until 6746211 is fixed */
1820 /*CONSTCOND*/
1821 ASSERT(PAGESIZE == MAXBSIZE);
1822
1823 /*
1824 * remember the last smp faulted on this cpu.
1825 */
1826 (smd_cpu+CPU->cpu_seqid)->scpu.scpu_last_smap = smp;
1827
1828 if (forcefault == SM_PAGECREATE) {
1829 baseaddr = segmap_pagecreate_kpm(seg, vp, baseoff, smp, rw);
1830 return (baseaddr);
1831 }
1832
1833 if (newslot == 0 &&
1834 (pp = GET_KPME(smp)->kpe_page) != NULL) {
1835
1836 /* fastpath */
1837 switch (rw) {
1838 case S_READ:
1839 case S_WRITE:
1840 if (page_trylock(pp, SE_SHARED)) {
1841 if (PP_ISFREE(pp) ||
1842 !(pp->p_vnode == vp &&
1843 pp->p_offset == baseoff)) {
1844 page_unlock(pp);
1845 pp = page_lookup(vp, baseoff,
1846 SE_SHARED);
1847 }
1848 } else {
1849 pp = page_lookup(vp, baseoff, SE_SHARED);
1850 }
1851
1852 if (pp == NULL) {
1853 ASSERT(GET_KPME(smp)->kpe_page == NULL);
1854 break;
1855 }
1856
1857 if (rw == S_WRITE &&
1858 hat_page_getattr(pp, P_MOD | P_REF) !=
1859 (P_MOD | P_REF)) {
1860 page_unlock(pp);
1861 break;
1862 }
1863
1864 /*
1865 * We have the p_selock as reader, grab_smp
1866 * can't hit us, we have bumped the smap
1867 * refcnt and hat_pageunload needs the
1868 * p_selock exclusive.
1869 */
1870 kpme = GET_KPME(smp);
1871 if (kpme->kpe_page == pp) {
1872 baseaddr = hat_kpm_page2va(pp, 0);
1873 } else if (kpme->kpe_page == NULL) {
1874 baseaddr = hat_kpm_mapin(pp, kpme);
1875 } else {
1876 panic("segmap_getmapflt: stale "
1877 "kpme page, kpme %p", (void *)kpme);
1878 /*NOTREACHED*/
1879 }
1880
1881 /*
1882 * We don't invoke segmap_fault via TLB miss,
1883 * so we set ref and mod bits in advance.
1884 * For S_OTHER and we set them in segmap_fault
1885 * F_SOFTUNLOCK.
1886 */
1887 if (rw == S_READ && !hat_isref(pp))
1888 hat_setref(pp);
1889
1890 return (baseaddr);
1891 default:
1892 break;
1893 }
1894 }
1895
1896 base = segkpm_create_va(baseoff);
1897 error = VOP_GETPAGE(vp, (offset_t)baseoff, len, &prot, pl, MAXBSIZE,
1898 seg, base, rw, CRED(), NULL);
1899
1900 pp = pl[0];
1901 if (error || pp == NULL) {
1902 /*
1903 * Use segmap address slot and let segmap_fault deal
1904 * with the error cases. There is no error return
1905 * possible here.
1906 */
1907 goto use_segmap_range;
1908 }
1909
1910 ASSERT(pl[1] == NULL);
1911
1912 /*
1913 * When prot is not returned w/ PROT_ALL the returned pages
1914 * are not backed by fs blocks. For most of the segmap users
1915 * this is no problem, they don't write to the pages in the
1916 * same request and therefore don't rely on a following
1917 * trap driven segmap_fault. With SM_LOCKPROTO users it
1918 * is more secure to use segkmap adresses to allow
1919 * protection segmap_fault's.
1920 */
1921 if (prot != PROT_ALL && forcefault == SM_LOCKPROTO) {
1922 /*
1923 * Use segmap address slot and let segmap_fault
1924 * do the error return.
1925 */
1926 ASSERT(rw != S_WRITE);
1927 ASSERT(PAGE_LOCKED(pp));
1928 page_unlock(pp);
1929 forcefault = 0;
1930 goto use_segmap_range;
1931 }
1932
1933 /*
1934 * We have the p_selock as reader, grab_smp can't hit us, we
1935 * have bumped the smap refcnt and hat_pageunload needs the
1936 * p_selock exclusive.
1937 */
1938 kpme = GET_KPME(smp);
1939 if (kpme->kpe_page == pp) {
1940 baseaddr = hat_kpm_page2va(pp, 0);
1941 } else if (kpme->kpe_page == NULL) {
1942 baseaddr = hat_kpm_mapin(pp, kpme);
1943 } else {
1944 panic("segmap_getmapflt: stale kpme page after "
1945 "VOP_GETPAGE, kpme %p", (void *)kpme);
1946 /*NOTREACHED*/
1947 }
1948
1949 smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++;
1950
1951 return (baseaddr);
1952
1953
1954 use_segmap_range:
1955 baseaddr = seg->s_base + ((smp - smd_smap) * MAXBSIZE);
1956 TRACE_4(TR_FAC_VM, TR_SEGMAP_GETMAP,
1957 "segmap_getmap:seg %p addr %p vp %p offset %llx",
1958 seg, baseaddr, vp, baseoff);
1959
1960 /*
1961 * Prefault the translations
1962 */
1963 vaddr = baseaddr + (off - baseoff);
1964 if (forcefault && (newslot || !hat_probe(kas.a_hat, vaddr))) {
1965
1966 caddr_t pgaddr = (caddr_t)((uintptr_t)vaddr &
1967 (uintptr_t)PAGEMASK);
1968
1969 (void) segmap_fault(kas.a_hat, seg, pgaddr,
1970 (vaddr + len - pgaddr + PAGESIZE - 1) & (uintptr_t)PAGEMASK,
1971 F_INVAL, rw);
1972 }
1973
1974 return (baseaddr);
1975 }
1976
1977 int
1978 segmap_release(struct seg *seg, caddr_t addr, uint_t flags)
1979 {
1980 struct smap *smp;
1981 int error;
1982 int bflags = 0;
1983 struct vnode *vp;
1984 u_offset_t offset;
1985 kmutex_t *smtx;
1986 int is_kpm = 0;
1987 page_t *pp;
1988
1989 if (segmap_kpm && IS_KPM_ADDR(addr)) {
1990
1991 if (((uintptr_t)addr & MAXBOFFSET) != 0) {
1992 panic("segmap_release: addr %p not "
1993 "MAXBSIZE aligned", (void *)addr);
1994 /*NOTREACHED*/
1995 }
1996
1997 if ((smp = get_smap_kpm(addr, &pp)) == NULL) {
1998 panic("segmap_release: smap not found "
1999 "for addr %p", (void *)addr);
2000 /*NOTREACHED*/
2001 }
2002
2003 TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP,
2004 "segmap_relmap:seg %p addr %p smp %p",
2005 seg, addr, smp);
2006
2007 smtx = SMAPMTX(smp);
2008
2009 /*
2010 * For compatibility reasons segmap_pagecreate_kpm sets this
2011 * flag to allow a following segmap_pagecreate to return
2012 * this as "newpage" flag. When segmap_pagecreate is not
2013 * called at all we clear it now.
2014 */
2015 smp->sm_flags &= ~SM_KPM_NEWPAGE;
2016 is_kpm = 1;
2017 if (smp->sm_flags & SM_WRITE_DATA) {
2018 hat_setrefmod(pp);
2019 } else if (smp->sm_flags & SM_READ_DATA) {
2020 hat_setref(pp);
2021 }
2022 } else {
2023 if (addr < seg->s_base || addr >= seg->s_base + seg->s_size ||
2024 ((uintptr_t)addr & MAXBOFFSET) != 0) {
2025 panic("segmap_release: bad addr %p", (void *)addr);
2026 /*NOTREACHED*/
2027 }
2028 smp = GET_SMAP(seg, addr);
2029
2030 TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP,
2031 "segmap_relmap:seg %p addr %p smp %p",
2032 seg, addr, smp);
2033
2034 smtx = SMAPMTX(smp);
2035 mutex_enter(smtx);
2036 smp->sm_flags |= SM_NOTKPM_RELEASED;
2037 }
2038
2039 ASSERT(smp->sm_refcnt > 0);
2040
2041 /*
2042 * Need to call VOP_PUTPAGE() if any flags (except SM_DONTNEED)
2043 * are set.
2044 */
2045 if ((flags & ~SM_DONTNEED) != 0) {
2046 if (flags & SM_WRITE)
2047 segmapcnt.smp_rel_write.value.ul++;
2048 if (flags & SM_ASYNC) {
2049 bflags |= B_ASYNC;
2050 segmapcnt.smp_rel_async.value.ul++;
2051 }
2052 if (flags & SM_INVAL) {
2053 bflags |= B_INVAL;
2054 segmapcnt.smp_rel_abort.value.ul++;
2055 }
2056 if (flags & SM_DESTROY) {
2057 bflags |= (B_INVAL|B_TRUNC);
2058 segmapcnt.smp_rel_abort.value.ul++;
2059 }
2060 if (smp->sm_refcnt == 1) {
2061 /*
2062 * We only bother doing the FREE and DONTNEED flags
2063 * if no one else is still referencing this mapping.
2064 */
2065 if (flags & SM_FREE) {
2066 bflags |= B_FREE;
2067 segmapcnt.smp_rel_free.value.ul++;
2068 }
2069 if (flags & SM_DONTNEED) {
2070 bflags |= B_DONTNEED;
2071 segmapcnt.smp_rel_dontneed.value.ul++;
2072 }
2073 }
2074 } else {
2075 smd_cpu[CPU->cpu_seqid].scpu.scpu_release++;
2076 }
2077
2078 vp = smp->sm_vp;
2079 offset = smp->sm_off;
2080
2081 if (--smp->sm_refcnt == 0) {
2082
2083 smp->sm_flags &= ~(SM_WRITE_DATA | SM_READ_DATA);
2084
2085 if (flags & (SM_INVAL|SM_DESTROY)) {
2086 segmap_hashout(smp); /* remove map info */
2087 if (is_kpm) {
2088 hat_kpm_mapout(pp, GET_KPME(smp), addr);
2089 if (smp->sm_flags & SM_NOTKPM_RELEASED) {
2090 smp->sm_flags &= ~SM_NOTKPM_RELEASED;
2091 hat_unload(kas.a_hat, segkmap->s_base +
2092 ((smp - smd_smap) * MAXBSIZE),
2093 MAXBSIZE, HAT_UNLOAD);
2094 }
2095
2096 } else {
2097 if (segmap_kpm)
2098 segkpm_mapout_validkpme(GET_KPME(smp));
2099
2100 smp->sm_flags &= ~SM_NOTKPM_RELEASED;
2101 hat_unload(kas.a_hat, addr, MAXBSIZE,
2102 HAT_UNLOAD);
2103 }
2104 }
2105 segmap_smapadd(smp); /* add to free list */
2106 }
2107
2108 mutex_exit(smtx);
2109
2110 if (is_kpm)
2111 page_unlock(pp);
2112 /*
2113 * Now invoke VOP_PUTPAGE() if any flags (except SM_DONTNEED)
2114 * are set.
2115 */
2116 if ((flags & ~SM_DONTNEED) != 0) {
2117 error = VOP_PUTPAGE(vp, offset, MAXBSIZE,
2118 bflags, CRED(), NULL);
2119 } else {
2120 error = 0;
2121 }
2122
2123 return (error);
2124 }
2125
2126 /*
2127 * Dump the pages belonging to this segmap segment.
2128 */
2129 static void
2130 segmap_dump(struct seg *seg)
2131 {
2132 struct segmap_data *smd;
2133 struct smap *smp, *smp_end;
2134 page_t *pp;
2135 pfn_t pfn;
2136 u_offset_t off;
2137 caddr_t addr;
2138
2139 smd = (struct segmap_data *)seg->s_data;
2140 addr = seg->s_base;
2141 for (smp = smd->smd_sm, smp_end = smp + smd->smd_npages;
2142 smp < smp_end; smp++) {
2143
2144 if (smp->sm_refcnt) {
2145 for (off = 0; off < MAXBSIZE; off += PAGESIZE) {
2146 int we_own_it = 0;
2147
2148 /*
2149 * If pp == NULL, the page either does
2150 * not exist or is exclusively locked.
2151 * So determine if it exists before
2152 * searching for it.
2153 */
2154 if ((pp = page_lookup_nowait(smp->sm_vp,
2155 smp->sm_off + off, SE_SHARED)))
2156 we_own_it = 1;
2157 else
2158 pp = page_exists(smp->sm_vp,
2159 smp->sm_off + off);
2160
2161 if (pp) {
2162 pfn = page_pptonum(pp);
2163 dump_addpage(seg->s_as,
2164 addr + off, pfn);
2165 if (we_own_it)
2166 page_unlock(pp);
2167 }
2168 dump_timeleft = dump_timeout;
2169 }
2170 }
2171 addr += MAXBSIZE;
2172 }
2173 }
2174
2175 /*ARGSUSED*/
2176 static int
2177 segmap_pagelock(struct seg *seg, caddr_t addr, size_t len,
2178 struct page ***ppp, enum lock_type type, enum seg_rw rw)
2179 {
2180 return (ENOTSUP);
2181 }
2182
2183 static int
2184 segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
2185 {
2186 struct segmap_data *smd = (struct segmap_data *)seg->s_data;
2187
2188 memidp->val[0] = (uintptr_t)smd->smd_sm->sm_vp;
2189 memidp->val[1] = smd->smd_sm->sm_off + (uintptr_t)(addr - seg->s_base);
2190 return (0);
2191 }
2192
2193 /*ARGSUSED*/
2194 static lgrp_mem_policy_info_t *
2195 segmap_getpolicy(struct seg *seg, caddr_t addr)
2196 {
2197 return (NULL);
2198 }
2199
2200 /*ARGSUSED*/
2201 static int
2202 segmap_capable(struct seg *seg, segcapability_t capability)
2203 {
2204 return (0);
2205 }
2206
2207
2208 #ifdef SEGKPM_SUPPORT
2209
2210 /*
2211 * segkpm support routines
2212 */
2213
2214 static caddr_t
2215 segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off,
2216 struct smap *smp, enum seg_rw rw)
2217 {
2218 caddr_t base;
2219 page_t *pp;
2220 int newpage = 0;
2221 struct kpme *kpme;
2222
2223 ASSERT(smp->sm_refcnt > 0);
2224
2225 if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
2226 kmutex_t *smtx;
2227
2228 base = segkpm_create_va(off);
2229
2230 if ((pp = page_create_va(vp, off, PAGESIZE, PG_WAIT,
2231 seg, base)) == NULL) {
2232 panic("segmap_pagecreate_kpm: "
2233 "page_create failed");
2234 /*NOTREACHED*/
2235 }
2236
2237 newpage = 1;
2238 page_io_unlock(pp);
2239 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);
2240
2241 /*
2242 * Mark this here until the following segmap_pagecreate
2243 * or segmap_release.
2244 */
2245 smtx = SMAPMTX(smp);
2246 mutex_enter(smtx);
2247 smp->sm_flags |= SM_KPM_NEWPAGE;
2248 mutex_exit(smtx);
2249 }
2250
2251 kpme = GET_KPME(smp);
2252 if (!newpage && kpme->kpe_page == pp)
2253 base = hat_kpm_page2va(pp, 0);
2254 else
2255 base = hat_kpm_mapin(pp, kpme);
2256
2257 /*
2258 * FS code may decide not to call segmap_pagecreate and we
2259 * don't invoke segmap_fault via TLB miss, so we have to set
2260 * ref and mod bits in advance.
2261 */
2262 if (rw == S_WRITE) {
2263 hat_setrefmod(pp);
2264 } else {
2265 ASSERT(rw == S_READ);
2266 hat_setref(pp);
2267 }
2268
2269 smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++;
2270
2271 return (base);
2272 }
2273
2274 /*
2275 * Find the smap structure corresponding to the
2276 * KPM addr and return it locked.
2277 */
2278 struct smap *
2279 get_smap_kpm(caddr_t addr, page_t **ppp)
2280 {
2281 struct smap *smp;
2282 struct vnode *vp;
2283 u_offset_t offset;
2284 caddr_t baseaddr = (caddr_t)((uintptr_t)addr & MAXBMASK);
2285 int hashid;
2286 kmutex_t *hashmtx;
2287 page_t *pp;
2288 union segmap_cpu *scpu;
2289
2290 pp = hat_kpm_vaddr2page(baseaddr);
2291
2292 ASSERT(pp && !PP_ISFREE(pp));
2293 ASSERT(PAGE_LOCKED(pp));
2294 ASSERT(((uintptr_t)pp->p_offset & MAXBOFFSET) == 0);
2295
2296 vp = pp->p_vnode;
2297 offset = pp->p_offset;
2298 ASSERT(vp != NULL);
2299
2300 /*
2301 * Assume the last smap used on this cpu is the one needed.
2302 */
2303 scpu = smd_cpu+CPU->cpu_seqid;
2304 smp = scpu->scpu.scpu_last_smap;
2305 mutex_enter(&smp->sm_mtx);
2306 if (smp->sm_vp == vp && smp->sm_off == offset) {
2307 ASSERT(smp->sm_refcnt > 0);
2308 } else {
2309 /*
2310 * Assumption wrong, find the smap on the hash chain.
2311 */
2312 mutex_exit(&smp->sm_mtx);
2313 SMAP_HASHFUNC(vp, offset, hashid); /* macro assigns hashid */
2314 hashmtx = SHASHMTX(hashid);
2315
2316 mutex_enter(hashmtx);
2317 smp = smd_hash[hashid].sh_hash_list;
2318 for (; smp != NULL; smp = smp->sm_hash) {
2319 if (smp->sm_vp == vp && smp->sm_off == offset)
2320 break;
2321 }
2322 mutex_exit(hashmtx);
2323 if (smp) {
2324 mutex_enter(&smp->sm_mtx);
2325 ASSERT(smp->sm_vp == vp && smp->sm_off == offset);
2326 }
2327 }
2328
2329 if (ppp)
2330 *ppp = smp ? pp : NULL;
2331
2332 return (smp);
2333 }
2334
2335 #else /* SEGKPM_SUPPORT */
2336
2337 /* segkpm stubs */
2338
2339 /*ARGSUSED*/
2340 static caddr_t
2341 segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off,
2342 struct smap *smp, enum seg_rw rw)
2343 {
2344 return (NULL);
2345 }
2346
2347 /*ARGSUSED*/
2348 struct smap *
2349 get_smap_kpm(caddr_t addr, page_t **ppp)
2350 {
2351 return (NULL);
2352 }
2353
2354 #endif /* SEGKPM_SUPPORT */
--- EOF ---