1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * Portions of this source code were derived from Berkeley 4.3 BSD 31 * under license from the Regents of the University of California. 32 */ 33 34 /* 35 * VM - generic vnode mapping segment. 36 * 37 * The segmap driver is used only by the kernel to get faster (than seg_vn) 38 * mappings [lower routine overhead; more persistent cache] to random 39 * vnode/offsets. Note than the kernel may (and does) use seg_vn as well. 40 */ 41 42 #include <sys/types.h> 43 #include <sys/t_lock.h> 44 #include <sys/param.h> 45 #include <sys/sysmacros.h> 46 #include <sys/buf.h> 47 #include <sys/systm.h> 48 #include <sys/vnode.h> 49 #include <sys/mman.h> 50 #include <sys/errno.h> 51 #include <sys/cred.h> 52 #include <sys/kmem.h> 53 #include <sys/vtrace.h> 54 #include <sys/cmn_err.h> 55 #include <sys/debug.h> 56 #include <sys/thread.h> 57 #include <sys/dumphdr.h> 58 #include <sys/bitmap.h> 59 #include <sys/lgrp.h> 60 61 #include <vm/seg_kmem.h> 62 #include <vm/hat.h> 63 #include <vm/as.h> 64 #include <vm/seg.h> 65 #include <vm/seg_kpm.h> 66 #include <vm/seg_map.h> 67 #include <vm/page.h> 68 #include <vm/pvn.h> 69 #include <vm/rm.h> 70 71 /* 72 * Private seg op routines. 73 */ 74 static void segmap_free(struct seg *seg); 75 faultcode_t segmap_fault(struct hat *hat, struct seg *seg, caddr_t addr, 76 size_t len, enum fault_type type, enum seg_rw rw); 77 static faultcode_t segmap_faulta(struct seg *seg, caddr_t addr); 78 static int segmap_checkprot(struct seg *seg, caddr_t addr, size_t len, 79 uint_t prot); 80 static int segmap_kluster(struct seg *seg, caddr_t addr, ssize_t); 81 static int segmap_getprot(struct seg *seg, caddr_t addr, size_t len, 82 uint_t *protv); 83 static u_offset_t segmap_getoffset(struct seg *seg, caddr_t addr); 84 static int segmap_gettype(struct seg *seg, caddr_t addr); 85 static int segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp); 86 static void segmap_dump(struct seg *seg); 87 static int segmap_pagelock(struct seg *seg, caddr_t addr, size_t len, 88 struct page ***ppp, enum lock_type type, 89 enum seg_rw rw); 90 static int segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp); 91 static lgrp_mem_policy_info_t *segmap_getpolicy(struct seg *seg, 92 caddr_t addr); 93 static int segmap_capable(struct seg *seg, segcapability_t capability); 94 95 /* segkpm support */ 96 static caddr_t segmap_pagecreate_kpm(struct seg *, vnode_t *, u_offset_t, 97 struct smap *, enum seg_rw); 98 struct smap *get_smap_kpm(caddr_t, page_t **); 99 100 static struct seg_ops segmap_ops = { 101 .free = segmap_free, 102 .fault = segmap_fault, 103 .faulta = segmap_faulta, 104 .checkprot = segmap_checkprot, 105 .kluster = segmap_kluster, 106 .getprot = segmap_getprot, 107 .getoffset = segmap_getoffset, 108 .gettype = segmap_gettype, 109 .getvp = segmap_getvp, 110 .dump = segmap_dump, 111 .pagelock = segmap_pagelock, 112 .getmemid = segmap_getmemid, 113 .getpolicy = segmap_getpolicy, 114 .capable = segmap_capable, 115 }; 116 117 /* 118 * Private segmap routines. 119 */ 120 static void segmap_unlock(struct hat *hat, struct seg *seg, caddr_t addr, 121 size_t len, enum seg_rw rw, struct smap *smp); 122 static void segmap_smapadd(struct smap *smp); 123 static struct smap *segmap_hashin(struct smap *smp, struct vnode *vp, 124 u_offset_t off, int hashid); 125 static void segmap_hashout(struct smap *smp); 126 127 128 /* 129 * Statistics for segmap operations. 130 * 131 * No explicit locking to protect these stats. 132 */ 133 struct segmapcnt segmapcnt = { 134 { "fault", KSTAT_DATA_ULONG }, 135 { "faulta", KSTAT_DATA_ULONG }, 136 { "getmap", KSTAT_DATA_ULONG }, 137 { "get_use", KSTAT_DATA_ULONG }, 138 { "get_reclaim", KSTAT_DATA_ULONG }, 139 { "get_reuse", KSTAT_DATA_ULONG }, 140 { "get_unused", KSTAT_DATA_ULONG }, 141 { "get_nofree", KSTAT_DATA_ULONG }, 142 { "rel_async", KSTAT_DATA_ULONG }, 143 { "rel_write", KSTAT_DATA_ULONG }, 144 { "rel_free", KSTAT_DATA_ULONG }, 145 { "rel_abort", KSTAT_DATA_ULONG }, 146 { "rel_dontneed", KSTAT_DATA_ULONG }, 147 { "release", KSTAT_DATA_ULONG }, 148 { "pagecreate", KSTAT_DATA_ULONG }, 149 { "free_notfree", KSTAT_DATA_ULONG }, 150 { "free_dirty", KSTAT_DATA_ULONG }, 151 { "free", KSTAT_DATA_ULONG }, 152 { "stolen", KSTAT_DATA_ULONG }, 153 { "get_nomtx", KSTAT_DATA_ULONG } 154 }; 155 156 kstat_named_t *segmapcnt_ptr = (kstat_named_t *)&segmapcnt; 157 uint_t segmapcnt_ndata = sizeof (segmapcnt) / sizeof (kstat_named_t); 158 159 /* 160 * Return number of map pages in segment. 161 */ 162 #define MAP_PAGES(seg) ((seg)->s_size >> MAXBSHIFT) 163 164 /* 165 * Translate addr into smap number within segment. 166 */ 167 #define MAP_PAGE(seg, addr) (((addr) - (seg)->s_base) >> MAXBSHIFT) 168 169 /* 170 * Translate addr in seg into struct smap pointer. 171 */ 172 #define GET_SMAP(seg, addr) \ 173 &(((struct segmap_data *)((seg)->s_data))->smd_sm[MAP_PAGE(seg, addr)]) 174 175 /* 176 * Bit in map (16 bit bitmap). 177 */ 178 #define SMAP_BIT_MASK(bitindex) (1 << ((bitindex) & 0xf)) 179 180 static int smd_colormsk = 0; 181 static int smd_ncolor = 0; 182 static int smd_nfree = 0; 183 static int smd_freemsk = 0; 184 #ifdef DEBUG 185 static int *colors_used; 186 #endif 187 static struct smap *smd_smap; 188 static struct smaphash *smd_hash; 189 #ifdef SEGMAP_HASHSTATS 190 static unsigned int *smd_hash_len; 191 #endif 192 static struct smfree *smd_free; 193 static ulong_t smd_hashmsk = 0; 194 195 #define SEGMAP_MAXCOLOR 2 196 #define SEGMAP_CACHE_PAD 64 197 198 union segmap_cpu { 199 struct { 200 uint32_t scpu_free_ndx[SEGMAP_MAXCOLOR]; 201 struct smap *scpu_last_smap; 202 ulong_t scpu_getmap; 203 ulong_t scpu_release; 204 ulong_t scpu_get_reclaim; 205 ulong_t scpu_fault; 206 ulong_t scpu_pagecreate; 207 ulong_t scpu_get_reuse; 208 } scpu; 209 char scpu_pad[SEGMAP_CACHE_PAD]; 210 }; 211 static union segmap_cpu *smd_cpu; 212 213 /* 214 * There are three locks in seg_map: 215 * - per freelist mutexes 216 * - per hashchain mutexes 217 * - per smap mutexes 218 * 219 * The lock ordering is to get the smap mutex to lock down the slot 220 * first then the hash lock (for hash in/out (vp, off) list) or the 221 * freelist lock to put the slot back on the free list. 222 * 223 * The hash search is done by only holding the hashchain lock, when a wanted 224 * slot is found, we drop the hashchain lock then lock the slot so there 225 * is no overlapping of hashchain and smap locks. After the slot is 226 * locked, we verify again if the slot is still what we are looking 227 * for. 228 * 229 * Allocation of a free slot is done by holding the freelist lock, 230 * then locking the smap slot at the head of the freelist. This is 231 * in reversed lock order so mutex_tryenter() is used. 232 * 233 * The smap lock protects all fields in smap structure except for 234 * the link fields for hash/free lists which are protected by 235 * hashchain and freelist locks. 236 */ 237 238 #define SHASHMTX(hashid) (&smd_hash[hashid].sh_mtx) 239 240 #define SMP2SMF(smp) (&smd_free[(smp - smd_smap) & smd_freemsk]) 241 #define SMP2SMF_NDX(smp) (ushort_t)((smp - smd_smap) & smd_freemsk) 242 243 #define SMAPMTX(smp) (&smp->sm_mtx) 244 245 #define SMAP_HASHFUNC(vp, off, hashid) \ 246 { \ 247 hashid = ((((uintptr_t)(vp) >> 6) + ((uintptr_t)(vp) >> 3) + \ 248 ((off) >> MAXBSHIFT)) & smd_hashmsk); \ 249 } 250 251 /* 252 * The most frequently updated kstat counters are kept in the 253 * per cpu array to avoid hot cache blocks. The update function 254 * sums the cpu local counters to update the global counters. 255 */ 256 257 /* ARGSUSED */ 258 int 259 segmap_kstat_update(kstat_t *ksp, int rw) 260 { 261 int i; 262 ulong_t getmap, release, get_reclaim; 263 ulong_t fault, pagecreate, get_reuse; 264 265 if (rw == KSTAT_WRITE) 266 return (EACCES); 267 getmap = release = get_reclaim = (ulong_t)0; 268 fault = pagecreate = get_reuse = (ulong_t)0; 269 for (i = 0; i < max_ncpus; i++) { 270 getmap += smd_cpu[i].scpu.scpu_getmap; 271 release += smd_cpu[i].scpu.scpu_release; 272 get_reclaim += smd_cpu[i].scpu.scpu_get_reclaim; 273 fault += smd_cpu[i].scpu.scpu_fault; 274 pagecreate += smd_cpu[i].scpu.scpu_pagecreate; 275 get_reuse += smd_cpu[i].scpu.scpu_get_reuse; 276 } 277 segmapcnt.smp_getmap.value.ul = getmap; 278 segmapcnt.smp_release.value.ul = release; 279 segmapcnt.smp_get_reclaim.value.ul = get_reclaim; 280 segmapcnt.smp_fault.value.ul = fault; 281 segmapcnt.smp_pagecreate.value.ul = pagecreate; 282 segmapcnt.smp_get_reuse.value.ul = get_reuse; 283 return (0); 284 } 285 286 int 287 segmap_create(struct seg *seg, void *argsp) 288 { 289 struct segmap_data *smd; 290 struct smap *smp; 291 struct smfree *sm; 292 struct segmap_crargs *a = (struct segmap_crargs *)argsp; 293 struct smaphash *shashp; 294 union segmap_cpu *scpu; 295 long i, npages; 296 size_t hashsz; 297 uint_t nfreelist; 298 extern void prefetch_smap_w(void *); 299 extern int max_ncpus; 300 301 ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock)); 302 303 if (((uintptr_t)seg->s_base | seg->s_size) & MAXBOFFSET) { 304 panic("segkmap not MAXBSIZE aligned"); 305 /*NOTREACHED*/ 306 } 307 308 smd = kmem_zalloc(sizeof (struct segmap_data), KM_SLEEP); 309 310 seg->s_data = (void *)smd; 311 seg->s_ops = &segmap_ops; 312 smd->smd_prot = a->prot; 313 314 /* 315 * Scale the number of smap freelists to be 316 * proportional to max_ncpus * number of virtual colors. 317 * The caller can over-ride this scaling by providing 318 * a non-zero a->nfreelist argument. 319 */ 320 nfreelist = a->nfreelist; 321 if (nfreelist == 0) 322 nfreelist = max_ncpus; 323 else if (nfreelist < 0 || nfreelist > 4 * max_ncpus) { 324 cmn_err(CE_WARN, "segmap_create: nfreelist out of range " 325 "%d, using %d", nfreelist, max_ncpus); 326 nfreelist = max_ncpus; 327 } 328 if (!ISP2(nfreelist)) { 329 /* round up nfreelist to the next power of two. */ 330 nfreelist = 1 << (highbit(nfreelist)); 331 } 332 333 /* 334 * Get the number of virtual colors - must be a power of 2. 335 */ 336 if (a->shmsize) 337 smd_ncolor = a->shmsize >> MAXBSHIFT; 338 else 339 smd_ncolor = 1; 340 ASSERT((smd_ncolor & (smd_ncolor - 1)) == 0); 341 ASSERT(smd_ncolor <= SEGMAP_MAXCOLOR); 342 smd_colormsk = smd_ncolor - 1; 343 smd->smd_nfree = smd_nfree = smd_ncolor * nfreelist; 344 smd_freemsk = smd_nfree - 1; 345 346 /* 347 * Allocate and initialize the freelist headers. 348 * Note that sm_freeq[1] starts out as the release queue. This 349 * is known when the smap structures are initialized below. 350 */ 351 smd_free = smd->smd_free = 352 kmem_zalloc(smd_nfree * sizeof (struct smfree), KM_SLEEP); 353 for (i = 0; i < smd_nfree; i++) { 354 sm = &smd->smd_free[i]; 355 mutex_init(&sm->sm_freeq[0].smq_mtx, NULL, MUTEX_DEFAULT, NULL); 356 mutex_init(&sm->sm_freeq[1].smq_mtx, NULL, MUTEX_DEFAULT, NULL); 357 sm->sm_allocq = &sm->sm_freeq[0]; 358 sm->sm_releq = &sm->sm_freeq[1]; 359 } 360 361 /* 362 * Allocate and initialize the smap hash chain headers. 363 * Compute hash size rounding down to the next power of two. 364 */ 365 npages = MAP_PAGES(seg); 366 smd->smd_npages = npages; 367 hashsz = npages / SMAP_HASHAVELEN; 368 hashsz = 1 << (highbit(hashsz)-1); 369 smd_hashmsk = hashsz - 1; 370 smd_hash = smd->smd_hash = 371 kmem_alloc(hashsz * sizeof (struct smaphash), KM_SLEEP); 372 #ifdef SEGMAP_HASHSTATS 373 smd_hash_len = 374 kmem_zalloc(hashsz * sizeof (unsigned int), KM_SLEEP); 375 #endif 376 for (i = 0, shashp = smd_hash; i < hashsz; i++, shashp++) { 377 shashp->sh_hash_list = NULL; 378 mutex_init(&shashp->sh_mtx, NULL, MUTEX_DEFAULT, NULL); 379 } 380 381 /* 382 * Allocate and initialize the smap structures. 383 * Link all slots onto the appropriate freelist. 384 * The smap array is large enough to affect boot time 385 * on large systems, so use memory prefetching and only 386 * go through the array 1 time. Inline a optimized version 387 * of segmap_smapadd to add structures to freelists with 388 * knowledge that no locks are needed here. 389 */ 390 smd_smap = smd->smd_sm = 391 kmem_alloc(sizeof (struct smap) * npages, KM_SLEEP); 392 393 for (smp = &smd->smd_sm[MAP_PAGES(seg) - 1]; 394 smp >= smd->smd_sm; smp--) { 395 struct smap *smpfreelist; 396 struct sm_freeq *releq; 397 398 prefetch_smap_w((char *)smp); 399 400 smp->sm_vp = NULL; 401 smp->sm_hash = NULL; 402 smp->sm_off = 0; 403 smp->sm_bitmap = 0; 404 smp->sm_refcnt = 0; 405 mutex_init(&smp->sm_mtx, NULL, MUTEX_DEFAULT, NULL); 406 smp->sm_free_ndx = SMP2SMF_NDX(smp); 407 408 sm = SMP2SMF(smp); 409 releq = sm->sm_releq; 410 411 smpfreelist = releq->smq_free; 412 if (smpfreelist == 0) { 413 releq->smq_free = smp->sm_next = smp->sm_prev = smp; 414 } else { 415 smp->sm_next = smpfreelist; 416 smp->sm_prev = smpfreelist->sm_prev; 417 smpfreelist->sm_prev = smp; 418 smp->sm_prev->sm_next = smp; 419 releq->smq_free = smp->sm_next; 420 } 421 422 /* 423 * sm_flag = 0 (no SM_QNDX_ZERO) implies smap on sm_freeq[1] 424 */ 425 smp->sm_flags = 0; 426 427 #ifdef SEGKPM_SUPPORT 428 /* 429 * Due to the fragile prefetch loop no 430 * separate function is used here. 431 */ 432 smp->sm_kpme_next = NULL; 433 smp->sm_kpme_prev = NULL; 434 smp->sm_kpme_page = NULL; 435 #endif 436 } 437 438 /* 439 * Allocate the per color indices that distribute allocation 440 * requests over the free lists. Each cpu will have a private 441 * rotor index to spread the allocations even across the available 442 * smap freelists. Init the scpu_last_smap field to the first 443 * smap element so there is no need to check for NULL. 444 */ 445 smd_cpu = 446 kmem_zalloc(sizeof (union segmap_cpu) * max_ncpus, KM_SLEEP); 447 for (i = 0, scpu = smd_cpu; i < max_ncpus; i++, scpu++) { 448 int j; 449 for (j = 0; j < smd_ncolor; j++) 450 scpu->scpu.scpu_free_ndx[j] = j; 451 scpu->scpu.scpu_last_smap = smd_smap; 452 } 453 454 vpm_init(); 455 456 #ifdef DEBUG 457 /* 458 * Keep track of which colors are used more often. 459 */ 460 colors_used = kmem_zalloc(smd_nfree * sizeof (int), KM_SLEEP); 461 #endif /* DEBUG */ 462 463 return (0); 464 } 465 466 static void 467 segmap_free(seg) 468 struct seg *seg; 469 { 470 ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock)); 471 } 472 473 /* 474 * Do a F_SOFTUNLOCK call over the range requested. 475 * The range must have already been F_SOFTLOCK'ed. 476 */ 477 static void 478 segmap_unlock( 479 struct hat *hat, 480 struct seg *seg, 481 caddr_t addr, 482 size_t len, 483 enum seg_rw rw, 484 struct smap *smp) 485 { 486 page_t *pp; 487 caddr_t adr; 488 u_offset_t off; 489 struct vnode *vp; 490 kmutex_t *smtx; 491 492 ASSERT(smp->sm_refcnt > 0); 493 494 #ifdef lint 495 seg = seg; 496 #endif 497 498 if (segmap_kpm && IS_KPM_ADDR(addr)) { 499 500 /* 501 * We're called only from segmap_fault and this was a 502 * NOP in case of a kpm based smap, so dangerous things 503 * must have happened in the meantime. Pages are prefaulted 504 * and locked in segmap_getmapflt and they will not be 505 * unlocked until segmap_release. 506 */ 507 panic("segmap_unlock: called with kpm addr %p", (void *)addr); 508 /*NOTREACHED*/ 509 } 510 511 vp = smp->sm_vp; 512 off = smp->sm_off + (u_offset_t)((uintptr_t)addr & MAXBOFFSET); 513 514 hat_unlock(hat, addr, P2ROUNDUP(len, PAGESIZE)); 515 for (adr = addr; adr < addr + len; adr += PAGESIZE, off += PAGESIZE) { 516 ushort_t bitmask; 517 518 /* 519 * Use page_find() instead of page_lookup() to 520 * find the page since we know that it has 521 * "shared" lock. 522 */ 523 pp = page_find(vp, off); 524 if (pp == NULL) { 525 panic("segmap_unlock: page not found"); 526 /*NOTREACHED*/ 527 } 528 529 if (rw == S_WRITE) { 530 hat_setrefmod(pp); 531 } else if (rw != S_OTHER) { 532 TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT, 533 "segmap_fault:pp %p vp %p offset %llx", pp, vp, off); 534 hat_setref(pp); 535 } 536 537 /* 538 * Clear bitmap, if the bit corresponding to "off" is set, 539 * since the page and translation are being unlocked. 540 */ 541 bitmask = SMAP_BIT_MASK((off - smp->sm_off) >> PAGESHIFT); 542 543 /* 544 * Large Files: Following assertion is to verify 545 * the correctness of the cast to (int) above. 546 */ 547 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 548 smtx = SMAPMTX(smp); 549 mutex_enter(smtx); 550 if (smp->sm_bitmap & bitmask) { 551 smp->sm_bitmap &= ~bitmask; 552 } 553 mutex_exit(smtx); 554 555 page_unlock(pp); 556 } 557 } 558 559 #define MAXPPB (MAXBSIZE/4096) /* assumes minimum page size of 4k */ 560 561 /* 562 * This routine is called via a machine specific fault handling 563 * routine. It is also called by software routines wishing to 564 * lock or unlock a range of addresses. 565 * 566 * Note that this routine expects a page-aligned "addr". 567 */ 568 faultcode_t 569 segmap_fault( 570 struct hat *hat, 571 struct seg *seg, 572 caddr_t addr, 573 size_t len, 574 enum fault_type type, 575 enum seg_rw rw) 576 { 577 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 578 struct smap *smp; 579 page_t *pp, **ppp; 580 struct vnode *vp; 581 u_offset_t off; 582 page_t *pl[MAXPPB + 1]; 583 uint_t prot; 584 u_offset_t addroff; 585 caddr_t adr; 586 int err; 587 u_offset_t sm_off; 588 int hat_flag; 589 590 if (segmap_kpm && IS_KPM_ADDR(addr)) { 591 int newpage; 592 kmutex_t *smtx; 593 594 /* 595 * Pages are successfully prefaulted and locked in 596 * segmap_getmapflt and can't be unlocked until 597 * segmap_release. No hat mappings have to be locked 598 * and they also can't be unlocked as long as the 599 * caller owns an active kpm addr. 600 */ 601 #ifndef DEBUG 602 if (type != F_SOFTUNLOCK) 603 return (0); 604 #endif 605 606 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 607 panic("segmap_fault: smap not found " 608 "for addr %p", (void *)addr); 609 /*NOTREACHED*/ 610 } 611 612 smtx = SMAPMTX(smp); 613 #ifdef DEBUG 614 newpage = smp->sm_flags & SM_KPM_NEWPAGE; 615 if (newpage) { 616 cmn_err(CE_WARN, "segmap_fault: newpage? smp %p", 617 (void *)smp); 618 } 619 620 if (type != F_SOFTUNLOCK) { 621 mutex_exit(smtx); 622 return (0); 623 } 624 #endif 625 mutex_exit(smtx); 626 vp = smp->sm_vp; 627 sm_off = smp->sm_off; 628 629 if (vp == NULL) 630 return (FC_MAKE_ERR(EIO)); 631 632 ASSERT(smp->sm_refcnt > 0); 633 634 addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET); 635 if (addroff + len > MAXBSIZE) 636 panic("segmap_fault: endaddr %p exceeds MAXBSIZE chunk", 637 (void *)(addr + len)); 638 639 off = sm_off + addroff; 640 641 pp = page_find(vp, off); 642 643 if (pp == NULL) 644 panic("segmap_fault: softunlock page not found"); 645 646 /* 647 * Set ref bit also here in case of S_OTHER to avoid the 648 * overhead of supporting other cases than F_SOFTUNLOCK 649 * with segkpm. We can do this because the underlying 650 * pages are locked anyway. 651 */ 652 if (rw == S_WRITE) { 653 hat_setrefmod(pp); 654 } else { 655 TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT, 656 "segmap_fault:pp %p vp %p offset %llx", 657 pp, vp, off); 658 hat_setref(pp); 659 } 660 661 return (0); 662 } 663 664 smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++; 665 smp = GET_SMAP(seg, addr); 666 vp = smp->sm_vp; 667 sm_off = smp->sm_off; 668 669 if (vp == NULL) 670 return (FC_MAKE_ERR(EIO)); 671 672 ASSERT(smp->sm_refcnt > 0); 673 674 addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET); 675 if (addroff + len > MAXBSIZE) { 676 panic("segmap_fault: endaddr %p " 677 "exceeds MAXBSIZE chunk", (void *)(addr + len)); 678 /*NOTREACHED*/ 679 } 680 off = sm_off + addroff; 681 682 /* 683 * First handle the easy stuff 684 */ 685 if (type == F_SOFTUNLOCK) { 686 segmap_unlock(hat, seg, addr, len, rw, smp); 687 return (0); 688 } 689 690 TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE, 691 "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp); 692 err = VOP_GETPAGE(vp, (offset_t)off, len, &prot, pl, MAXBSIZE, 693 seg, addr, rw, CRED(), NULL); 694 695 if (err) 696 return (FC_MAKE_ERR(err)); 697 698 prot &= smd->smd_prot; 699 700 /* 701 * Handle all pages returned in the pl[] array. 702 * This loop is coded on the assumption that if 703 * there was no error from the VOP_GETPAGE routine, 704 * that the page list returned will contain all the 705 * needed pages for the vp from [off..off + len]. 706 */ 707 ppp = pl; 708 while ((pp = *ppp++) != NULL) { 709 u_offset_t poff; 710 ASSERT(pp->p_vnode == vp); 711 hat_flag = HAT_LOAD; 712 713 /* 714 * Verify that the pages returned are within the range 715 * of this segmap region. Note that it is theoretically 716 * possible for pages outside this range to be returned, 717 * but it is not very likely. If we cannot use the 718 * page here, just release it and go on to the next one. 719 */ 720 if (pp->p_offset < sm_off || 721 pp->p_offset >= sm_off + MAXBSIZE) { 722 (void) page_release(pp, 1); 723 continue; 724 } 725 726 ASSERT(hat == kas.a_hat); 727 poff = pp->p_offset; 728 adr = addr + (poff - off); 729 if (adr >= addr && adr < addr + len) { 730 hat_setref(pp); 731 TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT, 732 "segmap_fault:pp %p vp %p offset %llx", 733 pp, vp, poff); 734 if (type == F_SOFTLOCK) 735 hat_flag = HAT_LOAD_LOCK; 736 } 737 738 /* 739 * Deal with VMODSORT pages here. If we know this is a write 740 * do the setmod now and allow write protection. 741 * As long as it's modified or not S_OTHER, remove write 742 * protection. With S_OTHER it's up to the FS to deal with this. 743 */ 744 if (IS_VMODSORT(vp)) { 745 if (rw == S_WRITE) 746 hat_setmod(pp); 747 else if (rw != S_OTHER && !hat_ismod(pp)) 748 prot &= ~PROT_WRITE; 749 } 750 751 hat_memload(hat, adr, pp, prot, hat_flag); 752 if (hat_flag != HAT_LOAD_LOCK) 753 page_unlock(pp); 754 } 755 return (0); 756 } 757 758 /* 759 * This routine is used to start I/O on pages asynchronously. 760 */ 761 static faultcode_t 762 segmap_faulta(struct seg *seg, caddr_t addr) 763 { 764 struct smap *smp; 765 struct vnode *vp; 766 u_offset_t off; 767 int err; 768 769 if (segmap_kpm && IS_KPM_ADDR(addr)) { 770 int newpage; 771 kmutex_t *smtx; 772 773 /* 774 * Pages are successfully prefaulted and locked in 775 * segmap_getmapflt and can't be unlocked until 776 * segmap_release. No hat mappings have to be locked 777 * and they also can't be unlocked as long as the 778 * caller owns an active kpm addr. 779 */ 780 #ifdef DEBUG 781 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 782 panic("segmap_faulta: smap not found " 783 "for addr %p", (void *)addr); 784 /*NOTREACHED*/ 785 } 786 787 smtx = SMAPMTX(smp); 788 newpage = smp->sm_flags & SM_KPM_NEWPAGE; 789 mutex_exit(smtx); 790 if (newpage) 791 cmn_err(CE_WARN, "segmap_faulta: newpage? smp %p", 792 (void *)smp); 793 #endif 794 return (0); 795 } 796 797 segmapcnt.smp_faulta.value.ul++; 798 smp = GET_SMAP(seg, addr); 799 800 ASSERT(smp->sm_refcnt > 0); 801 802 vp = smp->sm_vp; 803 off = smp->sm_off; 804 805 if (vp == NULL) { 806 cmn_err(CE_WARN, "segmap_faulta - no vp"); 807 return (FC_MAKE_ERR(EIO)); 808 } 809 810 TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE, 811 "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp); 812 813 err = VOP_GETPAGE(vp, (offset_t)(off + ((offset_t)((uintptr_t)addr 814 & MAXBOFFSET))), PAGESIZE, (uint_t *)NULL, (page_t **)NULL, 0, 815 seg, addr, S_READ, CRED(), NULL); 816 817 if (err) 818 return (FC_MAKE_ERR(err)); 819 return (0); 820 } 821 822 /*ARGSUSED*/ 823 static int 824 segmap_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) 825 { 826 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 827 828 ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock)); 829 830 /* 831 * Need not acquire the segment lock since 832 * "smd_prot" is a read-only field. 833 */ 834 return (((smd->smd_prot & prot) != prot) ? EACCES : 0); 835 } 836 837 static int 838 segmap_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv) 839 { 840 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 841 size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1; 842 843 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 844 845 if (pgno != 0) { 846 do { 847 protv[--pgno] = smd->smd_prot; 848 } while (pgno != 0); 849 } 850 return (0); 851 } 852 853 static u_offset_t 854 segmap_getoffset(struct seg *seg, caddr_t addr) 855 { 856 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 857 858 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock)); 859 860 return ((u_offset_t)smd->smd_sm->sm_off + (addr - seg->s_base)); 861 } 862 863 /*ARGSUSED*/ 864 static int 865 segmap_gettype(struct seg *seg, caddr_t addr) 866 { 867 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock)); 868 869 return (MAP_SHARED); 870 } 871 872 /*ARGSUSED*/ 873 static int 874 segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp) 875 { 876 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 877 878 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock)); 879 880 /* XXX - This doesn't make any sense */ 881 *vpp = smd->smd_sm->sm_vp; 882 return (0); 883 } 884 885 /* 886 * Check to see if it makes sense to do kluster/read ahead to 887 * addr + delta relative to the mapping at addr. We assume here 888 * that delta is a signed PAGESIZE'd multiple (which can be negative). 889 * 890 * For segmap we always "approve" of this action from our standpoint. 891 */ 892 /*ARGSUSED*/ 893 static int 894 segmap_kluster(struct seg *seg, caddr_t addr, ssize_t delta) 895 { 896 return (0); 897 } 898 899 /* 900 * Special private segmap operations 901 */ 902 903 /* 904 * Add smap to the appropriate free list. 905 */ 906 static void 907 segmap_smapadd(struct smap *smp) 908 { 909 struct smfree *sm; 910 struct smap *smpfreelist; 911 struct sm_freeq *releq; 912 913 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 914 915 if (smp->sm_refcnt != 0) { 916 panic("segmap_smapadd"); 917 /*NOTREACHED*/ 918 } 919 920 sm = &smd_free[smp->sm_free_ndx]; 921 /* 922 * Add to the tail of the release queue 923 * Note that sm_releq and sm_allocq could toggle 924 * before we get the lock. This does not affect 925 * correctness as the 2 queues are only maintained 926 * to reduce lock pressure. 927 */ 928 releq = sm->sm_releq; 929 if (releq == &sm->sm_freeq[0]) 930 smp->sm_flags |= SM_QNDX_ZERO; 931 else 932 smp->sm_flags &= ~SM_QNDX_ZERO; 933 mutex_enter(&releq->smq_mtx); 934 smpfreelist = releq->smq_free; 935 if (smpfreelist == 0) { 936 int want; 937 938 releq->smq_free = smp->sm_next = smp->sm_prev = smp; 939 /* 940 * Both queue mutexes held to set sm_want; 941 * snapshot the value before dropping releq mutex. 942 * If sm_want appears after the releq mutex is dropped, 943 * then the smap just freed is already gone. 944 */ 945 want = sm->sm_want; 946 mutex_exit(&releq->smq_mtx); 947 /* 948 * See if there was a waiter before dropping the releq mutex 949 * then recheck after obtaining sm_freeq[0] mutex as 950 * the another thread may have already signaled. 951 */ 952 if (want) { 953 mutex_enter(&sm->sm_freeq[0].smq_mtx); 954 if (sm->sm_want) 955 cv_signal(&sm->sm_free_cv); 956 mutex_exit(&sm->sm_freeq[0].smq_mtx); 957 } 958 } else { 959 smp->sm_next = smpfreelist; 960 smp->sm_prev = smpfreelist->sm_prev; 961 smpfreelist->sm_prev = smp; 962 smp->sm_prev->sm_next = smp; 963 mutex_exit(&releq->smq_mtx); 964 } 965 } 966 967 968 static struct smap * 969 segmap_hashin(struct smap *smp, struct vnode *vp, u_offset_t off, int hashid) 970 { 971 struct smap **hpp; 972 struct smap *tmp; 973 kmutex_t *hmtx; 974 975 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 976 ASSERT(smp->sm_vp == NULL); 977 ASSERT(smp->sm_hash == NULL); 978 ASSERT(smp->sm_prev == NULL); 979 ASSERT(smp->sm_next == NULL); 980 ASSERT(hashid >= 0 && hashid <= smd_hashmsk); 981 982 hmtx = SHASHMTX(hashid); 983 984 mutex_enter(hmtx); 985 /* 986 * First we need to verify that no one has created a smp 987 * with (vp,off) as its tag before we us. 988 */ 989 for (tmp = smd_hash[hashid].sh_hash_list; 990 tmp != NULL; tmp = tmp->sm_hash) 991 if (tmp->sm_vp == vp && tmp->sm_off == off) 992 break; 993 994 if (tmp == NULL) { 995 /* 996 * No one created one yet. 997 * 998 * Funniness here - we don't increment the ref count on the 999 * vnode * even though we have another pointer to it here. 1000 * The reason for this is that we don't want the fact that 1001 * a seg_map entry somewhere refers to a vnode to prevent the 1002 * vnode * itself from going away. This is because this 1003 * reference to the vnode is a "soft one". In the case where 1004 * a mapping is being used by a rdwr [or directory routine?] 1005 * there already has to be a non-zero ref count on the vnode. 1006 * In the case where the vp has been freed and the the smap 1007 * structure is on the free list, there are no pages in memory 1008 * that can refer to the vnode. Thus even if we reuse the same 1009 * vnode/smap structure for a vnode which has the same 1010 * address but represents a different object, we are ok. 1011 */ 1012 smp->sm_vp = vp; 1013 smp->sm_off = off; 1014 1015 hpp = &smd_hash[hashid].sh_hash_list; 1016 smp->sm_hash = *hpp; 1017 *hpp = smp; 1018 #ifdef SEGMAP_HASHSTATS 1019 smd_hash_len[hashid]++; 1020 #endif 1021 } 1022 mutex_exit(hmtx); 1023 1024 return (tmp); 1025 } 1026 1027 static void 1028 segmap_hashout(struct smap *smp) 1029 { 1030 struct smap **hpp, *hp; 1031 struct vnode *vp; 1032 kmutex_t *mtx; 1033 int hashid; 1034 u_offset_t off; 1035 1036 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 1037 1038 vp = smp->sm_vp; 1039 off = smp->sm_off; 1040 1041 SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */ 1042 mtx = SHASHMTX(hashid); 1043 mutex_enter(mtx); 1044 1045 hpp = &smd_hash[hashid].sh_hash_list; 1046 for (;;) { 1047 hp = *hpp; 1048 if (hp == NULL) { 1049 panic("segmap_hashout"); 1050 /*NOTREACHED*/ 1051 } 1052 if (hp == smp) 1053 break; 1054 hpp = &hp->sm_hash; 1055 } 1056 1057 *hpp = smp->sm_hash; 1058 smp->sm_hash = NULL; 1059 #ifdef SEGMAP_HASHSTATS 1060 smd_hash_len[hashid]--; 1061 #endif 1062 mutex_exit(mtx); 1063 1064 smp->sm_vp = NULL; 1065 smp->sm_off = (u_offset_t)0; 1066 1067 } 1068 1069 /* 1070 * Attempt to free unmodified, unmapped, and non locked segmap 1071 * pages. 1072 */ 1073 void 1074 segmap_pagefree(struct vnode *vp, u_offset_t off) 1075 { 1076 u_offset_t pgoff; 1077 page_t *pp; 1078 1079 for (pgoff = off; pgoff < off + MAXBSIZE; pgoff += PAGESIZE) { 1080 1081 if ((pp = page_lookup_nowait(vp, pgoff, SE_EXCL)) == NULL) 1082 continue; 1083 1084 switch (page_release(pp, 1)) { 1085 case PGREL_NOTREL: 1086 segmapcnt.smp_free_notfree.value.ul++; 1087 break; 1088 case PGREL_MOD: 1089 segmapcnt.smp_free_dirty.value.ul++; 1090 break; 1091 case PGREL_CLEAN: 1092 segmapcnt.smp_free.value.ul++; 1093 break; 1094 } 1095 } 1096 } 1097 1098 /* 1099 * Locks held on entry: smap lock 1100 * Locks held on exit : smap lock. 1101 */ 1102 1103 static void 1104 grab_smp(struct smap *smp, page_t *pp) 1105 { 1106 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 1107 ASSERT(smp->sm_refcnt == 0); 1108 1109 if (smp->sm_vp != (struct vnode *)NULL) { 1110 struct vnode *vp = smp->sm_vp; 1111 u_offset_t off = smp->sm_off; 1112 /* 1113 * Destroy old vnode association and 1114 * unload any hardware translations to 1115 * the old object. 1116 */ 1117 smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reuse++; 1118 segmap_hashout(smp); 1119 1120 /* 1121 * This node is off freelist and hashlist, 1122 * so there is no reason to drop/reacquire sm_mtx 1123 * across calls to hat_unload. 1124 */ 1125 if (segmap_kpm) { 1126 caddr_t vaddr; 1127 int hat_unload_needed = 0; 1128 1129 /* 1130 * unload kpm mapping 1131 */ 1132 if (pp != NULL) { 1133 vaddr = hat_kpm_page2va(pp, 1); 1134 hat_kpm_mapout(pp, GET_KPME(smp), vaddr); 1135 page_unlock(pp); 1136 } 1137 1138 /* 1139 * Check if we have (also) the rare case of a 1140 * non kpm mapping. 1141 */ 1142 if (smp->sm_flags & SM_NOTKPM_RELEASED) { 1143 hat_unload_needed = 1; 1144 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 1145 } 1146 1147 if (hat_unload_needed) { 1148 hat_unload(kas.a_hat, segkmap->s_base + 1149 ((smp - smd_smap) * MAXBSIZE), 1150 MAXBSIZE, HAT_UNLOAD); 1151 } 1152 1153 } else { 1154 ASSERT(smp->sm_flags & SM_NOTKPM_RELEASED); 1155 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 1156 hat_unload(kas.a_hat, segkmap->s_base + 1157 ((smp - smd_smap) * MAXBSIZE), 1158 MAXBSIZE, HAT_UNLOAD); 1159 } 1160 segmap_pagefree(vp, off); 1161 } 1162 } 1163 1164 static struct smap * 1165 get_free_smp(int free_ndx) 1166 { 1167 struct smfree *sm; 1168 kmutex_t *smtx; 1169 struct smap *smp, *first; 1170 struct sm_freeq *allocq, *releq; 1171 struct kpme *kpme; 1172 page_t *pp = NULL; 1173 int end_ndx, page_locked = 0; 1174 1175 end_ndx = free_ndx; 1176 sm = &smd_free[free_ndx]; 1177 1178 retry_queue: 1179 allocq = sm->sm_allocq; 1180 mutex_enter(&allocq->smq_mtx); 1181 1182 if ((smp = allocq->smq_free) == NULL) { 1183 1184 skip_queue: 1185 /* 1186 * The alloc list is empty or this queue is being skipped; 1187 * first see if the allocq toggled. 1188 */ 1189 if (sm->sm_allocq != allocq) { 1190 /* queue changed */ 1191 mutex_exit(&allocq->smq_mtx); 1192 goto retry_queue; 1193 } 1194 releq = sm->sm_releq; 1195 if (!mutex_tryenter(&releq->smq_mtx)) { 1196 /* cannot get releq; a free smp may be there now */ 1197 mutex_exit(&allocq->smq_mtx); 1198 1199 /* 1200 * This loop could spin forever if this thread has 1201 * higher priority than the thread that is holding 1202 * releq->smq_mtx. In order to force the other thread 1203 * to run, we'll lock/unlock the mutex which is safe 1204 * since we just unlocked the allocq mutex. 1205 */ 1206 mutex_enter(&releq->smq_mtx); 1207 mutex_exit(&releq->smq_mtx); 1208 goto retry_queue; 1209 } 1210 if (releq->smq_free == NULL) { 1211 /* 1212 * This freelist is empty. 1213 * This should not happen unless clients 1214 * are failing to release the segmap 1215 * window after accessing the data. 1216 * Before resorting to sleeping, try 1217 * the next list of the same color. 1218 */ 1219 free_ndx = (free_ndx + smd_ncolor) & smd_freemsk; 1220 if (free_ndx != end_ndx) { 1221 mutex_exit(&releq->smq_mtx); 1222 mutex_exit(&allocq->smq_mtx); 1223 sm = &smd_free[free_ndx]; 1224 goto retry_queue; 1225 } 1226 /* 1227 * Tried all freelists of the same color once, 1228 * wait on this list and hope something gets freed. 1229 */ 1230 segmapcnt.smp_get_nofree.value.ul++; 1231 sm->sm_want++; 1232 mutex_exit(&sm->sm_freeq[1].smq_mtx); 1233 cv_wait(&sm->sm_free_cv, 1234 &sm->sm_freeq[0].smq_mtx); 1235 sm->sm_want--; 1236 mutex_exit(&sm->sm_freeq[0].smq_mtx); 1237 sm = &smd_free[free_ndx]; 1238 goto retry_queue; 1239 } else { 1240 /* 1241 * Something on the rele queue; flip the alloc 1242 * and rele queues and retry. 1243 */ 1244 sm->sm_allocq = releq; 1245 sm->sm_releq = allocq; 1246 mutex_exit(&allocq->smq_mtx); 1247 mutex_exit(&releq->smq_mtx); 1248 if (page_locked) { 1249 delay(hz >> 2); 1250 page_locked = 0; 1251 } 1252 goto retry_queue; 1253 } 1254 } else { 1255 /* 1256 * Fastpath the case we get the smap mutex 1257 * on the first try. 1258 */ 1259 first = smp; 1260 next_smap: 1261 smtx = SMAPMTX(smp); 1262 if (!mutex_tryenter(smtx)) { 1263 /* 1264 * Another thread is trying to reclaim this slot. 1265 * Skip to the next queue or smap. 1266 */ 1267 if ((smp = smp->sm_next) == first) { 1268 goto skip_queue; 1269 } else { 1270 goto next_smap; 1271 } 1272 } else { 1273 /* 1274 * if kpme exists, get shared lock on the page 1275 */ 1276 if (segmap_kpm && smp->sm_vp != NULL) { 1277 1278 kpme = GET_KPME(smp); 1279 pp = kpme->kpe_page; 1280 1281 if (pp != NULL) { 1282 if (!page_trylock(pp, SE_SHARED)) { 1283 smp = smp->sm_next; 1284 mutex_exit(smtx); 1285 page_locked = 1; 1286 1287 pp = NULL; 1288 1289 if (smp == first) { 1290 goto skip_queue; 1291 } else { 1292 goto next_smap; 1293 } 1294 } else { 1295 if (kpme->kpe_page == NULL) { 1296 page_unlock(pp); 1297 pp = NULL; 1298 } 1299 } 1300 } 1301 } 1302 1303 /* 1304 * At this point, we've selected smp. Remove smp 1305 * from its freelist. If smp is the first one in 1306 * the freelist, update the head of the freelist. 1307 */ 1308 if (first == smp) { 1309 ASSERT(first == allocq->smq_free); 1310 allocq->smq_free = smp->sm_next; 1311 } 1312 1313 /* 1314 * if the head of the freelist still points to smp, 1315 * then there are no more free smaps in that list. 1316 */ 1317 if (allocq->smq_free == smp) 1318 /* 1319 * Took the last one 1320 */ 1321 allocq->smq_free = NULL; 1322 else { 1323 smp->sm_prev->sm_next = smp->sm_next; 1324 smp->sm_next->sm_prev = smp->sm_prev; 1325 } 1326 mutex_exit(&allocq->smq_mtx); 1327 smp->sm_prev = smp->sm_next = NULL; 1328 1329 /* 1330 * if pp != NULL, pp must have been locked; 1331 * grab_smp() unlocks pp. 1332 */ 1333 ASSERT((pp == NULL) || PAGE_LOCKED(pp)); 1334 grab_smp(smp, pp); 1335 /* return smp locked. */ 1336 ASSERT(SMAPMTX(smp) == smtx); 1337 ASSERT(MUTEX_HELD(smtx)); 1338 return (smp); 1339 } 1340 } 1341 } 1342 1343 /* 1344 * Special public segmap operations 1345 */ 1346 1347 /* 1348 * Create pages (without using VOP_GETPAGE) and load up translations to them. 1349 * If softlock is TRUE, then set things up so that it looks like a call 1350 * to segmap_fault with F_SOFTLOCK. 1351 * 1352 * Returns 1, if a page is created by calling page_create_va(), or 0 otherwise. 1353 * 1354 * All fields in the generic segment (struct seg) are considered to be 1355 * read-only for "segmap" even though the kernel address space (kas) may 1356 * not be locked, hence no lock is needed to access them. 1357 */ 1358 int 1359 segmap_pagecreate(struct seg *seg, caddr_t addr, size_t len, int softlock) 1360 { 1361 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 1362 page_t *pp; 1363 u_offset_t off; 1364 struct smap *smp; 1365 struct vnode *vp; 1366 caddr_t eaddr; 1367 int newpage = 0; 1368 uint_t prot; 1369 kmutex_t *smtx; 1370 int hat_flag; 1371 1372 ASSERT(seg->s_as == &kas); 1373 1374 if (segmap_kpm && IS_KPM_ADDR(addr)) { 1375 /* 1376 * Pages are successfully prefaulted and locked in 1377 * segmap_getmapflt and can't be unlocked until 1378 * segmap_release. The SM_KPM_NEWPAGE flag is set 1379 * in segmap_pagecreate_kpm when new pages are created. 1380 * and it is returned as "newpage" indication here. 1381 */ 1382 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 1383 panic("segmap_pagecreate: smap not found " 1384 "for addr %p", (void *)addr); 1385 /*NOTREACHED*/ 1386 } 1387 1388 smtx = SMAPMTX(smp); 1389 newpage = smp->sm_flags & SM_KPM_NEWPAGE; 1390 smp->sm_flags &= ~SM_KPM_NEWPAGE; 1391 mutex_exit(smtx); 1392 1393 return (newpage); 1394 } 1395 1396 smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++; 1397 1398 eaddr = addr + len; 1399 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1400 1401 smp = GET_SMAP(seg, addr); 1402 1403 /* 1404 * We don't grab smp mutex here since we assume the smp 1405 * has a refcnt set already which prevents the slot from 1406 * changing its id. 1407 */ 1408 ASSERT(smp->sm_refcnt > 0); 1409 1410 vp = smp->sm_vp; 1411 off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET)); 1412 prot = smd->smd_prot; 1413 1414 for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) { 1415 hat_flag = HAT_LOAD; 1416 pp = page_lookup(vp, off, SE_SHARED); 1417 if (pp == NULL) { 1418 ushort_t bitindex; 1419 1420 if ((pp = page_create_va(vp, off, 1421 PAGESIZE, PG_WAIT, seg, addr)) == NULL) { 1422 panic("segmap_pagecreate: page_create failed"); 1423 /*NOTREACHED*/ 1424 } 1425 newpage = 1; 1426 page_io_unlock(pp); 1427 1428 /* 1429 * Since pages created here do not contain valid 1430 * data until the caller writes into them, the 1431 * "exclusive" lock will not be dropped to prevent 1432 * other users from accessing the page. We also 1433 * have to lock the translation to prevent a fault 1434 * from occurring when the virtual address mapped by 1435 * this page is written into. This is necessary to 1436 * avoid a deadlock since we haven't dropped the 1437 * "exclusive" lock. 1438 */ 1439 bitindex = (ushort_t)((off - smp->sm_off) >> PAGESHIFT); 1440 1441 /* 1442 * Large Files: The following assertion is to 1443 * verify the cast above. 1444 */ 1445 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 1446 smtx = SMAPMTX(smp); 1447 mutex_enter(smtx); 1448 smp->sm_bitmap |= SMAP_BIT_MASK(bitindex); 1449 mutex_exit(smtx); 1450 1451 hat_flag = HAT_LOAD_LOCK; 1452 } else if (softlock) { 1453 hat_flag = HAT_LOAD_LOCK; 1454 } 1455 1456 if (IS_VMODSORT(pp->p_vnode) && (prot & PROT_WRITE)) 1457 hat_setmod(pp); 1458 1459 hat_memload(kas.a_hat, addr, pp, prot, hat_flag); 1460 1461 if (hat_flag != HAT_LOAD_LOCK) 1462 page_unlock(pp); 1463 1464 TRACE_5(TR_FAC_VM, TR_SEGMAP_PAGECREATE, 1465 "segmap_pagecreate:seg %p addr %p pp %p vp %p offset %llx", 1466 seg, addr, pp, vp, off); 1467 } 1468 1469 return (newpage); 1470 } 1471 1472 void 1473 segmap_pageunlock(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw) 1474 { 1475 struct smap *smp; 1476 ushort_t bitmask; 1477 page_t *pp; 1478 struct vnode *vp; 1479 u_offset_t off; 1480 caddr_t eaddr; 1481 kmutex_t *smtx; 1482 1483 ASSERT(seg->s_as == &kas); 1484 1485 eaddr = addr + len; 1486 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1487 1488 if (segmap_kpm && IS_KPM_ADDR(addr)) { 1489 /* 1490 * Pages are successfully prefaulted and locked in 1491 * segmap_getmapflt and can't be unlocked until 1492 * segmap_release, so no pages or hat mappings have 1493 * to be unlocked at this point. 1494 */ 1495 #ifdef DEBUG 1496 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 1497 panic("segmap_pageunlock: smap not found " 1498 "for addr %p", (void *)addr); 1499 /*NOTREACHED*/ 1500 } 1501 1502 ASSERT(smp->sm_refcnt > 0); 1503 mutex_exit(SMAPMTX(smp)); 1504 #endif 1505 return; 1506 } 1507 1508 smp = GET_SMAP(seg, addr); 1509 smtx = SMAPMTX(smp); 1510 1511 ASSERT(smp->sm_refcnt > 0); 1512 1513 vp = smp->sm_vp; 1514 off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET)); 1515 1516 for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) { 1517 bitmask = SMAP_BIT_MASK((int)(off - smp->sm_off) >> PAGESHIFT); 1518 1519 /* 1520 * Large Files: Following assertion is to verify 1521 * the correctness of the cast to (int) above. 1522 */ 1523 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 1524 1525 /* 1526 * If the bit corresponding to "off" is set, 1527 * clear this bit in the bitmap, unlock translations, 1528 * and release the "exclusive" lock on the page. 1529 */ 1530 if (smp->sm_bitmap & bitmask) { 1531 mutex_enter(smtx); 1532 smp->sm_bitmap &= ~bitmask; 1533 mutex_exit(smtx); 1534 1535 hat_unlock(kas.a_hat, addr, PAGESIZE); 1536 1537 /* 1538 * Use page_find() instead of page_lookup() to 1539 * find the page since we know that it has 1540 * "exclusive" lock. 1541 */ 1542 pp = page_find(vp, off); 1543 if (pp == NULL) { 1544 panic("segmap_pageunlock: page not found"); 1545 /*NOTREACHED*/ 1546 } 1547 if (rw == S_WRITE) { 1548 hat_setrefmod(pp); 1549 } else if (rw != S_OTHER) { 1550 hat_setref(pp); 1551 } 1552 1553 page_unlock(pp); 1554 } 1555 } 1556 } 1557 1558 caddr_t 1559 segmap_getmap(struct seg *seg, struct vnode *vp, u_offset_t off) 1560 { 1561 return (segmap_getmapflt(seg, vp, off, MAXBSIZE, 0, S_OTHER)); 1562 } 1563 1564 /* 1565 * This is the magic virtual address that offset 0 of an ELF 1566 * file gets mapped to in user space. This is used to pick 1567 * the vac color on the freelist. 1568 */ 1569 #define ELF_OFFZERO_VA (0x10000) 1570 /* 1571 * segmap_getmap allocates a MAXBSIZE big slot to map the vnode vp 1572 * in the range <off, off + len). off doesn't need to be MAXBSIZE aligned. 1573 * The return address is always MAXBSIZE aligned. 1574 * 1575 * If forcefault is nonzero and the MMU translations haven't yet been created, 1576 * segmap_getmap will call segmap_fault(..., F_INVAL, rw) to create them. 1577 */ 1578 caddr_t 1579 segmap_getmapflt( 1580 struct seg *seg, 1581 struct vnode *vp, 1582 u_offset_t off, 1583 size_t len, 1584 int forcefault, 1585 enum seg_rw rw) 1586 { 1587 struct smap *smp, *nsmp; 1588 extern struct vnode *common_specvp(); 1589 caddr_t baseaddr; /* MAXBSIZE aligned */ 1590 u_offset_t baseoff; 1591 int newslot; 1592 caddr_t vaddr; 1593 int color, hashid; 1594 kmutex_t *hashmtx, *smapmtx; 1595 struct smfree *sm; 1596 page_t *pp; 1597 struct kpme *kpme; 1598 uint_t prot; 1599 caddr_t base; 1600 page_t *pl[MAXPPB + 1]; 1601 int error; 1602 int is_kpm = 1; 1603 1604 ASSERT(seg->s_as == &kas); 1605 ASSERT(seg == segkmap); 1606 1607 baseoff = off & (offset_t)MAXBMASK; 1608 if (off + len > baseoff + MAXBSIZE) { 1609 panic("segmap_getmap bad len"); 1610 /*NOTREACHED*/ 1611 } 1612 1613 /* 1614 * If this is a block device we have to be sure to use the 1615 * "common" block device vnode for the mapping. 1616 */ 1617 if (vp->v_type == VBLK) 1618 vp = common_specvp(vp); 1619 1620 smd_cpu[CPU->cpu_seqid].scpu.scpu_getmap++; 1621 1622 if (segmap_kpm == 0 || 1623 (forcefault == SM_PAGECREATE && rw != S_WRITE)) { 1624 is_kpm = 0; 1625 } 1626 1627 SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */ 1628 hashmtx = SHASHMTX(hashid); 1629 1630 retry_hash: 1631 mutex_enter(hashmtx); 1632 for (smp = smd_hash[hashid].sh_hash_list; 1633 smp != NULL; smp = smp->sm_hash) 1634 if (smp->sm_vp == vp && smp->sm_off == baseoff) 1635 break; 1636 mutex_exit(hashmtx); 1637 1638 vrfy_smp: 1639 if (smp != NULL) { 1640 1641 ASSERT(vp->v_count != 0); 1642 1643 /* 1644 * Get smap lock and recheck its tag. The hash lock 1645 * is dropped since the hash is based on (vp, off) 1646 * and (vp, off) won't change when we have smap mtx. 1647 */ 1648 smapmtx = SMAPMTX(smp); 1649 mutex_enter(smapmtx); 1650 if (smp->sm_vp != vp || smp->sm_off != baseoff) { 1651 mutex_exit(smapmtx); 1652 goto retry_hash; 1653 } 1654 1655 if (smp->sm_refcnt == 0) { 1656 1657 smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reclaim++; 1658 1659 /* 1660 * Could still be on the free list. However, this 1661 * could also be an smp that is transitioning from 1662 * the free list when we have too much contention 1663 * for the smapmtx's. In this case, we have an 1664 * unlocked smp that is not on the free list any 1665 * longer, but still has a 0 refcnt. The only way 1666 * to be sure is to check the freelist pointers. 1667 * Since we now have the smapmtx, we are guaranteed 1668 * that the (vp, off) won't change, so we are safe 1669 * to reclaim it. get_free_smp() knows that this 1670 * can happen, and it will check the refcnt. 1671 */ 1672 1673 if ((smp->sm_next != NULL)) { 1674 struct sm_freeq *freeq; 1675 1676 ASSERT(smp->sm_prev != NULL); 1677 sm = &smd_free[smp->sm_free_ndx]; 1678 1679 if (smp->sm_flags & SM_QNDX_ZERO) 1680 freeq = &sm->sm_freeq[0]; 1681 else 1682 freeq = &sm->sm_freeq[1]; 1683 1684 mutex_enter(&freeq->smq_mtx); 1685 if (freeq->smq_free != smp) { 1686 /* 1687 * fastpath normal case 1688 */ 1689 smp->sm_prev->sm_next = smp->sm_next; 1690 smp->sm_next->sm_prev = smp->sm_prev; 1691 } else if (smp == smp->sm_next) { 1692 /* 1693 * Taking the last smap on freelist 1694 */ 1695 freeq->smq_free = NULL; 1696 } else { 1697 /* 1698 * Reclaiming 1st smap on list 1699 */ 1700 freeq->smq_free = smp->sm_next; 1701 smp->sm_prev->sm_next = smp->sm_next; 1702 smp->sm_next->sm_prev = smp->sm_prev; 1703 } 1704 mutex_exit(&freeq->smq_mtx); 1705 smp->sm_prev = smp->sm_next = NULL; 1706 } else { 1707 ASSERT(smp->sm_prev == NULL); 1708 segmapcnt.smp_stolen.value.ul++; 1709 } 1710 1711 } else { 1712 segmapcnt.smp_get_use.value.ul++; 1713 } 1714 smp->sm_refcnt++; /* another user */ 1715 1716 /* 1717 * We don't invoke segmap_fault via TLB miss, so we set ref 1718 * and mod bits in advance. For S_OTHER we set them in 1719 * segmap_fault F_SOFTUNLOCK. 1720 */ 1721 if (is_kpm) { 1722 if (rw == S_WRITE) { 1723 smp->sm_flags |= SM_WRITE_DATA; 1724 } else if (rw == S_READ) { 1725 smp->sm_flags |= SM_READ_DATA; 1726 } 1727 } 1728 mutex_exit(smapmtx); 1729 1730 newslot = 0; 1731 } else { 1732 1733 uint32_t free_ndx, *free_ndxp; 1734 union segmap_cpu *scpu; 1735 1736 /* 1737 * On a PAC machine or a machine with anti-alias 1738 * hardware, smd_colormsk will be zero. 1739 * 1740 * On a VAC machine- pick color by offset in the file 1741 * so we won't get VAC conflicts on elf files. 1742 * On data files, color does not matter but we 1743 * don't know what kind of file it is so we always 1744 * pick color by offset. This causes color 1745 * corresponding to file offset zero to be used more 1746 * heavily. 1747 */ 1748 color = (baseoff >> MAXBSHIFT) & smd_colormsk; 1749 scpu = smd_cpu+CPU->cpu_seqid; 1750 free_ndxp = &scpu->scpu.scpu_free_ndx[color]; 1751 free_ndx = (*free_ndxp += smd_ncolor) & smd_freemsk; 1752 #ifdef DEBUG 1753 colors_used[free_ndx]++; 1754 #endif /* DEBUG */ 1755 1756 /* 1757 * Get a locked smp slot from the free list. 1758 */ 1759 smp = get_free_smp(free_ndx); 1760 smapmtx = SMAPMTX(smp); 1761 1762 ASSERT(smp->sm_vp == NULL); 1763 1764 if ((nsmp = segmap_hashin(smp, vp, baseoff, hashid)) != NULL) { 1765 /* 1766 * Failed to hashin, there exists one now. 1767 * Return the smp we just allocated. 1768 */ 1769 segmap_smapadd(smp); 1770 mutex_exit(smapmtx); 1771 1772 smp = nsmp; 1773 goto vrfy_smp; 1774 } 1775 smp->sm_refcnt++; /* another user */ 1776 1777 /* 1778 * We don't invoke segmap_fault via TLB miss, so we set ref 1779 * and mod bits in advance. For S_OTHER we set them in 1780 * segmap_fault F_SOFTUNLOCK. 1781 */ 1782 if (is_kpm) { 1783 if (rw == S_WRITE) { 1784 smp->sm_flags |= SM_WRITE_DATA; 1785 } else if (rw == S_READ) { 1786 smp->sm_flags |= SM_READ_DATA; 1787 } 1788 } 1789 mutex_exit(smapmtx); 1790 1791 newslot = 1; 1792 } 1793 1794 if (!is_kpm) 1795 goto use_segmap_range; 1796 1797 /* 1798 * Use segkpm 1799 */ 1800 /* Lint directive required until 6746211 is fixed */ 1801 /*CONSTCOND*/ 1802 ASSERT(PAGESIZE == MAXBSIZE); 1803 1804 /* 1805 * remember the last smp faulted on this cpu. 1806 */ 1807 (smd_cpu+CPU->cpu_seqid)->scpu.scpu_last_smap = smp; 1808 1809 if (forcefault == SM_PAGECREATE) { 1810 baseaddr = segmap_pagecreate_kpm(seg, vp, baseoff, smp, rw); 1811 return (baseaddr); 1812 } 1813 1814 if (newslot == 0 && 1815 (pp = GET_KPME(smp)->kpe_page) != NULL) { 1816 1817 /* fastpath */ 1818 switch (rw) { 1819 case S_READ: 1820 case S_WRITE: 1821 if (page_trylock(pp, SE_SHARED)) { 1822 if (PP_ISFREE(pp) || 1823 !(pp->p_vnode == vp && 1824 pp->p_offset == baseoff)) { 1825 page_unlock(pp); 1826 pp = page_lookup(vp, baseoff, 1827 SE_SHARED); 1828 } 1829 } else { 1830 pp = page_lookup(vp, baseoff, SE_SHARED); 1831 } 1832 1833 if (pp == NULL) { 1834 ASSERT(GET_KPME(smp)->kpe_page == NULL); 1835 break; 1836 } 1837 1838 if (rw == S_WRITE && 1839 hat_page_getattr(pp, P_MOD | P_REF) != 1840 (P_MOD | P_REF)) { 1841 page_unlock(pp); 1842 break; 1843 } 1844 1845 /* 1846 * We have the p_selock as reader, grab_smp 1847 * can't hit us, we have bumped the smap 1848 * refcnt and hat_pageunload needs the 1849 * p_selock exclusive. 1850 */ 1851 kpme = GET_KPME(smp); 1852 if (kpme->kpe_page == pp) { 1853 baseaddr = hat_kpm_page2va(pp, 0); 1854 } else if (kpme->kpe_page == NULL) { 1855 baseaddr = hat_kpm_mapin(pp, kpme); 1856 } else { 1857 panic("segmap_getmapflt: stale " 1858 "kpme page, kpme %p", (void *)kpme); 1859 /*NOTREACHED*/ 1860 } 1861 1862 /* 1863 * We don't invoke segmap_fault via TLB miss, 1864 * so we set ref and mod bits in advance. 1865 * For S_OTHER and we set them in segmap_fault 1866 * F_SOFTUNLOCK. 1867 */ 1868 if (rw == S_READ && !hat_isref(pp)) 1869 hat_setref(pp); 1870 1871 return (baseaddr); 1872 default: 1873 break; 1874 } 1875 } 1876 1877 base = segkpm_create_va(baseoff); 1878 error = VOP_GETPAGE(vp, (offset_t)baseoff, len, &prot, pl, MAXBSIZE, 1879 seg, base, rw, CRED(), NULL); 1880 1881 pp = pl[0]; 1882 if (error || pp == NULL) { 1883 /* 1884 * Use segmap address slot and let segmap_fault deal 1885 * with the error cases. There is no error return 1886 * possible here. 1887 */ 1888 goto use_segmap_range; 1889 } 1890 1891 ASSERT(pl[1] == NULL); 1892 1893 /* 1894 * When prot is not returned w/ PROT_ALL the returned pages 1895 * are not backed by fs blocks. For most of the segmap users 1896 * this is no problem, they don't write to the pages in the 1897 * same request and therefore don't rely on a following 1898 * trap driven segmap_fault. With SM_LOCKPROTO users it 1899 * is more secure to use segkmap adresses to allow 1900 * protection segmap_fault's. 1901 */ 1902 if (prot != PROT_ALL && forcefault == SM_LOCKPROTO) { 1903 /* 1904 * Use segmap address slot and let segmap_fault 1905 * do the error return. 1906 */ 1907 ASSERT(rw != S_WRITE); 1908 ASSERT(PAGE_LOCKED(pp)); 1909 page_unlock(pp); 1910 forcefault = 0; 1911 goto use_segmap_range; 1912 } 1913 1914 /* 1915 * We have the p_selock as reader, grab_smp can't hit us, we 1916 * have bumped the smap refcnt and hat_pageunload needs the 1917 * p_selock exclusive. 1918 */ 1919 kpme = GET_KPME(smp); 1920 if (kpme->kpe_page == pp) { 1921 baseaddr = hat_kpm_page2va(pp, 0); 1922 } else if (kpme->kpe_page == NULL) { 1923 baseaddr = hat_kpm_mapin(pp, kpme); 1924 } else { 1925 panic("segmap_getmapflt: stale kpme page after " 1926 "VOP_GETPAGE, kpme %p", (void *)kpme); 1927 /*NOTREACHED*/ 1928 } 1929 1930 smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++; 1931 1932 return (baseaddr); 1933 1934 1935 use_segmap_range: 1936 baseaddr = seg->s_base + ((smp - smd_smap) * MAXBSIZE); 1937 TRACE_4(TR_FAC_VM, TR_SEGMAP_GETMAP, 1938 "segmap_getmap:seg %p addr %p vp %p offset %llx", 1939 seg, baseaddr, vp, baseoff); 1940 1941 /* 1942 * Prefault the translations 1943 */ 1944 vaddr = baseaddr + (off - baseoff); 1945 if (forcefault && (newslot || !hat_probe(kas.a_hat, vaddr))) { 1946 1947 caddr_t pgaddr = (caddr_t)((uintptr_t)vaddr & 1948 (uintptr_t)PAGEMASK); 1949 1950 (void) segmap_fault(kas.a_hat, seg, pgaddr, 1951 (vaddr + len - pgaddr + PAGESIZE - 1) & (uintptr_t)PAGEMASK, 1952 F_INVAL, rw); 1953 } 1954 1955 return (baseaddr); 1956 } 1957 1958 int 1959 segmap_release(struct seg *seg, caddr_t addr, uint_t flags) 1960 { 1961 struct smap *smp; 1962 int error; 1963 int bflags = 0; 1964 struct vnode *vp; 1965 u_offset_t offset; 1966 kmutex_t *smtx; 1967 int is_kpm = 0; 1968 page_t *pp; 1969 1970 if (segmap_kpm && IS_KPM_ADDR(addr)) { 1971 1972 if (((uintptr_t)addr & MAXBOFFSET) != 0) { 1973 panic("segmap_release: addr %p not " 1974 "MAXBSIZE aligned", (void *)addr); 1975 /*NOTREACHED*/ 1976 } 1977 1978 if ((smp = get_smap_kpm(addr, &pp)) == NULL) { 1979 panic("segmap_release: smap not found " 1980 "for addr %p", (void *)addr); 1981 /*NOTREACHED*/ 1982 } 1983 1984 TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP, 1985 "segmap_relmap:seg %p addr %p smp %p", 1986 seg, addr, smp); 1987 1988 smtx = SMAPMTX(smp); 1989 1990 /* 1991 * For compatibility reasons segmap_pagecreate_kpm sets this 1992 * flag to allow a following segmap_pagecreate to return 1993 * this as "newpage" flag. When segmap_pagecreate is not 1994 * called at all we clear it now. 1995 */ 1996 smp->sm_flags &= ~SM_KPM_NEWPAGE; 1997 is_kpm = 1; 1998 if (smp->sm_flags & SM_WRITE_DATA) { 1999 hat_setrefmod(pp); 2000 } else if (smp->sm_flags & SM_READ_DATA) { 2001 hat_setref(pp); 2002 } 2003 } else { 2004 if (addr < seg->s_base || addr >= seg->s_base + seg->s_size || 2005 ((uintptr_t)addr & MAXBOFFSET) != 0) { 2006 panic("segmap_release: bad addr %p", (void *)addr); 2007 /*NOTREACHED*/ 2008 } 2009 smp = GET_SMAP(seg, addr); 2010 2011 TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP, 2012 "segmap_relmap:seg %p addr %p smp %p", 2013 seg, addr, smp); 2014 2015 smtx = SMAPMTX(smp); 2016 mutex_enter(smtx); 2017 smp->sm_flags |= SM_NOTKPM_RELEASED; 2018 } 2019 2020 ASSERT(smp->sm_refcnt > 0); 2021 2022 /* 2023 * Need to call VOP_PUTPAGE() if any flags (except SM_DONTNEED) 2024 * are set. 2025 */ 2026 if ((flags & ~SM_DONTNEED) != 0) { 2027 if (flags & SM_WRITE) 2028 segmapcnt.smp_rel_write.value.ul++; 2029 if (flags & SM_ASYNC) { 2030 bflags |= B_ASYNC; 2031 segmapcnt.smp_rel_async.value.ul++; 2032 } 2033 if (flags & SM_INVAL) { 2034 bflags |= B_INVAL; 2035 segmapcnt.smp_rel_abort.value.ul++; 2036 } 2037 if (flags & SM_DESTROY) { 2038 bflags |= (B_INVAL|B_TRUNC); 2039 segmapcnt.smp_rel_abort.value.ul++; 2040 } 2041 if (smp->sm_refcnt == 1) { 2042 /* 2043 * We only bother doing the FREE and DONTNEED flags 2044 * if no one else is still referencing this mapping. 2045 */ 2046 if (flags & SM_FREE) { 2047 bflags |= B_FREE; 2048 segmapcnt.smp_rel_free.value.ul++; 2049 } 2050 if (flags & SM_DONTNEED) { 2051 bflags |= B_DONTNEED; 2052 segmapcnt.smp_rel_dontneed.value.ul++; 2053 } 2054 } 2055 } else { 2056 smd_cpu[CPU->cpu_seqid].scpu.scpu_release++; 2057 } 2058 2059 vp = smp->sm_vp; 2060 offset = smp->sm_off; 2061 2062 if (--smp->sm_refcnt == 0) { 2063 2064 smp->sm_flags &= ~(SM_WRITE_DATA | SM_READ_DATA); 2065 2066 if (flags & (SM_INVAL|SM_DESTROY)) { 2067 segmap_hashout(smp); /* remove map info */ 2068 if (is_kpm) { 2069 hat_kpm_mapout(pp, GET_KPME(smp), addr); 2070 if (smp->sm_flags & SM_NOTKPM_RELEASED) { 2071 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 2072 hat_unload(kas.a_hat, segkmap->s_base + 2073 ((smp - smd_smap) * MAXBSIZE), 2074 MAXBSIZE, HAT_UNLOAD); 2075 } 2076 2077 } else { 2078 if (segmap_kpm) 2079 segkpm_mapout_validkpme(GET_KPME(smp)); 2080 2081 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 2082 hat_unload(kas.a_hat, addr, MAXBSIZE, 2083 HAT_UNLOAD); 2084 } 2085 } 2086 segmap_smapadd(smp); /* add to free list */ 2087 } 2088 2089 mutex_exit(smtx); 2090 2091 if (is_kpm) 2092 page_unlock(pp); 2093 /* 2094 * Now invoke VOP_PUTPAGE() if any flags (except SM_DONTNEED) 2095 * are set. 2096 */ 2097 if ((flags & ~SM_DONTNEED) != 0) { 2098 error = VOP_PUTPAGE(vp, offset, MAXBSIZE, 2099 bflags, CRED(), NULL); 2100 } else { 2101 error = 0; 2102 } 2103 2104 return (error); 2105 } 2106 2107 /* 2108 * Dump the pages belonging to this segmap segment. 2109 */ 2110 static void 2111 segmap_dump(struct seg *seg) 2112 { 2113 struct segmap_data *smd; 2114 struct smap *smp, *smp_end; 2115 page_t *pp; 2116 pfn_t pfn; 2117 u_offset_t off; 2118 caddr_t addr; 2119 2120 smd = (struct segmap_data *)seg->s_data; 2121 addr = seg->s_base; 2122 for (smp = smd->smd_sm, smp_end = smp + smd->smd_npages; 2123 smp < smp_end; smp++) { 2124 2125 if (smp->sm_refcnt) { 2126 for (off = 0; off < MAXBSIZE; off += PAGESIZE) { 2127 int we_own_it = 0; 2128 2129 /* 2130 * If pp == NULL, the page either does 2131 * not exist or is exclusively locked. 2132 * So determine if it exists before 2133 * searching for it. 2134 */ 2135 if ((pp = page_lookup_nowait(smp->sm_vp, 2136 smp->sm_off + off, SE_SHARED))) 2137 we_own_it = 1; 2138 else 2139 pp = page_exists(smp->sm_vp, 2140 smp->sm_off + off); 2141 2142 if (pp) { 2143 pfn = page_pptonum(pp); 2144 dump_addpage(seg->s_as, 2145 addr + off, pfn); 2146 if (we_own_it) 2147 page_unlock(pp); 2148 } 2149 dump_timeleft = dump_timeout; 2150 } 2151 } 2152 addr += MAXBSIZE; 2153 } 2154 } 2155 2156 /*ARGSUSED*/ 2157 static int 2158 segmap_pagelock(struct seg *seg, caddr_t addr, size_t len, 2159 struct page ***ppp, enum lock_type type, enum seg_rw rw) 2160 { 2161 return (ENOTSUP); 2162 } 2163 2164 static int 2165 segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp) 2166 { 2167 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 2168 2169 memidp->val[0] = (uintptr_t)smd->smd_sm->sm_vp; 2170 memidp->val[1] = smd->smd_sm->sm_off + (uintptr_t)(addr - seg->s_base); 2171 return (0); 2172 } 2173 2174 /*ARGSUSED*/ 2175 static lgrp_mem_policy_info_t * 2176 segmap_getpolicy(struct seg *seg, caddr_t addr) 2177 { 2178 return (NULL); 2179 } 2180 2181 /*ARGSUSED*/ 2182 static int 2183 segmap_capable(struct seg *seg, segcapability_t capability) 2184 { 2185 return (0); 2186 } 2187 2188 2189 #ifdef SEGKPM_SUPPORT 2190 2191 /* 2192 * segkpm support routines 2193 */ 2194 2195 static caddr_t 2196 segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off, 2197 struct smap *smp, enum seg_rw rw) 2198 { 2199 caddr_t base; 2200 page_t *pp; 2201 int newpage = 0; 2202 struct kpme *kpme; 2203 2204 ASSERT(smp->sm_refcnt > 0); 2205 2206 if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) { 2207 kmutex_t *smtx; 2208 2209 base = segkpm_create_va(off); 2210 2211 if ((pp = page_create_va(vp, off, PAGESIZE, PG_WAIT, 2212 seg, base)) == NULL) { 2213 panic("segmap_pagecreate_kpm: " 2214 "page_create failed"); 2215 /*NOTREACHED*/ 2216 } 2217 2218 newpage = 1; 2219 page_io_unlock(pp); 2220 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 2221 2222 /* 2223 * Mark this here until the following segmap_pagecreate 2224 * or segmap_release. 2225 */ 2226 smtx = SMAPMTX(smp); 2227 mutex_enter(smtx); 2228 smp->sm_flags |= SM_KPM_NEWPAGE; 2229 mutex_exit(smtx); 2230 } 2231 2232 kpme = GET_KPME(smp); 2233 if (!newpage && kpme->kpe_page == pp) 2234 base = hat_kpm_page2va(pp, 0); 2235 else 2236 base = hat_kpm_mapin(pp, kpme); 2237 2238 /* 2239 * FS code may decide not to call segmap_pagecreate and we 2240 * don't invoke segmap_fault via TLB miss, so we have to set 2241 * ref and mod bits in advance. 2242 */ 2243 if (rw == S_WRITE) { 2244 hat_setrefmod(pp); 2245 } else { 2246 ASSERT(rw == S_READ); 2247 hat_setref(pp); 2248 } 2249 2250 smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++; 2251 2252 return (base); 2253 } 2254 2255 /* 2256 * Find the smap structure corresponding to the 2257 * KPM addr and return it locked. 2258 */ 2259 struct smap * 2260 get_smap_kpm(caddr_t addr, page_t **ppp) 2261 { 2262 struct smap *smp; 2263 struct vnode *vp; 2264 u_offset_t offset; 2265 caddr_t baseaddr = (caddr_t)((uintptr_t)addr & MAXBMASK); 2266 int hashid; 2267 kmutex_t *hashmtx; 2268 page_t *pp; 2269 union segmap_cpu *scpu; 2270 2271 pp = hat_kpm_vaddr2page(baseaddr); 2272 2273 ASSERT(pp && !PP_ISFREE(pp)); 2274 ASSERT(PAGE_LOCKED(pp)); 2275 ASSERT(((uintptr_t)pp->p_offset & MAXBOFFSET) == 0); 2276 2277 vp = pp->p_vnode; 2278 offset = pp->p_offset; 2279 ASSERT(vp != NULL); 2280 2281 /* 2282 * Assume the last smap used on this cpu is the one needed. 2283 */ 2284 scpu = smd_cpu+CPU->cpu_seqid; 2285 smp = scpu->scpu.scpu_last_smap; 2286 mutex_enter(&smp->sm_mtx); 2287 if (smp->sm_vp == vp && smp->sm_off == offset) { 2288 ASSERT(smp->sm_refcnt > 0); 2289 } else { 2290 /* 2291 * Assumption wrong, find the smap on the hash chain. 2292 */ 2293 mutex_exit(&smp->sm_mtx); 2294 SMAP_HASHFUNC(vp, offset, hashid); /* macro assigns hashid */ 2295 hashmtx = SHASHMTX(hashid); 2296 2297 mutex_enter(hashmtx); 2298 smp = smd_hash[hashid].sh_hash_list; 2299 for (; smp != NULL; smp = smp->sm_hash) { 2300 if (smp->sm_vp == vp && smp->sm_off == offset) 2301 break; 2302 } 2303 mutex_exit(hashmtx); 2304 if (smp) { 2305 mutex_enter(&smp->sm_mtx); 2306 ASSERT(smp->sm_vp == vp && smp->sm_off == offset); 2307 } 2308 } 2309 2310 if (ppp) 2311 *ppp = smp ? pp : NULL; 2312 2313 return (smp); 2314 } 2315 2316 #else /* SEGKPM_SUPPORT */ 2317 2318 /* segkpm stubs */ 2319 2320 /*ARGSUSED*/ 2321 static caddr_t 2322 segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off, 2323 struct smap *smp, enum seg_rw rw) 2324 { 2325 return (NULL); 2326 } 2327 2328 /*ARGSUSED*/ 2329 struct smap * 2330 get_smap_kpm(caddr_t addr, page_t **ppp) 2331 { 2332 return (NULL); 2333 } 2334 2335 #endif /* SEGKPM_SUPPORT */