1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * Portions of this source code were derived from Berkeley 4.3 BSD 31 * under license from the Regents of the University of California. 32 */ 33 34 /* 35 * VM - generic vnode mapping segment. 36 * 37 * The segmap driver is used only by the kernel to get faster (than seg_vn) 38 * mappings [lower routine overhead; more persistent cache] to random 39 * vnode/offsets. Note than the kernel may (and does) use seg_vn as well. 40 */ 41 42 #include <sys/types.h> 43 #include <sys/t_lock.h> 44 #include <sys/param.h> 45 #include <sys/sysmacros.h> 46 #include <sys/buf.h> 47 #include <sys/systm.h> 48 #include <sys/vnode.h> 49 #include <sys/mman.h> 50 #include <sys/errno.h> 51 #include <sys/cred.h> 52 #include <sys/kmem.h> 53 #include <sys/vtrace.h> 54 #include <sys/cmn_err.h> 55 #include <sys/debug.h> 56 #include <sys/thread.h> 57 #include <sys/dumphdr.h> 58 #include <sys/bitmap.h> 59 #include <sys/lgrp.h> 60 61 #include <vm/seg_kmem.h> 62 #include <vm/hat.h> 63 #include <vm/as.h> 64 #include <vm/seg.h> 65 #include <vm/seg_kpm.h> 66 #include <vm/seg_map.h> 67 #include <vm/page.h> 68 #include <vm/pvn.h> 69 #include <vm/rm.h> 70 71 /* 72 * Private seg op routines. 73 */ 74 static void segmap_free(struct seg *seg); 75 faultcode_t segmap_fault(struct hat *hat, struct seg *seg, caddr_t addr, 76 size_t len, enum fault_type type, enum seg_rw rw); 77 static faultcode_t segmap_faulta(struct seg *seg, caddr_t addr); 78 static int segmap_checkprot(struct seg *seg, caddr_t addr, size_t len, 79 uint_t prot); 80 static int segmap_kluster(struct seg *seg, caddr_t addr, ssize_t); 81 static int segmap_getprot(struct seg *seg, caddr_t addr, size_t len, 82 uint_t *protv); 83 static u_offset_t segmap_getoffset(struct seg *seg, caddr_t addr); 84 static int segmap_gettype(struct seg *seg, caddr_t addr); 85 static int segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp); 86 static void segmap_dump(struct seg *seg); 87 static int segmap_pagelock(struct seg *seg, caddr_t addr, size_t len, 88 struct page ***ppp, enum lock_type type, 89 enum seg_rw rw); 90 static int segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp); 91 92 /* segkpm support */ 93 static caddr_t segmap_pagecreate_kpm(struct seg *, vnode_t *, u_offset_t, 94 struct smap *, enum seg_rw); 95 struct smap *get_smap_kpm(caddr_t, page_t **); 96 97 static const struct seg_ops segmap_ops = { 98 .free = segmap_free, 99 .fault = segmap_fault, 100 .faulta = segmap_faulta, 101 .checkprot = segmap_checkprot, 102 .kluster = segmap_kluster, 103 .getprot = segmap_getprot, 104 .getoffset = segmap_getoffset, 105 .gettype = segmap_gettype, 106 .getvp = segmap_getvp, 107 .dump = segmap_dump, 108 .pagelock = segmap_pagelock, 109 .getmemid = segmap_getmemid, 110 }; 111 112 /* 113 * Private segmap routines. 114 */ 115 static void segmap_unlock(struct hat *hat, struct seg *seg, caddr_t addr, 116 size_t len, enum seg_rw rw, struct smap *smp); 117 static void segmap_smapadd(struct smap *smp); 118 static struct smap *segmap_hashin(struct smap *smp, struct vnode *vp, 119 u_offset_t off, int hashid); 120 static void segmap_hashout(struct smap *smp); 121 122 123 /* 124 * Statistics for segmap operations. 125 * 126 * No explicit locking to protect these stats. 127 */ 128 struct segmapcnt segmapcnt = { 129 { "fault", KSTAT_DATA_ULONG }, 130 { "faulta", KSTAT_DATA_ULONG }, 131 { "getmap", KSTAT_DATA_ULONG }, 132 { "get_use", KSTAT_DATA_ULONG }, 133 { "get_reclaim", KSTAT_DATA_ULONG }, 134 { "get_reuse", KSTAT_DATA_ULONG }, 135 { "get_unused", KSTAT_DATA_ULONG }, 136 { "get_nofree", KSTAT_DATA_ULONG }, 137 { "rel_async", KSTAT_DATA_ULONG }, 138 { "rel_write", KSTAT_DATA_ULONG }, 139 { "rel_free", KSTAT_DATA_ULONG }, 140 { "rel_abort", KSTAT_DATA_ULONG }, 141 { "rel_dontneed", KSTAT_DATA_ULONG }, 142 { "release", KSTAT_DATA_ULONG }, 143 { "pagecreate", KSTAT_DATA_ULONG }, 144 { "free_notfree", KSTAT_DATA_ULONG }, 145 { "free_dirty", KSTAT_DATA_ULONG }, 146 { "free", KSTAT_DATA_ULONG }, 147 { "stolen", KSTAT_DATA_ULONG }, 148 { "get_nomtx", KSTAT_DATA_ULONG } 149 }; 150 151 kstat_named_t *segmapcnt_ptr = (kstat_named_t *)&segmapcnt; 152 uint_t segmapcnt_ndata = sizeof (segmapcnt) / sizeof (kstat_named_t); 153 154 /* 155 * Return number of map pages in segment. 156 */ 157 #define MAP_PAGES(seg) ((seg)->s_size >> MAXBSHIFT) 158 159 /* 160 * Translate addr into smap number within segment. 161 */ 162 #define MAP_PAGE(seg, addr) (((addr) - (seg)->s_base) >> MAXBSHIFT) 163 164 /* 165 * Translate addr in seg into struct smap pointer. 166 */ 167 #define GET_SMAP(seg, addr) \ 168 &(((struct segmap_data *)((seg)->s_data))->smd_sm[MAP_PAGE(seg, addr)]) 169 170 /* 171 * Bit in map (16 bit bitmap). 172 */ 173 #define SMAP_BIT_MASK(bitindex) (1 << ((bitindex) & 0xf)) 174 175 static int smd_colormsk = 0; 176 static int smd_ncolor = 0; 177 static int smd_nfree = 0; 178 static int smd_freemsk = 0; 179 #ifdef DEBUG 180 static int *colors_used; 181 #endif 182 static struct smap *smd_smap; 183 static struct smaphash *smd_hash; 184 #ifdef SEGMAP_HASHSTATS 185 static unsigned int *smd_hash_len; 186 #endif 187 static struct smfree *smd_free; 188 static ulong_t smd_hashmsk = 0; 189 190 #define SEGMAP_MAXCOLOR 2 191 #define SEGMAP_CACHE_PAD 64 192 193 union segmap_cpu { 194 struct { 195 uint32_t scpu_free_ndx[SEGMAP_MAXCOLOR]; 196 struct smap *scpu_last_smap; 197 ulong_t scpu_getmap; 198 ulong_t scpu_release; 199 ulong_t scpu_get_reclaim; 200 ulong_t scpu_fault; 201 ulong_t scpu_pagecreate; 202 ulong_t scpu_get_reuse; 203 } scpu; 204 char scpu_pad[SEGMAP_CACHE_PAD]; 205 }; 206 static union segmap_cpu *smd_cpu; 207 208 /* 209 * There are three locks in seg_map: 210 * - per freelist mutexes 211 * - per hashchain mutexes 212 * - per smap mutexes 213 * 214 * The lock ordering is to get the smap mutex to lock down the slot 215 * first then the hash lock (for hash in/out (vp, off) list) or the 216 * freelist lock to put the slot back on the free list. 217 * 218 * The hash search is done by only holding the hashchain lock, when a wanted 219 * slot is found, we drop the hashchain lock then lock the slot so there 220 * is no overlapping of hashchain and smap locks. After the slot is 221 * locked, we verify again if the slot is still what we are looking 222 * for. 223 * 224 * Allocation of a free slot is done by holding the freelist lock, 225 * then locking the smap slot at the head of the freelist. This is 226 * in reversed lock order so mutex_tryenter() is used. 227 * 228 * The smap lock protects all fields in smap structure except for 229 * the link fields for hash/free lists which are protected by 230 * hashchain and freelist locks. 231 */ 232 233 #define SHASHMTX(hashid) (&smd_hash[hashid].sh_mtx) 234 235 #define SMP2SMF(smp) (&smd_free[(smp - smd_smap) & smd_freemsk]) 236 #define SMP2SMF_NDX(smp) (ushort_t)((smp - smd_smap) & smd_freemsk) 237 238 #define SMAPMTX(smp) (&smp->sm_mtx) 239 240 #define SMAP_HASHFUNC(vp, off, hashid) \ 241 { \ 242 hashid = ((((uintptr_t)(vp) >> 6) + ((uintptr_t)(vp) >> 3) + \ 243 ((off) >> MAXBSHIFT)) & smd_hashmsk); \ 244 } 245 246 /* 247 * The most frequently updated kstat counters are kept in the 248 * per cpu array to avoid hot cache blocks. The update function 249 * sums the cpu local counters to update the global counters. 250 */ 251 252 /* ARGSUSED */ 253 int 254 segmap_kstat_update(kstat_t *ksp, int rw) 255 { 256 int i; 257 ulong_t getmap, release, get_reclaim; 258 ulong_t fault, pagecreate, get_reuse; 259 260 if (rw == KSTAT_WRITE) 261 return (EACCES); 262 getmap = release = get_reclaim = (ulong_t)0; 263 fault = pagecreate = get_reuse = (ulong_t)0; 264 for (i = 0; i < max_ncpus; i++) { 265 getmap += smd_cpu[i].scpu.scpu_getmap; 266 release += smd_cpu[i].scpu.scpu_release; 267 get_reclaim += smd_cpu[i].scpu.scpu_get_reclaim; 268 fault += smd_cpu[i].scpu.scpu_fault; 269 pagecreate += smd_cpu[i].scpu.scpu_pagecreate; 270 get_reuse += smd_cpu[i].scpu.scpu_get_reuse; 271 } 272 segmapcnt.smp_getmap.value.ul = getmap; 273 segmapcnt.smp_release.value.ul = release; 274 segmapcnt.smp_get_reclaim.value.ul = get_reclaim; 275 segmapcnt.smp_fault.value.ul = fault; 276 segmapcnt.smp_pagecreate.value.ul = pagecreate; 277 segmapcnt.smp_get_reuse.value.ul = get_reuse; 278 return (0); 279 } 280 281 int 282 segmap_create(struct seg *seg, void *argsp) 283 { 284 struct segmap_data *smd; 285 struct smap *smp; 286 struct smfree *sm; 287 struct segmap_crargs *a = (struct segmap_crargs *)argsp; 288 struct smaphash *shashp; 289 union segmap_cpu *scpu; 290 long i, npages; 291 size_t hashsz; 292 uint_t nfreelist; 293 extern void prefetch_smap_w(void *); 294 extern int max_ncpus; 295 296 ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock)); 297 298 if (((uintptr_t)seg->s_base | seg->s_size) & MAXBOFFSET) { 299 panic("segkmap not MAXBSIZE aligned"); 300 /*NOTREACHED*/ 301 } 302 303 smd = kmem_zalloc(sizeof (struct segmap_data), KM_SLEEP); 304 305 seg->s_data = (void *)smd; 306 seg->s_ops = &segmap_ops; 307 smd->smd_prot = a->prot; 308 309 /* 310 * Scale the number of smap freelists to be 311 * proportional to max_ncpus * number of virtual colors. 312 * The caller can over-ride this scaling by providing 313 * a non-zero a->nfreelist argument. 314 */ 315 nfreelist = a->nfreelist; 316 if (nfreelist == 0) 317 nfreelist = max_ncpus; 318 else if (nfreelist < 0 || nfreelist > 4 * max_ncpus) { 319 cmn_err(CE_WARN, "segmap_create: nfreelist out of range " 320 "%d, using %d", nfreelist, max_ncpus); 321 nfreelist = max_ncpus; 322 } 323 if (!ISP2(nfreelist)) { 324 /* round up nfreelist to the next power of two. */ 325 nfreelist = 1 << (highbit(nfreelist)); 326 } 327 328 /* 329 * Get the number of virtual colors - must be a power of 2. 330 */ 331 if (a->shmsize) 332 smd_ncolor = a->shmsize >> MAXBSHIFT; 333 else 334 smd_ncolor = 1; 335 ASSERT((smd_ncolor & (smd_ncolor - 1)) == 0); 336 ASSERT(smd_ncolor <= SEGMAP_MAXCOLOR); 337 smd_colormsk = smd_ncolor - 1; 338 smd->smd_nfree = smd_nfree = smd_ncolor * nfreelist; 339 smd_freemsk = smd_nfree - 1; 340 341 /* 342 * Allocate and initialize the freelist headers. 343 * Note that sm_freeq[1] starts out as the release queue. This 344 * is known when the smap structures are initialized below. 345 */ 346 smd_free = smd->smd_free = 347 kmem_zalloc(smd_nfree * sizeof (struct smfree), KM_SLEEP); 348 for (i = 0; i < smd_nfree; i++) { 349 sm = &smd->smd_free[i]; 350 mutex_init(&sm->sm_freeq[0].smq_mtx, NULL, MUTEX_DEFAULT, NULL); 351 mutex_init(&sm->sm_freeq[1].smq_mtx, NULL, MUTEX_DEFAULT, NULL); 352 sm->sm_allocq = &sm->sm_freeq[0]; 353 sm->sm_releq = &sm->sm_freeq[1]; 354 } 355 356 /* 357 * Allocate and initialize the smap hash chain headers. 358 * Compute hash size rounding down to the next power of two. 359 */ 360 npages = MAP_PAGES(seg); 361 smd->smd_npages = npages; 362 hashsz = npages / SMAP_HASHAVELEN; 363 hashsz = 1 << (highbit(hashsz)-1); 364 smd_hashmsk = hashsz - 1; 365 smd_hash = smd->smd_hash = 366 kmem_alloc(hashsz * sizeof (struct smaphash), KM_SLEEP); 367 #ifdef SEGMAP_HASHSTATS 368 smd_hash_len = 369 kmem_zalloc(hashsz * sizeof (unsigned int), KM_SLEEP); 370 #endif 371 for (i = 0, shashp = smd_hash; i < hashsz; i++, shashp++) { 372 shashp->sh_hash_list = NULL; 373 mutex_init(&shashp->sh_mtx, NULL, MUTEX_DEFAULT, NULL); 374 } 375 376 /* 377 * Allocate and initialize the smap structures. 378 * Link all slots onto the appropriate freelist. 379 * The smap array is large enough to affect boot time 380 * on large systems, so use memory prefetching and only 381 * go through the array 1 time. Inline a optimized version 382 * of segmap_smapadd to add structures to freelists with 383 * knowledge that no locks are needed here. 384 */ 385 smd_smap = smd->smd_sm = 386 kmem_alloc(sizeof (struct smap) * npages, KM_SLEEP); 387 388 for (smp = &smd->smd_sm[MAP_PAGES(seg) - 1]; 389 smp >= smd->smd_sm; smp--) { 390 struct smap *smpfreelist; 391 struct sm_freeq *releq; 392 393 prefetch_smap_w((char *)smp); 394 395 smp->sm_vp = NULL; 396 smp->sm_hash = NULL; 397 smp->sm_off = 0; 398 smp->sm_bitmap = 0; 399 smp->sm_refcnt = 0; 400 mutex_init(&smp->sm_mtx, NULL, MUTEX_DEFAULT, NULL); 401 smp->sm_free_ndx = SMP2SMF_NDX(smp); 402 403 sm = SMP2SMF(smp); 404 releq = sm->sm_releq; 405 406 smpfreelist = releq->smq_free; 407 if (smpfreelist == 0) { 408 releq->smq_free = smp->sm_next = smp->sm_prev = smp; 409 } else { 410 smp->sm_next = smpfreelist; 411 smp->sm_prev = smpfreelist->sm_prev; 412 smpfreelist->sm_prev = smp; 413 smp->sm_prev->sm_next = smp; 414 releq->smq_free = smp->sm_next; 415 } 416 417 /* 418 * sm_flag = 0 (no SM_QNDX_ZERO) implies smap on sm_freeq[1] 419 */ 420 smp->sm_flags = 0; 421 422 #ifdef SEGKPM_SUPPORT 423 /* 424 * Due to the fragile prefetch loop no 425 * separate function is used here. 426 */ 427 smp->sm_kpme_next = NULL; 428 smp->sm_kpme_prev = NULL; 429 smp->sm_kpme_page = NULL; 430 #endif 431 } 432 433 /* 434 * Allocate the per color indices that distribute allocation 435 * requests over the free lists. Each cpu will have a private 436 * rotor index to spread the allocations even across the available 437 * smap freelists. Init the scpu_last_smap field to the first 438 * smap element so there is no need to check for NULL. 439 */ 440 smd_cpu = 441 kmem_zalloc(sizeof (union segmap_cpu) * max_ncpus, KM_SLEEP); 442 for (i = 0, scpu = smd_cpu; i < max_ncpus; i++, scpu++) { 443 int j; 444 for (j = 0; j < smd_ncolor; j++) 445 scpu->scpu.scpu_free_ndx[j] = j; 446 scpu->scpu.scpu_last_smap = smd_smap; 447 } 448 449 vpm_init(); 450 451 #ifdef DEBUG 452 /* 453 * Keep track of which colors are used more often. 454 */ 455 colors_used = kmem_zalloc(smd_nfree * sizeof (int), KM_SLEEP); 456 #endif /* DEBUG */ 457 458 return (0); 459 } 460 461 static void 462 segmap_free(seg) 463 struct seg *seg; 464 { 465 ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock)); 466 } 467 468 /* 469 * Do a F_SOFTUNLOCK call over the range requested. 470 * The range must have already been F_SOFTLOCK'ed. 471 */ 472 static void 473 segmap_unlock( 474 struct hat *hat, 475 struct seg *seg, 476 caddr_t addr, 477 size_t len, 478 enum seg_rw rw, 479 struct smap *smp) 480 { 481 page_t *pp; 482 caddr_t adr; 483 u_offset_t off; 484 struct vnode *vp; 485 kmutex_t *smtx; 486 487 ASSERT(smp->sm_refcnt > 0); 488 489 #ifdef lint 490 seg = seg; 491 #endif 492 493 if (segmap_kpm && IS_KPM_ADDR(addr)) { 494 495 /* 496 * We're called only from segmap_fault and this was a 497 * NOP in case of a kpm based smap, so dangerous things 498 * must have happened in the meantime. Pages are prefaulted 499 * and locked in segmap_getmapflt and they will not be 500 * unlocked until segmap_release. 501 */ 502 panic("segmap_unlock: called with kpm addr %p", (void *)addr); 503 /*NOTREACHED*/ 504 } 505 506 vp = smp->sm_vp; 507 off = smp->sm_off + (u_offset_t)((uintptr_t)addr & MAXBOFFSET); 508 509 hat_unlock(hat, addr, P2ROUNDUP(len, PAGESIZE)); 510 for (adr = addr; adr < addr + len; adr += PAGESIZE, off += PAGESIZE) { 511 ushort_t bitmask; 512 513 /* 514 * Use page_find() instead of page_lookup() to 515 * find the page since we know that it has 516 * "shared" lock. 517 */ 518 pp = page_find(vp, off); 519 if (pp == NULL) { 520 panic("segmap_unlock: page not found"); 521 /*NOTREACHED*/ 522 } 523 524 if (rw == S_WRITE) { 525 hat_setrefmod(pp); 526 } else if (rw != S_OTHER) { 527 TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT, 528 "segmap_fault:pp %p vp %p offset %llx", pp, vp, off); 529 hat_setref(pp); 530 } 531 532 /* 533 * Clear bitmap, if the bit corresponding to "off" is set, 534 * since the page and translation are being unlocked. 535 */ 536 bitmask = SMAP_BIT_MASK((off - smp->sm_off) >> PAGESHIFT); 537 538 /* 539 * Large Files: Following assertion is to verify 540 * the correctness of the cast to (int) above. 541 */ 542 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 543 smtx = SMAPMTX(smp); 544 mutex_enter(smtx); 545 if (smp->sm_bitmap & bitmask) { 546 smp->sm_bitmap &= ~bitmask; 547 } 548 mutex_exit(smtx); 549 550 page_unlock(pp); 551 } 552 } 553 554 #define MAXPPB (MAXBSIZE/4096) /* assumes minimum page size of 4k */ 555 556 /* 557 * This routine is called via a machine specific fault handling 558 * routine. It is also called by software routines wishing to 559 * lock or unlock a range of addresses. 560 * 561 * Note that this routine expects a page-aligned "addr". 562 */ 563 faultcode_t 564 segmap_fault( 565 struct hat *hat, 566 struct seg *seg, 567 caddr_t addr, 568 size_t len, 569 enum fault_type type, 570 enum seg_rw rw) 571 { 572 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 573 struct smap *smp; 574 page_t *pp, **ppp; 575 struct vnode *vp; 576 u_offset_t off; 577 page_t *pl[MAXPPB + 1]; 578 uint_t prot; 579 u_offset_t addroff; 580 caddr_t adr; 581 int err; 582 u_offset_t sm_off; 583 int hat_flag; 584 585 if (segmap_kpm && IS_KPM_ADDR(addr)) { 586 int newpage; 587 kmutex_t *smtx; 588 589 /* 590 * Pages are successfully prefaulted and locked in 591 * segmap_getmapflt and can't be unlocked until 592 * segmap_release. No hat mappings have to be locked 593 * and they also can't be unlocked as long as the 594 * caller owns an active kpm addr. 595 */ 596 #ifndef DEBUG 597 if (type != F_SOFTUNLOCK) 598 return (0); 599 #endif 600 601 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 602 panic("segmap_fault: smap not found " 603 "for addr %p", (void *)addr); 604 /*NOTREACHED*/ 605 } 606 607 smtx = SMAPMTX(smp); 608 #ifdef DEBUG 609 newpage = smp->sm_flags & SM_KPM_NEWPAGE; 610 if (newpage) { 611 cmn_err(CE_WARN, "segmap_fault: newpage? smp %p", 612 (void *)smp); 613 } 614 615 if (type != F_SOFTUNLOCK) { 616 mutex_exit(smtx); 617 return (0); 618 } 619 #endif 620 mutex_exit(smtx); 621 vp = smp->sm_vp; 622 sm_off = smp->sm_off; 623 624 if (vp == NULL) 625 return (FC_MAKE_ERR(EIO)); 626 627 ASSERT(smp->sm_refcnt > 0); 628 629 addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET); 630 if (addroff + len > MAXBSIZE) 631 panic("segmap_fault: endaddr %p exceeds MAXBSIZE chunk", 632 (void *)(addr + len)); 633 634 off = sm_off + addroff; 635 636 pp = page_find(vp, off); 637 638 if (pp == NULL) 639 panic("segmap_fault: softunlock page not found"); 640 641 /* 642 * Set ref bit also here in case of S_OTHER to avoid the 643 * overhead of supporting other cases than F_SOFTUNLOCK 644 * with segkpm. We can do this because the underlying 645 * pages are locked anyway. 646 */ 647 if (rw == S_WRITE) { 648 hat_setrefmod(pp); 649 } else { 650 TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT, 651 "segmap_fault:pp %p vp %p offset %llx", 652 pp, vp, off); 653 hat_setref(pp); 654 } 655 656 return (0); 657 } 658 659 smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++; 660 smp = GET_SMAP(seg, addr); 661 vp = smp->sm_vp; 662 sm_off = smp->sm_off; 663 664 if (vp == NULL) 665 return (FC_MAKE_ERR(EIO)); 666 667 ASSERT(smp->sm_refcnt > 0); 668 669 addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET); 670 if (addroff + len > MAXBSIZE) { 671 panic("segmap_fault: endaddr %p " 672 "exceeds MAXBSIZE chunk", (void *)(addr + len)); 673 /*NOTREACHED*/ 674 } 675 off = sm_off + addroff; 676 677 /* 678 * First handle the easy stuff 679 */ 680 if (type == F_SOFTUNLOCK) { 681 segmap_unlock(hat, seg, addr, len, rw, smp); 682 return (0); 683 } 684 685 TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE, 686 "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp); 687 err = VOP_GETPAGE(vp, (offset_t)off, len, &prot, pl, MAXBSIZE, 688 seg, addr, rw, CRED(), NULL); 689 690 if (err) 691 return (FC_MAKE_ERR(err)); 692 693 prot &= smd->smd_prot; 694 695 /* 696 * Handle all pages returned in the pl[] array. 697 * This loop is coded on the assumption that if 698 * there was no error from the VOP_GETPAGE routine, 699 * that the page list returned will contain all the 700 * needed pages for the vp from [off..off + len]. 701 */ 702 ppp = pl; 703 while ((pp = *ppp++) != NULL) { 704 u_offset_t poff; 705 ASSERT(pp->p_vnode == vp); 706 hat_flag = HAT_LOAD; 707 708 /* 709 * Verify that the pages returned are within the range 710 * of this segmap region. Note that it is theoretically 711 * possible for pages outside this range to be returned, 712 * but it is not very likely. If we cannot use the 713 * page here, just release it and go on to the next one. 714 */ 715 if (pp->p_offset < sm_off || 716 pp->p_offset >= sm_off + MAXBSIZE) { 717 (void) page_release(pp, 1); 718 continue; 719 } 720 721 ASSERT(hat == kas.a_hat); 722 poff = pp->p_offset; 723 adr = addr + (poff - off); 724 if (adr >= addr && adr < addr + len) { 725 hat_setref(pp); 726 TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT, 727 "segmap_fault:pp %p vp %p offset %llx", 728 pp, vp, poff); 729 if (type == F_SOFTLOCK) 730 hat_flag = HAT_LOAD_LOCK; 731 } 732 733 /* 734 * Deal with VMODSORT pages here. If we know this is a write 735 * do the setmod now and allow write protection. 736 * As long as it's modified or not S_OTHER, remove write 737 * protection. With S_OTHER it's up to the FS to deal with this. 738 */ 739 if (IS_VMODSORT(vp)) { 740 if (rw == S_WRITE) 741 hat_setmod(pp); 742 else if (rw != S_OTHER && !hat_ismod(pp)) 743 prot &= ~PROT_WRITE; 744 } 745 746 hat_memload(hat, adr, pp, prot, hat_flag); 747 if (hat_flag != HAT_LOAD_LOCK) 748 page_unlock(pp); 749 } 750 return (0); 751 } 752 753 /* 754 * This routine is used to start I/O on pages asynchronously. 755 */ 756 static faultcode_t 757 segmap_faulta(struct seg *seg, caddr_t addr) 758 { 759 struct smap *smp; 760 struct vnode *vp; 761 u_offset_t off; 762 int err; 763 764 if (segmap_kpm && IS_KPM_ADDR(addr)) { 765 int newpage; 766 kmutex_t *smtx; 767 768 /* 769 * Pages are successfully prefaulted and locked in 770 * segmap_getmapflt and can't be unlocked until 771 * segmap_release. No hat mappings have to be locked 772 * and they also can't be unlocked as long as the 773 * caller owns an active kpm addr. 774 */ 775 #ifdef DEBUG 776 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 777 panic("segmap_faulta: smap not found " 778 "for addr %p", (void *)addr); 779 /*NOTREACHED*/ 780 } 781 782 smtx = SMAPMTX(smp); 783 newpage = smp->sm_flags & SM_KPM_NEWPAGE; 784 mutex_exit(smtx); 785 if (newpage) 786 cmn_err(CE_WARN, "segmap_faulta: newpage? smp %p", 787 (void *)smp); 788 #endif 789 return (0); 790 } 791 792 segmapcnt.smp_faulta.value.ul++; 793 smp = GET_SMAP(seg, addr); 794 795 ASSERT(smp->sm_refcnt > 0); 796 797 vp = smp->sm_vp; 798 off = smp->sm_off; 799 800 if (vp == NULL) { 801 cmn_err(CE_WARN, "segmap_faulta - no vp"); 802 return (FC_MAKE_ERR(EIO)); 803 } 804 805 TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE, 806 "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp); 807 808 err = VOP_GETPAGE(vp, (offset_t)(off + ((offset_t)((uintptr_t)addr 809 & MAXBOFFSET))), PAGESIZE, (uint_t *)NULL, (page_t **)NULL, 0, 810 seg, addr, S_READ, CRED(), NULL); 811 812 if (err) 813 return (FC_MAKE_ERR(err)); 814 return (0); 815 } 816 817 /*ARGSUSED*/ 818 static int 819 segmap_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) 820 { 821 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 822 823 ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock)); 824 825 /* 826 * Need not acquire the segment lock since 827 * "smd_prot" is a read-only field. 828 */ 829 return (((smd->smd_prot & prot) != prot) ? EACCES : 0); 830 } 831 832 static int 833 segmap_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv) 834 { 835 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 836 size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1; 837 838 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 839 840 if (pgno != 0) { 841 do { 842 protv[--pgno] = smd->smd_prot; 843 } while (pgno != 0); 844 } 845 return (0); 846 } 847 848 static u_offset_t 849 segmap_getoffset(struct seg *seg, caddr_t addr) 850 { 851 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 852 853 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock)); 854 855 return ((u_offset_t)smd->smd_sm->sm_off + (addr - seg->s_base)); 856 } 857 858 /*ARGSUSED*/ 859 static int 860 segmap_gettype(struct seg *seg, caddr_t addr) 861 { 862 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock)); 863 864 return (MAP_SHARED); 865 } 866 867 /*ARGSUSED*/ 868 static int 869 segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp) 870 { 871 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 872 873 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock)); 874 875 /* XXX - This doesn't make any sense */ 876 *vpp = smd->smd_sm->sm_vp; 877 return (0); 878 } 879 880 /* 881 * Check to see if it makes sense to do kluster/read ahead to 882 * addr + delta relative to the mapping at addr. We assume here 883 * that delta is a signed PAGESIZE'd multiple (which can be negative). 884 * 885 * For segmap we always "approve" of this action from our standpoint. 886 */ 887 /*ARGSUSED*/ 888 static int 889 segmap_kluster(struct seg *seg, caddr_t addr, ssize_t delta) 890 { 891 return (0); 892 } 893 894 /* 895 * Special private segmap operations 896 */ 897 898 /* 899 * Add smap to the appropriate free list. 900 */ 901 static void 902 segmap_smapadd(struct smap *smp) 903 { 904 struct smfree *sm; 905 struct smap *smpfreelist; 906 struct sm_freeq *releq; 907 908 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 909 910 if (smp->sm_refcnt != 0) { 911 panic("segmap_smapadd"); 912 /*NOTREACHED*/ 913 } 914 915 sm = &smd_free[smp->sm_free_ndx]; 916 /* 917 * Add to the tail of the release queue 918 * Note that sm_releq and sm_allocq could toggle 919 * before we get the lock. This does not affect 920 * correctness as the 2 queues are only maintained 921 * to reduce lock pressure. 922 */ 923 releq = sm->sm_releq; 924 if (releq == &sm->sm_freeq[0]) 925 smp->sm_flags |= SM_QNDX_ZERO; 926 else 927 smp->sm_flags &= ~SM_QNDX_ZERO; 928 mutex_enter(&releq->smq_mtx); 929 smpfreelist = releq->smq_free; 930 if (smpfreelist == 0) { 931 int want; 932 933 releq->smq_free = smp->sm_next = smp->sm_prev = smp; 934 /* 935 * Both queue mutexes held to set sm_want; 936 * snapshot the value before dropping releq mutex. 937 * If sm_want appears after the releq mutex is dropped, 938 * then the smap just freed is already gone. 939 */ 940 want = sm->sm_want; 941 mutex_exit(&releq->smq_mtx); 942 /* 943 * See if there was a waiter before dropping the releq mutex 944 * then recheck after obtaining sm_freeq[0] mutex as 945 * the another thread may have already signaled. 946 */ 947 if (want) { 948 mutex_enter(&sm->sm_freeq[0].smq_mtx); 949 if (sm->sm_want) 950 cv_signal(&sm->sm_free_cv); 951 mutex_exit(&sm->sm_freeq[0].smq_mtx); 952 } 953 } else { 954 smp->sm_next = smpfreelist; 955 smp->sm_prev = smpfreelist->sm_prev; 956 smpfreelist->sm_prev = smp; 957 smp->sm_prev->sm_next = smp; 958 mutex_exit(&releq->smq_mtx); 959 } 960 } 961 962 963 static struct smap * 964 segmap_hashin(struct smap *smp, struct vnode *vp, u_offset_t off, int hashid) 965 { 966 struct smap **hpp; 967 struct smap *tmp; 968 kmutex_t *hmtx; 969 970 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 971 ASSERT(smp->sm_vp == NULL); 972 ASSERT(smp->sm_hash == NULL); 973 ASSERT(smp->sm_prev == NULL); 974 ASSERT(smp->sm_next == NULL); 975 ASSERT(hashid >= 0 && hashid <= smd_hashmsk); 976 977 hmtx = SHASHMTX(hashid); 978 979 mutex_enter(hmtx); 980 /* 981 * First we need to verify that no one has created a smp 982 * with (vp,off) as its tag before we us. 983 */ 984 for (tmp = smd_hash[hashid].sh_hash_list; 985 tmp != NULL; tmp = tmp->sm_hash) 986 if (tmp->sm_vp == vp && tmp->sm_off == off) 987 break; 988 989 if (tmp == NULL) { 990 /* 991 * No one created one yet. 992 * 993 * Funniness here - we don't increment the ref count on the 994 * vnode * even though we have another pointer to it here. 995 * The reason for this is that we don't want the fact that 996 * a seg_map entry somewhere refers to a vnode to prevent the 997 * vnode * itself from going away. This is because this 998 * reference to the vnode is a "soft one". In the case where 999 * a mapping is being used by a rdwr [or directory routine?] 1000 * there already has to be a non-zero ref count on the vnode. 1001 * In the case where the vp has been freed and the the smap 1002 * structure is on the free list, there are no pages in memory 1003 * that can refer to the vnode. Thus even if we reuse the same 1004 * vnode/smap structure for a vnode which has the same 1005 * address but represents a different object, we are ok. 1006 */ 1007 smp->sm_vp = vp; 1008 smp->sm_off = off; 1009 1010 hpp = &smd_hash[hashid].sh_hash_list; 1011 smp->sm_hash = *hpp; 1012 *hpp = smp; 1013 #ifdef SEGMAP_HASHSTATS 1014 smd_hash_len[hashid]++; 1015 #endif 1016 } 1017 mutex_exit(hmtx); 1018 1019 return (tmp); 1020 } 1021 1022 static void 1023 segmap_hashout(struct smap *smp) 1024 { 1025 struct smap **hpp, *hp; 1026 struct vnode *vp; 1027 kmutex_t *mtx; 1028 int hashid; 1029 u_offset_t off; 1030 1031 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 1032 1033 vp = smp->sm_vp; 1034 off = smp->sm_off; 1035 1036 SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */ 1037 mtx = SHASHMTX(hashid); 1038 mutex_enter(mtx); 1039 1040 hpp = &smd_hash[hashid].sh_hash_list; 1041 for (;;) { 1042 hp = *hpp; 1043 if (hp == NULL) { 1044 panic("segmap_hashout"); 1045 /*NOTREACHED*/ 1046 } 1047 if (hp == smp) 1048 break; 1049 hpp = &hp->sm_hash; 1050 } 1051 1052 *hpp = smp->sm_hash; 1053 smp->sm_hash = NULL; 1054 #ifdef SEGMAP_HASHSTATS 1055 smd_hash_len[hashid]--; 1056 #endif 1057 mutex_exit(mtx); 1058 1059 smp->sm_vp = NULL; 1060 smp->sm_off = (u_offset_t)0; 1061 1062 } 1063 1064 /* 1065 * Attempt to free unmodified, unmapped, and non locked segmap 1066 * pages. 1067 */ 1068 void 1069 segmap_pagefree(struct vnode *vp, u_offset_t off) 1070 { 1071 u_offset_t pgoff; 1072 page_t *pp; 1073 1074 for (pgoff = off; pgoff < off + MAXBSIZE; pgoff += PAGESIZE) { 1075 1076 if ((pp = page_lookup_nowait(vp, pgoff, SE_EXCL)) == NULL) 1077 continue; 1078 1079 switch (page_release(pp, 1)) { 1080 case PGREL_NOTREL: 1081 segmapcnt.smp_free_notfree.value.ul++; 1082 break; 1083 case PGREL_MOD: 1084 segmapcnt.smp_free_dirty.value.ul++; 1085 break; 1086 case PGREL_CLEAN: 1087 segmapcnt.smp_free.value.ul++; 1088 break; 1089 } 1090 } 1091 } 1092 1093 /* 1094 * Locks held on entry: smap lock 1095 * Locks held on exit : smap lock. 1096 */ 1097 1098 static void 1099 grab_smp(struct smap *smp, page_t *pp) 1100 { 1101 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 1102 ASSERT(smp->sm_refcnt == 0); 1103 1104 if (smp->sm_vp != (struct vnode *)NULL) { 1105 struct vnode *vp = smp->sm_vp; 1106 u_offset_t off = smp->sm_off; 1107 /* 1108 * Destroy old vnode association and 1109 * unload any hardware translations to 1110 * the old object. 1111 */ 1112 smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reuse++; 1113 segmap_hashout(smp); 1114 1115 /* 1116 * This node is off freelist and hashlist, 1117 * so there is no reason to drop/reacquire sm_mtx 1118 * across calls to hat_unload. 1119 */ 1120 if (segmap_kpm) { 1121 caddr_t vaddr; 1122 int hat_unload_needed = 0; 1123 1124 /* 1125 * unload kpm mapping 1126 */ 1127 if (pp != NULL) { 1128 vaddr = hat_kpm_page2va(pp, 1); 1129 hat_kpm_mapout(pp, GET_KPME(smp), vaddr); 1130 page_unlock(pp); 1131 } 1132 1133 /* 1134 * Check if we have (also) the rare case of a 1135 * non kpm mapping. 1136 */ 1137 if (smp->sm_flags & SM_NOTKPM_RELEASED) { 1138 hat_unload_needed = 1; 1139 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 1140 } 1141 1142 if (hat_unload_needed) { 1143 hat_unload(kas.a_hat, segkmap->s_base + 1144 ((smp - smd_smap) * MAXBSIZE), 1145 MAXBSIZE, HAT_UNLOAD); 1146 } 1147 1148 } else { 1149 ASSERT(smp->sm_flags & SM_NOTKPM_RELEASED); 1150 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 1151 hat_unload(kas.a_hat, segkmap->s_base + 1152 ((smp - smd_smap) * MAXBSIZE), 1153 MAXBSIZE, HAT_UNLOAD); 1154 } 1155 segmap_pagefree(vp, off); 1156 } 1157 } 1158 1159 static struct smap * 1160 get_free_smp(int free_ndx) 1161 { 1162 struct smfree *sm; 1163 kmutex_t *smtx; 1164 struct smap *smp, *first; 1165 struct sm_freeq *allocq, *releq; 1166 struct kpme *kpme; 1167 page_t *pp = NULL; 1168 int end_ndx, page_locked = 0; 1169 1170 end_ndx = free_ndx; 1171 sm = &smd_free[free_ndx]; 1172 1173 retry_queue: 1174 allocq = sm->sm_allocq; 1175 mutex_enter(&allocq->smq_mtx); 1176 1177 if ((smp = allocq->smq_free) == NULL) { 1178 1179 skip_queue: 1180 /* 1181 * The alloc list is empty or this queue is being skipped; 1182 * first see if the allocq toggled. 1183 */ 1184 if (sm->sm_allocq != allocq) { 1185 /* queue changed */ 1186 mutex_exit(&allocq->smq_mtx); 1187 goto retry_queue; 1188 } 1189 releq = sm->sm_releq; 1190 if (!mutex_tryenter(&releq->smq_mtx)) { 1191 /* cannot get releq; a free smp may be there now */ 1192 mutex_exit(&allocq->smq_mtx); 1193 1194 /* 1195 * This loop could spin forever if this thread has 1196 * higher priority than the thread that is holding 1197 * releq->smq_mtx. In order to force the other thread 1198 * to run, we'll lock/unlock the mutex which is safe 1199 * since we just unlocked the allocq mutex. 1200 */ 1201 mutex_enter(&releq->smq_mtx); 1202 mutex_exit(&releq->smq_mtx); 1203 goto retry_queue; 1204 } 1205 if (releq->smq_free == NULL) { 1206 /* 1207 * This freelist is empty. 1208 * This should not happen unless clients 1209 * are failing to release the segmap 1210 * window after accessing the data. 1211 * Before resorting to sleeping, try 1212 * the next list of the same color. 1213 */ 1214 free_ndx = (free_ndx + smd_ncolor) & smd_freemsk; 1215 if (free_ndx != end_ndx) { 1216 mutex_exit(&releq->smq_mtx); 1217 mutex_exit(&allocq->smq_mtx); 1218 sm = &smd_free[free_ndx]; 1219 goto retry_queue; 1220 } 1221 /* 1222 * Tried all freelists of the same color once, 1223 * wait on this list and hope something gets freed. 1224 */ 1225 segmapcnt.smp_get_nofree.value.ul++; 1226 sm->sm_want++; 1227 mutex_exit(&sm->sm_freeq[1].smq_mtx); 1228 cv_wait(&sm->sm_free_cv, 1229 &sm->sm_freeq[0].smq_mtx); 1230 sm->sm_want--; 1231 mutex_exit(&sm->sm_freeq[0].smq_mtx); 1232 sm = &smd_free[free_ndx]; 1233 goto retry_queue; 1234 } else { 1235 /* 1236 * Something on the rele queue; flip the alloc 1237 * and rele queues and retry. 1238 */ 1239 sm->sm_allocq = releq; 1240 sm->sm_releq = allocq; 1241 mutex_exit(&allocq->smq_mtx); 1242 mutex_exit(&releq->smq_mtx); 1243 if (page_locked) { 1244 delay(hz >> 2); 1245 page_locked = 0; 1246 } 1247 goto retry_queue; 1248 } 1249 } else { 1250 /* 1251 * Fastpath the case we get the smap mutex 1252 * on the first try. 1253 */ 1254 first = smp; 1255 next_smap: 1256 smtx = SMAPMTX(smp); 1257 if (!mutex_tryenter(smtx)) { 1258 /* 1259 * Another thread is trying to reclaim this slot. 1260 * Skip to the next queue or smap. 1261 */ 1262 if ((smp = smp->sm_next) == first) { 1263 goto skip_queue; 1264 } else { 1265 goto next_smap; 1266 } 1267 } else { 1268 /* 1269 * if kpme exists, get shared lock on the page 1270 */ 1271 if (segmap_kpm && smp->sm_vp != NULL) { 1272 1273 kpme = GET_KPME(smp); 1274 pp = kpme->kpe_page; 1275 1276 if (pp != NULL) { 1277 if (!page_trylock(pp, SE_SHARED)) { 1278 smp = smp->sm_next; 1279 mutex_exit(smtx); 1280 page_locked = 1; 1281 1282 pp = NULL; 1283 1284 if (smp == first) { 1285 goto skip_queue; 1286 } else { 1287 goto next_smap; 1288 } 1289 } else { 1290 if (kpme->kpe_page == NULL) { 1291 page_unlock(pp); 1292 pp = NULL; 1293 } 1294 } 1295 } 1296 } 1297 1298 /* 1299 * At this point, we've selected smp. Remove smp 1300 * from its freelist. If smp is the first one in 1301 * the freelist, update the head of the freelist. 1302 */ 1303 if (first == smp) { 1304 ASSERT(first == allocq->smq_free); 1305 allocq->smq_free = smp->sm_next; 1306 } 1307 1308 /* 1309 * if the head of the freelist still points to smp, 1310 * then there are no more free smaps in that list. 1311 */ 1312 if (allocq->smq_free == smp) 1313 /* 1314 * Took the last one 1315 */ 1316 allocq->smq_free = NULL; 1317 else { 1318 smp->sm_prev->sm_next = smp->sm_next; 1319 smp->sm_next->sm_prev = smp->sm_prev; 1320 } 1321 mutex_exit(&allocq->smq_mtx); 1322 smp->sm_prev = smp->sm_next = NULL; 1323 1324 /* 1325 * if pp != NULL, pp must have been locked; 1326 * grab_smp() unlocks pp. 1327 */ 1328 ASSERT((pp == NULL) || PAGE_LOCKED(pp)); 1329 grab_smp(smp, pp); 1330 /* return smp locked. */ 1331 ASSERT(SMAPMTX(smp) == smtx); 1332 ASSERT(MUTEX_HELD(smtx)); 1333 return (smp); 1334 } 1335 } 1336 } 1337 1338 /* 1339 * Special public segmap operations 1340 */ 1341 1342 /* 1343 * Create pages (without using VOP_GETPAGE) and load up translations to them. 1344 * If softlock is TRUE, then set things up so that it looks like a call 1345 * to segmap_fault with F_SOFTLOCK. 1346 * 1347 * Returns 1, if a page is created by calling page_create_va(), or 0 otherwise. 1348 * 1349 * All fields in the generic segment (struct seg) are considered to be 1350 * read-only for "segmap" even though the kernel address space (kas) may 1351 * not be locked, hence no lock is needed to access them. 1352 */ 1353 int 1354 segmap_pagecreate(struct seg *seg, caddr_t addr, size_t len, int softlock) 1355 { 1356 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 1357 page_t *pp; 1358 u_offset_t off; 1359 struct smap *smp; 1360 struct vnode *vp; 1361 caddr_t eaddr; 1362 int newpage = 0; 1363 uint_t prot; 1364 kmutex_t *smtx; 1365 int hat_flag; 1366 1367 ASSERT(seg->s_as == &kas); 1368 1369 if (segmap_kpm && IS_KPM_ADDR(addr)) { 1370 /* 1371 * Pages are successfully prefaulted and locked in 1372 * segmap_getmapflt and can't be unlocked until 1373 * segmap_release. The SM_KPM_NEWPAGE flag is set 1374 * in segmap_pagecreate_kpm when new pages are created. 1375 * and it is returned as "newpage" indication here. 1376 */ 1377 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 1378 panic("segmap_pagecreate: smap not found " 1379 "for addr %p", (void *)addr); 1380 /*NOTREACHED*/ 1381 } 1382 1383 smtx = SMAPMTX(smp); 1384 newpage = smp->sm_flags & SM_KPM_NEWPAGE; 1385 smp->sm_flags &= ~SM_KPM_NEWPAGE; 1386 mutex_exit(smtx); 1387 1388 return (newpage); 1389 } 1390 1391 smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++; 1392 1393 eaddr = addr + len; 1394 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1395 1396 smp = GET_SMAP(seg, addr); 1397 1398 /* 1399 * We don't grab smp mutex here since we assume the smp 1400 * has a refcnt set already which prevents the slot from 1401 * changing its id. 1402 */ 1403 ASSERT(smp->sm_refcnt > 0); 1404 1405 vp = smp->sm_vp; 1406 off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET)); 1407 prot = smd->smd_prot; 1408 1409 for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) { 1410 hat_flag = HAT_LOAD; 1411 pp = page_lookup(vp, off, SE_SHARED); 1412 if (pp == NULL) { 1413 ushort_t bitindex; 1414 1415 if ((pp = page_create_va(vp, off, 1416 PAGESIZE, PG_WAIT, seg, addr)) == NULL) { 1417 panic("segmap_pagecreate: page_create failed"); 1418 /*NOTREACHED*/ 1419 } 1420 newpage = 1; 1421 page_io_unlock(pp); 1422 1423 /* 1424 * Since pages created here do not contain valid 1425 * data until the caller writes into them, the 1426 * "exclusive" lock will not be dropped to prevent 1427 * other users from accessing the page. We also 1428 * have to lock the translation to prevent a fault 1429 * from occurring when the virtual address mapped by 1430 * this page is written into. This is necessary to 1431 * avoid a deadlock since we haven't dropped the 1432 * "exclusive" lock. 1433 */ 1434 bitindex = (ushort_t)((off - smp->sm_off) >> PAGESHIFT); 1435 1436 /* 1437 * Large Files: The following assertion is to 1438 * verify the cast above. 1439 */ 1440 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 1441 smtx = SMAPMTX(smp); 1442 mutex_enter(smtx); 1443 smp->sm_bitmap |= SMAP_BIT_MASK(bitindex); 1444 mutex_exit(smtx); 1445 1446 hat_flag = HAT_LOAD_LOCK; 1447 } else if (softlock) { 1448 hat_flag = HAT_LOAD_LOCK; 1449 } 1450 1451 if (IS_VMODSORT(pp->p_vnode) && (prot & PROT_WRITE)) 1452 hat_setmod(pp); 1453 1454 hat_memload(kas.a_hat, addr, pp, prot, hat_flag); 1455 1456 if (hat_flag != HAT_LOAD_LOCK) 1457 page_unlock(pp); 1458 1459 TRACE_5(TR_FAC_VM, TR_SEGMAP_PAGECREATE, 1460 "segmap_pagecreate:seg %p addr %p pp %p vp %p offset %llx", 1461 seg, addr, pp, vp, off); 1462 } 1463 1464 return (newpage); 1465 } 1466 1467 void 1468 segmap_pageunlock(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw) 1469 { 1470 struct smap *smp; 1471 ushort_t bitmask; 1472 page_t *pp; 1473 struct vnode *vp; 1474 u_offset_t off; 1475 caddr_t eaddr; 1476 kmutex_t *smtx; 1477 1478 ASSERT(seg->s_as == &kas); 1479 1480 eaddr = addr + len; 1481 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1482 1483 if (segmap_kpm && IS_KPM_ADDR(addr)) { 1484 /* 1485 * Pages are successfully prefaulted and locked in 1486 * segmap_getmapflt and can't be unlocked until 1487 * segmap_release, so no pages or hat mappings have 1488 * to be unlocked at this point. 1489 */ 1490 #ifdef DEBUG 1491 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 1492 panic("segmap_pageunlock: smap not found " 1493 "for addr %p", (void *)addr); 1494 /*NOTREACHED*/ 1495 } 1496 1497 ASSERT(smp->sm_refcnt > 0); 1498 mutex_exit(SMAPMTX(smp)); 1499 #endif 1500 return; 1501 } 1502 1503 smp = GET_SMAP(seg, addr); 1504 smtx = SMAPMTX(smp); 1505 1506 ASSERT(smp->sm_refcnt > 0); 1507 1508 vp = smp->sm_vp; 1509 off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET)); 1510 1511 for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) { 1512 bitmask = SMAP_BIT_MASK((int)(off - smp->sm_off) >> PAGESHIFT); 1513 1514 /* 1515 * Large Files: Following assertion is to verify 1516 * the correctness of the cast to (int) above. 1517 */ 1518 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 1519 1520 /* 1521 * If the bit corresponding to "off" is set, 1522 * clear this bit in the bitmap, unlock translations, 1523 * and release the "exclusive" lock on the page. 1524 */ 1525 if (smp->sm_bitmap & bitmask) { 1526 mutex_enter(smtx); 1527 smp->sm_bitmap &= ~bitmask; 1528 mutex_exit(smtx); 1529 1530 hat_unlock(kas.a_hat, addr, PAGESIZE); 1531 1532 /* 1533 * Use page_find() instead of page_lookup() to 1534 * find the page since we know that it has 1535 * "exclusive" lock. 1536 */ 1537 pp = page_find(vp, off); 1538 if (pp == NULL) { 1539 panic("segmap_pageunlock: page not found"); 1540 /*NOTREACHED*/ 1541 } 1542 if (rw == S_WRITE) { 1543 hat_setrefmod(pp); 1544 } else if (rw != S_OTHER) { 1545 hat_setref(pp); 1546 } 1547 1548 page_unlock(pp); 1549 } 1550 } 1551 } 1552 1553 caddr_t 1554 segmap_getmap(struct seg *seg, struct vnode *vp, u_offset_t off) 1555 { 1556 return (segmap_getmapflt(seg, vp, off, MAXBSIZE, 0, S_OTHER)); 1557 } 1558 1559 /* 1560 * This is the magic virtual address that offset 0 of an ELF 1561 * file gets mapped to in user space. This is used to pick 1562 * the vac color on the freelist. 1563 */ 1564 #define ELF_OFFZERO_VA (0x10000) 1565 /* 1566 * segmap_getmap allocates a MAXBSIZE big slot to map the vnode vp 1567 * in the range <off, off + len). off doesn't need to be MAXBSIZE aligned. 1568 * The return address is always MAXBSIZE aligned. 1569 * 1570 * If forcefault is nonzero and the MMU translations haven't yet been created, 1571 * segmap_getmap will call segmap_fault(..., F_INVAL, rw) to create them. 1572 */ 1573 caddr_t 1574 segmap_getmapflt( 1575 struct seg *seg, 1576 struct vnode *vp, 1577 u_offset_t off, 1578 size_t len, 1579 int forcefault, 1580 enum seg_rw rw) 1581 { 1582 struct smap *smp, *nsmp; 1583 extern struct vnode *common_specvp(); 1584 caddr_t baseaddr; /* MAXBSIZE aligned */ 1585 u_offset_t baseoff; 1586 int newslot; 1587 caddr_t vaddr; 1588 int color, hashid; 1589 kmutex_t *hashmtx, *smapmtx; 1590 struct smfree *sm; 1591 page_t *pp; 1592 struct kpme *kpme; 1593 uint_t prot; 1594 caddr_t base; 1595 page_t *pl[MAXPPB + 1]; 1596 int error; 1597 int is_kpm = 1; 1598 1599 ASSERT(seg->s_as == &kas); 1600 ASSERT(seg == segkmap); 1601 1602 baseoff = off & (offset_t)MAXBMASK; 1603 if (off + len > baseoff + MAXBSIZE) { 1604 panic("segmap_getmap bad len"); 1605 /*NOTREACHED*/ 1606 } 1607 1608 /* 1609 * If this is a block device we have to be sure to use the 1610 * "common" block device vnode for the mapping. 1611 */ 1612 if (vp->v_type == VBLK) 1613 vp = common_specvp(vp); 1614 1615 smd_cpu[CPU->cpu_seqid].scpu.scpu_getmap++; 1616 1617 if (segmap_kpm == 0 || 1618 (forcefault == SM_PAGECREATE && rw != S_WRITE)) { 1619 is_kpm = 0; 1620 } 1621 1622 SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */ 1623 hashmtx = SHASHMTX(hashid); 1624 1625 retry_hash: 1626 mutex_enter(hashmtx); 1627 for (smp = smd_hash[hashid].sh_hash_list; 1628 smp != NULL; smp = smp->sm_hash) 1629 if (smp->sm_vp == vp && smp->sm_off == baseoff) 1630 break; 1631 mutex_exit(hashmtx); 1632 1633 vrfy_smp: 1634 if (smp != NULL) { 1635 1636 ASSERT(vp->v_count != 0); 1637 1638 /* 1639 * Get smap lock and recheck its tag. The hash lock 1640 * is dropped since the hash is based on (vp, off) 1641 * and (vp, off) won't change when we have smap mtx. 1642 */ 1643 smapmtx = SMAPMTX(smp); 1644 mutex_enter(smapmtx); 1645 if (smp->sm_vp != vp || smp->sm_off != baseoff) { 1646 mutex_exit(smapmtx); 1647 goto retry_hash; 1648 } 1649 1650 if (smp->sm_refcnt == 0) { 1651 1652 smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reclaim++; 1653 1654 /* 1655 * Could still be on the free list. However, this 1656 * could also be an smp that is transitioning from 1657 * the free list when we have too much contention 1658 * for the smapmtx's. In this case, we have an 1659 * unlocked smp that is not on the free list any 1660 * longer, but still has a 0 refcnt. The only way 1661 * to be sure is to check the freelist pointers. 1662 * Since we now have the smapmtx, we are guaranteed 1663 * that the (vp, off) won't change, so we are safe 1664 * to reclaim it. get_free_smp() knows that this 1665 * can happen, and it will check the refcnt. 1666 */ 1667 1668 if ((smp->sm_next != NULL)) { 1669 struct sm_freeq *freeq; 1670 1671 ASSERT(smp->sm_prev != NULL); 1672 sm = &smd_free[smp->sm_free_ndx]; 1673 1674 if (smp->sm_flags & SM_QNDX_ZERO) 1675 freeq = &sm->sm_freeq[0]; 1676 else 1677 freeq = &sm->sm_freeq[1]; 1678 1679 mutex_enter(&freeq->smq_mtx); 1680 if (freeq->smq_free != smp) { 1681 /* 1682 * fastpath normal case 1683 */ 1684 smp->sm_prev->sm_next = smp->sm_next; 1685 smp->sm_next->sm_prev = smp->sm_prev; 1686 } else if (smp == smp->sm_next) { 1687 /* 1688 * Taking the last smap on freelist 1689 */ 1690 freeq->smq_free = NULL; 1691 } else { 1692 /* 1693 * Reclaiming 1st smap on list 1694 */ 1695 freeq->smq_free = smp->sm_next; 1696 smp->sm_prev->sm_next = smp->sm_next; 1697 smp->sm_next->sm_prev = smp->sm_prev; 1698 } 1699 mutex_exit(&freeq->smq_mtx); 1700 smp->sm_prev = smp->sm_next = NULL; 1701 } else { 1702 ASSERT(smp->sm_prev == NULL); 1703 segmapcnt.smp_stolen.value.ul++; 1704 } 1705 1706 } else { 1707 segmapcnt.smp_get_use.value.ul++; 1708 } 1709 smp->sm_refcnt++; /* another user */ 1710 1711 /* 1712 * We don't invoke segmap_fault via TLB miss, so we set ref 1713 * and mod bits in advance. For S_OTHER we set them in 1714 * segmap_fault F_SOFTUNLOCK. 1715 */ 1716 if (is_kpm) { 1717 if (rw == S_WRITE) { 1718 smp->sm_flags |= SM_WRITE_DATA; 1719 } else if (rw == S_READ) { 1720 smp->sm_flags |= SM_READ_DATA; 1721 } 1722 } 1723 mutex_exit(smapmtx); 1724 1725 newslot = 0; 1726 } else { 1727 1728 uint32_t free_ndx, *free_ndxp; 1729 union segmap_cpu *scpu; 1730 1731 /* 1732 * On a PAC machine or a machine with anti-alias 1733 * hardware, smd_colormsk will be zero. 1734 * 1735 * On a VAC machine- pick color by offset in the file 1736 * so we won't get VAC conflicts on elf files. 1737 * On data files, color does not matter but we 1738 * don't know what kind of file it is so we always 1739 * pick color by offset. This causes color 1740 * corresponding to file offset zero to be used more 1741 * heavily. 1742 */ 1743 color = (baseoff >> MAXBSHIFT) & smd_colormsk; 1744 scpu = smd_cpu+CPU->cpu_seqid; 1745 free_ndxp = &scpu->scpu.scpu_free_ndx[color]; 1746 free_ndx = (*free_ndxp += smd_ncolor) & smd_freemsk; 1747 #ifdef DEBUG 1748 colors_used[free_ndx]++; 1749 #endif /* DEBUG */ 1750 1751 /* 1752 * Get a locked smp slot from the free list. 1753 */ 1754 smp = get_free_smp(free_ndx); 1755 smapmtx = SMAPMTX(smp); 1756 1757 ASSERT(smp->sm_vp == NULL); 1758 1759 if ((nsmp = segmap_hashin(smp, vp, baseoff, hashid)) != NULL) { 1760 /* 1761 * Failed to hashin, there exists one now. 1762 * Return the smp we just allocated. 1763 */ 1764 segmap_smapadd(smp); 1765 mutex_exit(smapmtx); 1766 1767 smp = nsmp; 1768 goto vrfy_smp; 1769 } 1770 smp->sm_refcnt++; /* another user */ 1771 1772 /* 1773 * We don't invoke segmap_fault via TLB miss, so we set ref 1774 * and mod bits in advance. For S_OTHER we set them in 1775 * segmap_fault F_SOFTUNLOCK. 1776 */ 1777 if (is_kpm) { 1778 if (rw == S_WRITE) { 1779 smp->sm_flags |= SM_WRITE_DATA; 1780 } else if (rw == S_READ) { 1781 smp->sm_flags |= SM_READ_DATA; 1782 } 1783 } 1784 mutex_exit(smapmtx); 1785 1786 newslot = 1; 1787 } 1788 1789 if (!is_kpm) 1790 goto use_segmap_range; 1791 1792 /* 1793 * Use segkpm 1794 */ 1795 /* Lint directive required until 6746211 is fixed */ 1796 /*CONSTCOND*/ 1797 ASSERT(PAGESIZE == MAXBSIZE); 1798 1799 /* 1800 * remember the last smp faulted on this cpu. 1801 */ 1802 (smd_cpu+CPU->cpu_seqid)->scpu.scpu_last_smap = smp; 1803 1804 if (forcefault == SM_PAGECREATE) { 1805 baseaddr = segmap_pagecreate_kpm(seg, vp, baseoff, smp, rw); 1806 return (baseaddr); 1807 } 1808 1809 if (newslot == 0 && 1810 (pp = GET_KPME(smp)->kpe_page) != NULL) { 1811 1812 /* fastpath */ 1813 switch (rw) { 1814 case S_READ: 1815 case S_WRITE: 1816 if (page_trylock(pp, SE_SHARED)) { 1817 if (PP_ISFREE(pp) || 1818 !(pp->p_vnode == vp && 1819 pp->p_offset == baseoff)) { 1820 page_unlock(pp); 1821 pp = page_lookup(vp, baseoff, 1822 SE_SHARED); 1823 } 1824 } else { 1825 pp = page_lookup(vp, baseoff, SE_SHARED); 1826 } 1827 1828 if (pp == NULL) { 1829 ASSERT(GET_KPME(smp)->kpe_page == NULL); 1830 break; 1831 } 1832 1833 if (rw == S_WRITE && 1834 hat_page_getattr(pp, P_MOD | P_REF) != 1835 (P_MOD | P_REF)) { 1836 page_unlock(pp); 1837 break; 1838 } 1839 1840 /* 1841 * We have the p_selock as reader, grab_smp 1842 * can't hit us, we have bumped the smap 1843 * refcnt and hat_pageunload needs the 1844 * p_selock exclusive. 1845 */ 1846 kpme = GET_KPME(smp); 1847 if (kpme->kpe_page == pp) { 1848 baseaddr = hat_kpm_page2va(pp, 0); 1849 } else if (kpme->kpe_page == NULL) { 1850 baseaddr = hat_kpm_mapin(pp, kpme); 1851 } else { 1852 panic("segmap_getmapflt: stale " 1853 "kpme page, kpme %p", (void *)kpme); 1854 /*NOTREACHED*/ 1855 } 1856 1857 /* 1858 * We don't invoke segmap_fault via TLB miss, 1859 * so we set ref and mod bits in advance. 1860 * For S_OTHER and we set them in segmap_fault 1861 * F_SOFTUNLOCK. 1862 */ 1863 if (rw == S_READ && !hat_isref(pp)) 1864 hat_setref(pp); 1865 1866 return (baseaddr); 1867 default: 1868 break; 1869 } 1870 } 1871 1872 base = segkpm_create_va(baseoff); 1873 error = VOP_GETPAGE(vp, (offset_t)baseoff, len, &prot, pl, MAXBSIZE, 1874 seg, base, rw, CRED(), NULL); 1875 1876 pp = pl[0]; 1877 if (error || pp == NULL) { 1878 /* 1879 * Use segmap address slot and let segmap_fault deal 1880 * with the error cases. There is no error return 1881 * possible here. 1882 */ 1883 goto use_segmap_range; 1884 } 1885 1886 ASSERT(pl[1] == NULL); 1887 1888 /* 1889 * When prot is not returned w/ PROT_ALL the returned pages 1890 * are not backed by fs blocks. For most of the segmap users 1891 * this is no problem, they don't write to the pages in the 1892 * same request and therefore don't rely on a following 1893 * trap driven segmap_fault. With SM_LOCKPROTO users it 1894 * is more secure to use segkmap adresses to allow 1895 * protection segmap_fault's. 1896 */ 1897 if (prot != PROT_ALL && forcefault == SM_LOCKPROTO) { 1898 /* 1899 * Use segmap address slot and let segmap_fault 1900 * do the error return. 1901 */ 1902 ASSERT(rw != S_WRITE); 1903 ASSERT(PAGE_LOCKED(pp)); 1904 page_unlock(pp); 1905 forcefault = 0; 1906 goto use_segmap_range; 1907 } 1908 1909 /* 1910 * We have the p_selock as reader, grab_smp can't hit us, we 1911 * have bumped the smap refcnt and hat_pageunload needs the 1912 * p_selock exclusive. 1913 */ 1914 kpme = GET_KPME(smp); 1915 if (kpme->kpe_page == pp) { 1916 baseaddr = hat_kpm_page2va(pp, 0); 1917 } else if (kpme->kpe_page == NULL) { 1918 baseaddr = hat_kpm_mapin(pp, kpme); 1919 } else { 1920 panic("segmap_getmapflt: stale kpme page after " 1921 "VOP_GETPAGE, kpme %p", (void *)kpme); 1922 /*NOTREACHED*/ 1923 } 1924 1925 smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++; 1926 1927 return (baseaddr); 1928 1929 1930 use_segmap_range: 1931 baseaddr = seg->s_base + ((smp - smd_smap) * MAXBSIZE); 1932 TRACE_4(TR_FAC_VM, TR_SEGMAP_GETMAP, 1933 "segmap_getmap:seg %p addr %p vp %p offset %llx", 1934 seg, baseaddr, vp, baseoff); 1935 1936 /* 1937 * Prefault the translations 1938 */ 1939 vaddr = baseaddr + (off - baseoff); 1940 if (forcefault && (newslot || !hat_probe(kas.a_hat, vaddr))) { 1941 1942 caddr_t pgaddr = (caddr_t)((uintptr_t)vaddr & 1943 (uintptr_t)PAGEMASK); 1944 1945 (void) segmap_fault(kas.a_hat, seg, pgaddr, 1946 (vaddr + len - pgaddr + PAGESIZE - 1) & (uintptr_t)PAGEMASK, 1947 F_INVAL, rw); 1948 } 1949 1950 return (baseaddr); 1951 } 1952 1953 int 1954 segmap_release(struct seg *seg, caddr_t addr, uint_t flags) 1955 { 1956 struct smap *smp; 1957 int error; 1958 int bflags = 0; 1959 struct vnode *vp; 1960 u_offset_t offset; 1961 kmutex_t *smtx; 1962 int is_kpm = 0; 1963 page_t *pp; 1964 1965 if (segmap_kpm && IS_KPM_ADDR(addr)) { 1966 1967 if (((uintptr_t)addr & MAXBOFFSET) != 0) { 1968 panic("segmap_release: addr %p not " 1969 "MAXBSIZE aligned", (void *)addr); 1970 /*NOTREACHED*/ 1971 } 1972 1973 if ((smp = get_smap_kpm(addr, &pp)) == NULL) { 1974 panic("segmap_release: smap not found " 1975 "for addr %p", (void *)addr); 1976 /*NOTREACHED*/ 1977 } 1978 1979 TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP, 1980 "segmap_relmap:seg %p addr %p smp %p", 1981 seg, addr, smp); 1982 1983 smtx = SMAPMTX(smp); 1984 1985 /* 1986 * For compatibility reasons segmap_pagecreate_kpm sets this 1987 * flag to allow a following segmap_pagecreate to return 1988 * this as "newpage" flag. When segmap_pagecreate is not 1989 * called at all we clear it now. 1990 */ 1991 smp->sm_flags &= ~SM_KPM_NEWPAGE; 1992 is_kpm = 1; 1993 if (smp->sm_flags & SM_WRITE_DATA) { 1994 hat_setrefmod(pp); 1995 } else if (smp->sm_flags & SM_READ_DATA) { 1996 hat_setref(pp); 1997 } 1998 } else { 1999 if (addr < seg->s_base || addr >= seg->s_base + seg->s_size || 2000 ((uintptr_t)addr & MAXBOFFSET) != 0) { 2001 panic("segmap_release: bad addr %p", (void *)addr); 2002 /*NOTREACHED*/ 2003 } 2004 smp = GET_SMAP(seg, addr); 2005 2006 TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP, 2007 "segmap_relmap:seg %p addr %p smp %p", 2008 seg, addr, smp); 2009 2010 smtx = SMAPMTX(smp); 2011 mutex_enter(smtx); 2012 smp->sm_flags |= SM_NOTKPM_RELEASED; 2013 } 2014 2015 ASSERT(smp->sm_refcnt > 0); 2016 2017 /* 2018 * Need to call VOP_PUTPAGE() if any flags (except SM_DONTNEED) 2019 * are set. 2020 */ 2021 if ((flags & ~SM_DONTNEED) != 0) { 2022 if (flags & SM_WRITE) 2023 segmapcnt.smp_rel_write.value.ul++; 2024 if (flags & SM_ASYNC) { 2025 bflags |= B_ASYNC; 2026 segmapcnt.smp_rel_async.value.ul++; 2027 } 2028 if (flags & SM_INVAL) { 2029 bflags |= B_INVAL; 2030 segmapcnt.smp_rel_abort.value.ul++; 2031 } 2032 if (flags & SM_DESTROY) { 2033 bflags |= (B_INVAL|B_TRUNC); 2034 segmapcnt.smp_rel_abort.value.ul++; 2035 } 2036 if (smp->sm_refcnt == 1) { 2037 /* 2038 * We only bother doing the FREE and DONTNEED flags 2039 * if no one else is still referencing this mapping. 2040 */ 2041 if (flags & SM_FREE) { 2042 bflags |= B_FREE; 2043 segmapcnt.smp_rel_free.value.ul++; 2044 } 2045 if (flags & SM_DONTNEED) { 2046 bflags |= B_DONTNEED; 2047 segmapcnt.smp_rel_dontneed.value.ul++; 2048 } 2049 } 2050 } else { 2051 smd_cpu[CPU->cpu_seqid].scpu.scpu_release++; 2052 } 2053 2054 vp = smp->sm_vp; 2055 offset = smp->sm_off; 2056 2057 if (--smp->sm_refcnt == 0) { 2058 2059 smp->sm_flags &= ~(SM_WRITE_DATA | SM_READ_DATA); 2060 2061 if (flags & (SM_INVAL|SM_DESTROY)) { 2062 segmap_hashout(smp); /* remove map info */ 2063 if (is_kpm) { 2064 hat_kpm_mapout(pp, GET_KPME(smp), addr); 2065 if (smp->sm_flags & SM_NOTKPM_RELEASED) { 2066 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 2067 hat_unload(kas.a_hat, segkmap->s_base + 2068 ((smp - smd_smap) * MAXBSIZE), 2069 MAXBSIZE, HAT_UNLOAD); 2070 } 2071 2072 } else { 2073 if (segmap_kpm) 2074 segkpm_mapout_validkpme(GET_KPME(smp)); 2075 2076 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 2077 hat_unload(kas.a_hat, addr, MAXBSIZE, 2078 HAT_UNLOAD); 2079 } 2080 } 2081 segmap_smapadd(smp); /* add to free list */ 2082 } 2083 2084 mutex_exit(smtx); 2085 2086 if (is_kpm) 2087 page_unlock(pp); 2088 /* 2089 * Now invoke VOP_PUTPAGE() if any flags (except SM_DONTNEED) 2090 * are set. 2091 */ 2092 if ((flags & ~SM_DONTNEED) != 0) { 2093 error = VOP_PUTPAGE(vp, offset, MAXBSIZE, 2094 bflags, CRED(), NULL); 2095 } else { 2096 error = 0; 2097 } 2098 2099 return (error); 2100 } 2101 2102 /* 2103 * Dump the pages belonging to this segmap segment. 2104 */ 2105 static void 2106 segmap_dump(struct seg *seg) 2107 { 2108 struct segmap_data *smd; 2109 struct smap *smp, *smp_end; 2110 page_t *pp; 2111 pfn_t pfn; 2112 u_offset_t off; 2113 caddr_t addr; 2114 2115 smd = (struct segmap_data *)seg->s_data; 2116 addr = seg->s_base; 2117 for (smp = smd->smd_sm, smp_end = smp + smd->smd_npages; 2118 smp < smp_end; smp++) { 2119 2120 if (smp->sm_refcnt) { 2121 for (off = 0; off < MAXBSIZE; off += PAGESIZE) { 2122 int we_own_it = 0; 2123 2124 /* 2125 * If pp == NULL, the page either does 2126 * not exist or is exclusively locked. 2127 * So determine if it exists before 2128 * searching for it. 2129 */ 2130 if ((pp = page_lookup_nowait(smp->sm_vp, 2131 smp->sm_off + off, SE_SHARED))) 2132 we_own_it = 1; 2133 else 2134 pp = page_exists(smp->sm_vp, 2135 smp->sm_off + off); 2136 2137 if (pp) { 2138 pfn = page_pptonum(pp); 2139 dump_addpage(seg->s_as, 2140 addr + off, pfn); 2141 if (we_own_it) 2142 page_unlock(pp); 2143 } 2144 dump_timeleft = dump_timeout; 2145 } 2146 } 2147 addr += MAXBSIZE; 2148 } 2149 } 2150 2151 /*ARGSUSED*/ 2152 static int 2153 segmap_pagelock(struct seg *seg, caddr_t addr, size_t len, 2154 struct page ***ppp, enum lock_type type, enum seg_rw rw) 2155 { 2156 return (ENOTSUP); 2157 } 2158 2159 static int 2160 segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp) 2161 { 2162 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 2163 2164 memidp->val[0] = (uintptr_t)smd->smd_sm->sm_vp; 2165 memidp->val[1] = smd->smd_sm->sm_off + (uintptr_t)(addr - seg->s_base); 2166 return (0); 2167 } 2168 2169 2170 #ifdef SEGKPM_SUPPORT 2171 2172 /* 2173 * segkpm support routines 2174 */ 2175 2176 static caddr_t 2177 segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off, 2178 struct smap *smp, enum seg_rw rw) 2179 { 2180 caddr_t base; 2181 page_t *pp; 2182 int newpage = 0; 2183 struct kpme *kpme; 2184 2185 ASSERT(smp->sm_refcnt > 0); 2186 2187 if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) { 2188 kmutex_t *smtx; 2189 2190 base = segkpm_create_va(off); 2191 2192 if ((pp = page_create_va(vp, off, PAGESIZE, PG_WAIT, 2193 seg, base)) == NULL) { 2194 panic("segmap_pagecreate_kpm: " 2195 "page_create failed"); 2196 /*NOTREACHED*/ 2197 } 2198 2199 newpage = 1; 2200 page_io_unlock(pp); 2201 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 2202 2203 /* 2204 * Mark this here until the following segmap_pagecreate 2205 * or segmap_release. 2206 */ 2207 smtx = SMAPMTX(smp); 2208 mutex_enter(smtx); 2209 smp->sm_flags |= SM_KPM_NEWPAGE; 2210 mutex_exit(smtx); 2211 } 2212 2213 kpme = GET_KPME(smp); 2214 if (!newpage && kpme->kpe_page == pp) 2215 base = hat_kpm_page2va(pp, 0); 2216 else 2217 base = hat_kpm_mapin(pp, kpme); 2218 2219 /* 2220 * FS code may decide not to call segmap_pagecreate and we 2221 * don't invoke segmap_fault via TLB miss, so we have to set 2222 * ref and mod bits in advance. 2223 */ 2224 if (rw == S_WRITE) { 2225 hat_setrefmod(pp); 2226 } else { 2227 ASSERT(rw == S_READ); 2228 hat_setref(pp); 2229 } 2230 2231 smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++; 2232 2233 return (base); 2234 } 2235 2236 /* 2237 * Find the smap structure corresponding to the 2238 * KPM addr and return it locked. 2239 */ 2240 struct smap * 2241 get_smap_kpm(caddr_t addr, page_t **ppp) 2242 { 2243 struct smap *smp; 2244 struct vnode *vp; 2245 u_offset_t offset; 2246 caddr_t baseaddr = (caddr_t)((uintptr_t)addr & MAXBMASK); 2247 int hashid; 2248 kmutex_t *hashmtx; 2249 page_t *pp; 2250 union segmap_cpu *scpu; 2251 2252 pp = hat_kpm_vaddr2page(baseaddr); 2253 2254 ASSERT(pp && !PP_ISFREE(pp)); 2255 ASSERT(PAGE_LOCKED(pp)); 2256 ASSERT(((uintptr_t)pp->p_offset & MAXBOFFSET) == 0); 2257 2258 vp = pp->p_vnode; 2259 offset = pp->p_offset; 2260 ASSERT(vp != NULL); 2261 2262 /* 2263 * Assume the last smap used on this cpu is the one needed. 2264 */ 2265 scpu = smd_cpu+CPU->cpu_seqid; 2266 smp = scpu->scpu.scpu_last_smap; 2267 mutex_enter(&smp->sm_mtx); 2268 if (smp->sm_vp == vp && smp->sm_off == offset) { 2269 ASSERT(smp->sm_refcnt > 0); 2270 } else { 2271 /* 2272 * Assumption wrong, find the smap on the hash chain. 2273 */ 2274 mutex_exit(&smp->sm_mtx); 2275 SMAP_HASHFUNC(vp, offset, hashid); /* macro assigns hashid */ 2276 hashmtx = SHASHMTX(hashid); 2277 2278 mutex_enter(hashmtx); 2279 smp = smd_hash[hashid].sh_hash_list; 2280 for (; smp != NULL; smp = smp->sm_hash) { 2281 if (smp->sm_vp == vp && smp->sm_off == offset) 2282 break; 2283 } 2284 mutex_exit(hashmtx); 2285 if (smp) { 2286 mutex_enter(&smp->sm_mtx); 2287 ASSERT(smp->sm_vp == vp && smp->sm_off == offset); 2288 } 2289 } 2290 2291 if (ppp) 2292 *ppp = smp ? pp : NULL; 2293 2294 return (smp); 2295 } 2296 2297 #else /* SEGKPM_SUPPORT */ 2298 2299 /* segkpm stubs */ 2300 2301 /*ARGSUSED*/ 2302 static caddr_t 2303 segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off, 2304 struct smap *smp, enum seg_rw rw) 2305 { 2306 return (NULL); 2307 } 2308 2309 /*ARGSUSED*/ 2310 struct smap * 2311 get_smap_kpm(caddr_t addr, page_t **ppp) 2312 { 2313 return (NULL); 2314 } 2315 2316 #endif /* SEGKPM_SUPPORT */