1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * Portions of this source code were derived from Berkeley 4.3 BSD 31 * under license from the Regents of the University of California. 32 */ 33 34 /* 35 * VM - generic vnode mapping segment. 36 * 37 * The segmap driver is used only by the kernel to get faster (than seg_vn) 38 * mappings [lower routine overhead; more persistent cache] to random 39 * vnode/offsets. Note than the kernel may (and does) use seg_vn as well. 40 */ 41 42 #include <sys/types.h> 43 #include <sys/t_lock.h> 44 #include <sys/param.h> 45 #include <sys/sysmacros.h> 46 #include <sys/buf.h> 47 #include <sys/systm.h> 48 #include <sys/vnode.h> 49 #include <sys/mman.h> 50 #include <sys/errno.h> 51 #include <sys/cred.h> 52 #include <sys/kmem.h> 53 #include <sys/vtrace.h> 54 #include <sys/cmn_err.h> 55 #include <sys/debug.h> 56 #include <sys/thread.h> 57 #include <sys/dumphdr.h> 58 #include <sys/bitmap.h> 59 #include <sys/lgrp.h> 60 61 #include <vm/seg_kmem.h> 62 #include <vm/hat.h> 63 #include <vm/as.h> 64 #include <vm/seg.h> 65 #include <vm/seg_kpm.h> 66 #include <vm/seg_map.h> 67 #include <vm/page.h> 68 #include <vm/pvn.h> 69 #include <vm/rm.h> 70 71 /* 72 * Private seg op routines. 73 */ 74 static void segmap_free(struct seg *seg); 75 faultcode_t segmap_fault(struct hat *hat, struct seg *seg, caddr_t addr, 76 size_t len, enum fault_type type, enum seg_rw rw); 77 static faultcode_t segmap_faulta(struct seg *seg, caddr_t addr); 78 static int segmap_checkprot(struct seg *seg, caddr_t addr, size_t len, 79 uint_t prot); 80 static int segmap_kluster(struct seg *seg, caddr_t addr, ssize_t); 81 static int segmap_getprot(struct seg *seg, caddr_t addr, size_t len, 82 uint_t *protv); 83 static u_offset_t segmap_getoffset(struct seg *seg, caddr_t addr); 84 static int segmap_gettype(struct seg *seg, caddr_t addr); 85 static int segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp); 86 static void segmap_dump(struct seg *seg); 87 static int segmap_pagelock(struct seg *seg, caddr_t addr, size_t len, 88 struct page ***ppp, enum lock_type type, 89 enum seg_rw rw); 90 static int segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp); 91 static int segmap_capable(struct seg *seg, segcapability_t capability); 92 93 /* segkpm support */ 94 static caddr_t segmap_pagecreate_kpm(struct seg *, vnode_t *, u_offset_t, 95 struct smap *, enum seg_rw); 96 struct smap *get_smap_kpm(caddr_t, page_t **); 97 98 static struct seg_ops segmap_ops = { 99 .free = segmap_free, 100 .fault = segmap_fault, 101 .faulta = segmap_faulta, 102 .checkprot = segmap_checkprot, 103 .kluster = segmap_kluster, 104 .getprot = segmap_getprot, 105 .getoffset = segmap_getoffset, 106 .gettype = segmap_gettype, 107 .getvp = segmap_getvp, 108 .dump = segmap_dump, 109 .pagelock = segmap_pagelock, 110 .getmemid = segmap_getmemid, 111 .capable = segmap_capable, 112 }; 113 114 /* 115 * Private segmap routines. 116 */ 117 static void segmap_unlock(struct hat *hat, struct seg *seg, caddr_t addr, 118 size_t len, enum seg_rw rw, struct smap *smp); 119 static void segmap_smapadd(struct smap *smp); 120 static struct smap *segmap_hashin(struct smap *smp, struct vnode *vp, 121 u_offset_t off, int hashid); 122 static void segmap_hashout(struct smap *smp); 123 124 125 /* 126 * Statistics for segmap operations. 127 * 128 * No explicit locking to protect these stats. 129 */ 130 struct segmapcnt segmapcnt = { 131 { "fault", KSTAT_DATA_ULONG }, 132 { "faulta", KSTAT_DATA_ULONG }, 133 { "getmap", KSTAT_DATA_ULONG }, 134 { "get_use", KSTAT_DATA_ULONG }, 135 { "get_reclaim", KSTAT_DATA_ULONG }, 136 { "get_reuse", KSTAT_DATA_ULONG }, 137 { "get_unused", KSTAT_DATA_ULONG }, 138 { "get_nofree", KSTAT_DATA_ULONG }, 139 { "rel_async", KSTAT_DATA_ULONG }, 140 { "rel_write", KSTAT_DATA_ULONG }, 141 { "rel_free", KSTAT_DATA_ULONG }, 142 { "rel_abort", KSTAT_DATA_ULONG }, 143 { "rel_dontneed", KSTAT_DATA_ULONG }, 144 { "release", KSTAT_DATA_ULONG }, 145 { "pagecreate", KSTAT_DATA_ULONG }, 146 { "free_notfree", KSTAT_DATA_ULONG }, 147 { "free_dirty", KSTAT_DATA_ULONG }, 148 { "free", KSTAT_DATA_ULONG }, 149 { "stolen", KSTAT_DATA_ULONG }, 150 { "get_nomtx", KSTAT_DATA_ULONG } 151 }; 152 153 kstat_named_t *segmapcnt_ptr = (kstat_named_t *)&segmapcnt; 154 uint_t segmapcnt_ndata = sizeof (segmapcnt) / sizeof (kstat_named_t); 155 156 /* 157 * Return number of map pages in segment. 158 */ 159 #define MAP_PAGES(seg) ((seg)->s_size >> MAXBSHIFT) 160 161 /* 162 * Translate addr into smap number within segment. 163 */ 164 #define MAP_PAGE(seg, addr) (((addr) - (seg)->s_base) >> MAXBSHIFT) 165 166 /* 167 * Translate addr in seg into struct smap pointer. 168 */ 169 #define GET_SMAP(seg, addr) \ 170 &(((struct segmap_data *)((seg)->s_data))->smd_sm[MAP_PAGE(seg, addr)]) 171 172 /* 173 * Bit in map (16 bit bitmap). 174 */ 175 #define SMAP_BIT_MASK(bitindex) (1 << ((bitindex) & 0xf)) 176 177 static int smd_colormsk = 0; 178 static int smd_ncolor = 0; 179 static int smd_nfree = 0; 180 static int smd_freemsk = 0; 181 #ifdef DEBUG 182 static int *colors_used; 183 #endif 184 static struct smap *smd_smap; 185 static struct smaphash *smd_hash; 186 #ifdef SEGMAP_HASHSTATS 187 static unsigned int *smd_hash_len; 188 #endif 189 static struct smfree *smd_free; 190 static ulong_t smd_hashmsk = 0; 191 192 #define SEGMAP_MAXCOLOR 2 193 #define SEGMAP_CACHE_PAD 64 194 195 union segmap_cpu { 196 struct { 197 uint32_t scpu_free_ndx[SEGMAP_MAXCOLOR]; 198 struct smap *scpu_last_smap; 199 ulong_t scpu_getmap; 200 ulong_t scpu_release; 201 ulong_t scpu_get_reclaim; 202 ulong_t scpu_fault; 203 ulong_t scpu_pagecreate; 204 ulong_t scpu_get_reuse; 205 } scpu; 206 char scpu_pad[SEGMAP_CACHE_PAD]; 207 }; 208 static union segmap_cpu *smd_cpu; 209 210 /* 211 * There are three locks in seg_map: 212 * - per freelist mutexes 213 * - per hashchain mutexes 214 * - per smap mutexes 215 * 216 * The lock ordering is to get the smap mutex to lock down the slot 217 * first then the hash lock (for hash in/out (vp, off) list) or the 218 * freelist lock to put the slot back on the free list. 219 * 220 * The hash search is done by only holding the hashchain lock, when a wanted 221 * slot is found, we drop the hashchain lock then lock the slot so there 222 * is no overlapping of hashchain and smap locks. After the slot is 223 * locked, we verify again if the slot is still what we are looking 224 * for. 225 * 226 * Allocation of a free slot is done by holding the freelist lock, 227 * then locking the smap slot at the head of the freelist. This is 228 * in reversed lock order so mutex_tryenter() is used. 229 * 230 * The smap lock protects all fields in smap structure except for 231 * the link fields for hash/free lists which are protected by 232 * hashchain and freelist locks. 233 */ 234 235 #define SHASHMTX(hashid) (&smd_hash[hashid].sh_mtx) 236 237 #define SMP2SMF(smp) (&smd_free[(smp - smd_smap) & smd_freemsk]) 238 #define SMP2SMF_NDX(smp) (ushort_t)((smp - smd_smap) & smd_freemsk) 239 240 #define SMAPMTX(smp) (&smp->sm_mtx) 241 242 #define SMAP_HASHFUNC(vp, off, hashid) \ 243 { \ 244 hashid = ((((uintptr_t)(vp) >> 6) + ((uintptr_t)(vp) >> 3) + \ 245 ((off) >> MAXBSHIFT)) & smd_hashmsk); \ 246 } 247 248 /* 249 * The most frequently updated kstat counters are kept in the 250 * per cpu array to avoid hot cache blocks. The update function 251 * sums the cpu local counters to update the global counters. 252 */ 253 254 /* ARGSUSED */ 255 int 256 segmap_kstat_update(kstat_t *ksp, int rw) 257 { 258 int i; 259 ulong_t getmap, release, get_reclaim; 260 ulong_t fault, pagecreate, get_reuse; 261 262 if (rw == KSTAT_WRITE) 263 return (EACCES); 264 getmap = release = get_reclaim = (ulong_t)0; 265 fault = pagecreate = get_reuse = (ulong_t)0; 266 for (i = 0; i < max_ncpus; i++) { 267 getmap += smd_cpu[i].scpu.scpu_getmap; 268 release += smd_cpu[i].scpu.scpu_release; 269 get_reclaim += smd_cpu[i].scpu.scpu_get_reclaim; 270 fault += smd_cpu[i].scpu.scpu_fault; 271 pagecreate += smd_cpu[i].scpu.scpu_pagecreate; 272 get_reuse += smd_cpu[i].scpu.scpu_get_reuse; 273 } 274 segmapcnt.smp_getmap.value.ul = getmap; 275 segmapcnt.smp_release.value.ul = release; 276 segmapcnt.smp_get_reclaim.value.ul = get_reclaim; 277 segmapcnt.smp_fault.value.ul = fault; 278 segmapcnt.smp_pagecreate.value.ul = pagecreate; 279 segmapcnt.smp_get_reuse.value.ul = get_reuse; 280 return (0); 281 } 282 283 int 284 segmap_create(struct seg *seg, void *argsp) 285 { 286 struct segmap_data *smd; 287 struct smap *smp; 288 struct smfree *sm; 289 struct segmap_crargs *a = (struct segmap_crargs *)argsp; 290 struct smaphash *shashp; 291 union segmap_cpu *scpu; 292 long i, npages; 293 size_t hashsz; 294 uint_t nfreelist; 295 extern void prefetch_smap_w(void *); 296 extern int max_ncpus; 297 298 ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock)); 299 300 if (((uintptr_t)seg->s_base | seg->s_size) & MAXBOFFSET) { 301 panic("segkmap not MAXBSIZE aligned"); 302 /*NOTREACHED*/ 303 } 304 305 smd = kmem_zalloc(sizeof (struct segmap_data), KM_SLEEP); 306 307 seg->s_data = (void *)smd; 308 seg->s_ops = &segmap_ops; 309 smd->smd_prot = a->prot; 310 311 /* 312 * Scale the number of smap freelists to be 313 * proportional to max_ncpus * number of virtual colors. 314 * The caller can over-ride this scaling by providing 315 * a non-zero a->nfreelist argument. 316 */ 317 nfreelist = a->nfreelist; 318 if (nfreelist == 0) 319 nfreelist = max_ncpus; 320 else if (nfreelist < 0 || nfreelist > 4 * max_ncpus) { 321 cmn_err(CE_WARN, "segmap_create: nfreelist out of range " 322 "%d, using %d", nfreelist, max_ncpus); 323 nfreelist = max_ncpus; 324 } 325 if (!ISP2(nfreelist)) { 326 /* round up nfreelist to the next power of two. */ 327 nfreelist = 1 << (highbit(nfreelist)); 328 } 329 330 /* 331 * Get the number of virtual colors - must be a power of 2. 332 */ 333 if (a->shmsize) 334 smd_ncolor = a->shmsize >> MAXBSHIFT; 335 else 336 smd_ncolor = 1; 337 ASSERT((smd_ncolor & (smd_ncolor - 1)) == 0); 338 ASSERT(smd_ncolor <= SEGMAP_MAXCOLOR); 339 smd_colormsk = smd_ncolor - 1; 340 smd->smd_nfree = smd_nfree = smd_ncolor * nfreelist; 341 smd_freemsk = smd_nfree - 1; 342 343 /* 344 * Allocate and initialize the freelist headers. 345 * Note that sm_freeq[1] starts out as the release queue. This 346 * is known when the smap structures are initialized below. 347 */ 348 smd_free = smd->smd_free = 349 kmem_zalloc(smd_nfree * sizeof (struct smfree), KM_SLEEP); 350 for (i = 0; i < smd_nfree; i++) { 351 sm = &smd->smd_free[i]; 352 mutex_init(&sm->sm_freeq[0].smq_mtx, NULL, MUTEX_DEFAULT, NULL); 353 mutex_init(&sm->sm_freeq[1].smq_mtx, NULL, MUTEX_DEFAULT, NULL); 354 sm->sm_allocq = &sm->sm_freeq[0]; 355 sm->sm_releq = &sm->sm_freeq[1]; 356 } 357 358 /* 359 * Allocate and initialize the smap hash chain headers. 360 * Compute hash size rounding down to the next power of two. 361 */ 362 npages = MAP_PAGES(seg); 363 smd->smd_npages = npages; 364 hashsz = npages / SMAP_HASHAVELEN; 365 hashsz = 1 << (highbit(hashsz)-1); 366 smd_hashmsk = hashsz - 1; 367 smd_hash = smd->smd_hash = 368 kmem_alloc(hashsz * sizeof (struct smaphash), KM_SLEEP); 369 #ifdef SEGMAP_HASHSTATS 370 smd_hash_len = 371 kmem_zalloc(hashsz * sizeof (unsigned int), KM_SLEEP); 372 #endif 373 for (i = 0, shashp = smd_hash; i < hashsz; i++, shashp++) { 374 shashp->sh_hash_list = NULL; 375 mutex_init(&shashp->sh_mtx, NULL, MUTEX_DEFAULT, NULL); 376 } 377 378 /* 379 * Allocate and initialize the smap structures. 380 * Link all slots onto the appropriate freelist. 381 * The smap array is large enough to affect boot time 382 * on large systems, so use memory prefetching and only 383 * go through the array 1 time. Inline a optimized version 384 * of segmap_smapadd to add structures to freelists with 385 * knowledge that no locks are needed here. 386 */ 387 smd_smap = smd->smd_sm = 388 kmem_alloc(sizeof (struct smap) * npages, KM_SLEEP); 389 390 for (smp = &smd->smd_sm[MAP_PAGES(seg) - 1]; 391 smp >= smd->smd_sm; smp--) { 392 struct smap *smpfreelist; 393 struct sm_freeq *releq; 394 395 prefetch_smap_w((char *)smp); 396 397 smp->sm_vp = NULL; 398 smp->sm_hash = NULL; 399 smp->sm_off = 0; 400 smp->sm_bitmap = 0; 401 smp->sm_refcnt = 0; 402 mutex_init(&smp->sm_mtx, NULL, MUTEX_DEFAULT, NULL); 403 smp->sm_free_ndx = SMP2SMF_NDX(smp); 404 405 sm = SMP2SMF(smp); 406 releq = sm->sm_releq; 407 408 smpfreelist = releq->smq_free; 409 if (smpfreelist == 0) { 410 releq->smq_free = smp->sm_next = smp->sm_prev = smp; 411 } else { 412 smp->sm_next = smpfreelist; 413 smp->sm_prev = smpfreelist->sm_prev; 414 smpfreelist->sm_prev = smp; 415 smp->sm_prev->sm_next = smp; 416 releq->smq_free = smp->sm_next; 417 } 418 419 /* 420 * sm_flag = 0 (no SM_QNDX_ZERO) implies smap on sm_freeq[1] 421 */ 422 smp->sm_flags = 0; 423 424 #ifdef SEGKPM_SUPPORT 425 /* 426 * Due to the fragile prefetch loop no 427 * separate function is used here. 428 */ 429 smp->sm_kpme_next = NULL; 430 smp->sm_kpme_prev = NULL; 431 smp->sm_kpme_page = NULL; 432 #endif 433 } 434 435 /* 436 * Allocate the per color indices that distribute allocation 437 * requests over the free lists. Each cpu will have a private 438 * rotor index to spread the allocations even across the available 439 * smap freelists. Init the scpu_last_smap field to the first 440 * smap element so there is no need to check for NULL. 441 */ 442 smd_cpu = 443 kmem_zalloc(sizeof (union segmap_cpu) * max_ncpus, KM_SLEEP); 444 for (i = 0, scpu = smd_cpu; i < max_ncpus; i++, scpu++) { 445 int j; 446 for (j = 0; j < smd_ncolor; j++) 447 scpu->scpu.scpu_free_ndx[j] = j; 448 scpu->scpu.scpu_last_smap = smd_smap; 449 } 450 451 vpm_init(); 452 453 #ifdef DEBUG 454 /* 455 * Keep track of which colors are used more often. 456 */ 457 colors_used = kmem_zalloc(smd_nfree * sizeof (int), KM_SLEEP); 458 #endif /* DEBUG */ 459 460 return (0); 461 } 462 463 static void 464 segmap_free(seg) 465 struct seg *seg; 466 { 467 ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock)); 468 } 469 470 /* 471 * Do a F_SOFTUNLOCK call over the range requested. 472 * The range must have already been F_SOFTLOCK'ed. 473 */ 474 static void 475 segmap_unlock( 476 struct hat *hat, 477 struct seg *seg, 478 caddr_t addr, 479 size_t len, 480 enum seg_rw rw, 481 struct smap *smp) 482 { 483 page_t *pp; 484 caddr_t adr; 485 u_offset_t off; 486 struct vnode *vp; 487 kmutex_t *smtx; 488 489 ASSERT(smp->sm_refcnt > 0); 490 491 #ifdef lint 492 seg = seg; 493 #endif 494 495 if (segmap_kpm && IS_KPM_ADDR(addr)) { 496 497 /* 498 * We're called only from segmap_fault and this was a 499 * NOP in case of a kpm based smap, so dangerous things 500 * must have happened in the meantime. Pages are prefaulted 501 * and locked in segmap_getmapflt and they will not be 502 * unlocked until segmap_release. 503 */ 504 panic("segmap_unlock: called with kpm addr %p", (void *)addr); 505 /*NOTREACHED*/ 506 } 507 508 vp = smp->sm_vp; 509 off = smp->sm_off + (u_offset_t)((uintptr_t)addr & MAXBOFFSET); 510 511 hat_unlock(hat, addr, P2ROUNDUP(len, PAGESIZE)); 512 for (adr = addr; adr < addr + len; adr += PAGESIZE, off += PAGESIZE) { 513 ushort_t bitmask; 514 515 /* 516 * Use page_find() instead of page_lookup() to 517 * find the page since we know that it has 518 * "shared" lock. 519 */ 520 pp = page_find(vp, off); 521 if (pp == NULL) { 522 panic("segmap_unlock: page not found"); 523 /*NOTREACHED*/ 524 } 525 526 if (rw == S_WRITE) { 527 hat_setrefmod(pp); 528 } else if (rw != S_OTHER) { 529 TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT, 530 "segmap_fault:pp %p vp %p offset %llx", pp, vp, off); 531 hat_setref(pp); 532 } 533 534 /* 535 * Clear bitmap, if the bit corresponding to "off" is set, 536 * since the page and translation are being unlocked. 537 */ 538 bitmask = SMAP_BIT_MASK((off - smp->sm_off) >> PAGESHIFT); 539 540 /* 541 * Large Files: Following assertion is to verify 542 * the correctness of the cast to (int) above. 543 */ 544 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 545 smtx = SMAPMTX(smp); 546 mutex_enter(smtx); 547 if (smp->sm_bitmap & bitmask) { 548 smp->sm_bitmap &= ~bitmask; 549 } 550 mutex_exit(smtx); 551 552 page_unlock(pp); 553 } 554 } 555 556 #define MAXPPB (MAXBSIZE/4096) /* assumes minimum page size of 4k */ 557 558 /* 559 * This routine is called via a machine specific fault handling 560 * routine. It is also called by software routines wishing to 561 * lock or unlock a range of addresses. 562 * 563 * Note that this routine expects a page-aligned "addr". 564 */ 565 faultcode_t 566 segmap_fault( 567 struct hat *hat, 568 struct seg *seg, 569 caddr_t addr, 570 size_t len, 571 enum fault_type type, 572 enum seg_rw rw) 573 { 574 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 575 struct smap *smp; 576 page_t *pp, **ppp; 577 struct vnode *vp; 578 u_offset_t off; 579 page_t *pl[MAXPPB + 1]; 580 uint_t prot; 581 u_offset_t addroff; 582 caddr_t adr; 583 int err; 584 u_offset_t sm_off; 585 int hat_flag; 586 587 if (segmap_kpm && IS_KPM_ADDR(addr)) { 588 int newpage; 589 kmutex_t *smtx; 590 591 /* 592 * Pages are successfully prefaulted and locked in 593 * segmap_getmapflt and can't be unlocked until 594 * segmap_release. No hat mappings have to be locked 595 * and they also can't be unlocked as long as the 596 * caller owns an active kpm addr. 597 */ 598 #ifndef DEBUG 599 if (type != F_SOFTUNLOCK) 600 return (0); 601 #endif 602 603 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 604 panic("segmap_fault: smap not found " 605 "for addr %p", (void *)addr); 606 /*NOTREACHED*/ 607 } 608 609 smtx = SMAPMTX(smp); 610 #ifdef DEBUG 611 newpage = smp->sm_flags & SM_KPM_NEWPAGE; 612 if (newpage) { 613 cmn_err(CE_WARN, "segmap_fault: newpage? smp %p", 614 (void *)smp); 615 } 616 617 if (type != F_SOFTUNLOCK) { 618 mutex_exit(smtx); 619 return (0); 620 } 621 #endif 622 mutex_exit(smtx); 623 vp = smp->sm_vp; 624 sm_off = smp->sm_off; 625 626 if (vp == NULL) 627 return (FC_MAKE_ERR(EIO)); 628 629 ASSERT(smp->sm_refcnt > 0); 630 631 addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET); 632 if (addroff + len > MAXBSIZE) 633 panic("segmap_fault: endaddr %p exceeds MAXBSIZE chunk", 634 (void *)(addr + len)); 635 636 off = sm_off + addroff; 637 638 pp = page_find(vp, off); 639 640 if (pp == NULL) 641 panic("segmap_fault: softunlock page not found"); 642 643 /* 644 * Set ref bit also here in case of S_OTHER to avoid the 645 * overhead of supporting other cases than F_SOFTUNLOCK 646 * with segkpm. We can do this because the underlying 647 * pages are locked anyway. 648 */ 649 if (rw == S_WRITE) { 650 hat_setrefmod(pp); 651 } else { 652 TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT, 653 "segmap_fault:pp %p vp %p offset %llx", 654 pp, vp, off); 655 hat_setref(pp); 656 } 657 658 return (0); 659 } 660 661 smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++; 662 smp = GET_SMAP(seg, addr); 663 vp = smp->sm_vp; 664 sm_off = smp->sm_off; 665 666 if (vp == NULL) 667 return (FC_MAKE_ERR(EIO)); 668 669 ASSERT(smp->sm_refcnt > 0); 670 671 addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET); 672 if (addroff + len > MAXBSIZE) { 673 panic("segmap_fault: endaddr %p " 674 "exceeds MAXBSIZE chunk", (void *)(addr + len)); 675 /*NOTREACHED*/ 676 } 677 off = sm_off + addroff; 678 679 /* 680 * First handle the easy stuff 681 */ 682 if (type == F_SOFTUNLOCK) { 683 segmap_unlock(hat, seg, addr, len, rw, smp); 684 return (0); 685 } 686 687 TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE, 688 "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp); 689 err = VOP_GETPAGE(vp, (offset_t)off, len, &prot, pl, MAXBSIZE, 690 seg, addr, rw, CRED(), NULL); 691 692 if (err) 693 return (FC_MAKE_ERR(err)); 694 695 prot &= smd->smd_prot; 696 697 /* 698 * Handle all pages returned in the pl[] array. 699 * This loop is coded on the assumption that if 700 * there was no error from the VOP_GETPAGE routine, 701 * that the page list returned will contain all the 702 * needed pages for the vp from [off..off + len]. 703 */ 704 ppp = pl; 705 while ((pp = *ppp++) != NULL) { 706 u_offset_t poff; 707 ASSERT(pp->p_vnode == vp); 708 hat_flag = HAT_LOAD; 709 710 /* 711 * Verify that the pages returned are within the range 712 * of this segmap region. Note that it is theoretically 713 * possible for pages outside this range to be returned, 714 * but it is not very likely. If we cannot use the 715 * page here, just release it and go on to the next one. 716 */ 717 if (pp->p_offset < sm_off || 718 pp->p_offset >= sm_off + MAXBSIZE) { 719 (void) page_release(pp, 1); 720 continue; 721 } 722 723 ASSERT(hat == kas.a_hat); 724 poff = pp->p_offset; 725 adr = addr + (poff - off); 726 if (adr >= addr && adr < addr + len) { 727 hat_setref(pp); 728 TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT, 729 "segmap_fault:pp %p vp %p offset %llx", 730 pp, vp, poff); 731 if (type == F_SOFTLOCK) 732 hat_flag = HAT_LOAD_LOCK; 733 } 734 735 /* 736 * Deal with VMODSORT pages here. If we know this is a write 737 * do the setmod now and allow write protection. 738 * As long as it's modified or not S_OTHER, remove write 739 * protection. With S_OTHER it's up to the FS to deal with this. 740 */ 741 if (IS_VMODSORT(vp)) { 742 if (rw == S_WRITE) 743 hat_setmod(pp); 744 else if (rw != S_OTHER && !hat_ismod(pp)) 745 prot &= ~PROT_WRITE; 746 } 747 748 hat_memload(hat, adr, pp, prot, hat_flag); 749 if (hat_flag != HAT_LOAD_LOCK) 750 page_unlock(pp); 751 } 752 return (0); 753 } 754 755 /* 756 * This routine is used to start I/O on pages asynchronously. 757 */ 758 static faultcode_t 759 segmap_faulta(struct seg *seg, caddr_t addr) 760 { 761 struct smap *smp; 762 struct vnode *vp; 763 u_offset_t off; 764 int err; 765 766 if (segmap_kpm && IS_KPM_ADDR(addr)) { 767 int newpage; 768 kmutex_t *smtx; 769 770 /* 771 * Pages are successfully prefaulted and locked in 772 * segmap_getmapflt and can't be unlocked until 773 * segmap_release. No hat mappings have to be locked 774 * and they also can't be unlocked as long as the 775 * caller owns an active kpm addr. 776 */ 777 #ifdef DEBUG 778 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 779 panic("segmap_faulta: smap not found " 780 "for addr %p", (void *)addr); 781 /*NOTREACHED*/ 782 } 783 784 smtx = SMAPMTX(smp); 785 newpage = smp->sm_flags & SM_KPM_NEWPAGE; 786 mutex_exit(smtx); 787 if (newpage) 788 cmn_err(CE_WARN, "segmap_faulta: newpage? smp %p", 789 (void *)smp); 790 #endif 791 return (0); 792 } 793 794 segmapcnt.smp_faulta.value.ul++; 795 smp = GET_SMAP(seg, addr); 796 797 ASSERT(smp->sm_refcnt > 0); 798 799 vp = smp->sm_vp; 800 off = smp->sm_off; 801 802 if (vp == NULL) { 803 cmn_err(CE_WARN, "segmap_faulta - no vp"); 804 return (FC_MAKE_ERR(EIO)); 805 } 806 807 TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE, 808 "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp); 809 810 err = VOP_GETPAGE(vp, (offset_t)(off + ((offset_t)((uintptr_t)addr 811 & MAXBOFFSET))), PAGESIZE, (uint_t *)NULL, (page_t **)NULL, 0, 812 seg, addr, S_READ, CRED(), NULL); 813 814 if (err) 815 return (FC_MAKE_ERR(err)); 816 return (0); 817 } 818 819 /*ARGSUSED*/ 820 static int 821 segmap_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) 822 { 823 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 824 825 ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock)); 826 827 /* 828 * Need not acquire the segment lock since 829 * "smd_prot" is a read-only field. 830 */ 831 return (((smd->smd_prot & prot) != prot) ? EACCES : 0); 832 } 833 834 static int 835 segmap_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv) 836 { 837 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 838 size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1; 839 840 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 841 842 if (pgno != 0) { 843 do { 844 protv[--pgno] = smd->smd_prot; 845 } while (pgno != 0); 846 } 847 return (0); 848 } 849 850 static u_offset_t 851 segmap_getoffset(struct seg *seg, caddr_t addr) 852 { 853 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 854 855 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock)); 856 857 return ((u_offset_t)smd->smd_sm->sm_off + (addr - seg->s_base)); 858 } 859 860 /*ARGSUSED*/ 861 static int 862 segmap_gettype(struct seg *seg, caddr_t addr) 863 { 864 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock)); 865 866 return (MAP_SHARED); 867 } 868 869 /*ARGSUSED*/ 870 static int 871 segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp) 872 { 873 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 874 875 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock)); 876 877 /* XXX - This doesn't make any sense */ 878 *vpp = smd->smd_sm->sm_vp; 879 return (0); 880 } 881 882 /* 883 * Check to see if it makes sense to do kluster/read ahead to 884 * addr + delta relative to the mapping at addr. We assume here 885 * that delta is a signed PAGESIZE'd multiple (which can be negative). 886 * 887 * For segmap we always "approve" of this action from our standpoint. 888 */ 889 /*ARGSUSED*/ 890 static int 891 segmap_kluster(struct seg *seg, caddr_t addr, ssize_t delta) 892 { 893 return (0); 894 } 895 896 /* 897 * Special private segmap operations 898 */ 899 900 /* 901 * Add smap to the appropriate free list. 902 */ 903 static void 904 segmap_smapadd(struct smap *smp) 905 { 906 struct smfree *sm; 907 struct smap *smpfreelist; 908 struct sm_freeq *releq; 909 910 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 911 912 if (smp->sm_refcnt != 0) { 913 panic("segmap_smapadd"); 914 /*NOTREACHED*/ 915 } 916 917 sm = &smd_free[smp->sm_free_ndx]; 918 /* 919 * Add to the tail of the release queue 920 * Note that sm_releq and sm_allocq could toggle 921 * before we get the lock. This does not affect 922 * correctness as the 2 queues are only maintained 923 * to reduce lock pressure. 924 */ 925 releq = sm->sm_releq; 926 if (releq == &sm->sm_freeq[0]) 927 smp->sm_flags |= SM_QNDX_ZERO; 928 else 929 smp->sm_flags &= ~SM_QNDX_ZERO; 930 mutex_enter(&releq->smq_mtx); 931 smpfreelist = releq->smq_free; 932 if (smpfreelist == 0) { 933 int want; 934 935 releq->smq_free = smp->sm_next = smp->sm_prev = smp; 936 /* 937 * Both queue mutexes held to set sm_want; 938 * snapshot the value before dropping releq mutex. 939 * If sm_want appears after the releq mutex is dropped, 940 * then the smap just freed is already gone. 941 */ 942 want = sm->sm_want; 943 mutex_exit(&releq->smq_mtx); 944 /* 945 * See if there was a waiter before dropping the releq mutex 946 * then recheck after obtaining sm_freeq[0] mutex as 947 * the another thread may have already signaled. 948 */ 949 if (want) { 950 mutex_enter(&sm->sm_freeq[0].smq_mtx); 951 if (sm->sm_want) 952 cv_signal(&sm->sm_free_cv); 953 mutex_exit(&sm->sm_freeq[0].smq_mtx); 954 } 955 } else { 956 smp->sm_next = smpfreelist; 957 smp->sm_prev = smpfreelist->sm_prev; 958 smpfreelist->sm_prev = smp; 959 smp->sm_prev->sm_next = smp; 960 mutex_exit(&releq->smq_mtx); 961 } 962 } 963 964 965 static struct smap * 966 segmap_hashin(struct smap *smp, struct vnode *vp, u_offset_t off, int hashid) 967 { 968 struct smap **hpp; 969 struct smap *tmp; 970 kmutex_t *hmtx; 971 972 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 973 ASSERT(smp->sm_vp == NULL); 974 ASSERT(smp->sm_hash == NULL); 975 ASSERT(smp->sm_prev == NULL); 976 ASSERT(smp->sm_next == NULL); 977 ASSERT(hashid >= 0 && hashid <= smd_hashmsk); 978 979 hmtx = SHASHMTX(hashid); 980 981 mutex_enter(hmtx); 982 /* 983 * First we need to verify that no one has created a smp 984 * with (vp,off) as its tag before we us. 985 */ 986 for (tmp = smd_hash[hashid].sh_hash_list; 987 tmp != NULL; tmp = tmp->sm_hash) 988 if (tmp->sm_vp == vp && tmp->sm_off == off) 989 break; 990 991 if (tmp == NULL) { 992 /* 993 * No one created one yet. 994 * 995 * Funniness here - we don't increment the ref count on the 996 * vnode * even though we have another pointer to it here. 997 * The reason for this is that we don't want the fact that 998 * a seg_map entry somewhere refers to a vnode to prevent the 999 * vnode * itself from going away. This is because this 1000 * reference to the vnode is a "soft one". In the case where 1001 * a mapping is being used by a rdwr [or directory routine?] 1002 * there already has to be a non-zero ref count on the vnode. 1003 * In the case where the vp has been freed and the the smap 1004 * structure is on the free list, there are no pages in memory 1005 * that can refer to the vnode. Thus even if we reuse the same 1006 * vnode/smap structure for a vnode which has the same 1007 * address but represents a different object, we are ok. 1008 */ 1009 smp->sm_vp = vp; 1010 smp->sm_off = off; 1011 1012 hpp = &smd_hash[hashid].sh_hash_list; 1013 smp->sm_hash = *hpp; 1014 *hpp = smp; 1015 #ifdef SEGMAP_HASHSTATS 1016 smd_hash_len[hashid]++; 1017 #endif 1018 } 1019 mutex_exit(hmtx); 1020 1021 return (tmp); 1022 } 1023 1024 static void 1025 segmap_hashout(struct smap *smp) 1026 { 1027 struct smap **hpp, *hp; 1028 struct vnode *vp; 1029 kmutex_t *mtx; 1030 int hashid; 1031 u_offset_t off; 1032 1033 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 1034 1035 vp = smp->sm_vp; 1036 off = smp->sm_off; 1037 1038 SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */ 1039 mtx = SHASHMTX(hashid); 1040 mutex_enter(mtx); 1041 1042 hpp = &smd_hash[hashid].sh_hash_list; 1043 for (;;) { 1044 hp = *hpp; 1045 if (hp == NULL) { 1046 panic("segmap_hashout"); 1047 /*NOTREACHED*/ 1048 } 1049 if (hp == smp) 1050 break; 1051 hpp = &hp->sm_hash; 1052 } 1053 1054 *hpp = smp->sm_hash; 1055 smp->sm_hash = NULL; 1056 #ifdef SEGMAP_HASHSTATS 1057 smd_hash_len[hashid]--; 1058 #endif 1059 mutex_exit(mtx); 1060 1061 smp->sm_vp = NULL; 1062 smp->sm_off = (u_offset_t)0; 1063 1064 } 1065 1066 /* 1067 * Attempt to free unmodified, unmapped, and non locked segmap 1068 * pages. 1069 */ 1070 void 1071 segmap_pagefree(struct vnode *vp, u_offset_t off) 1072 { 1073 u_offset_t pgoff; 1074 page_t *pp; 1075 1076 for (pgoff = off; pgoff < off + MAXBSIZE; pgoff += PAGESIZE) { 1077 1078 if ((pp = page_lookup_nowait(vp, pgoff, SE_EXCL)) == NULL) 1079 continue; 1080 1081 switch (page_release(pp, 1)) { 1082 case PGREL_NOTREL: 1083 segmapcnt.smp_free_notfree.value.ul++; 1084 break; 1085 case PGREL_MOD: 1086 segmapcnt.smp_free_dirty.value.ul++; 1087 break; 1088 case PGREL_CLEAN: 1089 segmapcnt.smp_free.value.ul++; 1090 break; 1091 } 1092 } 1093 } 1094 1095 /* 1096 * Locks held on entry: smap lock 1097 * Locks held on exit : smap lock. 1098 */ 1099 1100 static void 1101 grab_smp(struct smap *smp, page_t *pp) 1102 { 1103 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 1104 ASSERT(smp->sm_refcnt == 0); 1105 1106 if (smp->sm_vp != (struct vnode *)NULL) { 1107 struct vnode *vp = smp->sm_vp; 1108 u_offset_t off = smp->sm_off; 1109 /* 1110 * Destroy old vnode association and 1111 * unload any hardware translations to 1112 * the old object. 1113 */ 1114 smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reuse++; 1115 segmap_hashout(smp); 1116 1117 /* 1118 * This node is off freelist and hashlist, 1119 * so there is no reason to drop/reacquire sm_mtx 1120 * across calls to hat_unload. 1121 */ 1122 if (segmap_kpm) { 1123 caddr_t vaddr; 1124 int hat_unload_needed = 0; 1125 1126 /* 1127 * unload kpm mapping 1128 */ 1129 if (pp != NULL) { 1130 vaddr = hat_kpm_page2va(pp, 1); 1131 hat_kpm_mapout(pp, GET_KPME(smp), vaddr); 1132 page_unlock(pp); 1133 } 1134 1135 /* 1136 * Check if we have (also) the rare case of a 1137 * non kpm mapping. 1138 */ 1139 if (smp->sm_flags & SM_NOTKPM_RELEASED) { 1140 hat_unload_needed = 1; 1141 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 1142 } 1143 1144 if (hat_unload_needed) { 1145 hat_unload(kas.a_hat, segkmap->s_base + 1146 ((smp - smd_smap) * MAXBSIZE), 1147 MAXBSIZE, HAT_UNLOAD); 1148 } 1149 1150 } else { 1151 ASSERT(smp->sm_flags & SM_NOTKPM_RELEASED); 1152 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 1153 hat_unload(kas.a_hat, segkmap->s_base + 1154 ((smp - smd_smap) * MAXBSIZE), 1155 MAXBSIZE, HAT_UNLOAD); 1156 } 1157 segmap_pagefree(vp, off); 1158 } 1159 } 1160 1161 static struct smap * 1162 get_free_smp(int free_ndx) 1163 { 1164 struct smfree *sm; 1165 kmutex_t *smtx; 1166 struct smap *smp, *first; 1167 struct sm_freeq *allocq, *releq; 1168 struct kpme *kpme; 1169 page_t *pp = NULL; 1170 int end_ndx, page_locked = 0; 1171 1172 end_ndx = free_ndx; 1173 sm = &smd_free[free_ndx]; 1174 1175 retry_queue: 1176 allocq = sm->sm_allocq; 1177 mutex_enter(&allocq->smq_mtx); 1178 1179 if ((smp = allocq->smq_free) == NULL) { 1180 1181 skip_queue: 1182 /* 1183 * The alloc list is empty or this queue is being skipped; 1184 * first see if the allocq toggled. 1185 */ 1186 if (sm->sm_allocq != allocq) { 1187 /* queue changed */ 1188 mutex_exit(&allocq->smq_mtx); 1189 goto retry_queue; 1190 } 1191 releq = sm->sm_releq; 1192 if (!mutex_tryenter(&releq->smq_mtx)) { 1193 /* cannot get releq; a free smp may be there now */ 1194 mutex_exit(&allocq->smq_mtx); 1195 1196 /* 1197 * This loop could spin forever if this thread has 1198 * higher priority than the thread that is holding 1199 * releq->smq_mtx. In order to force the other thread 1200 * to run, we'll lock/unlock the mutex which is safe 1201 * since we just unlocked the allocq mutex. 1202 */ 1203 mutex_enter(&releq->smq_mtx); 1204 mutex_exit(&releq->smq_mtx); 1205 goto retry_queue; 1206 } 1207 if (releq->smq_free == NULL) { 1208 /* 1209 * This freelist is empty. 1210 * This should not happen unless clients 1211 * are failing to release the segmap 1212 * window after accessing the data. 1213 * Before resorting to sleeping, try 1214 * the next list of the same color. 1215 */ 1216 free_ndx = (free_ndx + smd_ncolor) & smd_freemsk; 1217 if (free_ndx != end_ndx) { 1218 mutex_exit(&releq->smq_mtx); 1219 mutex_exit(&allocq->smq_mtx); 1220 sm = &smd_free[free_ndx]; 1221 goto retry_queue; 1222 } 1223 /* 1224 * Tried all freelists of the same color once, 1225 * wait on this list and hope something gets freed. 1226 */ 1227 segmapcnt.smp_get_nofree.value.ul++; 1228 sm->sm_want++; 1229 mutex_exit(&sm->sm_freeq[1].smq_mtx); 1230 cv_wait(&sm->sm_free_cv, 1231 &sm->sm_freeq[0].smq_mtx); 1232 sm->sm_want--; 1233 mutex_exit(&sm->sm_freeq[0].smq_mtx); 1234 sm = &smd_free[free_ndx]; 1235 goto retry_queue; 1236 } else { 1237 /* 1238 * Something on the rele queue; flip the alloc 1239 * and rele queues and retry. 1240 */ 1241 sm->sm_allocq = releq; 1242 sm->sm_releq = allocq; 1243 mutex_exit(&allocq->smq_mtx); 1244 mutex_exit(&releq->smq_mtx); 1245 if (page_locked) { 1246 delay(hz >> 2); 1247 page_locked = 0; 1248 } 1249 goto retry_queue; 1250 } 1251 } else { 1252 /* 1253 * Fastpath the case we get the smap mutex 1254 * on the first try. 1255 */ 1256 first = smp; 1257 next_smap: 1258 smtx = SMAPMTX(smp); 1259 if (!mutex_tryenter(smtx)) { 1260 /* 1261 * Another thread is trying to reclaim this slot. 1262 * Skip to the next queue or smap. 1263 */ 1264 if ((smp = smp->sm_next) == first) { 1265 goto skip_queue; 1266 } else { 1267 goto next_smap; 1268 } 1269 } else { 1270 /* 1271 * if kpme exists, get shared lock on the page 1272 */ 1273 if (segmap_kpm && smp->sm_vp != NULL) { 1274 1275 kpme = GET_KPME(smp); 1276 pp = kpme->kpe_page; 1277 1278 if (pp != NULL) { 1279 if (!page_trylock(pp, SE_SHARED)) { 1280 smp = smp->sm_next; 1281 mutex_exit(smtx); 1282 page_locked = 1; 1283 1284 pp = NULL; 1285 1286 if (smp == first) { 1287 goto skip_queue; 1288 } else { 1289 goto next_smap; 1290 } 1291 } else { 1292 if (kpme->kpe_page == NULL) { 1293 page_unlock(pp); 1294 pp = NULL; 1295 } 1296 } 1297 } 1298 } 1299 1300 /* 1301 * At this point, we've selected smp. Remove smp 1302 * from its freelist. If smp is the first one in 1303 * the freelist, update the head of the freelist. 1304 */ 1305 if (first == smp) { 1306 ASSERT(first == allocq->smq_free); 1307 allocq->smq_free = smp->sm_next; 1308 } 1309 1310 /* 1311 * if the head of the freelist still points to smp, 1312 * then there are no more free smaps in that list. 1313 */ 1314 if (allocq->smq_free == smp) 1315 /* 1316 * Took the last one 1317 */ 1318 allocq->smq_free = NULL; 1319 else { 1320 smp->sm_prev->sm_next = smp->sm_next; 1321 smp->sm_next->sm_prev = smp->sm_prev; 1322 } 1323 mutex_exit(&allocq->smq_mtx); 1324 smp->sm_prev = smp->sm_next = NULL; 1325 1326 /* 1327 * if pp != NULL, pp must have been locked; 1328 * grab_smp() unlocks pp. 1329 */ 1330 ASSERT((pp == NULL) || PAGE_LOCKED(pp)); 1331 grab_smp(smp, pp); 1332 /* return smp locked. */ 1333 ASSERT(SMAPMTX(smp) == smtx); 1334 ASSERT(MUTEX_HELD(smtx)); 1335 return (smp); 1336 } 1337 } 1338 } 1339 1340 /* 1341 * Special public segmap operations 1342 */ 1343 1344 /* 1345 * Create pages (without using VOP_GETPAGE) and load up translations to them. 1346 * If softlock is TRUE, then set things up so that it looks like a call 1347 * to segmap_fault with F_SOFTLOCK. 1348 * 1349 * Returns 1, if a page is created by calling page_create_va(), or 0 otherwise. 1350 * 1351 * All fields in the generic segment (struct seg) are considered to be 1352 * read-only for "segmap" even though the kernel address space (kas) may 1353 * not be locked, hence no lock is needed to access them. 1354 */ 1355 int 1356 segmap_pagecreate(struct seg *seg, caddr_t addr, size_t len, int softlock) 1357 { 1358 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 1359 page_t *pp; 1360 u_offset_t off; 1361 struct smap *smp; 1362 struct vnode *vp; 1363 caddr_t eaddr; 1364 int newpage = 0; 1365 uint_t prot; 1366 kmutex_t *smtx; 1367 int hat_flag; 1368 1369 ASSERT(seg->s_as == &kas); 1370 1371 if (segmap_kpm && IS_KPM_ADDR(addr)) { 1372 /* 1373 * Pages are successfully prefaulted and locked in 1374 * segmap_getmapflt and can't be unlocked until 1375 * segmap_release. The SM_KPM_NEWPAGE flag is set 1376 * in segmap_pagecreate_kpm when new pages are created. 1377 * and it is returned as "newpage" indication here. 1378 */ 1379 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 1380 panic("segmap_pagecreate: smap not found " 1381 "for addr %p", (void *)addr); 1382 /*NOTREACHED*/ 1383 } 1384 1385 smtx = SMAPMTX(smp); 1386 newpage = smp->sm_flags & SM_KPM_NEWPAGE; 1387 smp->sm_flags &= ~SM_KPM_NEWPAGE; 1388 mutex_exit(smtx); 1389 1390 return (newpage); 1391 } 1392 1393 smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++; 1394 1395 eaddr = addr + len; 1396 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1397 1398 smp = GET_SMAP(seg, addr); 1399 1400 /* 1401 * We don't grab smp mutex here since we assume the smp 1402 * has a refcnt set already which prevents the slot from 1403 * changing its id. 1404 */ 1405 ASSERT(smp->sm_refcnt > 0); 1406 1407 vp = smp->sm_vp; 1408 off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET)); 1409 prot = smd->smd_prot; 1410 1411 for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) { 1412 hat_flag = HAT_LOAD; 1413 pp = page_lookup(vp, off, SE_SHARED); 1414 if (pp == NULL) { 1415 ushort_t bitindex; 1416 1417 if ((pp = page_create_va(vp, off, 1418 PAGESIZE, PG_WAIT, seg, addr)) == NULL) { 1419 panic("segmap_pagecreate: page_create failed"); 1420 /*NOTREACHED*/ 1421 } 1422 newpage = 1; 1423 page_io_unlock(pp); 1424 1425 /* 1426 * Since pages created here do not contain valid 1427 * data until the caller writes into them, the 1428 * "exclusive" lock will not be dropped to prevent 1429 * other users from accessing the page. We also 1430 * have to lock the translation to prevent a fault 1431 * from occurring when the virtual address mapped by 1432 * this page is written into. This is necessary to 1433 * avoid a deadlock since we haven't dropped the 1434 * "exclusive" lock. 1435 */ 1436 bitindex = (ushort_t)((off - smp->sm_off) >> PAGESHIFT); 1437 1438 /* 1439 * Large Files: The following assertion is to 1440 * verify the cast above. 1441 */ 1442 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 1443 smtx = SMAPMTX(smp); 1444 mutex_enter(smtx); 1445 smp->sm_bitmap |= SMAP_BIT_MASK(bitindex); 1446 mutex_exit(smtx); 1447 1448 hat_flag = HAT_LOAD_LOCK; 1449 } else if (softlock) { 1450 hat_flag = HAT_LOAD_LOCK; 1451 } 1452 1453 if (IS_VMODSORT(pp->p_vnode) && (prot & PROT_WRITE)) 1454 hat_setmod(pp); 1455 1456 hat_memload(kas.a_hat, addr, pp, prot, hat_flag); 1457 1458 if (hat_flag != HAT_LOAD_LOCK) 1459 page_unlock(pp); 1460 1461 TRACE_5(TR_FAC_VM, TR_SEGMAP_PAGECREATE, 1462 "segmap_pagecreate:seg %p addr %p pp %p vp %p offset %llx", 1463 seg, addr, pp, vp, off); 1464 } 1465 1466 return (newpage); 1467 } 1468 1469 void 1470 segmap_pageunlock(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw) 1471 { 1472 struct smap *smp; 1473 ushort_t bitmask; 1474 page_t *pp; 1475 struct vnode *vp; 1476 u_offset_t off; 1477 caddr_t eaddr; 1478 kmutex_t *smtx; 1479 1480 ASSERT(seg->s_as == &kas); 1481 1482 eaddr = addr + len; 1483 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1484 1485 if (segmap_kpm && IS_KPM_ADDR(addr)) { 1486 /* 1487 * Pages are successfully prefaulted and locked in 1488 * segmap_getmapflt and can't be unlocked until 1489 * segmap_release, so no pages or hat mappings have 1490 * to be unlocked at this point. 1491 */ 1492 #ifdef DEBUG 1493 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 1494 panic("segmap_pageunlock: smap not found " 1495 "for addr %p", (void *)addr); 1496 /*NOTREACHED*/ 1497 } 1498 1499 ASSERT(smp->sm_refcnt > 0); 1500 mutex_exit(SMAPMTX(smp)); 1501 #endif 1502 return; 1503 } 1504 1505 smp = GET_SMAP(seg, addr); 1506 smtx = SMAPMTX(smp); 1507 1508 ASSERT(smp->sm_refcnt > 0); 1509 1510 vp = smp->sm_vp; 1511 off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET)); 1512 1513 for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) { 1514 bitmask = SMAP_BIT_MASK((int)(off - smp->sm_off) >> PAGESHIFT); 1515 1516 /* 1517 * Large Files: Following assertion is to verify 1518 * the correctness of the cast to (int) above. 1519 */ 1520 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 1521 1522 /* 1523 * If the bit corresponding to "off" is set, 1524 * clear this bit in the bitmap, unlock translations, 1525 * and release the "exclusive" lock on the page. 1526 */ 1527 if (smp->sm_bitmap & bitmask) { 1528 mutex_enter(smtx); 1529 smp->sm_bitmap &= ~bitmask; 1530 mutex_exit(smtx); 1531 1532 hat_unlock(kas.a_hat, addr, PAGESIZE); 1533 1534 /* 1535 * Use page_find() instead of page_lookup() to 1536 * find the page since we know that it has 1537 * "exclusive" lock. 1538 */ 1539 pp = page_find(vp, off); 1540 if (pp == NULL) { 1541 panic("segmap_pageunlock: page not found"); 1542 /*NOTREACHED*/ 1543 } 1544 if (rw == S_WRITE) { 1545 hat_setrefmod(pp); 1546 } else if (rw != S_OTHER) { 1547 hat_setref(pp); 1548 } 1549 1550 page_unlock(pp); 1551 } 1552 } 1553 } 1554 1555 caddr_t 1556 segmap_getmap(struct seg *seg, struct vnode *vp, u_offset_t off) 1557 { 1558 return (segmap_getmapflt(seg, vp, off, MAXBSIZE, 0, S_OTHER)); 1559 } 1560 1561 /* 1562 * This is the magic virtual address that offset 0 of an ELF 1563 * file gets mapped to in user space. This is used to pick 1564 * the vac color on the freelist. 1565 */ 1566 #define ELF_OFFZERO_VA (0x10000) 1567 /* 1568 * segmap_getmap allocates a MAXBSIZE big slot to map the vnode vp 1569 * in the range <off, off + len). off doesn't need to be MAXBSIZE aligned. 1570 * The return address is always MAXBSIZE aligned. 1571 * 1572 * If forcefault is nonzero and the MMU translations haven't yet been created, 1573 * segmap_getmap will call segmap_fault(..., F_INVAL, rw) to create them. 1574 */ 1575 caddr_t 1576 segmap_getmapflt( 1577 struct seg *seg, 1578 struct vnode *vp, 1579 u_offset_t off, 1580 size_t len, 1581 int forcefault, 1582 enum seg_rw rw) 1583 { 1584 struct smap *smp, *nsmp; 1585 extern struct vnode *common_specvp(); 1586 caddr_t baseaddr; /* MAXBSIZE aligned */ 1587 u_offset_t baseoff; 1588 int newslot; 1589 caddr_t vaddr; 1590 int color, hashid; 1591 kmutex_t *hashmtx, *smapmtx; 1592 struct smfree *sm; 1593 page_t *pp; 1594 struct kpme *kpme; 1595 uint_t prot; 1596 caddr_t base; 1597 page_t *pl[MAXPPB + 1]; 1598 int error; 1599 int is_kpm = 1; 1600 1601 ASSERT(seg->s_as == &kas); 1602 ASSERT(seg == segkmap); 1603 1604 baseoff = off & (offset_t)MAXBMASK; 1605 if (off + len > baseoff + MAXBSIZE) { 1606 panic("segmap_getmap bad len"); 1607 /*NOTREACHED*/ 1608 } 1609 1610 /* 1611 * If this is a block device we have to be sure to use the 1612 * "common" block device vnode for the mapping. 1613 */ 1614 if (vp->v_type == VBLK) 1615 vp = common_specvp(vp); 1616 1617 smd_cpu[CPU->cpu_seqid].scpu.scpu_getmap++; 1618 1619 if (segmap_kpm == 0 || 1620 (forcefault == SM_PAGECREATE && rw != S_WRITE)) { 1621 is_kpm = 0; 1622 } 1623 1624 SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */ 1625 hashmtx = SHASHMTX(hashid); 1626 1627 retry_hash: 1628 mutex_enter(hashmtx); 1629 for (smp = smd_hash[hashid].sh_hash_list; 1630 smp != NULL; smp = smp->sm_hash) 1631 if (smp->sm_vp == vp && smp->sm_off == baseoff) 1632 break; 1633 mutex_exit(hashmtx); 1634 1635 vrfy_smp: 1636 if (smp != NULL) { 1637 1638 ASSERT(vp->v_count != 0); 1639 1640 /* 1641 * Get smap lock and recheck its tag. The hash lock 1642 * is dropped since the hash is based on (vp, off) 1643 * and (vp, off) won't change when we have smap mtx. 1644 */ 1645 smapmtx = SMAPMTX(smp); 1646 mutex_enter(smapmtx); 1647 if (smp->sm_vp != vp || smp->sm_off != baseoff) { 1648 mutex_exit(smapmtx); 1649 goto retry_hash; 1650 } 1651 1652 if (smp->sm_refcnt == 0) { 1653 1654 smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reclaim++; 1655 1656 /* 1657 * Could still be on the free list. However, this 1658 * could also be an smp that is transitioning from 1659 * the free list when we have too much contention 1660 * for the smapmtx's. In this case, we have an 1661 * unlocked smp that is not on the free list any 1662 * longer, but still has a 0 refcnt. The only way 1663 * to be sure is to check the freelist pointers. 1664 * Since we now have the smapmtx, we are guaranteed 1665 * that the (vp, off) won't change, so we are safe 1666 * to reclaim it. get_free_smp() knows that this 1667 * can happen, and it will check the refcnt. 1668 */ 1669 1670 if ((smp->sm_next != NULL)) { 1671 struct sm_freeq *freeq; 1672 1673 ASSERT(smp->sm_prev != NULL); 1674 sm = &smd_free[smp->sm_free_ndx]; 1675 1676 if (smp->sm_flags & SM_QNDX_ZERO) 1677 freeq = &sm->sm_freeq[0]; 1678 else 1679 freeq = &sm->sm_freeq[1]; 1680 1681 mutex_enter(&freeq->smq_mtx); 1682 if (freeq->smq_free != smp) { 1683 /* 1684 * fastpath normal case 1685 */ 1686 smp->sm_prev->sm_next = smp->sm_next; 1687 smp->sm_next->sm_prev = smp->sm_prev; 1688 } else if (smp == smp->sm_next) { 1689 /* 1690 * Taking the last smap on freelist 1691 */ 1692 freeq->smq_free = NULL; 1693 } else { 1694 /* 1695 * Reclaiming 1st smap on list 1696 */ 1697 freeq->smq_free = smp->sm_next; 1698 smp->sm_prev->sm_next = smp->sm_next; 1699 smp->sm_next->sm_prev = smp->sm_prev; 1700 } 1701 mutex_exit(&freeq->smq_mtx); 1702 smp->sm_prev = smp->sm_next = NULL; 1703 } else { 1704 ASSERT(smp->sm_prev == NULL); 1705 segmapcnt.smp_stolen.value.ul++; 1706 } 1707 1708 } else { 1709 segmapcnt.smp_get_use.value.ul++; 1710 } 1711 smp->sm_refcnt++; /* another user */ 1712 1713 /* 1714 * We don't invoke segmap_fault via TLB miss, so we set ref 1715 * and mod bits in advance. For S_OTHER we set them in 1716 * segmap_fault F_SOFTUNLOCK. 1717 */ 1718 if (is_kpm) { 1719 if (rw == S_WRITE) { 1720 smp->sm_flags |= SM_WRITE_DATA; 1721 } else if (rw == S_READ) { 1722 smp->sm_flags |= SM_READ_DATA; 1723 } 1724 } 1725 mutex_exit(smapmtx); 1726 1727 newslot = 0; 1728 } else { 1729 1730 uint32_t free_ndx, *free_ndxp; 1731 union segmap_cpu *scpu; 1732 1733 /* 1734 * On a PAC machine or a machine with anti-alias 1735 * hardware, smd_colormsk will be zero. 1736 * 1737 * On a VAC machine- pick color by offset in the file 1738 * so we won't get VAC conflicts on elf files. 1739 * On data files, color does not matter but we 1740 * don't know what kind of file it is so we always 1741 * pick color by offset. This causes color 1742 * corresponding to file offset zero to be used more 1743 * heavily. 1744 */ 1745 color = (baseoff >> MAXBSHIFT) & smd_colormsk; 1746 scpu = smd_cpu+CPU->cpu_seqid; 1747 free_ndxp = &scpu->scpu.scpu_free_ndx[color]; 1748 free_ndx = (*free_ndxp += smd_ncolor) & smd_freemsk; 1749 #ifdef DEBUG 1750 colors_used[free_ndx]++; 1751 #endif /* DEBUG */ 1752 1753 /* 1754 * Get a locked smp slot from the free list. 1755 */ 1756 smp = get_free_smp(free_ndx); 1757 smapmtx = SMAPMTX(smp); 1758 1759 ASSERT(smp->sm_vp == NULL); 1760 1761 if ((nsmp = segmap_hashin(smp, vp, baseoff, hashid)) != NULL) { 1762 /* 1763 * Failed to hashin, there exists one now. 1764 * Return the smp we just allocated. 1765 */ 1766 segmap_smapadd(smp); 1767 mutex_exit(smapmtx); 1768 1769 smp = nsmp; 1770 goto vrfy_smp; 1771 } 1772 smp->sm_refcnt++; /* another user */ 1773 1774 /* 1775 * We don't invoke segmap_fault via TLB miss, so we set ref 1776 * and mod bits in advance. For S_OTHER we set them in 1777 * segmap_fault F_SOFTUNLOCK. 1778 */ 1779 if (is_kpm) { 1780 if (rw == S_WRITE) { 1781 smp->sm_flags |= SM_WRITE_DATA; 1782 } else if (rw == S_READ) { 1783 smp->sm_flags |= SM_READ_DATA; 1784 } 1785 } 1786 mutex_exit(smapmtx); 1787 1788 newslot = 1; 1789 } 1790 1791 if (!is_kpm) 1792 goto use_segmap_range; 1793 1794 /* 1795 * Use segkpm 1796 */ 1797 /* Lint directive required until 6746211 is fixed */ 1798 /*CONSTCOND*/ 1799 ASSERT(PAGESIZE == MAXBSIZE); 1800 1801 /* 1802 * remember the last smp faulted on this cpu. 1803 */ 1804 (smd_cpu+CPU->cpu_seqid)->scpu.scpu_last_smap = smp; 1805 1806 if (forcefault == SM_PAGECREATE) { 1807 baseaddr = segmap_pagecreate_kpm(seg, vp, baseoff, smp, rw); 1808 return (baseaddr); 1809 } 1810 1811 if (newslot == 0 && 1812 (pp = GET_KPME(smp)->kpe_page) != NULL) { 1813 1814 /* fastpath */ 1815 switch (rw) { 1816 case S_READ: 1817 case S_WRITE: 1818 if (page_trylock(pp, SE_SHARED)) { 1819 if (PP_ISFREE(pp) || 1820 !(pp->p_vnode == vp && 1821 pp->p_offset == baseoff)) { 1822 page_unlock(pp); 1823 pp = page_lookup(vp, baseoff, 1824 SE_SHARED); 1825 } 1826 } else { 1827 pp = page_lookup(vp, baseoff, SE_SHARED); 1828 } 1829 1830 if (pp == NULL) { 1831 ASSERT(GET_KPME(smp)->kpe_page == NULL); 1832 break; 1833 } 1834 1835 if (rw == S_WRITE && 1836 hat_page_getattr(pp, P_MOD | P_REF) != 1837 (P_MOD | P_REF)) { 1838 page_unlock(pp); 1839 break; 1840 } 1841 1842 /* 1843 * We have the p_selock as reader, grab_smp 1844 * can't hit us, we have bumped the smap 1845 * refcnt and hat_pageunload needs the 1846 * p_selock exclusive. 1847 */ 1848 kpme = GET_KPME(smp); 1849 if (kpme->kpe_page == pp) { 1850 baseaddr = hat_kpm_page2va(pp, 0); 1851 } else if (kpme->kpe_page == NULL) { 1852 baseaddr = hat_kpm_mapin(pp, kpme); 1853 } else { 1854 panic("segmap_getmapflt: stale " 1855 "kpme page, kpme %p", (void *)kpme); 1856 /*NOTREACHED*/ 1857 } 1858 1859 /* 1860 * We don't invoke segmap_fault via TLB miss, 1861 * so we set ref and mod bits in advance. 1862 * For S_OTHER and we set them in segmap_fault 1863 * F_SOFTUNLOCK. 1864 */ 1865 if (rw == S_READ && !hat_isref(pp)) 1866 hat_setref(pp); 1867 1868 return (baseaddr); 1869 default: 1870 break; 1871 } 1872 } 1873 1874 base = segkpm_create_va(baseoff); 1875 error = VOP_GETPAGE(vp, (offset_t)baseoff, len, &prot, pl, MAXBSIZE, 1876 seg, base, rw, CRED(), NULL); 1877 1878 pp = pl[0]; 1879 if (error || pp == NULL) { 1880 /* 1881 * Use segmap address slot and let segmap_fault deal 1882 * with the error cases. There is no error return 1883 * possible here. 1884 */ 1885 goto use_segmap_range; 1886 } 1887 1888 ASSERT(pl[1] == NULL); 1889 1890 /* 1891 * When prot is not returned w/ PROT_ALL the returned pages 1892 * are not backed by fs blocks. For most of the segmap users 1893 * this is no problem, they don't write to the pages in the 1894 * same request and therefore don't rely on a following 1895 * trap driven segmap_fault. With SM_LOCKPROTO users it 1896 * is more secure to use segkmap adresses to allow 1897 * protection segmap_fault's. 1898 */ 1899 if (prot != PROT_ALL && forcefault == SM_LOCKPROTO) { 1900 /* 1901 * Use segmap address slot and let segmap_fault 1902 * do the error return. 1903 */ 1904 ASSERT(rw != S_WRITE); 1905 ASSERT(PAGE_LOCKED(pp)); 1906 page_unlock(pp); 1907 forcefault = 0; 1908 goto use_segmap_range; 1909 } 1910 1911 /* 1912 * We have the p_selock as reader, grab_smp can't hit us, we 1913 * have bumped the smap refcnt and hat_pageunload needs the 1914 * p_selock exclusive. 1915 */ 1916 kpme = GET_KPME(smp); 1917 if (kpme->kpe_page == pp) { 1918 baseaddr = hat_kpm_page2va(pp, 0); 1919 } else if (kpme->kpe_page == NULL) { 1920 baseaddr = hat_kpm_mapin(pp, kpme); 1921 } else { 1922 panic("segmap_getmapflt: stale kpme page after " 1923 "VOP_GETPAGE, kpme %p", (void *)kpme); 1924 /*NOTREACHED*/ 1925 } 1926 1927 smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++; 1928 1929 return (baseaddr); 1930 1931 1932 use_segmap_range: 1933 baseaddr = seg->s_base + ((smp - smd_smap) * MAXBSIZE); 1934 TRACE_4(TR_FAC_VM, TR_SEGMAP_GETMAP, 1935 "segmap_getmap:seg %p addr %p vp %p offset %llx", 1936 seg, baseaddr, vp, baseoff); 1937 1938 /* 1939 * Prefault the translations 1940 */ 1941 vaddr = baseaddr + (off - baseoff); 1942 if (forcefault && (newslot || !hat_probe(kas.a_hat, vaddr))) { 1943 1944 caddr_t pgaddr = (caddr_t)((uintptr_t)vaddr & 1945 (uintptr_t)PAGEMASK); 1946 1947 (void) segmap_fault(kas.a_hat, seg, pgaddr, 1948 (vaddr + len - pgaddr + PAGESIZE - 1) & (uintptr_t)PAGEMASK, 1949 F_INVAL, rw); 1950 } 1951 1952 return (baseaddr); 1953 } 1954 1955 int 1956 segmap_release(struct seg *seg, caddr_t addr, uint_t flags) 1957 { 1958 struct smap *smp; 1959 int error; 1960 int bflags = 0; 1961 struct vnode *vp; 1962 u_offset_t offset; 1963 kmutex_t *smtx; 1964 int is_kpm = 0; 1965 page_t *pp; 1966 1967 if (segmap_kpm && IS_KPM_ADDR(addr)) { 1968 1969 if (((uintptr_t)addr & MAXBOFFSET) != 0) { 1970 panic("segmap_release: addr %p not " 1971 "MAXBSIZE aligned", (void *)addr); 1972 /*NOTREACHED*/ 1973 } 1974 1975 if ((smp = get_smap_kpm(addr, &pp)) == NULL) { 1976 panic("segmap_release: smap not found " 1977 "for addr %p", (void *)addr); 1978 /*NOTREACHED*/ 1979 } 1980 1981 TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP, 1982 "segmap_relmap:seg %p addr %p smp %p", 1983 seg, addr, smp); 1984 1985 smtx = SMAPMTX(smp); 1986 1987 /* 1988 * For compatibility reasons segmap_pagecreate_kpm sets this 1989 * flag to allow a following segmap_pagecreate to return 1990 * this as "newpage" flag. When segmap_pagecreate is not 1991 * called at all we clear it now. 1992 */ 1993 smp->sm_flags &= ~SM_KPM_NEWPAGE; 1994 is_kpm = 1; 1995 if (smp->sm_flags & SM_WRITE_DATA) { 1996 hat_setrefmod(pp); 1997 } else if (smp->sm_flags & SM_READ_DATA) { 1998 hat_setref(pp); 1999 } 2000 } else { 2001 if (addr < seg->s_base || addr >= seg->s_base + seg->s_size || 2002 ((uintptr_t)addr & MAXBOFFSET) != 0) { 2003 panic("segmap_release: bad addr %p", (void *)addr); 2004 /*NOTREACHED*/ 2005 } 2006 smp = GET_SMAP(seg, addr); 2007 2008 TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP, 2009 "segmap_relmap:seg %p addr %p smp %p", 2010 seg, addr, smp); 2011 2012 smtx = SMAPMTX(smp); 2013 mutex_enter(smtx); 2014 smp->sm_flags |= SM_NOTKPM_RELEASED; 2015 } 2016 2017 ASSERT(smp->sm_refcnt > 0); 2018 2019 /* 2020 * Need to call VOP_PUTPAGE() if any flags (except SM_DONTNEED) 2021 * are set. 2022 */ 2023 if ((flags & ~SM_DONTNEED) != 0) { 2024 if (flags & SM_WRITE) 2025 segmapcnt.smp_rel_write.value.ul++; 2026 if (flags & SM_ASYNC) { 2027 bflags |= B_ASYNC; 2028 segmapcnt.smp_rel_async.value.ul++; 2029 } 2030 if (flags & SM_INVAL) { 2031 bflags |= B_INVAL; 2032 segmapcnt.smp_rel_abort.value.ul++; 2033 } 2034 if (flags & SM_DESTROY) { 2035 bflags |= (B_INVAL|B_TRUNC); 2036 segmapcnt.smp_rel_abort.value.ul++; 2037 } 2038 if (smp->sm_refcnt == 1) { 2039 /* 2040 * We only bother doing the FREE and DONTNEED flags 2041 * if no one else is still referencing this mapping. 2042 */ 2043 if (flags & SM_FREE) { 2044 bflags |= B_FREE; 2045 segmapcnt.smp_rel_free.value.ul++; 2046 } 2047 if (flags & SM_DONTNEED) { 2048 bflags |= B_DONTNEED; 2049 segmapcnt.smp_rel_dontneed.value.ul++; 2050 } 2051 } 2052 } else { 2053 smd_cpu[CPU->cpu_seqid].scpu.scpu_release++; 2054 } 2055 2056 vp = smp->sm_vp; 2057 offset = smp->sm_off; 2058 2059 if (--smp->sm_refcnt == 0) { 2060 2061 smp->sm_flags &= ~(SM_WRITE_DATA | SM_READ_DATA); 2062 2063 if (flags & (SM_INVAL|SM_DESTROY)) { 2064 segmap_hashout(smp); /* remove map info */ 2065 if (is_kpm) { 2066 hat_kpm_mapout(pp, GET_KPME(smp), addr); 2067 if (smp->sm_flags & SM_NOTKPM_RELEASED) { 2068 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 2069 hat_unload(kas.a_hat, segkmap->s_base + 2070 ((smp - smd_smap) * MAXBSIZE), 2071 MAXBSIZE, HAT_UNLOAD); 2072 } 2073 2074 } else { 2075 if (segmap_kpm) 2076 segkpm_mapout_validkpme(GET_KPME(smp)); 2077 2078 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 2079 hat_unload(kas.a_hat, addr, MAXBSIZE, 2080 HAT_UNLOAD); 2081 } 2082 } 2083 segmap_smapadd(smp); /* add to free list */ 2084 } 2085 2086 mutex_exit(smtx); 2087 2088 if (is_kpm) 2089 page_unlock(pp); 2090 /* 2091 * Now invoke VOP_PUTPAGE() if any flags (except SM_DONTNEED) 2092 * are set. 2093 */ 2094 if ((flags & ~SM_DONTNEED) != 0) { 2095 error = VOP_PUTPAGE(vp, offset, MAXBSIZE, 2096 bflags, CRED(), NULL); 2097 } else { 2098 error = 0; 2099 } 2100 2101 return (error); 2102 } 2103 2104 /* 2105 * Dump the pages belonging to this segmap segment. 2106 */ 2107 static void 2108 segmap_dump(struct seg *seg) 2109 { 2110 struct segmap_data *smd; 2111 struct smap *smp, *smp_end; 2112 page_t *pp; 2113 pfn_t pfn; 2114 u_offset_t off; 2115 caddr_t addr; 2116 2117 smd = (struct segmap_data *)seg->s_data; 2118 addr = seg->s_base; 2119 for (smp = smd->smd_sm, smp_end = smp + smd->smd_npages; 2120 smp < smp_end; smp++) { 2121 2122 if (smp->sm_refcnt) { 2123 for (off = 0; off < MAXBSIZE; off += PAGESIZE) { 2124 int we_own_it = 0; 2125 2126 /* 2127 * If pp == NULL, the page either does 2128 * not exist or is exclusively locked. 2129 * So determine if it exists before 2130 * searching for it. 2131 */ 2132 if ((pp = page_lookup_nowait(smp->sm_vp, 2133 smp->sm_off + off, SE_SHARED))) 2134 we_own_it = 1; 2135 else 2136 pp = page_exists(smp->sm_vp, 2137 smp->sm_off + off); 2138 2139 if (pp) { 2140 pfn = page_pptonum(pp); 2141 dump_addpage(seg->s_as, 2142 addr + off, pfn); 2143 if (we_own_it) 2144 page_unlock(pp); 2145 } 2146 dump_timeleft = dump_timeout; 2147 } 2148 } 2149 addr += MAXBSIZE; 2150 } 2151 } 2152 2153 /*ARGSUSED*/ 2154 static int 2155 segmap_pagelock(struct seg *seg, caddr_t addr, size_t len, 2156 struct page ***ppp, enum lock_type type, enum seg_rw rw) 2157 { 2158 return (ENOTSUP); 2159 } 2160 2161 static int 2162 segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp) 2163 { 2164 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 2165 2166 memidp->val[0] = (uintptr_t)smd->smd_sm->sm_vp; 2167 memidp->val[1] = smd->smd_sm->sm_off + (uintptr_t)(addr - seg->s_base); 2168 return (0); 2169 } 2170 2171 /*ARGSUSED*/ 2172 static int 2173 segmap_capable(struct seg *seg, segcapability_t capability) 2174 { 2175 return (0); 2176 } 2177 2178 2179 #ifdef SEGKPM_SUPPORT 2180 2181 /* 2182 * segkpm support routines 2183 */ 2184 2185 static caddr_t 2186 segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off, 2187 struct smap *smp, enum seg_rw rw) 2188 { 2189 caddr_t base; 2190 page_t *pp; 2191 int newpage = 0; 2192 struct kpme *kpme; 2193 2194 ASSERT(smp->sm_refcnt > 0); 2195 2196 if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) { 2197 kmutex_t *smtx; 2198 2199 base = segkpm_create_va(off); 2200 2201 if ((pp = page_create_va(vp, off, PAGESIZE, PG_WAIT, 2202 seg, base)) == NULL) { 2203 panic("segmap_pagecreate_kpm: " 2204 "page_create failed"); 2205 /*NOTREACHED*/ 2206 } 2207 2208 newpage = 1; 2209 page_io_unlock(pp); 2210 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 2211 2212 /* 2213 * Mark this here until the following segmap_pagecreate 2214 * or segmap_release. 2215 */ 2216 smtx = SMAPMTX(smp); 2217 mutex_enter(smtx); 2218 smp->sm_flags |= SM_KPM_NEWPAGE; 2219 mutex_exit(smtx); 2220 } 2221 2222 kpme = GET_KPME(smp); 2223 if (!newpage && kpme->kpe_page == pp) 2224 base = hat_kpm_page2va(pp, 0); 2225 else 2226 base = hat_kpm_mapin(pp, kpme); 2227 2228 /* 2229 * FS code may decide not to call segmap_pagecreate and we 2230 * don't invoke segmap_fault via TLB miss, so we have to set 2231 * ref and mod bits in advance. 2232 */ 2233 if (rw == S_WRITE) { 2234 hat_setrefmod(pp); 2235 } else { 2236 ASSERT(rw == S_READ); 2237 hat_setref(pp); 2238 } 2239 2240 smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++; 2241 2242 return (base); 2243 } 2244 2245 /* 2246 * Find the smap structure corresponding to the 2247 * KPM addr and return it locked. 2248 */ 2249 struct smap * 2250 get_smap_kpm(caddr_t addr, page_t **ppp) 2251 { 2252 struct smap *smp; 2253 struct vnode *vp; 2254 u_offset_t offset; 2255 caddr_t baseaddr = (caddr_t)((uintptr_t)addr & MAXBMASK); 2256 int hashid; 2257 kmutex_t *hashmtx; 2258 page_t *pp; 2259 union segmap_cpu *scpu; 2260 2261 pp = hat_kpm_vaddr2page(baseaddr); 2262 2263 ASSERT(pp && !PP_ISFREE(pp)); 2264 ASSERT(PAGE_LOCKED(pp)); 2265 ASSERT(((uintptr_t)pp->p_offset & MAXBOFFSET) == 0); 2266 2267 vp = pp->p_vnode; 2268 offset = pp->p_offset; 2269 ASSERT(vp != NULL); 2270 2271 /* 2272 * Assume the last smap used on this cpu is the one needed. 2273 */ 2274 scpu = smd_cpu+CPU->cpu_seqid; 2275 smp = scpu->scpu.scpu_last_smap; 2276 mutex_enter(&smp->sm_mtx); 2277 if (smp->sm_vp == vp && smp->sm_off == offset) { 2278 ASSERT(smp->sm_refcnt > 0); 2279 } else { 2280 /* 2281 * Assumption wrong, find the smap on the hash chain. 2282 */ 2283 mutex_exit(&smp->sm_mtx); 2284 SMAP_HASHFUNC(vp, offset, hashid); /* macro assigns hashid */ 2285 hashmtx = SHASHMTX(hashid); 2286 2287 mutex_enter(hashmtx); 2288 smp = smd_hash[hashid].sh_hash_list; 2289 for (; smp != NULL; smp = smp->sm_hash) { 2290 if (smp->sm_vp == vp && smp->sm_off == offset) 2291 break; 2292 } 2293 mutex_exit(hashmtx); 2294 if (smp) { 2295 mutex_enter(&smp->sm_mtx); 2296 ASSERT(smp->sm_vp == vp && smp->sm_off == offset); 2297 } 2298 } 2299 2300 if (ppp) 2301 *ppp = smp ? pp : NULL; 2302 2303 return (smp); 2304 } 2305 2306 #else /* SEGKPM_SUPPORT */ 2307 2308 /* segkpm stubs */ 2309 2310 /*ARGSUSED*/ 2311 static caddr_t 2312 segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off, 2313 struct smap *smp, enum seg_rw rw) 2314 { 2315 return (NULL); 2316 } 2317 2318 /*ARGSUSED*/ 2319 struct smap * 2320 get_smap_kpm(caddr_t addr, page_t **ppp) 2321 { 2322 return (NULL); 2323 } 2324 2325 #endif /* SEGKPM_SUPPORT */