1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * Portions of this source code were derived from Berkeley 4.3 BSD 31 * under license from the Regents of the University of California. 32 */ 33 34 /* 35 * VM - generic vnode mapping segment. 36 * 37 * The segmap driver is used only by the kernel to get faster (than seg_vn) 38 * mappings [lower routine overhead; more persistent cache] to random 39 * vnode/offsets. Note than the kernel may (and does) use seg_vn as well. 40 */ 41 42 #include <sys/types.h> 43 #include <sys/t_lock.h> 44 #include <sys/param.h> 45 #include <sys/sysmacros.h> 46 #include <sys/buf.h> 47 #include <sys/systm.h> 48 #include <sys/vnode.h> 49 #include <sys/mman.h> 50 #include <sys/errno.h> 51 #include <sys/cred.h> 52 #include <sys/kmem.h> 53 #include <sys/vtrace.h> 54 #include <sys/cmn_err.h> 55 #include <sys/debug.h> 56 #include <sys/thread.h> 57 #include <sys/dumphdr.h> 58 #include <sys/bitmap.h> 59 #include <sys/lgrp.h> 60 61 #include <vm/seg_kmem.h> 62 #include <vm/hat.h> 63 #include <vm/as.h> 64 #include <vm/seg.h> 65 #include <vm/seg_kpm.h> 66 #include <vm/seg_map.h> 67 #include <vm/page.h> 68 #include <vm/pvn.h> 69 #include <vm/rm.h> 70 71 /* 72 * Private seg op routines. 73 */ 74 static void segmap_free(struct seg *seg); 75 faultcode_t segmap_fault(struct hat *hat, struct seg *seg, caddr_t addr, 76 size_t len, enum fault_type type, enum seg_rw rw); 77 static faultcode_t segmap_faulta(struct seg *seg, caddr_t addr); 78 static int segmap_checkprot(struct seg *seg, caddr_t addr, size_t len, 79 uint_t prot); 80 static int segmap_kluster(struct seg *seg, caddr_t addr, ssize_t); 81 static int segmap_getprot(struct seg *seg, caddr_t addr, size_t len, 82 uint_t *protv); 83 static u_offset_t segmap_getoffset(struct seg *seg, caddr_t addr); 84 static int segmap_gettype(struct seg *seg, caddr_t addr); 85 static int segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp); 86 static void segmap_dump(struct seg *seg); 87 static int segmap_pagelock(struct seg *seg, caddr_t addr, size_t len, 88 struct page ***ppp, enum lock_type type, 89 enum seg_rw rw); 90 static int segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp); 91 static lgrp_mem_policy_info_t *segmap_getpolicy(struct seg *seg, 92 caddr_t addr); 93 static int segmap_capable(struct seg *seg, segcapability_t capability); 94 95 /* segkpm support */ 96 static caddr_t segmap_pagecreate_kpm(struct seg *, vnode_t *, u_offset_t, 97 struct smap *, enum seg_rw); 98 struct smap *get_smap_kpm(caddr_t, page_t **); 99 100 static struct seg_ops segmap_ops = { 101 .free = segmap_free, 102 .fault = segmap_fault, 103 .faulta = segmap_faulta, 104 .checkprot = segmap_checkprot, 105 .kluster = segmap_kluster, 106 .getprot = segmap_getprot, 107 .getoffset = segmap_getoffset, 108 .gettype = segmap_gettype, 109 .getvp = segmap_getvp, 110 .dump = segmap_dump, 111 .pagelock = segmap_pagelock, 112 .getmemid = segmap_getmemid, 113 .getpolicy = segmap_getpolicy, 114 .capable = segmap_capable, 115 .inherit = seg_inherit_notsup, 116 }; 117 118 /* 119 * Private segmap routines. 120 */ 121 static void segmap_unlock(struct hat *hat, struct seg *seg, caddr_t addr, 122 size_t len, enum seg_rw rw, struct smap *smp); 123 static void segmap_smapadd(struct smap *smp); 124 static struct smap *segmap_hashin(struct smap *smp, struct vnode *vp, 125 u_offset_t off, int hashid); 126 static void segmap_hashout(struct smap *smp); 127 128 129 /* 130 * Statistics for segmap operations. 131 * 132 * No explicit locking to protect these stats. 133 */ 134 struct segmapcnt segmapcnt = { 135 { "fault", KSTAT_DATA_ULONG }, 136 { "faulta", KSTAT_DATA_ULONG }, 137 { "getmap", KSTAT_DATA_ULONG }, 138 { "get_use", KSTAT_DATA_ULONG }, 139 { "get_reclaim", KSTAT_DATA_ULONG }, 140 { "get_reuse", KSTAT_DATA_ULONG }, 141 { "get_unused", KSTAT_DATA_ULONG }, 142 { "get_nofree", KSTAT_DATA_ULONG }, 143 { "rel_async", KSTAT_DATA_ULONG }, 144 { "rel_write", KSTAT_DATA_ULONG }, 145 { "rel_free", KSTAT_DATA_ULONG }, 146 { "rel_abort", KSTAT_DATA_ULONG }, 147 { "rel_dontneed", KSTAT_DATA_ULONG }, 148 { "release", KSTAT_DATA_ULONG }, 149 { "pagecreate", KSTAT_DATA_ULONG }, 150 { "free_notfree", KSTAT_DATA_ULONG }, 151 { "free_dirty", KSTAT_DATA_ULONG }, 152 { "free", KSTAT_DATA_ULONG }, 153 { "stolen", KSTAT_DATA_ULONG }, 154 { "get_nomtx", KSTAT_DATA_ULONG } 155 }; 156 157 kstat_named_t *segmapcnt_ptr = (kstat_named_t *)&segmapcnt; 158 uint_t segmapcnt_ndata = sizeof (segmapcnt) / sizeof (kstat_named_t); 159 160 /* 161 * Return number of map pages in segment. 162 */ 163 #define MAP_PAGES(seg) ((seg)->s_size >> MAXBSHIFT) 164 165 /* 166 * Translate addr into smap number within segment. 167 */ 168 #define MAP_PAGE(seg, addr) (((addr) - (seg)->s_base) >> MAXBSHIFT) 169 170 /* 171 * Translate addr in seg into struct smap pointer. 172 */ 173 #define GET_SMAP(seg, addr) \ 174 &(((struct segmap_data *)((seg)->s_data))->smd_sm[MAP_PAGE(seg, addr)]) 175 176 /* 177 * Bit in map (16 bit bitmap). 178 */ 179 #define SMAP_BIT_MASK(bitindex) (1 << ((bitindex) & 0xf)) 180 181 static int smd_colormsk = 0; 182 static int smd_ncolor = 0; 183 static int smd_nfree = 0; 184 static int smd_freemsk = 0; 185 #ifdef DEBUG 186 static int *colors_used; 187 #endif 188 static struct smap *smd_smap; 189 static struct smaphash *smd_hash; 190 #ifdef SEGMAP_HASHSTATS 191 static unsigned int *smd_hash_len; 192 #endif 193 static struct smfree *smd_free; 194 static ulong_t smd_hashmsk = 0; 195 196 #define SEGMAP_MAXCOLOR 2 197 #define SEGMAP_CACHE_PAD 64 198 199 union segmap_cpu { 200 struct { 201 uint32_t scpu_free_ndx[SEGMAP_MAXCOLOR]; 202 struct smap *scpu_last_smap; 203 ulong_t scpu_getmap; 204 ulong_t scpu_release; 205 ulong_t scpu_get_reclaim; 206 ulong_t scpu_fault; 207 ulong_t scpu_pagecreate; 208 ulong_t scpu_get_reuse; 209 } scpu; 210 char scpu_pad[SEGMAP_CACHE_PAD]; 211 }; 212 static union segmap_cpu *smd_cpu; 213 214 /* 215 * There are three locks in seg_map: 216 * - per freelist mutexes 217 * - per hashchain mutexes 218 * - per smap mutexes 219 * 220 * The lock ordering is to get the smap mutex to lock down the slot 221 * first then the hash lock (for hash in/out (vp, off) list) or the 222 * freelist lock to put the slot back on the free list. 223 * 224 * The hash search is done by only holding the hashchain lock, when a wanted 225 * slot is found, we drop the hashchain lock then lock the slot so there 226 * is no overlapping of hashchain and smap locks. After the slot is 227 * locked, we verify again if the slot is still what we are looking 228 * for. 229 * 230 * Allocation of a free slot is done by holding the freelist lock, 231 * then locking the smap slot at the head of the freelist. This is 232 * in reversed lock order so mutex_tryenter() is used. 233 * 234 * The smap lock protects all fields in smap structure except for 235 * the link fields for hash/free lists which are protected by 236 * hashchain and freelist locks. 237 */ 238 239 #define SHASHMTX(hashid) (&smd_hash[hashid].sh_mtx) 240 241 #define SMP2SMF(smp) (&smd_free[(smp - smd_smap) & smd_freemsk]) 242 #define SMP2SMF_NDX(smp) (ushort_t)((smp - smd_smap) & smd_freemsk) 243 244 #define SMAPMTX(smp) (&smp->sm_mtx) 245 246 #define SMAP_HASHFUNC(vp, off, hashid) \ 247 { \ 248 hashid = ((((uintptr_t)(vp) >> 6) + ((uintptr_t)(vp) >> 3) + \ 249 ((off) >> MAXBSHIFT)) & smd_hashmsk); \ 250 } 251 252 /* 253 * The most frequently updated kstat counters are kept in the 254 * per cpu array to avoid hot cache blocks. The update function 255 * sums the cpu local counters to update the global counters. 256 */ 257 258 /* ARGSUSED */ 259 int 260 segmap_kstat_update(kstat_t *ksp, int rw) 261 { 262 int i; 263 ulong_t getmap, release, get_reclaim; 264 ulong_t fault, pagecreate, get_reuse; 265 266 if (rw == KSTAT_WRITE) 267 return (EACCES); 268 getmap = release = get_reclaim = (ulong_t)0; 269 fault = pagecreate = get_reuse = (ulong_t)0; 270 for (i = 0; i < max_ncpus; i++) { 271 getmap += smd_cpu[i].scpu.scpu_getmap; 272 release += smd_cpu[i].scpu.scpu_release; 273 get_reclaim += smd_cpu[i].scpu.scpu_get_reclaim; 274 fault += smd_cpu[i].scpu.scpu_fault; 275 pagecreate += smd_cpu[i].scpu.scpu_pagecreate; 276 get_reuse += smd_cpu[i].scpu.scpu_get_reuse; 277 } 278 segmapcnt.smp_getmap.value.ul = getmap; 279 segmapcnt.smp_release.value.ul = release; 280 segmapcnt.smp_get_reclaim.value.ul = get_reclaim; 281 segmapcnt.smp_fault.value.ul = fault; 282 segmapcnt.smp_pagecreate.value.ul = pagecreate; 283 segmapcnt.smp_get_reuse.value.ul = get_reuse; 284 return (0); 285 } 286 287 int 288 segmap_create(struct seg *seg, void *argsp) 289 { 290 struct segmap_data *smd; 291 struct smap *smp; 292 struct smfree *sm; 293 struct segmap_crargs *a = (struct segmap_crargs *)argsp; 294 struct smaphash *shashp; 295 union segmap_cpu *scpu; 296 long i, npages; 297 size_t hashsz; 298 uint_t nfreelist; 299 extern void prefetch_smap_w(void *); 300 extern int max_ncpus; 301 302 ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock)); 303 304 if (((uintptr_t)seg->s_base | seg->s_size) & MAXBOFFSET) { 305 panic("segkmap not MAXBSIZE aligned"); 306 /*NOTREACHED*/ 307 } 308 309 smd = kmem_zalloc(sizeof (struct segmap_data), KM_SLEEP); 310 311 seg->s_data = (void *)smd; 312 seg->s_ops = &segmap_ops; 313 smd->smd_prot = a->prot; 314 315 /* 316 * Scale the number of smap freelists to be 317 * proportional to max_ncpus * number of virtual colors. 318 * The caller can over-ride this scaling by providing 319 * a non-zero a->nfreelist argument. 320 */ 321 nfreelist = a->nfreelist; 322 if (nfreelist == 0) 323 nfreelist = max_ncpus; 324 else if (nfreelist < 0 || nfreelist > 4 * max_ncpus) { 325 cmn_err(CE_WARN, "segmap_create: nfreelist out of range " 326 "%d, using %d", nfreelist, max_ncpus); 327 nfreelist = max_ncpus; 328 } 329 if (!ISP2(nfreelist)) { 330 /* round up nfreelist to the next power of two. */ 331 nfreelist = 1 << (highbit(nfreelist)); 332 } 333 334 /* 335 * Get the number of virtual colors - must be a power of 2. 336 */ 337 if (a->shmsize) 338 smd_ncolor = a->shmsize >> MAXBSHIFT; 339 else 340 smd_ncolor = 1; 341 ASSERT((smd_ncolor & (smd_ncolor - 1)) == 0); 342 ASSERT(smd_ncolor <= SEGMAP_MAXCOLOR); 343 smd_colormsk = smd_ncolor - 1; 344 smd->smd_nfree = smd_nfree = smd_ncolor * nfreelist; 345 smd_freemsk = smd_nfree - 1; 346 347 /* 348 * Allocate and initialize the freelist headers. 349 * Note that sm_freeq[1] starts out as the release queue. This 350 * is known when the smap structures are initialized below. 351 */ 352 smd_free = smd->smd_free = 353 kmem_zalloc(smd_nfree * sizeof (struct smfree), KM_SLEEP); 354 for (i = 0; i < smd_nfree; i++) { 355 sm = &smd->smd_free[i]; 356 mutex_init(&sm->sm_freeq[0].smq_mtx, NULL, MUTEX_DEFAULT, NULL); 357 mutex_init(&sm->sm_freeq[1].smq_mtx, NULL, MUTEX_DEFAULT, NULL); 358 sm->sm_allocq = &sm->sm_freeq[0]; 359 sm->sm_releq = &sm->sm_freeq[1]; 360 } 361 362 /* 363 * Allocate and initialize the smap hash chain headers. 364 * Compute hash size rounding down to the next power of two. 365 */ 366 npages = MAP_PAGES(seg); 367 smd->smd_npages = npages; 368 hashsz = npages / SMAP_HASHAVELEN; 369 hashsz = 1 << (highbit(hashsz)-1); 370 smd_hashmsk = hashsz - 1; 371 smd_hash = smd->smd_hash = 372 kmem_alloc(hashsz * sizeof (struct smaphash), KM_SLEEP); 373 #ifdef SEGMAP_HASHSTATS 374 smd_hash_len = 375 kmem_zalloc(hashsz * sizeof (unsigned int), KM_SLEEP); 376 #endif 377 for (i = 0, shashp = smd_hash; i < hashsz; i++, shashp++) { 378 shashp->sh_hash_list = NULL; 379 mutex_init(&shashp->sh_mtx, NULL, MUTEX_DEFAULT, NULL); 380 } 381 382 /* 383 * Allocate and initialize the smap structures. 384 * Link all slots onto the appropriate freelist. 385 * The smap array is large enough to affect boot time 386 * on large systems, so use memory prefetching and only 387 * go through the array 1 time. Inline a optimized version 388 * of segmap_smapadd to add structures to freelists with 389 * knowledge that no locks are needed here. 390 */ 391 smd_smap = smd->smd_sm = 392 kmem_alloc(sizeof (struct smap) * npages, KM_SLEEP); 393 394 for (smp = &smd->smd_sm[MAP_PAGES(seg) - 1]; 395 smp >= smd->smd_sm; smp--) { 396 struct smap *smpfreelist; 397 struct sm_freeq *releq; 398 399 prefetch_smap_w((char *)smp); 400 401 smp->sm_vp = NULL; 402 smp->sm_hash = NULL; 403 smp->sm_off = 0; 404 smp->sm_bitmap = 0; 405 smp->sm_refcnt = 0; 406 mutex_init(&smp->sm_mtx, NULL, MUTEX_DEFAULT, NULL); 407 smp->sm_free_ndx = SMP2SMF_NDX(smp); 408 409 sm = SMP2SMF(smp); 410 releq = sm->sm_releq; 411 412 smpfreelist = releq->smq_free; 413 if (smpfreelist == 0) { 414 releq->smq_free = smp->sm_next = smp->sm_prev = smp; 415 } else { 416 smp->sm_next = smpfreelist; 417 smp->sm_prev = smpfreelist->sm_prev; 418 smpfreelist->sm_prev = smp; 419 smp->sm_prev->sm_next = smp; 420 releq->smq_free = smp->sm_next; 421 } 422 423 /* 424 * sm_flag = 0 (no SM_QNDX_ZERO) implies smap on sm_freeq[1] 425 */ 426 smp->sm_flags = 0; 427 428 #ifdef SEGKPM_SUPPORT 429 /* 430 * Due to the fragile prefetch loop no 431 * separate function is used here. 432 */ 433 smp->sm_kpme_next = NULL; 434 smp->sm_kpme_prev = NULL; 435 smp->sm_kpme_page = NULL; 436 #endif 437 } 438 439 /* 440 * Allocate the per color indices that distribute allocation 441 * requests over the free lists. Each cpu will have a private 442 * rotor index to spread the allocations even across the available 443 * smap freelists. Init the scpu_last_smap field to the first 444 * smap element so there is no need to check for NULL. 445 */ 446 smd_cpu = 447 kmem_zalloc(sizeof (union segmap_cpu) * max_ncpus, KM_SLEEP); 448 for (i = 0, scpu = smd_cpu; i < max_ncpus; i++, scpu++) { 449 int j; 450 for (j = 0; j < smd_ncolor; j++) 451 scpu->scpu.scpu_free_ndx[j] = j; 452 scpu->scpu.scpu_last_smap = smd_smap; 453 } 454 455 vpm_init(); 456 457 #ifdef DEBUG 458 /* 459 * Keep track of which colors are used more often. 460 */ 461 colors_used = kmem_zalloc(smd_nfree * sizeof (int), KM_SLEEP); 462 #endif /* DEBUG */ 463 464 return (0); 465 } 466 467 static void 468 segmap_free(seg) 469 struct seg *seg; 470 { 471 ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock)); 472 } 473 474 /* 475 * Do a F_SOFTUNLOCK call over the range requested. 476 * The range must have already been F_SOFTLOCK'ed. 477 */ 478 static void 479 segmap_unlock( 480 struct hat *hat, 481 struct seg *seg, 482 caddr_t addr, 483 size_t len, 484 enum seg_rw rw, 485 struct smap *smp) 486 { 487 page_t *pp; 488 caddr_t adr; 489 u_offset_t off; 490 struct vnode *vp; 491 kmutex_t *smtx; 492 493 ASSERT(smp->sm_refcnt > 0); 494 495 #ifdef lint 496 seg = seg; 497 #endif 498 499 if (segmap_kpm && IS_KPM_ADDR(addr)) { 500 501 /* 502 * We're called only from segmap_fault and this was a 503 * NOP in case of a kpm based smap, so dangerous things 504 * must have happened in the meantime. Pages are prefaulted 505 * and locked in segmap_getmapflt and they will not be 506 * unlocked until segmap_release. 507 */ 508 panic("segmap_unlock: called with kpm addr %p", (void *)addr); 509 /*NOTREACHED*/ 510 } 511 512 vp = smp->sm_vp; 513 off = smp->sm_off + (u_offset_t)((uintptr_t)addr & MAXBOFFSET); 514 515 hat_unlock(hat, addr, P2ROUNDUP(len, PAGESIZE)); 516 for (adr = addr; adr < addr + len; adr += PAGESIZE, off += PAGESIZE) { 517 ushort_t bitmask; 518 519 /* 520 * Use page_find() instead of page_lookup() to 521 * find the page since we know that it has 522 * "shared" lock. 523 */ 524 pp = page_find(vp, off); 525 if (pp == NULL) { 526 panic("segmap_unlock: page not found"); 527 /*NOTREACHED*/ 528 } 529 530 if (rw == S_WRITE) { 531 hat_setrefmod(pp); 532 } else if (rw != S_OTHER) { 533 TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT, 534 "segmap_fault:pp %p vp %p offset %llx", pp, vp, off); 535 hat_setref(pp); 536 } 537 538 /* 539 * Clear bitmap, if the bit corresponding to "off" is set, 540 * since the page and translation are being unlocked. 541 */ 542 bitmask = SMAP_BIT_MASK((off - smp->sm_off) >> PAGESHIFT); 543 544 /* 545 * Large Files: Following assertion is to verify 546 * the correctness of the cast to (int) above. 547 */ 548 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 549 smtx = SMAPMTX(smp); 550 mutex_enter(smtx); 551 if (smp->sm_bitmap & bitmask) { 552 smp->sm_bitmap &= ~bitmask; 553 } 554 mutex_exit(smtx); 555 556 page_unlock(pp); 557 } 558 } 559 560 #define MAXPPB (MAXBSIZE/4096) /* assumes minimum page size of 4k */ 561 562 /* 563 * This routine is called via a machine specific fault handling 564 * routine. It is also called by software routines wishing to 565 * lock or unlock a range of addresses. 566 * 567 * Note that this routine expects a page-aligned "addr". 568 */ 569 faultcode_t 570 segmap_fault( 571 struct hat *hat, 572 struct seg *seg, 573 caddr_t addr, 574 size_t len, 575 enum fault_type type, 576 enum seg_rw rw) 577 { 578 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 579 struct smap *smp; 580 page_t *pp, **ppp; 581 struct vnode *vp; 582 u_offset_t off; 583 page_t *pl[MAXPPB + 1]; 584 uint_t prot; 585 u_offset_t addroff; 586 caddr_t adr; 587 int err; 588 u_offset_t sm_off; 589 int hat_flag; 590 591 if (segmap_kpm && IS_KPM_ADDR(addr)) { 592 int newpage; 593 kmutex_t *smtx; 594 595 /* 596 * Pages are successfully prefaulted and locked in 597 * segmap_getmapflt and can't be unlocked until 598 * segmap_release. No hat mappings have to be locked 599 * and they also can't be unlocked as long as the 600 * caller owns an active kpm addr. 601 */ 602 #ifndef DEBUG 603 if (type != F_SOFTUNLOCK) 604 return (0); 605 #endif 606 607 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 608 panic("segmap_fault: smap not found " 609 "for addr %p", (void *)addr); 610 /*NOTREACHED*/ 611 } 612 613 smtx = SMAPMTX(smp); 614 #ifdef DEBUG 615 newpage = smp->sm_flags & SM_KPM_NEWPAGE; 616 if (newpage) { 617 cmn_err(CE_WARN, "segmap_fault: newpage? smp %p", 618 (void *)smp); 619 } 620 621 if (type != F_SOFTUNLOCK) { 622 mutex_exit(smtx); 623 return (0); 624 } 625 #endif 626 mutex_exit(smtx); 627 vp = smp->sm_vp; 628 sm_off = smp->sm_off; 629 630 if (vp == NULL) 631 return (FC_MAKE_ERR(EIO)); 632 633 ASSERT(smp->sm_refcnt > 0); 634 635 addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET); 636 if (addroff + len > MAXBSIZE) 637 panic("segmap_fault: endaddr %p exceeds MAXBSIZE chunk", 638 (void *)(addr + len)); 639 640 off = sm_off + addroff; 641 642 pp = page_find(vp, off); 643 644 if (pp == NULL) 645 panic("segmap_fault: softunlock page not found"); 646 647 /* 648 * Set ref bit also here in case of S_OTHER to avoid the 649 * overhead of supporting other cases than F_SOFTUNLOCK 650 * with segkpm. We can do this because the underlying 651 * pages are locked anyway. 652 */ 653 if (rw == S_WRITE) { 654 hat_setrefmod(pp); 655 } else { 656 TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT, 657 "segmap_fault:pp %p vp %p offset %llx", 658 pp, vp, off); 659 hat_setref(pp); 660 } 661 662 return (0); 663 } 664 665 smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++; 666 smp = GET_SMAP(seg, addr); 667 vp = smp->sm_vp; 668 sm_off = smp->sm_off; 669 670 if (vp == NULL) 671 return (FC_MAKE_ERR(EIO)); 672 673 ASSERT(smp->sm_refcnt > 0); 674 675 addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET); 676 if (addroff + len > MAXBSIZE) { 677 panic("segmap_fault: endaddr %p " 678 "exceeds MAXBSIZE chunk", (void *)(addr + len)); 679 /*NOTREACHED*/ 680 } 681 off = sm_off + addroff; 682 683 /* 684 * First handle the easy stuff 685 */ 686 if (type == F_SOFTUNLOCK) { 687 segmap_unlock(hat, seg, addr, len, rw, smp); 688 return (0); 689 } 690 691 TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE, 692 "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp); 693 err = VOP_GETPAGE(vp, (offset_t)off, len, &prot, pl, MAXBSIZE, 694 seg, addr, rw, CRED(), NULL); 695 696 if (err) 697 return (FC_MAKE_ERR(err)); 698 699 prot &= smd->smd_prot; 700 701 /* 702 * Handle all pages returned in the pl[] array. 703 * This loop is coded on the assumption that if 704 * there was no error from the VOP_GETPAGE routine, 705 * that the page list returned will contain all the 706 * needed pages for the vp from [off..off + len]. 707 */ 708 ppp = pl; 709 while ((pp = *ppp++) != NULL) { 710 u_offset_t poff; 711 ASSERT(pp->p_vnode == vp); 712 hat_flag = HAT_LOAD; 713 714 /* 715 * Verify that the pages returned are within the range 716 * of this segmap region. Note that it is theoretically 717 * possible for pages outside this range to be returned, 718 * but it is not very likely. If we cannot use the 719 * page here, just release it and go on to the next one. 720 */ 721 if (pp->p_offset < sm_off || 722 pp->p_offset >= sm_off + MAXBSIZE) { 723 (void) page_release(pp, 1); 724 continue; 725 } 726 727 ASSERT(hat == kas.a_hat); 728 poff = pp->p_offset; 729 adr = addr + (poff - off); 730 if (adr >= addr && adr < addr + len) { 731 hat_setref(pp); 732 TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT, 733 "segmap_fault:pp %p vp %p offset %llx", 734 pp, vp, poff); 735 if (type == F_SOFTLOCK) 736 hat_flag = HAT_LOAD_LOCK; 737 } 738 739 /* 740 * Deal with VMODSORT pages here. If we know this is a write 741 * do the setmod now and allow write protection. 742 * As long as it's modified or not S_OTHER, remove write 743 * protection. With S_OTHER it's up to the FS to deal with this. 744 */ 745 if (IS_VMODSORT(vp)) { 746 if (rw == S_WRITE) 747 hat_setmod(pp); 748 else if (rw != S_OTHER && !hat_ismod(pp)) 749 prot &= ~PROT_WRITE; 750 } 751 752 hat_memload(hat, adr, pp, prot, hat_flag); 753 if (hat_flag != HAT_LOAD_LOCK) 754 page_unlock(pp); 755 } 756 return (0); 757 } 758 759 /* 760 * This routine is used to start I/O on pages asynchronously. 761 */ 762 static faultcode_t 763 segmap_faulta(struct seg *seg, caddr_t addr) 764 { 765 struct smap *smp; 766 struct vnode *vp; 767 u_offset_t off; 768 int err; 769 770 if (segmap_kpm && IS_KPM_ADDR(addr)) { 771 int newpage; 772 kmutex_t *smtx; 773 774 /* 775 * Pages are successfully prefaulted and locked in 776 * segmap_getmapflt and can't be unlocked until 777 * segmap_release. No hat mappings have to be locked 778 * and they also can't be unlocked as long as the 779 * caller owns an active kpm addr. 780 */ 781 #ifdef DEBUG 782 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 783 panic("segmap_faulta: smap not found " 784 "for addr %p", (void *)addr); 785 /*NOTREACHED*/ 786 } 787 788 smtx = SMAPMTX(smp); 789 newpage = smp->sm_flags & SM_KPM_NEWPAGE; 790 mutex_exit(smtx); 791 if (newpage) 792 cmn_err(CE_WARN, "segmap_faulta: newpage? smp %p", 793 (void *)smp); 794 #endif 795 return (0); 796 } 797 798 segmapcnt.smp_faulta.value.ul++; 799 smp = GET_SMAP(seg, addr); 800 801 ASSERT(smp->sm_refcnt > 0); 802 803 vp = smp->sm_vp; 804 off = smp->sm_off; 805 806 if (vp == NULL) { 807 cmn_err(CE_WARN, "segmap_faulta - no vp"); 808 return (FC_MAKE_ERR(EIO)); 809 } 810 811 TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE, 812 "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp); 813 814 err = VOP_GETPAGE(vp, (offset_t)(off + ((offset_t)((uintptr_t)addr 815 & MAXBOFFSET))), PAGESIZE, (uint_t *)NULL, (page_t **)NULL, 0, 816 seg, addr, S_READ, CRED(), NULL); 817 818 if (err) 819 return (FC_MAKE_ERR(err)); 820 return (0); 821 } 822 823 /*ARGSUSED*/ 824 static int 825 segmap_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) 826 { 827 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 828 829 ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock)); 830 831 /* 832 * Need not acquire the segment lock since 833 * "smd_prot" is a read-only field. 834 */ 835 return (((smd->smd_prot & prot) != prot) ? EACCES : 0); 836 } 837 838 static int 839 segmap_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv) 840 { 841 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 842 size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1; 843 844 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 845 846 if (pgno != 0) { 847 do { 848 protv[--pgno] = smd->smd_prot; 849 } while (pgno != 0); 850 } 851 return (0); 852 } 853 854 static u_offset_t 855 segmap_getoffset(struct seg *seg, caddr_t addr) 856 { 857 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 858 859 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock)); 860 861 return ((u_offset_t)smd->smd_sm->sm_off + (addr - seg->s_base)); 862 } 863 864 /*ARGSUSED*/ 865 static int 866 segmap_gettype(struct seg *seg, caddr_t addr) 867 { 868 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock)); 869 870 return (MAP_SHARED); 871 } 872 873 /*ARGSUSED*/ 874 static int 875 segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp) 876 { 877 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 878 879 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock)); 880 881 /* XXX - This doesn't make any sense */ 882 *vpp = smd->smd_sm->sm_vp; 883 return (0); 884 } 885 886 /* 887 * Check to see if it makes sense to do kluster/read ahead to 888 * addr + delta relative to the mapping at addr. We assume here 889 * that delta is a signed PAGESIZE'd multiple (which can be negative). 890 * 891 * For segmap we always "approve" of this action from our standpoint. 892 */ 893 /*ARGSUSED*/ 894 static int 895 segmap_kluster(struct seg *seg, caddr_t addr, ssize_t delta) 896 { 897 return (0); 898 } 899 900 /* 901 * Special private segmap operations 902 */ 903 904 /* 905 * Add smap to the appropriate free list. 906 */ 907 static void 908 segmap_smapadd(struct smap *smp) 909 { 910 struct smfree *sm; 911 struct smap *smpfreelist; 912 struct sm_freeq *releq; 913 914 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 915 916 if (smp->sm_refcnt != 0) { 917 panic("segmap_smapadd"); 918 /*NOTREACHED*/ 919 } 920 921 sm = &smd_free[smp->sm_free_ndx]; 922 /* 923 * Add to the tail of the release queue 924 * Note that sm_releq and sm_allocq could toggle 925 * before we get the lock. This does not affect 926 * correctness as the 2 queues are only maintained 927 * to reduce lock pressure. 928 */ 929 releq = sm->sm_releq; 930 if (releq == &sm->sm_freeq[0]) 931 smp->sm_flags |= SM_QNDX_ZERO; 932 else 933 smp->sm_flags &= ~SM_QNDX_ZERO; 934 mutex_enter(&releq->smq_mtx); 935 smpfreelist = releq->smq_free; 936 if (smpfreelist == 0) { 937 int want; 938 939 releq->smq_free = smp->sm_next = smp->sm_prev = smp; 940 /* 941 * Both queue mutexes held to set sm_want; 942 * snapshot the value before dropping releq mutex. 943 * If sm_want appears after the releq mutex is dropped, 944 * then the smap just freed is already gone. 945 */ 946 want = sm->sm_want; 947 mutex_exit(&releq->smq_mtx); 948 /* 949 * See if there was a waiter before dropping the releq mutex 950 * then recheck after obtaining sm_freeq[0] mutex as 951 * the another thread may have already signaled. 952 */ 953 if (want) { 954 mutex_enter(&sm->sm_freeq[0].smq_mtx); 955 if (sm->sm_want) 956 cv_signal(&sm->sm_free_cv); 957 mutex_exit(&sm->sm_freeq[0].smq_mtx); 958 } 959 } else { 960 smp->sm_next = smpfreelist; 961 smp->sm_prev = smpfreelist->sm_prev; 962 smpfreelist->sm_prev = smp; 963 smp->sm_prev->sm_next = smp; 964 mutex_exit(&releq->smq_mtx); 965 } 966 } 967 968 969 static struct smap * 970 segmap_hashin(struct smap *smp, struct vnode *vp, u_offset_t off, int hashid) 971 { 972 struct smap **hpp; 973 struct smap *tmp; 974 kmutex_t *hmtx; 975 976 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 977 ASSERT(smp->sm_vp == NULL); 978 ASSERT(smp->sm_hash == NULL); 979 ASSERT(smp->sm_prev == NULL); 980 ASSERT(smp->sm_next == NULL); 981 ASSERT(hashid >= 0 && hashid <= smd_hashmsk); 982 983 hmtx = SHASHMTX(hashid); 984 985 mutex_enter(hmtx); 986 /* 987 * First we need to verify that no one has created a smp 988 * with (vp,off) as its tag before we us. 989 */ 990 for (tmp = smd_hash[hashid].sh_hash_list; 991 tmp != NULL; tmp = tmp->sm_hash) 992 if (tmp->sm_vp == vp && tmp->sm_off == off) 993 break; 994 995 if (tmp == NULL) { 996 /* 997 * No one created one yet. 998 * 999 * Funniness here - we don't increment the ref count on the 1000 * vnode * even though we have another pointer to it here. 1001 * The reason for this is that we don't want the fact that 1002 * a seg_map entry somewhere refers to a vnode to prevent the 1003 * vnode * itself from going away. This is because this 1004 * reference to the vnode is a "soft one". In the case where 1005 * a mapping is being used by a rdwr [or directory routine?] 1006 * there already has to be a non-zero ref count on the vnode. 1007 * In the case where the vp has been freed and the the smap 1008 * structure is on the free list, there are no pages in memory 1009 * that can refer to the vnode. Thus even if we reuse the same 1010 * vnode/smap structure for a vnode which has the same 1011 * address but represents a different object, we are ok. 1012 */ 1013 smp->sm_vp = vp; 1014 smp->sm_off = off; 1015 1016 hpp = &smd_hash[hashid].sh_hash_list; 1017 smp->sm_hash = *hpp; 1018 *hpp = smp; 1019 #ifdef SEGMAP_HASHSTATS 1020 smd_hash_len[hashid]++; 1021 #endif 1022 } 1023 mutex_exit(hmtx); 1024 1025 return (tmp); 1026 } 1027 1028 static void 1029 segmap_hashout(struct smap *smp) 1030 { 1031 struct smap **hpp, *hp; 1032 struct vnode *vp; 1033 kmutex_t *mtx; 1034 int hashid; 1035 u_offset_t off; 1036 1037 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 1038 1039 vp = smp->sm_vp; 1040 off = smp->sm_off; 1041 1042 SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */ 1043 mtx = SHASHMTX(hashid); 1044 mutex_enter(mtx); 1045 1046 hpp = &smd_hash[hashid].sh_hash_list; 1047 for (;;) { 1048 hp = *hpp; 1049 if (hp == NULL) { 1050 panic("segmap_hashout"); 1051 /*NOTREACHED*/ 1052 } 1053 if (hp == smp) 1054 break; 1055 hpp = &hp->sm_hash; 1056 } 1057 1058 *hpp = smp->sm_hash; 1059 smp->sm_hash = NULL; 1060 #ifdef SEGMAP_HASHSTATS 1061 smd_hash_len[hashid]--; 1062 #endif 1063 mutex_exit(mtx); 1064 1065 smp->sm_vp = NULL; 1066 smp->sm_off = (u_offset_t)0; 1067 1068 } 1069 1070 /* 1071 * Attempt to free unmodified, unmapped, and non locked segmap 1072 * pages. 1073 */ 1074 void 1075 segmap_pagefree(struct vnode *vp, u_offset_t off) 1076 { 1077 u_offset_t pgoff; 1078 page_t *pp; 1079 1080 for (pgoff = off; pgoff < off + MAXBSIZE; pgoff += PAGESIZE) { 1081 1082 if ((pp = page_lookup_nowait(vp, pgoff, SE_EXCL)) == NULL) 1083 continue; 1084 1085 switch (page_release(pp, 1)) { 1086 case PGREL_NOTREL: 1087 segmapcnt.smp_free_notfree.value.ul++; 1088 break; 1089 case PGREL_MOD: 1090 segmapcnt.smp_free_dirty.value.ul++; 1091 break; 1092 case PGREL_CLEAN: 1093 segmapcnt.smp_free.value.ul++; 1094 break; 1095 } 1096 } 1097 } 1098 1099 /* 1100 * Locks held on entry: smap lock 1101 * Locks held on exit : smap lock. 1102 */ 1103 1104 static void 1105 grab_smp(struct smap *smp, page_t *pp) 1106 { 1107 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 1108 ASSERT(smp->sm_refcnt == 0); 1109 1110 if (smp->sm_vp != (struct vnode *)NULL) { 1111 struct vnode *vp = smp->sm_vp; 1112 u_offset_t off = smp->sm_off; 1113 /* 1114 * Destroy old vnode association and 1115 * unload any hardware translations to 1116 * the old object. 1117 */ 1118 smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reuse++; 1119 segmap_hashout(smp); 1120 1121 /* 1122 * This node is off freelist and hashlist, 1123 * so there is no reason to drop/reacquire sm_mtx 1124 * across calls to hat_unload. 1125 */ 1126 if (segmap_kpm) { 1127 caddr_t vaddr; 1128 int hat_unload_needed = 0; 1129 1130 /* 1131 * unload kpm mapping 1132 */ 1133 if (pp != NULL) { 1134 vaddr = hat_kpm_page2va(pp, 1); 1135 hat_kpm_mapout(pp, GET_KPME(smp), vaddr); 1136 page_unlock(pp); 1137 } 1138 1139 /* 1140 * Check if we have (also) the rare case of a 1141 * non kpm mapping. 1142 */ 1143 if (smp->sm_flags & SM_NOTKPM_RELEASED) { 1144 hat_unload_needed = 1; 1145 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 1146 } 1147 1148 if (hat_unload_needed) { 1149 hat_unload(kas.a_hat, segkmap->s_base + 1150 ((smp - smd_smap) * MAXBSIZE), 1151 MAXBSIZE, HAT_UNLOAD); 1152 } 1153 1154 } else { 1155 ASSERT(smp->sm_flags & SM_NOTKPM_RELEASED); 1156 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 1157 hat_unload(kas.a_hat, segkmap->s_base + 1158 ((smp - smd_smap) * MAXBSIZE), 1159 MAXBSIZE, HAT_UNLOAD); 1160 } 1161 segmap_pagefree(vp, off); 1162 } 1163 } 1164 1165 static struct smap * 1166 get_free_smp(int free_ndx) 1167 { 1168 struct smfree *sm; 1169 kmutex_t *smtx; 1170 struct smap *smp, *first; 1171 struct sm_freeq *allocq, *releq; 1172 struct kpme *kpme; 1173 page_t *pp = NULL; 1174 int end_ndx, page_locked = 0; 1175 1176 end_ndx = free_ndx; 1177 sm = &smd_free[free_ndx]; 1178 1179 retry_queue: 1180 allocq = sm->sm_allocq; 1181 mutex_enter(&allocq->smq_mtx); 1182 1183 if ((smp = allocq->smq_free) == NULL) { 1184 1185 skip_queue: 1186 /* 1187 * The alloc list is empty or this queue is being skipped; 1188 * first see if the allocq toggled. 1189 */ 1190 if (sm->sm_allocq != allocq) { 1191 /* queue changed */ 1192 mutex_exit(&allocq->smq_mtx); 1193 goto retry_queue; 1194 } 1195 releq = sm->sm_releq; 1196 if (!mutex_tryenter(&releq->smq_mtx)) { 1197 /* cannot get releq; a free smp may be there now */ 1198 mutex_exit(&allocq->smq_mtx); 1199 1200 /* 1201 * This loop could spin forever if this thread has 1202 * higher priority than the thread that is holding 1203 * releq->smq_mtx. In order to force the other thread 1204 * to run, we'll lock/unlock the mutex which is safe 1205 * since we just unlocked the allocq mutex. 1206 */ 1207 mutex_enter(&releq->smq_mtx); 1208 mutex_exit(&releq->smq_mtx); 1209 goto retry_queue; 1210 } 1211 if (releq->smq_free == NULL) { 1212 /* 1213 * This freelist is empty. 1214 * This should not happen unless clients 1215 * are failing to release the segmap 1216 * window after accessing the data. 1217 * Before resorting to sleeping, try 1218 * the next list of the same color. 1219 */ 1220 free_ndx = (free_ndx + smd_ncolor) & smd_freemsk; 1221 if (free_ndx != end_ndx) { 1222 mutex_exit(&releq->smq_mtx); 1223 mutex_exit(&allocq->smq_mtx); 1224 sm = &smd_free[free_ndx]; 1225 goto retry_queue; 1226 } 1227 /* 1228 * Tried all freelists of the same color once, 1229 * wait on this list and hope something gets freed. 1230 */ 1231 segmapcnt.smp_get_nofree.value.ul++; 1232 sm->sm_want++; 1233 mutex_exit(&sm->sm_freeq[1].smq_mtx); 1234 cv_wait(&sm->sm_free_cv, 1235 &sm->sm_freeq[0].smq_mtx); 1236 sm->sm_want--; 1237 mutex_exit(&sm->sm_freeq[0].smq_mtx); 1238 sm = &smd_free[free_ndx]; 1239 goto retry_queue; 1240 } else { 1241 /* 1242 * Something on the rele queue; flip the alloc 1243 * and rele queues and retry. 1244 */ 1245 sm->sm_allocq = releq; 1246 sm->sm_releq = allocq; 1247 mutex_exit(&allocq->smq_mtx); 1248 mutex_exit(&releq->smq_mtx); 1249 if (page_locked) { 1250 delay(hz >> 2); 1251 page_locked = 0; 1252 } 1253 goto retry_queue; 1254 } 1255 } else { 1256 /* 1257 * Fastpath the case we get the smap mutex 1258 * on the first try. 1259 */ 1260 first = smp; 1261 next_smap: 1262 smtx = SMAPMTX(smp); 1263 if (!mutex_tryenter(smtx)) { 1264 /* 1265 * Another thread is trying to reclaim this slot. 1266 * Skip to the next queue or smap. 1267 */ 1268 if ((smp = smp->sm_next) == first) { 1269 goto skip_queue; 1270 } else { 1271 goto next_smap; 1272 } 1273 } else { 1274 /* 1275 * if kpme exists, get shared lock on the page 1276 */ 1277 if (segmap_kpm && smp->sm_vp != NULL) { 1278 1279 kpme = GET_KPME(smp); 1280 pp = kpme->kpe_page; 1281 1282 if (pp != NULL) { 1283 if (!page_trylock(pp, SE_SHARED)) { 1284 smp = smp->sm_next; 1285 mutex_exit(smtx); 1286 page_locked = 1; 1287 1288 pp = NULL; 1289 1290 if (smp == first) { 1291 goto skip_queue; 1292 } else { 1293 goto next_smap; 1294 } 1295 } else { 1296 if (kpme->kpe_page == NULL) { 1297 page_unlock(pp); 1298 pp = NULL; 1299 } 1300 } 1301 } 1302 } 1303 1304 /* 1305 * At this point, we've selected smp. Remove smp 1306 * from its freelist. If smp is the first one in 1307 * the freelist, update the head of the freelist. 1308 */ 1309 if (first == smp) { 1310 ASSERT(first == allocq->smq_free); 1311 allocq->smq_free = smp->sm_next; 1312 } 1313 1314 /* 1315 * if the head of the freelist still points to smp, 1316 * then there are no more free smaps in that list. 1317 */ 1318 if (allocq->smq_free == smp) 1319 /* 1320 * Took the last one 1321 */ 1322 allocq->smq_free = NULL; 1323 else { 1324 smp->sm_prev->sm_next = smp->sm_next; 1325 smp->sm_next->sm_prev = smp->sm_prev; 1326 } 1327 mutex_exit(&allocq->smq_mtx); 1328 smp->sm_prev = smp->sm_next = NULL; 1329 1330 /* 1331 * if pp != NULL, pp must have been locked; 1332 * grab_smp() unlocks pp. 1333 */ 1334 ASSERT((pp == NULL) || PAGE_LOCKED(pp)); 1335 grab_smp(smp, pp); 1336 /* return smp locked. */ 1337 ASSERT(SMAPMTX(smp) == smtx); 1338 ASSERT(MUTEX_HELD(smtx)); 1339 return (smp); 1340 } 1341 } 1342 } 1343 1344 /* 1345 * Special public segmap operations 1346 */ 1347 1348 /* 1349 * Create pages (without using VOP_GETPAGE) and load up translations to them. 1350 * If softlock is TRUE, then set things up so that it looks like a call 1351 * to segmap_fault with F_SOFTLOCK. 1352 * 1353 * Returns 1, if a page is created by calling page_create_va(), or 0 otherwise. 1354 * 1355 * All fields in the generic segment (struct seg) are considered to be 1356 * read-only for "segmap" even though the kernel address space (kas) may 1357 * not be locked, hence no lock is needed to access them. 1358 */ 1359 int 1360 segmap_pagecreate(struct seg *seg, caddr_t addr, size_t len, int softlock) 1361 { 1362 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 1363 page_t *pp; 1364 u_offset_t off; 1365 struct smap *smp; 1366 struct vnode *vp; 1367 caddr_t eaddr; 1368 int newpage = 0; 1369 uint_t prot; 1370 kmutex_t *smtx; 1371 int hat_flag; 1372 1373 ASSERT(seg->s_as == &kas); 1374 1375 if (segmap_kpm && IS_KPM_ADDR(addr)) { 1376 /* 1377 * Pages are successfully prefaulted and locked in 1378 * segmap_getmapflt and can't be unlocked until 1379 * segmap_release. The SM_KPM_NEWPAGE flag is set 1380 * in segmap_pagecreate_kpm when new pages are created. 1381 * and it is returned as "newpage" indication here. 1382 */ 1383 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 1384 panic("segmap_pagecreate: smap not found " 1385 "for addr %p", (void *)addr); 1386 /*NOTREACHED*/ 1387 } 1388 1389 smtx = SMAPMTX(smp); 1390 newpage = smp->sm_flags & SM_KPM_NEWPAGE; 1391 smp->sm_flags &= ~SM_KPM_NEWPAGE; 1392 mutex_exit(smtx); 1393 1394 return (newpage); 1395 } 1396 1397 smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++; 1398 1399 eaddr = addr + len; 1400 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1401 1402 smp = GET_SMAP(seg, addr); 1403 1404 /* 1405 * We don't grab smp mutex here since we assume the smp 1406 * has a refcnt set already which prevents the slot from 1407 * changing its id. 1408 */ 1409 ASSERT(smp->sm_refcnt > 0); 1410 1411 vp = smp->sm_vp; 1412 off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET)); 1413 prot = smd->smd_prot; 1414 1415 for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) { 1416 hat_flag = HAT_LOAD; 1417 pp = page_lookup(vp, off, SE_SHARED); 1418 if (pp == NULL) { 1419 ushort_t bitindex; 1420 1421 if ((pp = page_create_va(vp, off, 1422 PAGESIZE, PG_WAIT, seg, addr)) == NULL) { 1423 panic("segmap_pagecreate: page_create failed"); 1424 /*NOTREACHED*/ 1425 } 1426 newpage = 1; 1427 page_io_unlock(pp); 1428 1429 /* 1430 * Since pages created here do not contain valid 1431 * data until the caller writes into them, the 1432 * "exclusive" lock will not be dropped to prevent 1433 * other users from accessing the page. We also 1434 * have to lock the translation to prevent a fault 1435 * from occurring when the virtual address mapped by 1436 * this page is written into. This is necessary to 1437 * avoid a deadlock since we haven't dropped the 1438 * "exclusive" lock. 1439 */ 1440 bitindex = (ushort_t)((off - smp->sm_off) >> PAGESHIFT); 1441 1442 /* 1443 * Large Files: The following assertion is to 1444 * verify the cast above. 1445 */ 1446 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 1447 smtx = SMAPMTX(smp); 1448 mutex_enter(smtx); 1449 smp->sm_bitmap |= SMAP_BIT_MASK(bitindex); 1450 mutex_exit(smtx); 1451 1452 hat_flag = HAT_LOAD_LOCK; 1453 } else if (softlock) { 1454 hat_flag = HAT_LOAD_LOCK; 1455 } 1456 1457 if (IS_VMODSORT(pp->p_vnode) && (prot & PROT_WRITE)) 1458 hat_setmod(pp); 1459 1460 hat_memload(kas.a_hat, addr, pp, prot, hat_flag); 1461 1462 if (hat_flag != HAT_LOAD_LOCK) 1463 page_unlock(pp); 1464 1465 TRACE_5(TR_FAC_VM, TR_SEGMAP_PAGECREATE, 1466 "segmap_pagecreate:seg %p addr %p pp %p vp %p offset %llx", 1467 seg, addr, pp, vp, off); 1468 } 1469 1470 return (newpage); 1471 } 1472 1473 void 1474 segmap_pageunlock(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw) 1475 { 1476 struct smap *smp; 1477 ushort_t bitmask; 1478 page_t *pp; 1479 struct vnode *vp; 1480 u_offset_t off; 1481 caddr_t eaddr; 1482 kmutex_t *smtx; 1483 1484 ASSERT(seg->s_as == &kas); 1485 1486 eaddr = addr + len; 1487 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1488 1489 if (segmap_kpm && IS_KPM_ADDR(addr)) { 1490 /* 1491 * Pages are successfully prefaulted and locked in 1492 * segmap_getmapflt and can't be unlocked until 1493 * segmap_release, so no pages or hat mappings have 1494 * to be unlocked at this point. 1495 */ 1496 #ifdef DEBUG 1497 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 1498 panic("segmap_pageunlock: smap not found " 1499 "for addr %p", (void *)addr); 1500 /*NOTREACHED*/ 1501 } 1502 1503 ASSERT(smp->sm_refcnt > 0); 1504 mutex_exit(SMAPMTX(smp)); 1505 #endif 1506 return; 1507 } 1508 1509 smp = GET_SMAP(seg, addr); 1510 smtx = SMAPMTX(smp); 1511 1512 ASSERT(smp->sm_refcnt > 0); 1513 1514 vp = smp->sm_vp; 1515 off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET)); 1516 1517 for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) { 1518 bitmask = SMAP_BIT_MASK((int)(off - smp->sm_off) >> PAGESHIFT); 1519 1520 /* 1521 * Large Files: Following assertion is to verify 1522 * the correctness of the cast to (int) above. 1523 */ 1524 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 1525 1526 /* 1527 * If the bit corresponding to "off" is set, 1528 * clear this bit in the bitmap, unlock translations, 1529 * and release the "exclusive" lock on the page. 1530 */ 1531 if (smp->sm_bitmap & bitmask) { 1532 mutex_enter(smtx); 1533 smp->sm_bitmap &= ~bitmask; 1534 mutex_exit(smtx); 1535 1536 hat_unlock(kas.a_hat, addr, PAGESIZE); 1537 1538 /* 1539 * Use page_find() instead of page_lookup() to 1540 * find the page since we know that it has 1541 * "exclusive" lock. 1542 */ 1543 pp = page_find(vp, off); 1544 if (pp == NULL) { 1545 panic("segmap_pageunlock: page not found"); 1546 /*NOTREACHED*/ 1547 } 1548 if (rw == S_WRITE) { 1549 hat_setrefmod(pp); 1550 } else if (rw != S_OTHER) { 1551 hat_setref(pp); 1552 } 1553 1554 page_unlock(pp); 1555 } 1556 } 1557 } 1558 1559 caddr_t 1560 segmap_getmap(struct seg *seg, struct vnode *vp, u_offset_t off) 1561 { 1562 return (segmap_getmapflt(seg, vp, off, MAXBSIZE, 0, S_OTHER)); 1563 } 1564 1565 /* 1566 * This is the magic virtual address that offset 0 of an ELF 1567 * file gets mapped to in user space. This is used to pick 1568 * the vac color on the freelist. 1569 */ 1570 #define ELF_OFFZERO_VA (0x10000) 1571 /* 1572 * segmap_getmap allocates a MAXBSIZE big slot to map the vnode vp 1573 * in the range <off, off + len). off doesn't need to be MAXBSIZE aligned. 1574 * The return address is always MAXBSIZE aligned. 1575 * 1576 * If forcefault is nonzero and the MMU translations haven't yet been created, 1577 * segmap_getmap will call segmap_fault(..., F_INVAL, rw) to create them. 1578 */ 1579 caddr_t 1580 segmap_getmapflt( 1581 struct seg *seg, 1582 struct vnode *vp, 1583 u_offset_t off, 1584 size_t len, 1585 int forcefault, 1586 enum seg_rw rw) 1587 { 1588 struct smap *smp, *nsmp; 1589 extern struct vnode *common_specvp(); 1590 caddr_t baseaddr; /* MAXBSIZE aligned */ 1591 u_offset_t baseoff; 1592 int newslot; 1593 caddr_t vaddr; 1594 int color, hashid; 1595 kmutex_t *hashmtx, *smapmtx; 1596 struct smfree *sm; 1597 page_t *pp; 1598 struct kpme *kpme; 1599 uint_t prot; 1600 caddr_t base; 1601 page_t *pl[MAXPPB + 1]; 1602 int error; 1603 int is_kpm = 1; 1604 1605 ASSERT(seg->s_as == &kas); 1606 ASSERT(seg == segkmap); 1607 1608 baseoff = off & (offset_t)MAXBMASK; 1609 if (off + len > baseoff + MAXBSIZE) { 1610 panic("segmap_getmap bad len"); 1611 /*NOTREACHED*/ 1612 } 1613 1614 /* 1615 * If this is a block device we have to be sure to use the 1616 * "common" block device vnode for the mapping. 1617 */ 1618 if (vp->v_type == VBLK) 1619 vp = common_specvp(vp); 1620 1621 smd_cpu[CPU->cpu_seqid].scpu.scpu_getmap++; 1622 1623 if (segmap_kpm == 0 || 1624 (forcefault == SM_PAGECREATE && rw != S_WRITE)) { 1625 is_kpm = 0; 1626 } 1627 1628 SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */ 1629 hashmtx = SHASHMTX(hashid); 1630 1631 retry_hash: 1632 mutex_enter(hashmtx); 1633 for (smp = smd_hash[hashid].sh_hash_list; 1634 smp != NULL; smp = smp->sm_hash) 1635 if (smp->sm_vp == vp && smp->sm_off == baseoff) 1636 break; 1637 mutex_exit(hashmtx); 1638 1639 vrfy_smp: 1640 if (smp != NULL) { 1641 1642 ASSERT(vp->v_count != 0); 1643 1644 /* 1645 * Get smap lock and recheck its tag. The hash lock 1646 * is dropped since the hash is based on (vp, off) 1647 * and (vp, off) won't change when we have smap mtx. 1648 */ 1649 smapmtx = SMAPMTX(smp); 1650 mutex_enter(smapmtx); 1651 if (smp->sm_vp != vp || smp->sm_off != baseoff) { 1652 mutex_exit(smapmtx); 1653 goto retry_hash; 1654 } 1655 1656 if (smp->sm_refcnt == 0) { 1657 1658 smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reclaim++; 1659 1660 /* 1661 * Could still be on the free list. However, this 1662 * could also be an smp that is transitioning from 1663 * the free list when we have too much contention 1664 * for the smapmtx's. In this case, we have an 1665 * unlocked smp that is not on the free list any 1666 * longer, but still has a 0 refcnt. The only way 1667 * to be sure is to check the freelist pointers. 1668 * Since we now have the smapmtx, we are guaranteed 1669 * that the (vp, off) won't change, so we are safe 1670 * to reclaim it. get_free_smp() knows that this 1671 * can happen, and it will check the refcnt. 1672 */ 1673 1674 if ((smp->sm_next != NULL)) { 1675 struct sm_freeq *freeq; 1676 1677 ASSERT(smp->sm_prev != NULL); 1678 sm = &smd_free[smp->sm_free_ndx]; 1679 1680 if (smp->sm_flags & SM_QNDX_ZERO) 1681 freeq = &sm->sm_freeq[0]; 1682 else 1683 freeq = &sm->sm_freeq[1]; 1684 1685 mutex_enter(&freeq->smq_mtx); 1686 if (freeq->smq_free != smp) { 1687 /* 1688 * fastpath normal case 1689 */ 1690 smp->sm_prev->sm_next = smp->sm_next; 1691 smp->sm_next->sm_prev = smp->sm_prev; 1692 } else if (smp == smp->sm_next) { 1693 /* 1694 * Taking the last smap on freelist 1695 */ 1696 freeq->smq_free = NULL; 1697 } else { 1698 /* 1699 * Reclaiming 1st smap on list 1700 */ 1701 freeq->smq_free = smp->sm_next; 1702 smp->sm_prev->sm_next = smp->sm_next; 1703 smp->sm_next->sm_prev = smp->sm_prev; 1704 } 1705 mutex_exit(&freeq->smq_mtx); 1706 smp->sm_prev = smp->sm_next = NULL; 1707 } else { 1708 ASSERT(smp->sm_prev == NULL); 1709 segmapcnt.smp_stolen.value.ul++; 1710 } 1711 1712 } else { 1713 segmapcnt.smp_get_use.value.ul++; 1714 } 1715 smp->sm_refcnt++; /* another user */ 1716 1717 /* 1718 * We don't invoke segmap_fault via TLB miss, so we set ref 1719 * and mod bits in advance. For S_OTHER we set them in 1720 * segmap_fault F_SOFTUNLOCK. 1721 */ 1722 if (is_kpm) { 1723 if (rw == S_WRITE) { 1724 smp->sm_flags |= SM_WRITE_DATA; 1725 } else if (rw == S_READ) { 1726 smp->sm_flags |= SM_READ_DATA; 1727 } 1728 } 1729 mutex_exit(smapmtx); 1730 1731 newslot = 0; 1732 } else { 1733 1734 uint32_t free_ndx, *free_ndxp; 1735 union segmap_cpu *scpu; 1736 1737 /* 1738 * On a PAC machine or a machine with anti-alias 1739 * hardware, smd_colormsk will be zero. 1740 * 1741 * On a VAC machine- pick color by offset in the file 1742 * so we won't get VAC conflicts on elf files. 1743 * On data files, color does not matter but we 1744 * don't know what kind of file it is so we always 1745 * pick color by offset. This causes color 1746 * corresponding to file offset zero to be used more 1747 * heavily. 1748 */ 1749 color = (baseoff >> MAXBSHIFT) & smd_colormsk; 1750 scpu = smd_cpu+CPU->cpu_seqid; 1751 free_ndxp = &scpu->scpu.scpu_free_ndx[color]; 1752 free_ndx = (*free_ndxp += smd_ncolor) & smd_freemsk; 1753 #ifdef DEBUG 1754 colors_used[free_ndx]++; 1755 #endif /* DEBUG */ 1756 1757 /* 1758 * Get a locked smp slot from the free list. 1759 */ 1760 smp = get_free_smp(free_ndx); 1761 smapmtx = SMAPMTX(smp); 1762 1763 ASSERT(smp->sm_vp == NULL); 1764 1765 if ((nsmp = segmap_hashin(smp, vp, baseoff, hashid)) != NULL) { 1766 /* 1767 * Failed to hashin, there exists one now. 1768 * Return the smp we just allocated. 1769 */ 1770 segmap_smapadd(smp); 1771 mutex_exit(smapmtx); 1772 1773 smp = nsmp; 1774 goto vrfy_smp; 1775 } 1776 smp->sm_refcnt++; /* another user */ 1777 1778 /* 1779 * We don't invoke segmap_fault via TLB miss, so we set ref 1780 * and mod bits in advance. For S_OTHER we set them in 1781 * segmap_fault F_SOFTUNLOCK. 1782 */ 1783 if (is_kpm) { 1784 if (rw == S_WRITE) { 1785 smp->sm_flags |= SM_WRITE_DATA; 1786 } else if (rw == S_READ) { 1787 smp->sm_flags |= SM_READ_DATA; 1788 } 1789 } 1790 mutex_exit(smapmtx); 1791 1792 newslot = 1; 1793 } 1794 1795 if (!is_kpm) 1796 goto use_segmap_range; 1797 1798 /* 1799 * Use segkpm 1800 */ 1801 /* Lint directive required until 6746211 is fixed */ 1802 /*CONSTCOND*/ 1803 ASSERT(PAGESIZE == MAXBSIZE); 1804 1805 /* 1806 * remember the last smp faulted on this cpu. 1807 */ 1808 (smd_cpu+CPU->cpu_seqid)->scpu.scpu_last_smap = smp; 1809 1810 if (forcefault == SM_PAGECREATE) { 1811 baseaddr = segmap_pagecreate_kpm(seg, vp, baseoff, smp, rw); 1812 return (baseaddr); 1813 } 1814 1815 if (newslot == 0 && 1816 (pp = GET_KPME(smp)->kpe_page) != NULL) { 1817 1818 /* fastpath */ 1819 switch (rw) { 1820 case S_READ: 1821 case S_WRITE: 1822 if (page_trylock(pp, SE_SHARED)) { 1823 if (PP_ISFREE(pp) || 1824 !(pp->p_vnode == vp && 1825 pp->p_offset == baseoff)) { 1826 page_unlock(pp); 1827 pp = page_lookup(vp, baseoff, 1828 SE_SHARED); 1829 } 1830 } else { 1831 pp = page_lookup(vp, baseoff, SE_SHARED); 1832 } 1833 1834 if (pp == NULL) { 1835 ASSERT(GET_KPME(smp)->kpe_page == NULL); 1836 break; 1837 } 1838 1839 if (rw == S_WRITE && 1840 hat_page_getattr(pp, P_MOD | P_REF) != 1841 (P_MOD | P_REF)) { 1842 page_unlock(pp); 1843 break; 1844 } 1845 1846 /* 1847 * We have the p_selock as reader, grab_smp 1848 * can't hit us, we have bumped the smap 1849 * refcnt and hat_pageunload needs the 1850 * p_selock exclusive. 1851 */ 1852 kpme = GET_KPME(smp); 1853 if (kpme->kpe_page == pp) { 1854 baseaddr = hat_kpm_page2va(pp, 0); 1855 } else if (kpme->kpe_page == NULL) { 1856 baseaddr = hat_kpm_mapin(pp, kpme); 1857 } else { 1858 panic("segmap_getmapflt: stale " 1859 "kpme page, kpme %p", (void *)kpme); 1860 /*NOTREACHED*/ 1861 } 1862 1863 /* 1864 * We don't invoke segmap_fault via TLB miss, 1865 * so we set ref and mod bits in advance. 1866 * For S_OTHER and we set them in segmap_fault 1867 * F_SOFTUNLOCK. 1868 */ 1869 if (rw == S_READ && !hat_isref(pp)) 1870 hat_setref(pp); 1871 1872 return (baseaddr); 1873 default: 1874 break; 1875 } 1876 } 1877 1878 base = segkpm_create_va(baseoff); 1879 error = VOP_GETPAGE(vp, (offset_t)baseoff, len, &prot, pl, MAXBSIZE, 1880 seg, base, rw, CRED(), NULL); 1881 1882 pp = pl[0]; 1883 if (error || pp == NULL) { 1884 /* 1885 * Use segmap address slot and let segmap_fault deal 1886 * with the error cases. There is no error return 1887 * possible here. 1888 */ 1889 goto use_segmap_range; 1890 } 1891 1892 ASSERT(pl[1] == NULL); 1893 1894 /* 1895 * When prot is not returned w/ PROT_ALL the returned pages 1896 * are not backed by fs blocks. For most of the segmap users 1897 * this is no problem, they don't write to the pages in the 1898 * same request and therefore don't rely on a following 1899 * trap driven segmap_fault. With SM_LOCKPROTO users it 1900 * is more secure to use segkmap adresses to allow 1901 * protection segmap_fault's. 1902 */ 1903 if (prot != PROT_ALL && forcefault == SM_LOCKPROTO) { 1904 /* 1905 * Use segmap address slot and let segmap_fault 1906 * do the error return. 1907 */ 1908 ASSERT(rw != S_WRITE); 1909 ASSERT(PAGE_LOCKED(pp)); 1910 page_unlock(pp); 1911 forcefault = 0; 1912 goto use_segmap_range; 1913 } 1914 1915 /* 1916 * We have the p_selock as reader, grab_smp can't hit us, we 1917 * have bumped the smap refcnt and hat_pageunload needs the 1918 * p_selock exclusive. 1919 */ 1920 kpme = GET_KPME(smp); 1921 if (kpme->kpe_page == pp) { 1922 baseaddr = hat_kpm_page2va(pp, 0); 1923 } else if (kpme->kpe_page == NULL) { 1924 baseaddr = hat_kpm_mapin(pp, kpme); 1925 } else { 1926 panic("segmap_getmapflt: stale kpme page after " 1927 "VOP_GETPAGE, kpme %p", (void *)kpme); 1928 /*NOTREACHED*/ 1929 } 1930 1931 smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++; 1932 1933 return (baseaddr); 1934 1935 1936 use_segmap_range: 1937 baseaddr = seg->s_base + ((smp - smd_smap) * MAXBSIZE); 1938 TRACE_4(TR_FAC_VM, TR_SEGMAP_GETMAP, 1939 "segmap_getmap:seg %p addr %p vp %p offset %llx", 1940 seg, baseaddr, vp, baseoff); 1941 1942 /* 1943 * Prefault the translations 1944 */ 1945 vaddr = baseaddr + (off - baseoff); 1946 if (forcefault && (newslot || !hat_probe(kas.a_hat, vaddr))) { 1947 1948 caddr_t pgaddr = (caddr_t)((uintptr_t)vaddr & 1949 (uintptr_t)PAGEMASK); 1950 1951 (void) segmap_fault(kas.a_hat, seg, pgaddr, 1952 (vaddr + len - pgaddr + PAGESIZE - 1) & (uintptr_t)PAGEMASK, 1953 F_INVAL, rw); 1954 } 1955 1956 return (baseaddr); 1957 } 1958 1959 int 1960 segmap_release(struct seg *seg, caddr_t addr, uint_t flags) 1961 { 1962 struct smap *smp; 1963 int error; 1964 int bflags = 0; 1965 struct vnode *vp; 1966 u_offset_t offset; 1967 kmutex_t *smtx; 1968 int is_kpm = 0; 1969 page_t *pp; 1970 1971 if (segmap_kpm && IS_KPM_ADDR(addr)) { 1972 1973 if (((uintptr_t)addr & MAXBOFFSET) != 0) { 1974 panic("segmap_release: addr %p not " 1975 "MAXBSIZE aligned", (void *)addr); 1976 /*NOTREACHED*/ 1977 } 1978 1979 if ((smp = get_smap_kpm(addr, &pp)) == NULL) { 1980 panic("segmap_release: smap not found " 1981 "for addr %p", (void *)addr); 1982 /*NOTREACHED*/ 1983 } 1984 1985 TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP, 1986 "segmap_relmap:seg %p addr %p smp %p", 1987 seg, addr, smp); 1988 1989 smtx = SMAPMTX(smp); 1990 1991 /* 1992 * For compatibility reasons segmap_pagecreate_kpm sets this 1993 * flag to allow a following segmap_pagecreate to return 1994 * this as "newpage" flag. When segmap_pagecreate is not 1995 * called at all we clear it now. 1996 */ 1997 smp->sm_flags &= ~SM_KPM_NEWPAGE; 1998 is_kpm = 1; 1999 if (smp->sm_flags & SM_WRITE_DATA) { 2000 hat_setrefmod(pp); 2001 } else if (smp->sm_flags & SM_READ_DATA) { 2002 hat_setref(pp); 2003 } 2004 } else { 2005 if (addr < seg->s_base || addr >= seg->s_base + seg->s_size || 2006 ((uintptr_t)addr & MAXBOFFSET) != 0) { 2007 panic("segmap_release: bad addr %p", (void *)addr); 2008 /*NOTREACHED*/ 2009 } 2010 smp = GET_SMAP(seg, addr); 2011 2012 TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP, 2013 "segmap_relmap:seg %p addr %p smp %p", 2014 seg, addr, smp); 2015 2016 smtx = SMAPMTX(smp); 2017 mutex_enter(smtx); 2018 smp->sm_flags |= SM_NOTKPM_RELEASED; 2019 } 2020 2021 ASSERT(smp->sm_refcnt > 0); 2022 2023 /* 2024 * Need to call VOP_PUTPAGE() if any flags (except SM_DONTNEED) 2025 * are set. 2026 */ 2027 if ((flags & ~SM_DONTNEED) != 0) { 2028 if (flags & SM_WRITE) 2029 segmapcnt.smp_rel_write.value.ul++; 2030 if (flags & SM_ASYNC) { 2031 bflags |= B_ASYNC; 2032 segmapcnt.smp_rel_async.value.ul++; 2033 } 2034 if (flags & SM_INVAL) { 2035 bflags |= B_INVAL; 2036 segmapcnt.smp_rel_abort.value.ul++; 2037 } 2038 if (flags & SM_DESTROY) { 2039 bflags |= (B_INVAL|B_TRUNC); 2040 segmapcnt.smp_rel_abort.value.ul++; 2041 } 2042 if (smp->sm_refcnt == 1) { 2043 /* 2044 * We only bother doing the FREE and DONTNEED flags 2045 * if no one else is still referencing this mapping. 2046 */ 2047 if (flags & SM_FREE) { 2048 bflags |= B_FREE; 2049 segmapcnt.smp_rel_free.value.ul++; 2050 } 2051 if (flags & SM_DONTNEED) { 2052 bflags |= B_DONTNEED; 2053 segmapcnt.smp_rel_dontneed.value.ul++; 2054 } 2055 } 2056 } else { 2057 smd_cpu[CPU->cpu_seqid].scpu.scpu_release++; 2058 } 2059 2060 vp = smp->sm_vp; 2061 offset = smp->sm_off; 2062 2063 if (--smp->sm_refcnt == 0) { 2064 2065 smp->sm_flags &= ~(SM_WRITE_DATA | SM_READ_DATA); 2066 2067 if (flags & (SM_INVAL|SM_DESTROY)) { 2068 segmap_hashout(smp); /* remove map info */ 2069 if (is_kpm) { 2070 hat_kpm_mapout(pp, GET_KPME(smp), addr); 2071 if (smp->sm_flags & SM_NOTKPM_RELEASED) { 2072 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 2073 hat_unload(kas.a_hat, segkmap->s_base + 2074 ((smp - smd_smap) * MAXBSIZE), 2075 MAXBSIZE, HAT_UNLOAD); 2076 } 2077 2078 } else { 2079 if (segmap_kpm) 2080 segkpm_mapout_validkpme(GET_KPME(smp)); 2081 2082 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 2083 hat_unload(kas.a_hat, addr, MAXBSIZE, 2084 HAT_UNLOAD); 2085 } 2086 } 2087 segmap_smapadd(smp); /* add to free list */ 2088 } 2089 2090 mutex_exit(smtx); 2091 2092 if (is_kpm) 2093 page_unlock(pp); 2094 /* 2095 * Now invoke VOP_PUTPAGE() if any flags (except SM_DONTNEED) 2096 * are set. 2097 */ 2098 if ((flags & ~SM_DONTNEED) != 0) { 2099 error = VOP_PUTPAGE(vp, offset, MAXBSIZE, 2100 bflags, CRED(), NULL); 2101 } else { 2102 error = 0; 2103 } 2104 2105 return (error); 2106 } 2107 2108 /* 2109 * Dump the pages belonging to this segmap segment. 2110 */ 2111 static void 2112 segmap_dump(struct seg *seg) 2113 { 2114 struct segmap_data *smd; 2115 struct smap *smp, *smp_end; 2116 page_t *pp; 2117 pfn_t pfn; 2118 u_offset_t off; 2119 caddr_t addr; 2120 2121 smd = (struct segmap_data *)seg->s_data; 2122 addr = seg->s_base; 2123 for (smp = smd->smd_sm, smp_end = smp + smd->smd_npages; 2124 smp < smp_end; smp++) { 2125 2126 if (smp->sm_refcnt) { 2127 for (off = 0; off < MAXBSIZE; off += PAGESIZE) { 2128 int we_own_it = 0; 2129 2130 /* 2131 * If pp == NULL, the page either does 2132 * not exist or is exclusively locked. 2133 * So determine if it exists before 2134 * searching for it. 2135 */ 2136 if ((pp = page_lookup_nowait(smp->sm_vp, 2137 smp->sm_off + off, SE_SHARED))) 2138 we_own_it = 1; 2139 else 2140 pp = page_exists(smp->sm_vp, 2141 smp->sm_off + off); 2142 2143 if (pp) { 2144 pfn = page_pptonum(pp); 2145 dump_addpage(seg->s_as, 2146 addr + off, pfn); 2147 if (we_own_it) 2148 page_unlock(pp); 2149 } 2150 dump_timeleft = dump_timeout; 2151 } 2152 } 2153 addr += MAXBSIZE; 2154 } 2155 } 2156 2157 /*ARGSUSED*/ 2158 static int 2159 segmap_pagelock(struct seg *seg, caddr_t addr, size_t len, 2160 struct page ***ppp, enum lock_type type, enum seg_rw rw) 2161 { 2162 return (ENOTSUP); 2163 } 2164 2165 static int 2166 segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp) 2167 { 2168 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 2169 2170 memidp->val[0] = (uintptr_t)smd->smd_sm->sm_vp; 2171 memidp->val[1] = smd->smd_sm->sm_off + (uintptr_t)(addr - seg->s_base); 2172 return (0); 2173 } 2174 2175 /*ARGSUSED*/ 2176 static lgrp_mem_policy_info_t * 2177 segmap_getpolicy(struct seg *seg, caddr_t addr) 2178 { 2179 return (NULL); 2180 } 2181 2182 /*ARGSUSED*/ 2183 static int 2184 segmap_capable(struct seg *seg, segcapability_t capability) 2185 { 2186 return (0); 2187 } 2188 2189 2190 #ifdef SEGKPM_SUPPORT 2191 2192 /* 2193 * segkpm support routines 2194 */ 2195 2196 static caddr_t 2197 segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off, 2198 struct smap *smp, enum seg_rw rw) 2199 { 2200 caddr_t base; 2201 page_t *pp; 2202 int newpage = 0; 2203 struct kpme *kpme; 2204 2205 ASSERT(smp->sm_refcnt > 0); 2206 2207 if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) { 2208 kmutex_t *smtx; 2209 2210 base = segkpm_create_va(off); 2211 2212 if ((pp = page_create_va(vp, off, PAGESIZE, PG_WAIT, 2213 seg, base)) == NULL) { 2214 panic("segmap_pagecreate_kpm: " 2215 "page_create failed"); 2216 /*NOTREACHED*/ 2217 } 2218 2219 newpage = 1; 2220 page_io_unlock(pp); 2221 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 2222 2223 /* 2224 * Mark this here until the following segmap_pagecreate 2225 * or segmap_release. 2226 */ 2227 smtx = SMAPMTX(smp); 2228 mutex_enter(smtx); 2229 smp->sm_flags |= SM_KPM_NEWPAGE; 2230 mutex_exit(smtx); 2231 } 2232 2233 kpme = GET_KPME(smp); 2234 if (!newpage && kpme->kpe_page == pp) 2235 base = hat_kpm_page2va(pp, 0); 2236 else 2237 base = hat_kpm_mapin(pp, kpme); 2238 2239 /* 2240 * FS code may decide not to call segmap_pagecreate and we 2241 * don't invoke segmap_fault via TLB miss, so we have to set 2242 * ref and mod bits in advance. 2243 */ 2244 if (rw == S_WRITE) { 2245 hat_setrefmod(pp); 2246 } else { 2247 ASSERT(rw == S_READ); 2248 hat_setref(pp); 2249 } 2250 2251 smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++; 2252 2253 return (base); 2254 } 2255 2256 /* 2257 * Find the smap structure corresponding to the 2258 * KPM addr and return it locked. 2259 */ 2260 struct smap * 2261 get_smap_kpm(caddr_t addr, page_t **ppp) 2262 { 2263 struct smap *smp; 2264 struct vnode *vp; 2265 u_offset_t offset; 2266 caddr_t baseaddr = (caddr_t)((uintptr_t)addr & MAXBMASK); 2267 int hashid; 2268 kmutex_t *hashmtx; 2269 page_t *pp; 2270 union segmap_cpu *scpu; 2271 2272 pp = hat_kpm_vaddr2page(baseaddr); 2273 2274 ASSERT(pp && !PP_ISFREE(pp)); 2275 ASSERT(PAGE_LOCKED(pp)); 2276 ASSERT(((uintptr_t)pp->p_offset & MAXBOFFSET) == 0); 2277 2278 vp = pp->p_vnode; 2279 offset = pp->p_offset; 2280 ASSERT(vp != NULL); 2281 2282 /* 2283 * Assume the last smap used on this cpu is the one needed. 2284 */ 2285 scpu = smd_cpu+CPU->cpu_seqid; 2286 smp = scpu->scpu.scpu_last_smap; 2287 mutex_enter(&smp->sm_mtx); 2288 if (smp->sm_vp == vp && smp->sm_off == offset) { 2289 ASSERT(smp->sm_refcnt > 0); 2290 } else { 2291 /* 2292 * Assumption wrong, find the smap on the hash chain. 2293 */ 2294 mutex_exit(&smp->sm_mtx); 2295 SMAP_HASHFUNC(vp, offset, hashid); /* macro assigns hashid */ 2296 hashmtx = SHASHMTX(hashid); 2297 2298 mutex_enter(hashmtx); 2299 smp = smd_hash[hashid].sh_hash_list; 2300 for (; smp != NULL; smp = smp->sm_hash) { 2301 if (smp->sm_vp == vp && smp->sm_off == offset) 2302 break; 2303 } 2304 mutex_exit(hashmtx); 2305 if (smp) { 2306 mutex_enter(&smp->sm_mtx); 2307 ASSERT(smp->sm_vp == vp && smp->sm_off == offset); 2308 } 2309 } 2310 2311 if (ppp) 2312 *ppp = smp ? pp : NULL; 2313 2314 return (smp); 2315 } 2316 2317 #else /* SEGKPM_SUPPORT */ 2318 2319 /* segkpm stubs */ 2320 2321 /*ARGSUSED*/ 2322 static caddr_t 2323 segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off, 2324 struct smap *smp, enum seg_rw rw) 2325 { 2326 return (NULL); 2327 } 2328 2329 /*ARGSUSED*/ 2330 struct smap * 2331 get_smap_kpm(caddr_t addr, page_t **ppp) 2332 { 2333 return (NULL); 2334 } 2335 2336 #endif /* SEGKPM_SUPPORT */