1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * Portions of this source code were derived from Berkeley 4.3 BSD 31 * under license from the Regents of the University of California. 32 */ 33 34 /* 35 * VM - generic vnode mapping segment. 36 * 37 * The segmap driver is used only by the kernel to get faster (than seg_vn) 38 * mappings [lower routine overhead; more persistent cache] to random 39 * vnode/offsets. Note than the kernel may (and does) use seg_vn as well. 40 */ 41 42 #include <sys/types.h> 43 #include <sys/t_lock.h> 44 #include <sys/param.h> 45 #include <sys/sysmacros.h> 46 #include <sys/buf.h> 47 #include <sys/systm.h> 48 #include <sys/vnode.h> 49 #include <sys/mman.h> 50 #include <sys/errno.h> 51 #include <sys/cred.h> 52 #include <sys/kmem.h> 53 #include <sys/vtrace.h> 54 #include <sys/cmn_err.h> 55 #include <sys/debug.h> 56 #include <sys/thread.h> 57 #include <sys/dumphdr.h> 58 #include <sys/bitmap.h> 59 #include <sys/lgrp.h> 60 61 #include <vm/seg_kmem.h> 62 #include <vm/hat.h> 63 #include <vm/as.h> 64 #include <vm/seg.h> 65 #include <vm/seg_kpm.h> 66 #include <vm/seg_map.h> 67 #include <vm/page.h> 68 #include <vm/pvn.h> 69 #include <vm/rm.h> 70 71 /* 72 * Private seg op routines. 73 */ 74 static void segmap_free(struct seg *seg); 75 faultcode_t segmap_fault(struct hat *hat, struct seg *seg, caddr_t addr, 76 size_t len, enum fault_type type, enum seg_rw rw); 77 static faultcode_t segmap_faulta(struct seg *seg, caddr_t addr); 78 static int segmap_checkprot(struct seg *seg, caddr_t addr, size_t len, 79 uint_t prot); 80 static int segmap_kluster(struct seg *seg, caddr_t addr, ssize_t); 81 static int segmap_getprot(struct seg *seg, caddr_t addr, size_t len, 82 uint_t *protv); 83 static u_offset_t segmap_getoffset(struct seg *seg, caddr_t addr); 84 static int segmap_gettype(struct seg *seg, caddr_t addr); 85 static int segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp); 86 static void segmap_dump(struct seg *seg); 87 static int segmap_pagelock(struct seg *seg, caddr_t addr, size_t len, 88 struct page ***ppp, enum lock_type type, 89 enum seg_rw rw); 90 static void segmap_badop(void); 91 static int segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp); 92 static lgrp_mem_policy_info_t *segmap_getpolicy(struct seg *seg, 93 caddr_t addr); 94 static int segmap_capable(struct seg *seg, segcapability_t capability); 95 96 /* segkpm support */ 97 static caddr_t segmap_pagecreate_kpm(struct seg *, vnode_t *, u_offset_t, 98 struct smap *, enum seg_rw); 99 struct smap *get_smap_kpm(caddr_t, page_t **); 100 101 #define SEGMAP_BADOP(t) (t(*)())segmap_badop 102 103 static struct seg_ops segmap_ops = { 104 SEGMAP_BADOP(int), /* dup */ 105 SEGMAP_BADOP(int), /* unmap */ 106 segmap_free, 107 segmap_fault, 108 segmap_faulta, 109 SEGMAP_BADOP(int), /* setprot */ 110 segmap_checkprot, 111 segmap_kluster, 112 SEGMAP_BADOP(size_t), /* swapout */ 113 SEGMAP_BADOP(int), /* sync */ 114 SEGMAP_BADOP(size_t), /* incore */ 115 SEGMAP_BADOP(int), /* lockop */ 116 segmap_getprot, 117 segmap_getoffset, 118 segmap_gettype, 119 segmap_getvp, 120 SEGMAP_BADOP(int), /* advise */ 121 segmap_dump, 122 segmap_pagelock, /* pagelock */ 123 SEGMAP_BADOP(int), /* setpgsz */ 124 segmap_getmemid, /* getmemid */ 125 segmap_getpolicy, /* getpolicy */ 126 segmap_capable, /* capable */ 127 }; 128 129 /* 130 * Private segmap routines. 131 */ 132 static void segmap_unlock(struct hat *hat, struct seg *seg, caddr_t addr, 133 size_t len, enum seg_rw rw, struct smap *smp); 134 static void segmap_smapadd(struct smap *smp); 135 static struct smap *segmap_hashin(struct smap *smp, struct vnode *vp, 136 u_offset_t off, int hashid); 137 static void segmap_hashout(struct smap *smp); 138 139 140 /* 141 * Statistics for segmap operations. 142 * 143 * No explicit locking to protect these stats. 144 */ 145 struct segmapcnt segmapcnt = { 146 { "fault", KSTAT_DATA_ULONG }, 147 { "faulta", KSTAT_DATA_ULONG }, 148 { "getmap", KSTAT_DATA_ULONG }, 149 { "get_use", KSTAT_DATA_ULONG }, 150 { "get_reclaim", KSTAT_DATA_ULONG }, 151 { "get_reuse", KSTAT_DATA_ULONG }, 152 { "get_unused", KSTAT_DATA_ULONG }, 153 { "get_nofree", KSTAT_DATA_ULONG }, 154 { "rel_async", KSTAT_DATA_ULONG }, 155 { "rel_write", KSTAT_DATA_ULONG }, 156 { "rel_free", KSTAT_DATA_ULONG }, 157 { "rel_abort", KSTAT_DATA_ULONG }, 158 { "rel_dontneed", KSTAT_DATA_ULONG }, 159 { "release", KSTAT_DATA_ULONG }, 160 { "pagecreate", KSTAT_DATA_ULONG }, 161 { "free_notfree", KSTAT_DATA_ULONG }, 162 { "free_dirty", KSTAT_DATA_ULONG }, 163 { "free", KSTAT_DATA_ULONG }, 164 { "stolen", KSTAT_DATA_ULONG }, 165 { "get_nomtx", KSTAT_DATA_ULONG } 166 }; 167 168 kstat_named_t *segmapcnt_ptr = (kstat_named_t *)&segmapcnt; 169 uint_t segmapcnt_ndata = sizeof (segmapcnt) / sizeof (kstat_named_t); 170 171 /* 172 * Return number of map pages in segment. 173 */ 174 #define MAP_PAGES(seg) ((seg)->s_size >> MAXBSHIFT) 175 176 /* 177 * Translate addr into smap number within segment. 178 */ 179 #define MAP_PAGE(seg, addr) (((addr) - (seg)->s_base) >> MAXBSHIFT) 180 181 /* 182 * Translate addr in seg into struct smap pointer. 183 */ 184 #define GET_SMAP(seg, addr) \ 185 &(((struct segmap_data *)((seg)->s_data))->smd_sm[MAP_PAGE(seg, addr)]) 186 187 /* 188 * Bit in map (16 bit bitmap). 189 */ 190 #define SMAP_BIT_MASK(bitindex) (1 << ((bitindex) & 0xf)) 191 192 static int smd_colormsk = 0; 193 static int smd_ncolor = 0; 194 static int smd_nfree = 0; 195 static int smd_freemsk = 0; 196 #ifdef DEBUG 197 static int *colors_used; 198 #endif 199 static struct smap *smd_smap; 200 static struct smaphash *smd_hash; 201 #ifdef SEGMAP_HASHSTATS 202 static unsigned int *smd_hash_len; 203 #endif 204 static struct smfree *smd_free; 205 static ulong_t smd_hashmsk = 0; 206 207 #define SEGMAP_MAXCOLOR 2 208 #define SEGMAP_CACHE_PAD 64 209 210 union segmap_cpu { 211 struct { 212 uint32_t scpu_free_ndx[SEGMAP_MAXCOLOR]; 213 struct smap *scpu_last_smap; 214 ulong_t scpu_getmap; 215 ulong_t scpu_release; 216 ulong_t scpu_get_reclaim; 217 ulong_t scpu_fault; 218 ulong_t scpu_pagecreate; 219 ulong_t scpu_get_reuse; 220 } scpu; 221 char scpu_pad[SEGMAP_CACHE_PAD]; 222 }; 223 static union segmap_cpu *smd_cpu; 224 225 /* 226 * There are three locks in seg_map: 227 * - per freelist mutexes 228 * - per hashchain mutexes 229 * - per smap mutexes 230 * 231 * The lock ordering is to get the smap mutex to lock down the slot 232 * first then the hash lock (for hash in/out (vp, off) list) or the 233 * freelist lock to put the slot back on the free list. 234 * 235 * The hash search is done by only holding the hashchain lock, when a wanted 236 * slot is found, we drop the hashchain lock then lock the slot so there 237 * is no overlapping of hashchain and smap locks. After the slot is 238 * locked, we verify again if the slot is still what we are looking 239 * for. 240 * 241 * Allocation of a free slot is done by holding the freelist lock, 242 * then locking the smap slot at the head of the freelist. This is 243 * in reversed lock order so mutex_tryenter() is used. 244 * 245 * The smap lock protects all fields in smap structure except for 246 * the link fields for hash/free lists which are protected by 247 * hashchain and freelist locks. 248 */ 249 250 #define SHASHMTX(hashid) (&smd_hash[hashid].sh_mtx) 251 252 #define SMP2SMF(smp) (&smd_free[(smp - smd_smap) & smd_freemsk]) 253 #define SMP2SMF_NDX(smp) (ushort_t)((smp - smd_smap) & smd_freemsk) 254 255 #define SMAPMTX(smp) (&smp->sm_mtx) 256 257 #define SMAP_HASHFUNC(vp, off, hashid) \ 258 { \ 259 hashid = ((((uintptr_t)(vp) >> 6) + ((uintptr_t)(vp) >> 3) + \ 260 ((off) >> MAXBSHIFT)) & smd_hashmsk); \ 261 } 262 263 /* 264 * The most frequently updated kstat counters are kept in the 265 * per cpu array to avoid hot cache blocks. The update function 266 * sums the cpu local counters to update the global counters. 267 */ 268 269 /* ARGSUSED */ 270 int 271 segmap_kstat_update(kstat_t *ksp, int rw) 272 { 273 int i; 274 ulong_t getmap, release, get_reclaim; 275 ulong_t fault, pagecreate, get_reuse; 276 277 if (rw == KSTAT_WRITE) 278 return (EACCES); 279 getmap = release = get_reclaim = (ulong_t)0; 280 fault = pagecreate = get_reuse = (ulong_t)0; 281 for (i = 0; i < max_ncpus; i++) { 282 getmap += smd_cpu[i].scpu.scpu_getmap; 283 release += smd_cpu[i].scpu.scpu_release; 284 get_reclaim += smd_cpu[i].scpu.scpu_get_reclaim; 285 fault += smd_cpu[i].scpu.scpu_fault; 286 pagecreate += smd_cpu[i].scpu.scpu_pagecreate; 287 get_reuse += smd_cpu[i].scpu.scpu_get_reuse; 288 } 289 segmapcnt.smp_getmap.value.ul = getmap; 290 segmapcnt.smp_release.value.ul = release; 291 segmapcnt.smp_get_reclaim.value.ul = get_reclaim; 292 segmapcnt.smp_fault.value.ul = fault; 293 segmapcnt.smp_pagecreate.value.ul = pagecreate; 294 segmapcnt.smp_get_reuse.value.ul = get_reuse; 295 return (0); 296 } 297 298 int 299 segmap_create(struct seg *seg, void *argsp) 300 { 301 struct segmap_data *smd; 302 struct smap *smp; 303 struct smfree *sm; 304 struct segmap_crargs *a = (struct segmap_crargs *)argsp; 305 struct smaphash *shashp; 306 union segmap_cpu *scpu; 307 long i, npages; 308 size_t hashsz; 309 uint_t nfreelist; 310 extern void prefetch_smap_w(void *); 311 extern int max_ncpus; 312 313 ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock)); 314 315 if (((uintptr_t)seg->s_base | seg->s_size) & MAXBOFFSET) { 316 panic("segkmap not MAXBSIZE aligned"); 317 /*NOTREACHED*/ 318 } 319 320 smd = kmem_zalloc(sizeof (struct segmap_data), KM_SLEEP); 321 322 seg->s_data = (void *)smd; 323 seg->s_ops = &segmap_ops; 324 smd->smd_prot = a->prot; 325 326 /* 327 * Scale the number of smap freelists to be 328 * proportional to max_ncpus * number of virtual colors. 329 * The caller can over-ride this scaling by providing 330 * a non-zero a->nfreelist argument. 331 */ 332 nfreelist = a->nfreelist; 333 if (nfreelist == 0) 334 nfreelist = max_ncpus; 335 else if (nfreelist < 0 || nfreelist > 4 * max_ncpus) { 336 cmn_err(CE_WARN, "segmap_create: nfreelist out of range " 337 "%d, using %d", nfreelist, max_ncpus); 338 nfreelist = max_ncpus; 339 } 340 if (!ISP2(nfreelist)) { 341 /* round up nfreelist to the next power of two. */ 342 nfreelist = 1 << (highbit(nfreelist)); 343 } 344 345 /* 346 * Get the number of virtual colors - must be a power of 2. 347 */ 348 if (a->shmsize) 349 smd_ncolor = a->shmsize >> MAXBSHIFT; 350 else 351 smd_ncolor = 1; 352 ASSERT((smd_ncolor & (smd_ncolor - 1)) == 0); 353 ASSERT(smd_ncolor <= SEGMAP_MAXCOLOR); 354 smd_colormsk = smd_ncolor - 1; 355 smd->smd_nfree = smd_nfree = smd_ncolor * nfreelist; 356 smd_freemsk = smd_nfree - 1; 357 358 /* 359 * Allocate and initialize the freelist headers. 360 * Note that sm_freeq[1] starts out as the release queue. This 361 * is known when the smap structures are initialized below. 362 */ 363 smd_free = smd->smd_free = 364 kmem_zalloc(smd_nfree * sizeof (struct smfree), KM_SLEEP); 365 for (i = 0; i < smd_nfree; i++) { 366 sm = &smd->smd_free[i]; 367 mutex_init(&sm->sm_freeq[0].smq_mtx, NULL, MUTEX_DEFAULT, NULL); 368 mutex_init(&sm->sm_freeq[1].smq_mtx, NULL, MUTEX_DEFAULT, NULL); 369 sm->sm_allocq = &sm->sm_freeq[0]; 370 sm->sm_releq = &sm->sm_freeq[1]; 371 } 372 373 /* 374 * Allocate and initialize the smap hash chain headers. 375 * Compute hash size rounding down to the next power of two. 376 */ 377 npages = MAP_PAGES(seg); 378 smd->smd_npages = npages; 379 hashsz = npages / SMAP_HASHAVELEN; 380 hashsz = 1 << (highbit(hashsz)-1); 381 smd_hashmsk = hashsz - 1; 382 smd_hash = smd->smd_hash = 383 kmem_alloc(hashsz * sizeof (struct smaphash), KM_SLEEP); 384 #ifdef SEGMAP_HASHSTATS 385 smd_hash_len = 386 kmem_zalloc(hashsz * sizeof (unsigned int), KM_SLEEP); 387 #endif 388 for (i = 0, shashp = smd_hash; i < hashsz; i++, shashp++) { 389 shashp->sh_hash_list = NULL; 390 mutex_init(&shashp->sh_mtx, NULL, MUTEX_DEFAULT, NULL); 391 } 392 393 /* 394 * Allocate and initialize the smap structures. 395 * Link all slots onto the appropriate freelist. 396 * The smap array is large enough to affect boot time 397 * on large systems, so use memory prefetching and only 398 * go through the array 1 time. Inline a optimized version 399 * of segmap_smapadd to add structures to freelists with 400 * knowledge that no locks are needed here. 401 */ 402 smd_smap = smd->smd_sm = 403 kmem_alloc(sizeof (struct smap) * npages, KM_SLEEP); 404 405 for (smp = &smd->smd_sm[MAP_PAGES(seg) - 1]; 406 smp >= smd->smd_sm; smp--) { 407 struct smap *smpfreelist; 408 struct sm_freeq *releq; 409 410 prefetch_smap_w((char *)smp); 411 412 smp->sm_vp = NULL; 413 smp->sm_hash = NULL; 414 smp->sm_off = 0; 415 smp->sm_bitmap = 0; 416 smp->sm_refcnt = 0; 417 mutex_init(&smp->sm_mtx, NULL, MUTEX_DEFAULT, NULL); 418 smp->sm_free_ndx = SMP2SMF_NDX(smp); 419 420 sm = SMP2SMF(smp); 421 releq = sm->sm_releq; 422 423 smpfreelist = releq->smq_free; 424 if (smpfreelist == 0) { 425 releq->smq_free = smp->sm_next = smp->sm_prev = smp; 426 } else { 427 smp->sm_next = smpfreelist; 428 smp->sm_prev = smpfreelist->sm_prev; 429 smpfreelist->sm_prev = smp; 430 smp->sm_prev->sm_next = smp; 431 releq->smq_free = smp->sm_next; 432 } 433 434 /* 435 * sm_flag = 0 (no SM_QNDX_ZERO) implies smap on sm_freeq[1] 436 */ 437 smp->sm_flags = 0; 438 439 #ifdef SEGKPM_SUPPORT 440 /* 441 * Due to the fragile prefetch loop no 442 * separate function is used here. 443 */ 444 smp->sm_kpme_next = NULL; 445 smp->sm_kpme_prev = NULL; 446 smp->sm_kpme_page = NULL; 447 #endif 448 } 449 450 /* 451 * Allocate the per color indices that distribute allocation 452 * requests over the free lists. Each cpu will have a private 453 * rotor index to spread the allocations even across the available 454 * smap freelists. Init the scpu_last_smap field to the first 455 * smap element so there is no need to check for NULL. 456 */ 457 smd_cpu = 458 kmem_zalloc(sizeof (union segmap_cpu) * max_ncpus, KM_SLEEP); 459 for (i = 0, scpu = smd_cpu; i < max_ncpus; i++, scpu++) { 460 int j; 461 for (j = 0; j < smd_ncolor; j++) 462 scpu->scpu.scpu_free_ndx[j] = j; 463 scpu->scpu.scpu_last_smap = smd_smap; 464 } 465 466 vpm_init(); 467 468 #ifdef DEBUG 469 /* 470 * Keep track of which colors are used more often. 471 */ 472 colors_used = kmem_zalloc(smd_nfree * sizeof (int), KM_SLEEP); 473 #endif /* DEBUG */ 474 475 return (0); 476 } 477 478 static void 479 segmap_free(seg) 480 struct seg *seg; 481 { 482 ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock)); 483 } 484 485 /* 486 * Do a F_SOFTUNLOCK call over the range requested. 487 * The range must have already been F_SOFTLOCK'ed. 488 */ 489 static void 490 segmap_unlock( 491 struct hat *hat, 492 struct seg *seg, 493 caddr_t addr, 494 size_t len, 495 enum seg_rw rw, 496 struct smap *smp) 497 { 498 page_t *pp; 499 caddr_t adr; 500 u_offset_t off; 501 struct vnode *vp; 502 kmutex_t *smtx; 503 504 ASSERT(smp->sm_refcnt > 0); 505 506 #ifdef lint 507 seg = seg; 508 #endif 509 510 if (segmap_kpm && IS_KPM_ADDR(addr)) { 511 512 /* 513 * We're called only from segmap_fault and this was a 514 * NOP in case of a kpm based smap, so dangerous things 515 * must have happened in the meantime. Pages are prefaulted 516 * and locked in segmap_getmapflt and they will not be 517 * unlocked until segmap_release. 518 */ 519 panic("segmap_unlock: called with kpm addr %p", (void *)addr); 520 /*NOTREACHED*/ 521 } 522 523 vp = smp->sm_vp; 524 off = smp->sm_off + (u_offset_t)((uintptr_t)addr & MAXBOFFSET); 525 526 hat_unlock(hat, addr, P2ROUNDUP(len, PAGESIZE)); 527 for (adr = addr; adr < addr + len; adr += PAGESIZE, off += PAGESIZE) { 528 ushort_t bitmask; 529 530 /* 531 * Use page_find() instead of page_lookup() to 532 * find the page since we know that it has 533 * "shared" lock. 534 */ 535 pp = page_find(vp, off); 536 if (pp == NULL) { 537 panic("segmap_unlock: page not found"); 538 /*NOTREACHED*/ 539 } 540 541 if (rw == S_WRITE) { 542 hat_setrefmod(pp); 543 } else if (rw != S_OTHER) { 544 TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT, 545 "segmap_fault:pp %p vp %p offset %llx", pp, vp, off); 546 hat_setref(pp); 547 } 548 549 /* 550 * Clear bitmap, if the bit corresponding to "off" is set, 551 * since the page and translation are being unlocked. 552 */ 553 bitmask = SMAP_BIT_MASK((off - smp->sm_off) >> PAGESHIFT); 554 555 /* 556 * Large Files: Following assertion is to verify 557 * the correctness of the cast to (int) above. 558 */ 559 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 560 smtx = SMAPMTX(smp); 561 mutex_enter(smtx); 562 if (smp->sm_bitmap & bitmask) { 563 smp->sm_bitmap &= ~bitmask; 564 } 565 mutex_exit(smtx); 566 567 page_unlock(pp); 568 } 569 } 570 571 #define MAXPPB (MAXBSIZE/4096) /* assumes minimum page size of 4k */ 572 573 /* 574 * This routine is called via a machine specific fault handling 575 * routine. It is also called by software routines wishing to 576 * lock or unlock a range of addresses. 577 * 578 * Note that this routine expects a page-aligned "addr". 579 */ 580 faultcode_t 581 segmap_fault( 582 struct hat *hat, 583 struct seg *seg, 584 caddr_t addr, 585 size_t len, 586 enum fault_type type, 587 enum seg_rw rw) 588 { 589 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 590 struct smap *smp; 591 page_t *pp, **ppp; 592 struct vnode *vp; 593 u_offset_t off; 594 page_t *pl[MAXPPB + 1]; 595 uint_t prot; 596 u_offset_t addroff; 597 caddr_t adr; 598 int err; 599 u_offset_t sm_off; 600 int hat_flag; 601 602 if (segmap_kpm && IS_KPM_ADDR(addr)) { 603 int newpage; 604 kmutex_t *smtx; 605 606 /* 607 * Pages are successfully prefaulted and locked in 608 * segmap_getmapflt and can't be unlocked until 609 * segmap_release. No hat mappings have to be locked 610 * and they also can't be unlocked as long as the 611 * caller owns an active kpm addr. 612 */ 613 #ifndef DEBUG 614 if (type != F_SOFTUNLOCK) 615 return (0); 616 #endif 617 618 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 619 panic("segmap_fault: smap not found " 620 "for addr %p", (void *)addr); 621 /*NOTREACHED*/ 622 } 623 624 smtx = SMAPMTX(smp); 625 #ifdef DEBUG 626 newpage = smp->sm_flags & SM_KPM_NEWPAGE; 627 if (newpage) { 628 cmn_err(CE_WARN, "segmap_fault: newpage? smp %p", 629 (void *)smp); 630 } 631 632 if (type != F_SOFTUNLOCK) { 633 mutex_exit(smtx); 634 return (0); 635 } 636 #endif 637 mutex_exit(smtx); 638 vp = smp->sm_vp; 639 sm_off = smp->sm_off; 640 641 if (vp == NULL) 642 return (FC_MAKE_ERR(EIO)); 643 644 ASSERT(smp->sm_refcnt > 0); 645 646 addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET); 647 if (addroff + len > MAXBSIZE) 648 panic("segmap_fault: endaddr %p exceeds MAXBSIZE chunk", 649 (void *)(addr + len)); 650 651 off = sm_off + addroff; 652 653 pp = page_find(vp, off); 654 655 if (pp == NULL) 656 panic("segmap_fault: softunlock page not found"); 657 658 /* 659 * Set ref bit also here in case of S_OTHER to avoid the 660 * overhead of supporting other cases than F_SOFTUNLOCK 661 * with segkpm. We can do this because the underlying 662 * pages are locked anyway. 663 */ 664 if (rw == S_WRITE) { 665 hat_setrefmod(pp); 666 } else { 667 TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT, 668 "segmap_fault:pp %p vp %p offset %llx", 669 pp, vp, off); 670 hat_setref(pp); 671 } 672 673 return (0); 674 } 675 676 smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++; 677 smp = GET_SMAP(seg, addr); 678 vp = smp->sm_vp; 679 sm_off = smp->sm_off; 680 681 if (vp == NULL) 682 return (FC_MAKE_ERR(EIO)); 683 684 ASSERT(smp->sm_refcnt > 0); 685 686 addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET); 687 if (addroff + len > MAXBSIZE) { 688 panic("segmap_fault: endaddr %p " 689 "exceeds MAXBSIZE chunk", (void *)(addr + len)); 690 /*NOTREACHED*/ 691 } 692 off = sm_off + addroff; 693 694 /* 695 * First handle the easy stuff 696 */ 697 if (type == F_SOFTUNLOCK) { 698 segmap_unlock(hat, seg, addr, len, rw, smp); 699 return (0); 700 } 701 702 TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE, 703 "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp); 704 err = VOP_GETPAGE(vp, (offset_t)off, len, &prot, pl, MAXBSIZE, 705 seg, addr, rw, CRED(), NULL); 706 707 if (err) 708 return (FC_MAKE_ERR(err)); 709 710 prot &= smd->smd_prot; 711 712 /* 713 * Handle all pages returned in the pl[] array. 714 * This loop is coded on the assumption that if 715 * there was no error from the VOP_GETPAGE routine, 716 * that the page list returned will contain all the 717 * needed pages for the vp from [off..off + len]. 718 */ 719 ppp = pl; 720 while ((pp = *ppp++) != NULL) { 721 u_offset_t poff; 722 ASSERT(pp->p_vnode == vp); 723 hat_flag = HAT_LOAD; 724 725 /* 726 * Verify that the pages returned are within the range 727 * of this segmap region. Note that it is theoretically 728 * possible for pages outside this range to be returned, 729 * but it is not very likely. If we cannot use the 730 * page here, just release it and go on to the next one. 731 */ 732 if (pp->p_offset < sm_off || 733 pp->p_offset >= sm_off + MAXBSIZE) { 734 (void) page_release(pp, 1); 735 continue; 736 } 737 738 ASSERT(hat == kas.a_hat); 739 poff = pp->p_offset; 740 adr = addr + (poff - off); 741 if (adr >= addr && adr < addr + len) { 742 hat_setref(pp); 743 TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT, 744 "segmap_fault:pp %p vp %p offset %llx", 745 pp, vp, poff); 746 if (type == F_SOFTLOCK) 747 hat_flag = HAT_LOAD_LOCK; 748 } 749 750 /* 751 * Deal with VMODSORT pages here. If we know this is a write 752 * do the setmod now and allow write protection. 753 * As long as it's modified or not S_OTHER, remove write 754 * protection. With S_OTHER it's up to the FS to deal with this. 755 */ 756 if (IS_VMODSORT(vp)) { 757 if (rw == S_WRITE) 758 hat_setmod(pp); 759 else if (rw != S_OTHER && !hat_ismod(pp)) 760 prot &= ~PROT_WRITE; 761 } 762 763 hat_memload(hat, adr, pp, prot, hat_flag); 764 if (hat_flag != HAT_LOAD_LOCK) 765 page_unlock(pp); 766 } 767 return (0); 768 } 769 770 /* 771 * This routine is used to start I/O on pages asynchronously. 772 */ 773 static faultcode_t 774 segmap_faulta(struct seg *seg, caddr_t addr) 775 { 776 struct smap *smp; 777 struct vnode *vp; 778 u_offset_t off; 779 int err; 780 781 if (segmap_kpm && IS_KPM_ADDR(addr)) { 782 int newpage; 783 kmutex_t *smtx; 784 785 /* 786 * Pages are successfully prefaulted and locked in 787 * segmap_getmapflt and can't be unlocked until 788 * segmap_release. No hat mappings have to be locked 789 * and they also can't be unlocked as long as the 790 * caller owns an active kpm addr. 791 */ 792 #ifdef DEBUG 793 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 794 panic("segmap_faulta: smap not found " 795 "for addr %p", (void *)addr); 796 /*NOTREACHED*/ 797 } 798 799 smtx = SMAPMTX(smp); 800 newpage = smp->sm_flags & SM_KPM_NEWPAGE; 801 mutex_exit(smtx); 802 if (newpage) 803 cmn_err(CE_WARN, "segmap_faulta: newpage? smp %p", 804 (void *)smp); 805 #endif 806 return (0); 807 } 808 809 segmapcnt.smp_faulta.value.ul++; 810 smp = GET_SMAP(seg, addr); 811 812 ASSERT(smp->sm_refcnt > 0); 813 814 vp = smp->sm_vp; 815 off = smp->sm_off; 816 817 if (vp == NULL) { 818 cmn_err(CE_WARN, "segmap_faulta - no vp"); 819 return (FC_MAKE_ERR(EIO)); 820 } 821 822 TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE, 823 "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp); 824 825 err = VOP_GETPAGE(vp, (offset_t)(off + ((offset_t)((uintptr_t)addr 826 & MAXBOFFSET))), PAGESIZE, (uint_t *)NULL, (page_t **)NULL, 0, 827 seg, addr, S_READ, CRED(), NULL); 828 829 if (err) 830 return (FC_MAKE_ERR(err)); 831 return (0); 832 } 833 834 /*ARGSUSED*/ 835 static int 836 segmap_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) 837 { 838 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 839 840 ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock)); 841 842 /* 843 * Need not acquire the segment lock since 844 * "smd_prot" is a read-only field. 845 */ 846 return (((smd->smd_prot & prot) != prot) ? EACCES : 0); 847 } 848 849 static int 850 segmap_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv) 851 { 852 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 853 size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1; 854 855 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 856 857 if (pgno != 0) { 858 do { 859 protv[--pgno] = smd->smd_prot; 860 } while (pgno != 0); 861 } 862 return (0); 863 } 864 865 static u_offset_t 866 segmap_getoffset(struct seg *seg, caddr_t addr) 867 { 868 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 869 870 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock)); 871 872 return ((u_offset_t)smd->smd_sm->sm_off + (addr - seg->s_base)); 873 } 874 875 /*ARGSUSED*/ 876 static int 877 segmap_gettype(struct seg *seg, caddr_t addr) 878 { 879 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock)); 880 881 return (MAP_SHARED); 882 } 883 884 /*ARGSUSED*/ 885 static int 886 segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp) 887 { 888 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 889 890 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock)); 891 892 /* XXX - This doesn't make any sense */ 893 *vpp = smd->smd_sm->sm_vp; 894 return (0); 895 } 896 897 /* 898 * Check to see if it makes sense to do kluster/read ahead to 899 * addr + delta relative to the mapping at addr. We assume here 900 * that delta is a signed PAGESIZE'd multiple (which can be negative). 901 * 902 * For segmap we always "approve" of this action from our standpoint. 903 */ 904 /*ARGSUSED*/ 905 static int 906 segmap_kluster(struct seg *seg, caddr_t addr, ssize_t delta) 907 { 908 return (0); 909 } 910 911 static void 912 segmap_badop() 913 { 914 panic("segmap_badop"); 915 /*NOTREACHED*/ 916 } 917 918 /* 919 * Special private segmap operations 920 */ 921 922 /* 923 * Add smap to the appropriate free list. 924 */ 925 static void 926 segmap_smapadd(struct smap *smp) 927 { 928 struct smfree *sm; 929 struct smap *smpfreelist; 930 struct sm_freeq *releq; 931 932 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 933 934 if (smp->sm_refcnt != 0) { 935 panic("segmap_smapadd"); 936 /*NOTREACHED*/ 937 } 938 939 sm = &smd_free[smp->sm_free_ndx]; 940 /* 941 * Add to the tail of the release queue 942 * Note that sm_releq and sm_allocq could toggle 943 * before we get the lock. This does not affect 944 * correctness as the 2 queues are only maintained 945 * to reduce lock pressure. 946 */ 947 releq = sm->sm_releq; 948 if (releq == &sm->sm_freeq[0]) 949 smp->sm_flags |= SM_QNDX_ZERO; 950 else 951 smp->sm_flags &= ~SM_QNDX_ZERO; 952 mutex_enter(&releq->smq_mtx); 953 smpfreelist = releq->smq_free; 954 if (smpfreelist == 0) { 955 int want; 956 957 releq->smq_free = smp->sm_next = smp->sm_prev = smp; 958 /* 959 * Both queue mutexes held to set sm_want; 960 * snapshot the value before dropping releq mutex. 961 * If sm_want appears after the releq mutex is dropped, 962 * then the smap just freed is already gone. 963 */ 964 want = sm->sm_want; 965 mutex_exit(&releq->smq_mtx); 966 /* 967 * See if there was a waiter before dropping the releq mutex 968 * then recheck after obtaining sm_freeq[0] mutex as 969 * the another thread may have already signaled. 970 */ 971 if (want) { 972 mutex_enter(&sm->sm_freeq[0].smq_mtx); 973 if (sm->sm_want) 974 cv_signal(&sm->sm_free_cv); 975 mutex_exit(&sm->sm_freeq[0].smq_mtx); 976 } 977 } else { 978 smp->sm_next = smpfreelist; 979 smp->sm_prev = smpfreelist->sm_prev; 980 smpfreelist->sm_prev = smp; 981 smp->sm_prev->sm_next = smp; 982 mutex_exit(&releq->smq_mtx); 983 } 984 } 985 986 987 static struct smap * 988 segmap_hashin(struct smap *smp, struct vnode *vp, u_offset_t off, int hashid) 989 { 990 struct smap **hpp; 991 struct smap *tmp; 992 kmutex_t *hmtx; 993 994 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 995 ASSERT(smp->sm_vp == NULL); 996 ASSERT(smp->sm_hash == NULL); 997 ASSERT(smp->sm_prev == NULL); 998 ASSERT(smp->sm_next == NULL); 999 ASSERT(hashid >= 0 && hashid <= smd_hashmsk); 1000 1001 hmtx = SHASHMTX(hashid); 1002 1003 mutex_enter(hmtx); 1004 /* 1005 * First we need to verify that no one has created a smp 1006 * with (vp,off) as its tag before we us. 1007 */ 1008 for (tmp = smd_hash[hashid].sh_hash_list; 1009 tmp != NULL; tmp = tmp->sm_hash) 1010 if (tmp->sm_vp == vp && tmp->sm_off == off) 1011 break; 1012 1013 if (tmp == NULL) { 1014 /* 1015 * No one created one yet. 1016 * 1017 * Funniness here - we don't increment the ref count on the 1018 * vnode * even though we have another pointer to it here. 1019 * The reason for this is that we don't want the fact that 1020 * a seg_map entry somewhere refers to a vnode to prevent the 1021 * vnode * itself from going away. This is because this 1022 * reference to the vnode is a "soft one". In the case where 1023 * a mapping is being used by a rdwr [or directory routine?] 1024 * there already has to be a non-zero ref count on the vnode. 1025 * In the case where the vp has been freed and the the smap 1026 * structure is on the free list, there are no pages in memory 1027 * that can refer to the vnode. Thus even if we reuse the same 1028 * vnode/smap structure for a vnode which has the same 1029 * address but represents a different object, we are ok. 1030 */ 1031 smp->sm_vp = vp; 1032 smp->sm_off = off; 1033 1034 hpp = &smd_hash[hashid].sh_hash_list; 1035 smp->sm_hash = *hpp; 1036 *hpp = smp; 1037 #ifdef SEGMAP_HASHSTATS 1038 smd_hash_len[hashid]++; 1039 #endif 1040 } 1041 mutex_exit(hmtx); 1042 1043 return (tmp); 1044 } 1045 1046 static void 1047 segmap_hashout(struct smap *smp) 1048 { 1049 struct smap **hpp, *hp; 1050 struct vnode *vp; 1051 kmutex_t *mtx; 1052 int hashid; 1053 u_offset_t off; 1054 1055 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 1056 1057 vp = smp->sm_vp; 1058 off = smp->sm_off; 1059 1060 SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */ 1061 mtx = SHASHMTX(hashid); 1062 mutex_enter(mtx); 1063 1064 hpp = &smd_hash[hashid].sh_hash_list; 1065 for (;;) { 1066 hp = *hpp; 1067 if (hp == NULL) { 1068 panic("segmap_hashout"); 1069 /*NOTREACHED*/ 1070 } 1071 if (hp == smp) 1072 break; 1073 hpp = &hp->sm_hash; 1074 } 1075 1076 *hpp = smp->sm_hash; 1077 smp->sm_hash = NULL; 1078 #ifdef SEGMAP_HASHSTATS 1079 smd_hash_len[hashid]--; 1080 #endif 1081 mutex_exit(mtx); 1082 1083 smp->sm_vp = NULL; 1084 smp->sm_off = (u_offset_t)0; 1085 1086 } 1087 1088 /* 1089 * Attempt to free unmodified, unmapped, and non locked segmap 1090 * pages. 1091 */ 1092 void 1093 segmap_pagefree(struct vnode *vp, u_offset_t off) 1094 { 1095 u_offset_t pgoff; 1096 page_t *pp; 1097 1098 for (pgoff = off; pgoff < off + MAXBSIZE; pgoff += PAGESIZE) { 1099 1100 if ((pp = page_lookup_nowait(vp, pgoff, SE_EXCL)) == NULL) 1101 continue; 1102 1103 switch (page_release(pp, 1)) { 1104 case PGREL_NOTREL: 1105 segmapcnt.smp_free_notfree.value.ul++; 1106 break; 1107 case PGREL_MOD: 1108 segmapcnt.smp_free_dirty.value.ul++; 1109 break; 1110 case PGREL_CLEAN: 1111 segmapcnt.smp_free.value.ul++; 1112 break; 1113 } 1114 } 1115 } 1116 1117 /* 1118 * Locks held on entry: smap lock 1119 * Locks held on exit : smap lock. 1120 */ 1121 1122 static void 1123 grab_smp(struct smap *smp, page_t *pp) 1124 { 1125 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 1126 ASSERT(smp->sm_refcnt == 0); 1127 1128 if (smp->sm_vp != (struct vnode *)NULL) { 1129 struct vnode *vp = smp->sm_vp; 1130 u_offset_t off = smp->sm_off; 1131 /* 1132 * Destroy old vnode association and 1133 * unload any hardware translations to 1134 * the old object. 1135 */ 1136 smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reuse++; 1137 segmap_hashout(smp); 1138 1139 /* 1140 * This node is off freelist and hashlist, 1141 * so there is no reason to drop/reacquire sm_mtx 1142 * across calls to hat_unload. 1143 */ 1144 if (segmap_kpm) { 1145 caddr_t vaddr; 1146 int hat_unload_needed = 0; 1147 1148 /* 1149 * unload kpm mapping 1150 */ 1151 if (pp != NULL) { 1152 vaddr = hat_kpm_page2va(pp, 1); 1153 hat_kpm_mapout(pp, GET_KPME(smp), vaddr); 1154 page_unlock(pp); 1155 } 1156 1157 /* 1158 * Check if we have (also) the rare case of a 1159 * non kpm mapping. 1160 */ 1161 if (smp->sm_flags & SM_NOTKPM_RELEASED) { 1162 hat_unload_needed = 1; 1163 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 1164 } 1165 1166 if (hat_unload_needed) { 1167 hat_unload(kas.a_hat, segkmap->s_base + 1168 ((smp - smd_smap) * MAXBSIZE), 1169 MAXBSIZE, HAT_UNLOAD); 1170 } 1171 1172 } else { 1173 ASSERT(smp->sm_flags & SM_NOTKPM_RELEASED); 1174 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 1175 hat_unload(kas.a_hat, segkmap->s_base + 1176 ((smp - smd_smap) * MAXBSIZE), 1177 MAXBSIZE, HAT_UNLOAD); 1178 } 1179 segmap_pagefree(vp, off); 1180 } 1181 } 1182 1183 static struct smap * 1184 get_free_smp(int free_ndx) 1185 { 1186 struct smfree *sm; 1187 kmutex_t *smtx; 1188 struct smap *smp, *first; 1189 struct sm_freeq *allocq, *releq; 1190 struct kpme *kpme; 1191 page_t *pp = NULL; 1192 int end_ndx, page_locked = 0; 1193 1194 end_ndx = free_ndx; 1195 sm = &smd_free[free_ndx]; 1196 1197 retry_queue: 1198 allocq = sm->sm_allocq; 1199 mutex_enter(&allocq->smq_mtx); 1200 1201 if ((smp = allocq->smq_free) == NULL) { 1202 1203 skip_queue: 1204 /* 1205 * The alloc list is empty or this queue is being skipped; 1206 * first see if the allocq toggled. 1207 */ 1208 if (sm->sm_allocq != allocq) { 1209 /* queue changed */ 1210 mutex_exit(&allocq->smq_mtx); 1211 goto retry_queue; 1212 } 1213 releq = sm->sm_releq; 1214 if (!mutex_tryenter(&releq->smq_mtx)) { 1215 /* cannot get releq; a free smp may be there now */ 1216 mutex_exit(&allocq->smq_mtx); 1217 1218 /* 1219 * This loop could spin forever if this thread has 1220 * higher priority than the thread that is holding 1221 * releq->smq_mtx. In order to force the other thread 1222 * to run, we'll lock/unlock the mutex which is safe 1223 * since we just unlocked the allocq mutex. 1224 */ 1225 mutex_enter(&releq->smq_mtx); 1226 mutex_exit(&releq->smq_mtx); 1227 goto retry_queue; 1228 } 1229 if (releq->smq_free == NULL) { 1230 /* 1231 * This freelist is empty. 1232 * This should not happen unless clients 1233 * are failing to release the segmap 1234 * window after accessing the data. 1235 * Before resorting to sleeping, try 1236 * the next list of the same color. 1237 */ 1238 free_ndx = (free_ndx + smd_ncolor) & smd_freemsk; 1239 if (free_ndx != end_ndx) { 1240 mutex_exit(&releq->smq_mtx); 1241 mutex_exit(&allocq->smq_mtx); 1242 sm = &smd_free[free_ndx]; 1243 goto retry_queue; 1244 } 1245 /* 1246 * Tried all freelists of the same color once, 1247 * wait on this list and hope something gets freed. 1248 */ 1249 segmapcnt.smp_get_nofree.value.ul++; 1250 sm->sm_want++; 1251 mutex_exit(&sm->sm_freeq[1].smq_mtx); 1252 cv_wait(&sm->sm_free_cv, 1253 &sm->sm_freeq[0].smq_mtx); 1254 sm->sm_want--; 1255 mutex_exit(&sm->sm_freeq[0].smq_mtx); 1256 sm = &smd_free[free_ndx]; 1257 goto retry_queue; 1258 } else { 1259 /* 1260 * Something on the rele queue; flip the alloc 1261 * and rele queues and retry. 1262 */ 1263 sm->sm_allocq = releq; 1264 sm->sm_releq = allocq; 1265 mutex_exit(&allocq->smq_mtx); 1266 mutex_exit(&releq->smq_mtx); 1267 if (page_locked) { 1268 delay(hz >> 2); 1269 page_locked = 0; 1270 } 1271 goto retry_queue; 1272 } 1273 } else { 1274 /* 1275 * Fastpath the case we get the smap mutex 1276 * on the first try. 1277 */ 1278 first = smp; 1279 next_smap: 1280 smtx = SMAPMTX(smp); 1281 if (!mutex_tryenter(smtx)) { 1282 /* 1283 * Another thread is trying to reclaim this slot. 1284 * Skip to the next queue or smap. 1285 */ 1286 if ((smp = smp->sm_next) == first) { 1287 goto skip_queue; 1288 } else { 1289 goto next_smap; 1290 } 1291 } else { 1292 /* 1293 * if kpme exists, get shared lock on the page 1294 */ 1295 if (segmap_kpm && smp->sm_vp != NULL) { 1296 1297 kpme = GET_KPME(smp); 1298 pp = kpme->kpe_page; 1299 1300 if (pp != NULL) { 1301 if (!page_trylock(pp, SE_SHARED)) { 1302 smp = smp->sm_next; 1303 mutex_exit(smtx); 1304 page_locked = 1; 1305 1306 pp = NULL; 1307 1308 if (smp == first) { 1309 goto skip_queue; 1310 } else { 1311 goto next_smap; 1312 } 1313 } else { 1314 if (kpme->kpe_page == NULL) { 1315 page_unlock(pp); 1316 pp = NULL; 1317 } 1318 } 1319 } 1320 } 1321 1322 /* 1323 * At this point, we've selected smp. Remove smp 1324 * from its freelist. If smp is the first one in 1325 * the freelist, update the head of the freelist. 1326 */ 1327 if (first == smp) { 1328 ASSERT(first == allocq->smq_free); 1329 allocq->smq_free = smp->sm_next; 1330 } 1331 1332 /* 1333 * if the head of the freelist still points to smp, 1334 * then there are no more free smaps in that list. 1335 */ 1336 if (allocq->smq_free == smp) 1337 /* 1338 * Took the last one 1339 */ 1340 allocq->smq_free = NULL; 1341 else { 1342 smp->sm_prev->sm_next = smp->sm_next; 1343 smp->sm_next->sm_prev = smp->sm_prev; 1344 } 1345 mutex_exit(&allocq->smq_mtx); 1346 smp->sm_prev = smp->sm_next = NULL; 1347 1348 /* 1349 * if pp != NULL, pp must have been locked; 1350 * grab_smp() unlocks pp. 1351 */ 1352 ASSERT((pp == NULL) || PAGE_LOCKED(pp)); 1353 grab_smp(smp, pp); 1354 /* return smp locked. */ 1355 ASSERT(SMAPMTX(smp) == smtx); 1356 ASSERT(MUTEX_HELD(smtx)); 1357 return (smp); 1358 } 1359 } 1360 } 1361 1362 /* 1363 * Special public segmap operations 1364 */ 1365 1366 /* 1367 * Create pages (without using VOP_GETPAGE) and load up translations to them. 1368 * If softlock is TRUE, then set things up so that it looks like a call 1369 * to segmap_fault with F_SOFTLOCK. 1370 * 1371 * Returns 1, if a page is created by calling page_create_va(), or 0 otherwise. 1372 * 1373 * All fields in the generic segment (struct seg) are considered to be 1374 * read-only for "segmap" even though the kernel address space (kas) may 1375 * not be locked, hence no lock is needed to access them. 1376 */ 1377 int 1378 segmap_pagecreate(struct seg *seg, caddr_t addr, size_t len, int softlock) 1379 { 1380 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 1381 page_t *pp; 1382 u_offset_t off; 1383 struct smap *smp; 1384 struct vnode *vp; 1385 caddr_t eaddr; 1386 int newpage = 0; 1387 uint_t prot; 1388 kmutex_t *smtx; 1389 int hat_flag; 1390 1391 ASSERT(seg->s_as == &kas); 1392 1393 if (segmap_kpm && IS_KPM_ADDR(addr)) { 1394 /* 1395 * Pages are successfully prefaulted and locked in 1396 * segmap_getmapflt and can't be unlocked until 1397 * segmap_release. The SM_KPM_NEWPAGE flag is set 1398 * in segmap_pagecreate_kpm when new pages are created. 1399 * and it is returned as "newpage" indication here. 1400 */ 1401 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 1402 panic("segmap_pagecreate: smap not found " 1403 "for addr %p", (void *)addr); 1404 /*NOTREACHED*/ 1405 } 1406 1407 smtx = SMAPMTX(smp); 1408 newpage = smp->sm_flags & SM_KPM_NEWPAGE; 1409 smp->sm_flags &= ~SM_KPM_NEWPAGE; 1410 mutex_exit(smtx); 1411 1412 return (newpage); 1413 } 1414 1415 smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++; 1416 1417 eaddr = addr + len; 1418 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1419 1420 smp = GET_SMAP(seg, addr); 1421 1422 /* 1423 * We don't grab smp mutex here since we assume the smp 1424 * has a refcnt set already which prevents the slot from 1425 * changing its id. 1426 */ 1427 ASSERT(smp->sm_refcnt > 0); 1428 1429 vp = smp->sm_vp; 1430 off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET)); 1431 prot = smd->smd_prot; 1432 1433 for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) { 1434 hat_flag = HAT_LOAD; 1435 pp = page_lookup(vp, off, SE_SHARED); 1436 if (pp == NULL) { 1437 ushort_t bitindex; 1438 1439 if ((pp = page_create_va(vp, off, 1440 PAGESIZE, PG_WAIT, seg, addr)) == NULL) { 1441 panic("segmap_pagecreate: page_create failed"); 1442 /*NOTREACHED*/ 1443 } 1444 newpage = 1; 1445 page_io_unlock(pp); 1446 1447 /* 1448 * Since pages created here do not contain valid 1449 * data until the caller writes into them, the 1450 * "exclusive" lock will not be dropped to prevent 1451 * other users from accessing the page. We also 1452 * have to lock the translation to prevent a fault 1453 * from occurring when the virtual address mapped by 1454 * this page is written into. This is necessary to 1455 * avoid a deadlock since we haven't dropped the 1456 * "exclusive" lock. 1457 */ 1458 bitindex = (ushort_t)((off - smp->sm_off) >> PAGESHIFT); 1459 1460 /* 1461 * Large Files: The following assertion is to 1462 * verify the cast above. 1463 */ 1464 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 1465 smtx = SMAPMTX(smp); 1466 mutex_enter(smtx); 1467 smp->sm_bitmap |= SMAP_BIT_MASK(bitindex); 1468 mutex_exit(smtx); 1469 1470 hat_flag = HAT_LOAD_LOCK; 1471 } else if (softlock) { 1472 hat_flag = HAT_LOAD_LOCK; 1473 } 1474 1475 if (IS_VMODSORT(pp->p_vnode) && (prot & PROT_WRITE)) 1476 hat_setmod(pp); 1477 1478 hat_memload(kas.a_hat, addr, pp, prot, hat_flag); 1479 1480 if (hat_flag != HAT_LOAD_LOCK) 1481 page_unlock(pp); 1482 1483 TRACE_5(TR_FAC_VM, TR_SEGMAP_PAGECREATE, 1484 "segmap_pagecreate:seg %p addr %p pp %p vp %p offset %llx", 1485 seg, addr, pp, vp, off); 1486 } 1487 1488 return (newpage); 1489 } 1490 1491 void 1492 segmap_pageunlock(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw) 1493 { 1494 struct smap *smp; 1495 ushort_t bitmask; 1496 page_t *pp; 1497 struct vnode *vp; 1498 u_offset_t off; 1499 caddr_t eaddr; 1500 kmutex_t *smtx; 1501 1502 ASSERT(seg->s_as == &kas); 1503 1504 eaddr = addr + len; 1505 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1506 1507 if (segmap_kpm && IS_KPM_ADDR(addr)) { 1508 /* 1509 * Pages are successfully prefaulted and locked in 1510 * segmap_getmapflt and can't be unlocked until 1511 * segmap_release, so no pages or hat mappings have 1512 * to be unlocked at this point. 1513 */ 1514 #ifdef DEBUG 1515 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 1516 panic("segmap_pageunlock: smap not found " 1517 "for addr %p", (void *)addr); 1518 /*NOTREACHED*/ 1519 } 1520 1521 ASSERT(smp->sm_refcnt > 0); 1522 mutex_exit(SMAPMTX(smp)); 1523 #endif 1524 return; 1525 } 1526 1527 smp = GET_SMAP(seg, addr); 1528 smtx = SMAPMTX(smp); 1529 1530 ASSERT(smp->sm_refcnt > 0); 1531 1532 vp = smp->sm_vp; 1533 off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET)); 1534 1535 for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) { 1536 bitmask = SMAP_BIT_MASK((int)(off - smp->sm_off) >> PAGESHIFT); 1537 1538 /* 1539 * Large Files: Following assertion is to verify 1540 * the correctness of the cast to (int) above. 1541 */ 1542 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 1543 1544 /* 1545 * If the bit corresponding to "off" is set, 1546 * clear this bit in the bitmap, unlock translations, 1547 * and release the "exclusive" lock on the page. 1548 */ 1549 if (smp->sm_bitmap & bitmask) { 1550 mutex_enter(smtx); 1551 smp->sm_bitmap &= ~bitmask; 1552 mutex_exit(smtx); 1553 1554 hat_unlock(kas.a_hat, addr, PAGESIZE); 1555 1556 /* 1557 * Use page_find() instead of page_lookup() to 1558 * find the page since we know that it has 1559 * "exclusive" lock. 1560 */ 1561 pp = page_find(vp, off); 1562 if (pp == NULL) { 1563 panic("segmap_pageunlock: page not found"); 1564 /*NOTREACHED*/ 1565 } 1566 if (rw == S_WRITE) { 1567 hat_setrefmod(pp); 1568 } else if (rw != S_OTHER) { 1569 hat_setref(pp); 1570 } 1571 1572 page_unlock(pp); 1573 } 1574 } 1575 } 1576 1577 caddr_t 1578 segmap_getmap(struct seg *seg, struct vnode *vp, u_offset_t off) 1579 { 1580 return (segmap_getmapflt(seg, vp, off, MAXBSIZE, 0, S_OTHER)); 1581 } 1582 1583 /* 1584 * This is the magic virtual address that offset 0 of an ELF 1585 * file gets mapped to in user space. This is used to pick 1586 * the vac color on the freelist. 1587 */ 1588 #define ELF_OFFZERO_VA (0x10000) 1589 /* 1590 * segmap_getmap allocates a MAXBSIZE big slot to map the vnode vp 1591 * in the range <off, off + len). off doesn't need to be MAXBSIZE aligned. 1592 * The return address is always MAXBSIZE aligned. 1593 * 1594 * If forcefault is nonzero and the MMU translations haven't yet been created, 1595 * segmap_getmap will call segmap_fault(..., F_INVAL, rw) to create them. 1596 */ 1597 caddr_t 1598 segmap_getmapflt( 1599 struct seg *seg, 1600 struct vnode *vp, 1601 u_offset_t off, 1602 size_t len, 1603 int forcefault, 1604 enum seg_rw rw) 1605 { 1606 struct smap *smp, *nsmp; 1607 extern struct vnode *common_specvp(); 1608 caddr_t baseaddr; /* MAXBSIZE aligned */ 1609 u_offset_t baseoff; 1610 int newslot; 1611 caddr_t vaddr; 1612 int color, hashid; 1613 kmutex_t *hashmtx, *smapmtx; 1614 struct smfree *sm; 1615 page_t *pp; 1616 struct kpme *kpme; 1617 uint_t prot; 1618 caddr_t base; 1619 page_t *pl[MAXPPB + 1]; 1620 int error; 1621 int is_kpm = 1; 1622 1623 ASSERT(seg->s_as == &kas); 1624 ASSERT(seg == segkmap); 1625 1626 baseoff = off & (offset_t)MAXBMASK; 1627 if (off + len > baseoff + MAXBSIZE) { 1628 panic("segmap_getmap bad len"); 1629 /*NOTREACHED*/ 1630 } 1631 1632 /* 1633 * If this is a block device we have to be sure to use the 1634 * "common" block device vnode for the mapping. 1635 */ 1636 if (vp->v_type == VBLK) 1637 vp = common_specvp(vp); 1638 1639 smd_cpu[CPU->cpu_seqid].scpu.scpu_getmap++; 1640 1641 if (segmap_kpm == 0 || 1642 (forcefault == SM_PAGECREATE && rw != S_WRITE)) { 1643 is_kpm = 0; 1644 } 1645 1646 SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */ 1647 hashmtx = SHASHMTX(hashid); 1648 1649 retry_hash: 1650 mutex_enter(hashmtx); 1651 for (smp = smd_hash[hashid].sh_hash_list; 1652 smp != NULL; smp = smp->sm_hash) 1653 if (smp->sm_vp == vp && smp->sm_off == baseoff) 1654 break; 1655 mutex_exit(hashmtx); 1656 1657 vrfy_smp: 1658 if (smp != NULL) { 1659 1660 ASSERT(vp->v_count != 0); 1661 1662 /* 1663 * Get smap lock and recheck its tag. The hash lock 1664 * is dropped since the hash is based on (vp, off) 1665 * and (vp, off) won't change when we have smap mtx. 1666 */ 1667 smapmtx = SMAPMTX(smp); 1668 mutex_enter(smapmtx); 1669 if (smp->sm_vp != vp || smp->sm_off != baseoff) { 1670 mutex_exit(smapmtx); 1671 goto retry_hash; 1672 } 1673 1674 if (smp->sm_refcnt == 0) { 1675 1676 smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reclaim++; 1677 1678 /* 1679 * Could still be on the free list. However, this 1680 * could also be an smp that is transitioning from 1681 * the free list when we have too much contention 1682 * for the smapmtx's. In this case, we have an 1683 * unlocked smp that is not on the free list any 1684 * longer, but still has a 0 refcnt. The only way 1685 * to be sure is to check the freelist pointers. 1686 * Since we now have the smapmtx, we are guaranteed 1687 * that the (vp, off) won't change, so we are safe 1688 * to reclaim it. get_free_smp() knows that this 1689 * can happen, and it will check the refcnt. 1690 */ 1691 1692 if ((smp->sm_next != NULL)) { 1693 struct sm_freeq *freeq; 1694 1695 ASSERT(smp->sm_prev != NULL); 1696 sm = &smd_free[smp->sm_free_ndx]; 1697 1698 if (smp->sm_flags & SM_QNDX_ZERO) 1699 freeq = &sm->sm_freeq[0]; 1700 else 1701 freeq = &sm->sm_freeq[1]; 1702 1703 mutex_enter(&freeq->smq_mtx); 1704 if (freeq->smq_free != smp) { 1705 /* 1706 * fastpath normal case 1707 */ 1708 smp->sm_prev->sm_next = smp->sm_next; 1709 smp->sm_next->sm_prev = smp->sm_prev; 1710 } else if (smp == smp->sm_next) { 1711 /* 1712 * Taking the last smap on freelist 1713 */ 1714 freeq->smq_free = NULL; 1715 } else { 1716 /* 1717 * Reclaiming 1st smap on list 1718 */ 1719 freeq->smq_free = smp->sm_next; 1720 smp->sm_prev->sm_next = smp->sm_next; 1721 smp->sm_next->sm_prev = smp->sm_prev; 1722 } 1723 mutex_exit(&freeq->smq_mtx); 1724 smp->sm_prev = smp->sm_next = NULL; 1725 } else { 1726 ASSERT(smp->sm_prev == NULL); 1727 segmapcnt.smp_stolen.value.ul++; 1728 } 1729 1730 } else { 1731 segmapcnt.smp_get_use.value.ul++; 1732 } 1733 smp->sm_refcnt++; /* another user */ 1734 1735 /* 1736 * We don't invoke segmap_fault via TLB miss, so we set ref 1737 * and mod bits in advance. For S_OTHER we set them in 1738 * segmap_fault F_SOFTUNLOCK. 1739 */ 1740 if (is_kpm) { 1741 if (rw == S_WRITE) { 1742 smp->sm_flags |= SM_WRITE_DATA; 1743 } else if (rw == S_READ) { 1744 smp->sm_flags |= SM_READ_DATA; 1745 } 1746 } 1747 mutex_exit(smapmtx); 1748 1749 newslot = 0; 1750 } else { 1751 1752 uint32_t free_ndx, *free_ndxp; 1753 union segmap_cpu *scpu; 1754 1755 /* 1756 * On a PAC machine or a machine with anti-alias 1757 * hardware, smd_colormsk will be zero. 1758 * 1759 * On a VAC machine- pick color by offset in the file 1760 * so we won't get VAC conflicts on elf files. 1761 * On data files, color does not matter but we 1762 * don't know what kind of file it is so we always 1763 * pick color by offset. This causes color 1764 * corresponding to file offset zero to be used more 1765 * heavily. 1766 */ 1767 color = (baseoff >> MAXBSHIFT) & smd_colormsk; 1768 scpu = smd_cpu+CPU->cpu_seqid; 1769 free_ndxp = &scpu->scpu.scpu_free_ndx[color]; 1770 free_ndx = (*free_ndxp += smd_ncolor) & smd_freemsk; 1771 #ifdef DEBUG 1772 colors_used[free_ndx]++; 1773 #endif /* DEBUG */ 1774 1775 /* 1776 * Get a locked smp slot from the free list. 1777 */ 1778 smp = get_free_smp(free_ndx); 1779 smapmtx = SMAPMTX(smp); 1780 1781 ASSERT(smp->sm_vp == NULL); 1782 1783 if ((nsmp = segmap_hashin(smp, vp, baseoff, hashid)) != NULL) { 1784 /* 1785 * Failed to hashin, there exists one now. 1786 * Return the smp we just allocated. 1787 */ 1788 segmap_smapadd(smp); 1789 mutex_exit(smapmtx); 1790 1791 smp = nsmp; 1792 goto vrfy_smp; 1793 } 1794 smp->sm_refcnt++; /* another user */ 1795 1796 /* 1797 * We don't invoke segmap_fault via TLB miss, so we set ref 1798 * and mod bits in advance. For S_OTHER we set them in 1799 * segmap_fault F_SOFTUNLOCK. 1800 */ 1801 if (is_kpm) { 1802 if (rw == S_WRITE) { 1803 smp->sm_flags |= SM_WRITE_DATA; 1804 } else if (rw == S_READ) { 1805 smp->sm_flags |= SM_READ_DATA; 1806 } 1807 } 1808 mutex_exit(smapmtx); 1809 1810 newslot = 1; 1811 } 1812 1813 if (!is_kpm) 1814 goto use_segmap_range; 1815 1816 /* 1817 * Use segkpm 1818 */ 1819 /* Lint directive required until 6746211 is fixed */ 1820 /*CONSTCOND*/ 1821 ASSERT(PAGESIZE == MAXBSIZE); 1822 1823 /* 1824 * remember the last smp faulted on this cpu. 1825 */ 1826 (smd_cpu+CPU->cpu_seqid)->scpu.scpu_last_smap = smp; 1827 1828 if (forcefault == SM_PAGECREATE) { 1829 baseaddr = segmap_pagecreate_kpm(seg, vp, baseoff, smp, rw); 1830 return (baseaddr); 1831 } 1832 1833 if (newslot == 0 && 1834 (pp = GET_KPME(smp)->kpe_page) != NULL) { 1835 1836 /* fastpath */ 1837 switch (rw) { 1838 case S_READ: 1839 case S_WRITE: 1840 if (page_trylock(pp, SE_SHARED)) { 1841 if (PP_ISFREE(pp) || 1842 !(pp->p_vnode == vp && 1843 pp->p_offset == baseoff)) { 1844 page_unlock(pp); 1845 pp = page_lookup(vp, baseoff, 1846 SE_SHARED); 1847 } 1848 } else { 1849 pp = page_lookup(vp, baseoff, SE_SHARED); 1850 } 1851 1852 if (pp == NULL) { 1853 ASSERT(GET_KPME(smp)->kpe_page == NULL); 1854 break; 1855 } 1856 1857 if (rw == S_WRITE && 1858 hat_page_getattr(pp, P_MOD | P_REF) != 1859 (P_MOD | P_REF)) { 1860 page_unlock(pp); 1861 break; 1862 } 1863 1864 /* 1865 * We have the p_selock as reader, grab_smp 1866 * can't hit us, we have bumped the smap 1867 * refcnt and hat_pageunload needs the 1868 * p_selock exclusive. 1869 */ 1870 kpme = GET_KPME(smp); 1871 if (kpme->kpe_page == pp) { 1872 baseaddr = hat_kpm_page2va(pp, 0); 1873 } else if (kpme->kpe_page == NULL) { 1874 baseaddr = hat_kpm_mapin(pp, kpme); 1875 } else { 1876 panic("segmap_getmapflt: stale " 1877 "kpme page, kpme %p", (void *)kpme); 1878 /*NOTREACHED*/ 1879 } 1880 1881 /* 1882 * We don't invoke segmap_fault via TLB miss, 1883 * so we set ref and mod bits in advance. 1884 * For S_OTHER and we set them in segmap_fault 1885 * F_SOFTUNLOCK. 1886 */ 1887 if (rw == S_READ && !hat_isref(pp)) 1888 hat_setref(pp); 1889 1890 return (baseaddr); 1891 default: 1892 break; 1893 } 1894 } 1895 1896 base = segkpm_create_va(baseoff); 1897 error = VOP_GETPAGE(vp, (offset_t)baseoff, len, &prot, pl, MAXBSIZE, 1898 seg, base, rw, CRED(), NULL); 1899 1900 pp = pl[0]; 1901 if (error || pp == NULL) { 1902 /* 1903 * Use segmap address slot and let segmap_fault deal 1904 * with the error cases. There is no error return 1905 * possible here. 1906 */ 1907 goto use_segmap_range; 1908 } 1909 1910 ASSERT(pl[1] == NULL); 1911 1912 /* 1913 * When prot is not returned w/ PROT_ALL the returned pages 1914 * are not backed by fs blocks. For most of the segmap users 1915 * this is no problem, they don't write to the pages in the 1916 * same request and therefore don't rely on a following 1917 * trap driven segmap_fault. With SM_LOCKPROTO users it 1918 * is more secure to use segkmap adresses to allow 1919 * protection segmap_fault's. 1920 */ 1921 if (prot != PROT_ALL && forcefault == SM_LOCKPROTO) { 1922 /* 1923 * Use segmap address slot and let segmap_fault 1924 * do the error return. 1925 */ 1926 ASSERT(rw != S_WRITE); 1927 ASSERT(PAGE_LOCKED(pp)); 1928 page_unlock(pp); 1929 forcefault = 0; 1930 goto use_segmap_range; 1931 } 1932 1933 /* 1934 * We have the p_selock as reader, grab_smp can't hit us, we 1935 * have bumped the smap refcnt and hat_pageunload needs the 1936 * p_selock exclusive. 1937 */ 1938 kpme = GET_KPME(smp); 1939 if (kpme->kpe_page == pp) { 1940 baseaddr = hat_kpm_page2va(pp, 0); 1941 } else if (kpme->kpe_page == NULL) { 1942 baseaddr = hat_kpm_mapin(pp, kpme); 1943 } else { 1944 panic("segmap_getmapflt: stale kpme page after " 1945 "VOP_GETPAGE, kpme %p", (void *)kpme); 1946 /*NOTREACHED*/ 1947 } 1948 1949 smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++; 1950 1951 return (baseaddr); 1952 1953 1954 use_segmap_range: 1955 baseaddr = seg->s_base + ((smp - smd_smap) * MAXBSIZE); 1956 TRACE_4(TR_FAC_VM, TR_SEGMAP_GETMAP, 1957 "segmap_getmap:seg %p addr %p vp %p offset %llx", 1958 seg, baseaddr, vp, baseoff); 1959 1960 /* 1961 * Prefault the translations 1962 */ 1963 vaddr = baseaddr + (off - baseoff); 1964 if (forcefault && (newslot || !hat_probe(kas.a_hat, vaddr))) { 1965 1966 caddr_t pgaddr = (caddr_t)((uintptr_t)vaddr & 1967 (uintptr_t)PAGEMASK); 1968 1969 (void) segmap_fault(kas.a_hat, seg, pgaddr, 1970 (vaddr + len - pgaddr + PAGESIZE - 1) & (uintptr_t)PAGEMASK, 1971 F_INVAL, rw); 1972 } 1973 1974 return (baseaddr); 1975 } 1976 1977 int 1978 segmap_release(struct seg *seg, caddr_t addr, uint_t flags) 1979 { 1980 struct smap *smp; 1981 int error; 1982 int bflags = 0; 1983 struct vnode *vp; 1984 u_offset_t offset; 1985 kmutex_t *smtx; 1986 int is_kpm = 0; 1987 page_t *pp; 1988 1989 if (segmap_kpm && IS_KPM_ADDR(addr)) { 1990 1991 if (((uintptr_t)addr & MAXBOFFSET) != 0) { 1992 panic("segmap_release: addr %p not " 1993 "MAXBSIZE aligned", (void *)addr); 1994 /*NOTREACHED*/ 1995 } 1996 1997 if ((smp = get_smap_kpm(addr, &pp)) == NULL) { 1998 panic("segmap_release: smap not found " 1999 "for addr %p", (void *)addr); 2000 /*NOTREACHED*/ 2001 } 2002 2003 TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP, 2004 "segmap_relmap:seg %p addr %p smp %p", 2005 seg, addr, smp); 2006 2007 smtx = SMAPMTX(smp); 2008 2009 /* 2010 * For compatibility reasons segmap_pagecreate_kpm sets this 2011 * flag to allow a following segmap_pagecreate to return 2012 * this as "newpage" flag. When segmap_pagecreate is not 2013 * called at all we clear it now. 2014 */ 2015 smp->sm_flags &= ~SM_KPM_NEWPAGE; 2016 is_kpm = 1; 2017 if (smp->sm_flags & SM_WRITE_DATA) { 2018 hat_setrefmod(pp); 2019 } else if (smp->sm_flags & SM_READ_DATA) { 2020 hat_setref(pp); 2021 } 2022 } else { 2023 if (addr < seg->s_base || addr >= seg->s_base + seg->s_size || 2024 ((uintptr_t)addr & MAXBOFFSET) != 0) { 2025 panic("segmap_release: bad addr %p", (void *)addr); 2026 /*NOTREACHED*/ 2027 } 2028 smp = GET_SMAP(seg, addr); 2029 2030 TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP, 2031 "segmap_relmap:seg %p addr %p smp %p", 2032 seg, addr, smp); 2033 2034 smtx = SMAPMTX(smp); 2035 mutex_enter(smtx); 2036 smp->sm_flags |= SM_NOTKPM_RELEASED; 2037 } 2038 2039 ASSERT(smp->sm_refcnt > 0); 2040 2041 /* 2042 * Need to call VOP_PUTPAGE() if any flags (except SM_DONTNEED) 2043 * are set. 2044 */ 2045 if ((flags & ~SM_DONTNEED) != 0) { 2046 if (flags & SM_WRITE) 2047 segmapcnt.smp_rel_write.value.ul++; 2048 if (flags & SM_ASYNC) { 2049 bflags |= B_ASYNC; 2050 segmapcnt.smp_rel_async.value.ul++; 2051 } 2052 if (flags & SM_INVAL) { 2053 bflags |= B_INVAL; 2054 segmapcnt.smp_rel_abort.value.ul++; 2055 } 2056 if (flags & SM_DESTROY) { 2057 bflags |= (B_INVAL|B_TRUNC); 2058 segmapcnt.smp_rel_abort.value.ul++; 2059 } 2060 if (smp->sm_refcnt == 1) { 2061 /* 2062 * We only bother doing the FREE and DONTNEED flags 2063 * if no one else is still referencing this mapping. 2064 */ 2065 if (flags & SM_FREE) { 2066 bflags |= B_FREE; 2067 segmapcnt.smp_rel_free.value.ul++; 2068 } 2069 if (flags & SM_DONTNEED) { 2070 bflags |= B_DONTNEED; 2071 segmapcnt.smp_rel_dontneed.value.ul++; 2072 } 2073 } 2074 } else { 2075 smd_cpu[CPU->cpu_seqid].scpu.scpu_release++; 2076 } 2077 2078 vp = smp->sm_vp; 2079 offset = smp->sm_off; 2080 2081 if (--smp->sm_refcnt == 0) { 2082 2083 smp->sm_flags &= ~(SM_WRITE_DATA | SM_READ_DATA); 2084 2085 if (flags & (SM_INVAL|SM_DESTROY)) { 2086 segmap_hashout(smp); /* remove map info */ 2087 if (is_kpm) { 2088 hat_kpm_mapout(pp, GET_KPME(smp), addr); 2089 if (smp->sm_flags & SM_NOTKPM_RELEASED) { 2090 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 2091 hat_unload(kas.a_hat, segkmap->s_base + 2092 ((smp - smd_smap) * MAXBSIZE), 2093 MAXBSIZE, HAT_UNLOAD); 2094 } 2095 2096 } else { 2097 if (segmap_kpm) 2098 segkpm_mapout_validkpme(GET_KPME(smp)); 2099 2100 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 2101 hat_unload(kas.a_hat, addr, MAXBSIZE, 2102 HAT_UNLOAD); 2103 } 2104 } 2105 segmap_smapadd(smp); /* add to free list */ 2106 } 2107 2108 mutex_exit(smtx); 2109 2110 if (is_kpm) 2111 page_unlock(pp); 2112 /* 2113 * Now invoke VOP_PUTPAGE() if any flags (except SM_DONTNEED) 2114 * are set. 2115 */ 2116 if ((flags & ~SM_DONTNEED) != 0) { 2117 error = VOP_PUTPAGE(vp, offset, MAXBSIZE, 2118 bflags, CRED(), NULL); 2119 } else { 2120 error = 0; 2121 } 2122 2123 return (error); 2124 } 2125 2126 /* 2127 * Dump the pages belonging to this segmap segment. 2128 */ 2129 static void 2130 segmap_dump(struct seg *seg) 2131 { 2132 struct segmap_data *smd; 2133 struct smap *smp, *smp_end; 2134 page_t *pp; 2135 pfn_t pfn; 2136 u_offset_t off; 2137 caddr_t addr; 2138 2139 smd = (struct segmap_data *)seg->s_data; 2140 addr = seg->s_base; 2141 for (smp = smd->smd_sm, smp_end = smp + smd->smd_npages; 2142 smp < smp_end; smp++) { 2143 2144 if (smp->sm_refcnt) { 2145 for (off = 0; off < MAXBSIZE; off += PAGESIZE) { 2146 int we_own_it = 0; 2147 2148 /* 2149 * If pp == NULL, the page either does 2150 * not exist or is exclusively locked. 2151 * So determine if it exists before 2152 * searching for it. 2153 */ 2154 if ((pp = page_lookup_nowait(smp->sm_vp, 2155 smp->sm_off + off, SE_SHARED))) 2156 we_own_it = 1; 2157 else 2158 pp = page_exists(smp->sm_vp, 2159 smp->sm_off + off); 2160 2161 if (pp) { 2162 pfn = page_pptonum(pp); 2163 dump_addpage(seg->s_as, 2164 addr + off, pfn); 2165 if (we_own_it) 2166 page_unlock(pp); 2167 } 2168 dump_timeleft = dump_timeout; 2169 } 2170 } 2171 addr += MAXBSIZE; 2172 } 2173 } 2174 2175 /*ARGSUSED*/ 2176 static int 2177 segmap_pagelock(struct seg *seg, caddr_t addr, size_t len, 2178 struct page ***ppp, enum lock_type type, enum seg_rw rw) 2179 { 2180 return (ENOTSUP); 2181 } 2182 2183 static int 2184 segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp) 2185 { 2186 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 2187 2188 memidp->val[0] = (uintptr_t)smd->smd_sm->sm_vp; 2189 memidp->val[1] = smd->smd_sm->sm_off + (uintptr_t)(addr - seg->s_base); 2190 return (0); 2191 } 2192 2193 /*ARGSUSED*/ 2194 static lgrp_mem_policy_info_t * 2195 segmap_getpolicy(struct seg *seg, caddr_t addr) 2196 { 2197 return (NULL); 2198 } 2199 2200 /*ARGSUSED*/ 2201 static int 2202 segmap_capable(struct seg *seg, segcapability_t capability) 2203 { 2204 return (0); 2205 } 2206 2207 2208 #ifdef SEGKPM_SUPPORT 2209 2210 /* 2211 * segkpm support routines 2212 */ 2213 2214 static caddr_t 2215 segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off, 2216 struct smap *smp, enum seg_rw rw) 2217 { 2218 caddr_t base; 2219 page_t *pp; 2220 int newpage = 0; 2221 struct kpme *kpme; 2222 2223 ASSERT(smp->sm_refcnt > 0); 2224 2225 if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) { 2226 kmutex_t *smtx; 2227 2228 base = segkpm_create_va(off); 2229 2230 if ((pp = page_create_va(vp, off, PAGESIZE, PG_WAIT, 2231 seg, base)) == NULL) { 2232 panic("segmap_pagecreate_kpm: " 2233 "page_create failed"); 2234 /*NOTREACHED*/ 2235 } 2236 2237 newpage = 1; 2238 page_io_unlock(pp); 2239 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 2240 2241 /* 2242 * Mark this here until the following segmap_pagecreate 2243 * or segmap_release. 2244 */ 2245 smtx = SMAPMTX(smp); 2246 mutex_enter(smtx); 2247 smp->sm_flags |= SM_KPM_NEWPAGE; 2248 mutex_exit(smtx); 2249 } 2250 2251 kpme = GET_KPME(smp); 2252 if (!newpage && kpme->kpe_page == pp) 2253 base = hat_kpm_page2va(pp, 0); 2254 else 2255 base = hat_kpm_mapin(pp, kpme); 2256 2257 /* 2258 * FS code may decide not to call segmap_pagecreate and we 2259 * don't invoke segmap_fault via TLB miss, so we have to set 2260 * ref and mod bits in advance. 2261 */ 2262 if (rw == S_WRITE) { 2263 hat_setrefmod(pp); 2264 } else { 2265 ASSERT(rw == S_READ); 2266 hat_setref(pp); 2267 } 2268 2269 smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++; 2270 2271 return (base); 2272 } 2273 2274 /* 2275 * Find the smap structure corresponding to the 2276 * KPM addr and return it locked. 2277 */ 2278 struct smap * 2279 get_smap_kpm(caddr_t addr, page_t **ppp) 2280 { 2281 struct smap *smp; 2282 struct vnode *vp; 2283 u_offset_t offset; 2284 caddr_t baseaddr = (caddr_t)((uintptr_t)addr & MAXBMASK); 2285 int hashid; 2286 kmutex_t *hashmtx; 2287 page_t *pp; 2288 union segmap_cpu *scpu; 2289 2290 pp = hat_kpm_vaddr2page(baseaddr); 2291 2292 ASSERT(pp && !PP_ISFREE(pp)); 2293 ASSERT(PAGE_LOCKED(pp)); 2294 ASSERT(((uintptr_t)pp->p_offset & MAXBOFFSET) == 0); 2295 2296 vp = pp->p_vnode; 2297 offset = pp->p_offset; 2298 ASSERT(vp != NULL); 2299 2300 /* 2301 * Assume the last smap used on this cpu is the one needed. 2302 */ 2303 scpu = smd_cpu+CPU->cpu_seqid; 2304 smp = scpu->scpu.scpu_last_smap; 2305 mutex_enter(&smp->sm_mtx); 2306 if (smp->sm_vp == vp && smp->sm_off == offset) { 2307 ASSERT(smp->sm_refcnt > 0); 2308 } else { 2309 /* 2310 * Assumption wrong, find the smap on the hash chain. 2311 */ 2312 mutex_exit(&smp->sm_mtx); 2313 SMAP_HASHFUNC(vp, offset, hashid); /* macro assigns hashid */ 2314 hashmtx = SHASHMTX(hashid); 2315 2316 mutex_enter(hashmtx); 2317 smp = smd_hash[hashid].sh_hash_list; 2318 for (; smp != NULL; smp = smp->sm_hash) { 2319 if (smp->sm_vp == vp && smp->sm_off == offset) 2320 break; 2321 } 2322 mutex_exit(hashmtx); 2323 if (smp) { 2324 mutex_enter(&smp->sm_mtx); 2325 ASSERT(smp->sm_vp == vp && smp->sm_off == offset); 2326 } 2327 } 2328 2329 if (ppp) 2330 *ppp = smp ? pp : NULL; 2331 2332 return (smp); 2333 } 2334 2335 #else /* SEGKPM_SUPPORT */ 2336 2337 /* segkpm stubs */ 2338 2339 /*ARGSUSED*/ 2340 static caddr_t 2341 segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off, 2342 struct smap *smp, enum seg_rw rw) 2343 { 2344 return (NULL); 2345 } 2346 2347 /*ARGSUSED*/ 2348 struct smap * 2349 get_smap_kpm(caddr_t addr, page_t **ppp) 2350 { 2351 return (NULL); 2352 } 2353 2354 #endif /* SEGKPM_SUPPORT */