1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * Portions of this source code were derived from Berkeley 4.3 BSD 31 * under license from the Regents of the University of California. 32 */ 33 34 /* 35 * VM - generic vnode mapping segment. 36 * 37 * The segmap driver is used only by the kernel to get faster (than seg_vn) 38 * mappings [lower routine overhead; more persistent cache] to random 39 * vnode/offsets. Note than the kernel may (and does) use seg_vn as well. 40 */ 41 42 #include <sys/types.h> 43 #include <sys/t_lock.h> 44 #include <sys/param.h> 45 #include <sys/sysmacros.h> 46 #include <sys/buf.h> 47 #include <sys/systm.h> 48 #include <sys/vnode.h> 49 #include <sys/mman.h> 50 #include <sys/errno.h> 51 #include <sys/cred.h> 52 #include <sys/kmem.h> 53 #include <sys/vtrace.h> 54 #include <sys/cmn_err.h> 55 #include <sys/debug.h> 56 #include <sys/thread.h> 57 #include <sys/dumphdr.h> 58 #include <sys/bitmap.h> 59 #include <sys/lgrp.h> 60 61 #include <vm/seg_kmem.h> 62 #include <vm/hat.h> 63 #include <vm/as.h> 64 #include <vm/seg.h> 65 #include <vm/seg_kpm.h> 66 #include <vm/seg_map.h> 67 #include <vm/page.h> 68 #include <vm/pvn.h> 69 #include <vm/rm.h> 70 71 /* 72 * Private seg op routines. 73 */ 74 static void segmap_free(struct seg *seg); 75 faultcode_t segmap_fault(struct hat *hat, struct seg *seg, caddr_t addr, 76 size_t len, enum fault_type type, enum seg_rw rw); 77 static faultcode_t segmap_faulta(struct seg *seg, caddr_t addr); 78 static int segmap_checkprot(struct seg *seg, caddr_t addr, size_t len, 79 uint_t prot); 80 static int segmap_kluster(struct seg *seg, caddr_t addr, ssize_t); 81 static int segmap_getprot(struct seg *seg, caddr_t addr, size_t len, 82 uint_t *protv); 83 static u_offset_t segmap_getoffset(struct seg *seg, caddr_t addr); 84 static int segmap_gettype(struct seg *seg, caddr_t addr); 85 static int segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp); 86 static void segmap_dump(struct seg *seg); 87 static int segmap_pagelock(struct seg *seg, caddr_t addr, size_t len, 88 struct page ***ppp, enum lock_type type, 89 enum seg_rw rw); 90 static void segmap_badop(void); 91 static int segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp); 92 93 /* segkpm support */ 94 static caddr_t segmap_pagecreate_kpm(struct seg *, vnode_t *, u_offset_t, 95 struct smap *, enum seg_rw); 96 struct smap *get_smap_kpm(caddr_t, page_t **); 97 98 #define SEGMAP_BADOP(t) (t(*)())segmap_badop 99 100 static struct seg_ops segmap_ops = { 101 .dup = SEGMAP_BADOP(int), 102 .unmap = SEGMAP_BADOP(int), 103 .free = segmap_free, 104 .fault = segmap_fault, 105 .faulta = segmap_faulta, 106 .setprot = SEGMAP_BADOP(int), 107 .checkprot = segmap_checkprot, 108 .kluster = segmap_kluster, 109 .swapout = SEGMAP_BADOP(size_t), 110 .sync = SEGMAP_BADOP(int), 111 .incore = SEGMAP_BADOP(size_t), 112 .lockop = SEGMAP_BADOP(int), 113 .getprot = segmap_getprot, 114 .getoffset = segmap_getoffset, 115 .gettype = segmap_gettype, 116 .getvp = segmap_getvp, 117 .advise = SEGMAP_BADOP(int), 118 .dump = segmap_dump, 119 .pagelock = segmap_pagelock, 120 .setpagesize = SEGMAP_BADOP(int), 121 .getmemid = segmap_getmemid, 122 }; 123 124 /* 125 * Private segmap routines. 126 */ 127 static void segmap_unlock(struct hat *hat, struct seg *seg, caddr_t addr, 128 size_t len, enum seg_rw rw, struct smap *smp); 129 static void segmap_smapadd(struct smap *smp); 130 static struct smap *segmap_hashin(struct smap *smp, struct vnode *vp, 131 u_offset_t off, int hashid); 132 static void segmap_hashout(struct smap *smp); 133 134 135 /* 136 * Statistics for segmap operations. 137 * 138 * No explicit locking to protect these stats. 139 */ 140 struct segmapcnt segmapcnt = { 141 { "fault", KSTAT_DATA_ULONG }, 142 { "faulta", KSTAT_DATA_ULONG }, 143 { "getmap", KSTAT_DATA_ULONG }, 144 { "get_use", KSTAT_DATA_ULONG }, 145 { "get_reclaim", KSTAT_DATA_ULONG }, 146 { "get_reuse", KSTAT_DATA_ULONG }, 147 { "get_unused", KSTAT_DATA_ULONG }, 148 { "get_nofree", KSTAT_DATA_ULONG }, 149 { "rel_async", KSTAT_DATA_ULONG }, 150 { "rel_write", KSTAT_DATA_ULONG }, 151 { "rel_free", KSTAT_DATA_ULONG }, 152 { "rel_abort", KSTAT_DATA_ULONG }, 153 { "rel_dontneed", KSTAT_DATA_ULONG }, 154 { "release", KSTAT_DATA_ULONG }, 155 { "pagecreate", KSTAT_DATA_ULONG }, 156 { "free_notfree", KSTAT_DATA_ULONG }, 157 { "free_dirty", KSTAT_DATA_ULONG }, 158 { "free", KSTAT_DATA_ULONG }, 159 { "stolen", KSTAT_DATA_ULONG }, 160 { "get_nomtx", KSTAT_DATA_ULONG } 161 }; 162 163 kstat_named_t *segmapcnt_ptr = (kstat_named_t *)&segmapcnt; 164 uint_t segmapcnt_ndata = sizeof (segmapcnt) / sizeof (kstat_named_t); 165 166 /* 167 * Return number of map pages in segment. 168 */ 169 #define MAP_PAGES(seg) ((seg)->s_size >> MAXBSHIFT) 170 171 /* 172 * Translate addr into smap number within segment. 173 */ 174 #define MAP_PAGE(seg, addr) (((addr) - (seg)->s_base) >> MAXBSHIFT) 175 176 /* 177 * Translate addr in seg into struct smap pointer. 178 */ 179 #define GET_SMAP(seg, addr) \ 180 &(((struct segmap_data *)((seg)->s_data))->smd_sm[MAP_PAGE(seg, addr)]) 181 182 /* 183 * Bit in map (16 bit bitmap). 184 */ 185 #define SMAP_BIT_MASK(bitindex) (1 << ((bitindex) & 0xf)) 186 187 static int smd_colormsk = 0; 188 static int smd_ncolor = 0; 189 static int smd_nfree = 0; 190 static int smd_freemsk = 0; 191 #ifdef DEBUG 192 static int *colors_used; 193 #endif 194 static struct smap *smd_smap; 195 static struct smaphash *smd_hash; 196 #ifdef SEGMAP_HASHSTATS 197 static unsigned int *smd_hash_len; 198 #endif 199 static struct smfree *smd_free; 200 static ulong_t smd_hashmsk = 0; 201 202 #define SEGMAP_MAXCOLOR 2 203 #define SEGMAP_CACHE_PAD 64 204 205 union segmap_cpu { 206 struct { 207 uint32_t scpu_free_ndx[SEGMAP_MAXCOLOR]; 208 struct smap *scpu_last_smap; 209 ulong_t scpu_getmap; 210 ulong_t scpu_release; 211 ulong_t scpu_get_reclaim; 212 ulong_t scpu_fault; 213 ulong_t scpu_pagecreate; 214 ulong_t scpu_get_reuse; 215 } scpu; 216 char scpu_pad[SEGMAP_CACHE_PAD]; 217 }; 218 static union segmap_cpu *smd_cpu; 219 220 /* 221 * There are three locks in seg_map: 222 * - per freelist mutexes 223 * - per hashchain mutexes 224 * - per smap mutexes 225 * 226 * The lock ordering is to get the smap mutex to lock down the slot 227 * first then the hash lock (for hash in/out (vp, off) list) or the 228 * freelist lock to put the slot back on the free list. 229 * 230 * The hash search is done by only holding the hashchain lock, when a wanted 231 * slot is found, we drop the hashchain lock then lock the slot so there 232 * is no overlapping of hashchain and smap locks. After the slot is 233 * locked, we verify again if the slot is still what we are looking 234 * for. 235 * 236 * Allocation of a free slot is done by holding the freelist lock, 237 * then locking the smap slot at the head of the freelist. This is 238 * in reversed lock order so mutex_tryenter() is used. 239 * 240 * The smap lock protects all fields in smap structure except for 241 * the link fields for hash/free lists which are protected by 242 * hashchain and freelist locks. 243 */ 244 245 #define SHASHMTX(hashid) (&smd_hash[hashid].sh_mtx) 246 247 #define SMP2SMF(smp) (&smd_free[(smp - smd_smap) & smd_freemsk]) 248 #define SMP2SMF_NDX(smp) (ushort_t)((smp - smd_smap) & smd_freemsk) 249 250 #define SMAPMTX(smp) (&smp->sm_mtx) 251 252 #define SMAP_HASHFUNC(vp, off, hashid) \ 253 { \ 254 hashid = ((((uintptr_t)(vp) >> 6) + ((uintptr_t)(vp) >> 3) + \ 255 ((off) >> MAXBSHIFT)) & smd_hashmsk); \ 256 } 257 258 /* 259 * The most frequently updated kstat counters are kept in the 260 * per cpu array to avoid hot cache blocks. The update function 261 * sums the cpu local counters to update the global counters. 262 */ 263 264 /* ARGSUSED */ 265 int 266 segmap_kstat_update(kstat_t *ksp, int rw) 267 { 268 int i; 269 ulong_t getmap, release, get_reclaim; 270 ulong_t fault, pagecreate, get_reuse; 271 272 if (rw == KSTAT_WRITE) 273 return (EACCES); 274 getmap = release = get_reclaim = (ulong_t)0; 275 fault = pagecreate = get_reuse = (ulong_t)0; 276 for (i = 0; i < max_ncpus; i++) { 277 getmap += smd_cpu[i].scpu.scpu_getmap; 278 release += smd_cpu[i].scpu.scpu_release; 279 get_reclaim += smd_cpu[i].scpu.scpu_get_reclaim; 280 fault += smd_cpu[i].scpu.scpu_fault; 281 pagecreate += smd_cpu[i].scpu.scpu_pagecreate; 282 get_reuse += smd_cpu[i].scpu.scpu_get_reuse; 283 } 284 segmapcnt.smp_getmap.value.ul = getmap; 285 segmapcnt.smp_release.value.ul = release; 286 segmapcnt.smp_get_reclaim.value.ul = get_reclaim; 287 segmapcnt.smp_fault.value.ul = fault; 288 segmapcnt.smp_pagecreate.value.ul = pagecreate; 289 segmapcnt.smp_get_reuse.value.ul = get_reuse; 290 return (0); 291 } 292 293 int 294 segmap_create(struct seg *seg, void *argsp) 295 { 296 struct segmap_data *smd; 297 struct smap *smp; 298 struct smfree *sm; 299 struct segmap_crargs *a = (struct segmap_crargs *)argsp; 300 struct smaphash *shashp; 301 union segmap_cpu *scpu; 302 long i, npages; 303 size_t hashsz; 304 uint_t nfreelist; 305 extern void prefetch_smap_w(void *); 306 extern int max_ncpus; 307 308 ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock)); 309 310 if (((uintptr_t)seg->s_base | seg->s_size) & MAXBOFFSET) { 311 panic("segkmap not MAXBSIZE aligned"); 312 /*NOTREACHED*/ 313 } 314 315 smd = kmem_zalloc(sizeof (struct segmap_data), KM_SLEEP); 316 317 seg->s_data = (void *)smd; 318 seg->s_ops = &segmap_ops; 319 smd->smd_prot = a->prot; 320 321 /* 322 * Scale the number of smap freelists to be 323 * proportional to max_ncpus * number of virtual colors. 324 * The caller can over-ride this scaling by providing 325 * a non-zero a->nfreelist argument. 326 */ 327 nfreelist = a->nfreelist; 328 if (nfreelist == 0) 329 nfreelist = max_ncpus; 330 else if (nfreelist < 0 || nfreelist > 4 * max_ncpus) { 331 cmn_err(CE_WARN, "segmap_create: nfreelist out of range " 332 "%d, using %d", nfreelist, max_ncpus); 333 nfreelist = max_ncpus; 334 } 335 if (!ISP2(nfreelist)) { 336 /* round up nfreelist to the next power of two. */ 337 nfreelist = 1 << (highbit(nfreelist)); 338 } 339 340 /* 341 * Get the number of virtual colors - must be a power of 2. 342 */ 343 if (a->shmsize) 344 smd_ncolor = a->shmsize >> MAXBSHIFT; 345 else 346 smd_ncolor = 1; 347 ASSERT((smd_ncolor & (smd_ncolor - 1)) == 0); 348 ASSERT(smd_ncolor <= SEGMAP_MAXCOLOR); 349 smd_colormsk = smd_ncolor - 1; 350 smd->smd_nfree = smd_nfree = smd_ncolor * nfreelist; 351 smd_freemsk = smd_nfree - 1; 352 353 /* 354 * Allocate and initialize the freelist headers. 355 * Note that sm_freeq[1] starts out as the release queue. This 356 * is known when the smap structures are initialized below. 357 */ 358 smd_free = smd->smd_free = 359 kmem_zalloc(smd_nfree * sizeof (struct smfree), KM_SLEEP); 360 for (i = 0; i < smd_nfree; i++) { 361 sm = &smd->smd_free[i]; 362 mutex_init(&sm->sm_freeq[0].smq_mtx, NULL, MUTEX_DEFAULT, NULL); 363 mutex_init(&sm->sm_freeq[1].smq_mtx, NULL, MUTEX_DEFAULT, NULL); 364 sm->sm_allocq = &sm->sm_freeq[0]; 365 sm->sm_releq = &sm->sm_freeq[1]; 366 } 367 368 /* 369 * Allocate and initialize the smap hash chain headers. 370 * Compute hash size rounding down to the next power of two. 371 */ 372 npages = MAP_PAGES(seg); 373 smd->smd_npages = npages; 374 hashsz = npages / SMAP_HASHAVELEN; 375 hashsz = 1 << (highbit(hashsz)-1); 376 smd_hashmsk = hashsz - 1; 377 smd_hash = smd->smd_hash = 378 kmem_alloc(hashsz * sizeof (struct smaphash), KM_SLEEP); 379 #ifdef SEGMAP_HASHSTATS 380 smd_hash_len = 381 kmem_zalloc(hashsz * sizeof (unsigned int), KM_SLEEP); 382 #endif 383 for (i = 0, shashp = smd_hash; i < hashsz; i++, shashp++) { 384 shashp->sh_hash_list = NULL; 385 mutex_init(&shashp->sh_mtx, NULL, MUTEX_DEFAULT, NULL); 386 } 387 388 /* 389 * Allocate and initialize the smap structures. 390 * Link all slots onto the appropriate freelist. 391 * The smap array is large enough to affect boot time 392 * on large systems, so use memory prefetching and only 393 * go through the array 1 time. Inline a optimized version 394 * of segmap_smapadd to add structures to freelists with 395 * knowledge that no locks are needed here. 396 */ 397 smd_smap = smd->smd_sm = 398 kmem_alloc(sizeof (struct smap) * npages, KM_SLEEP); 399 400 for (smp = &smd->smd_sm[MAP_PAGES(seg) - 1]; 401 smp >= smd->smd_sm; smp--) { 402 struct smap *smpfreelist; 403 struct sm_freeq *releq; 404 405 prefetch_smap_w((char *)smp); 406 407 smp->sm_vp = NULL; 408 smp->sm_hash = NULL; 409 smp->sm_off = 0; 410 smp->sm_bitmap = 0; 411 smp->sm_refcnt = 0; 412 mutex_init(&smp->sm_mtx, NULL, MUTEX_DEFAULT, NULL); 413 smp->sm_free_ndx = SMP2SMF_NDX(smp); 414 415 sm = SMP2SMF(smp); 416 releq = sm->sm_releq; 417 418 smpfreelist = releq->smq_free; 419 if (smpfreelist == 0) { 420 releq->smq_free = smp->sm_next = smp->sm_prev = smp; 421 } else { 422 smp->sm_next = smpfreelist; 423 smp->sm_prev = smpfreelist->sm_prev; 424 smpfreelist->sm_prev = smp; 425 smp->sm_prev->sm_next = smp; 426 releq->smq_free = smp->sm_next; 427 } 428 429 /* 430 * sm_flag = 0 (no SM_QNDX_ZERO) implies smap on sm_freeq[1] 431 */ 432 smp->sm_flags = 0; 433 434 #ifdef SEGKPM_SUPPORT 435 /* 436 * Due to the fragile prefetch loop no 437 * separate function is used here. 438 */ 439 smp->sm_kpme_next = NULL; 440 smp->sm_kpme_prev = NULL; 441 smp->sm_kpme_page = NULL; 442 #endif 443 } 444 445 /* 446 * Allocate the per color indices that distribute allocation 447 * requests over the free lists. Each cpu will have a private 448 * rotor index to spread the allocations even across the available 449 * smap freelists. Init the scpu_last_smap field to the first 450 * smap element so there is no need to check for NULL. 451 */ 452 smd_cpu = 453 kmem_zalloc(sizeof (union segmap_cpu) * max_ncpus, KM_SLEEP); 454 for (i = 0, scpu = smd_cpu; i < max_ncpus; i++, scpu++) { 455 int j; 456 for (j = 0; j < smd_ncolor; j++) 457 scpu->scpu.scpu_free_ndx[j] = j; 458 scpu->scpu.scpu_last_smap = smd_smap; 459 } 460 461 vpm_init(); 462 463 #ifdef DEBUG 464 /* 465 * Keep track of which colors are used more often. 466 */ 467 colors_used = kmem_zalloc(smd_nfree * sizeof (int), KM_SLEEP); 468 #endif /* DEBUG */ 469 470 return (0); 471 } 472 473 static void 474 segmap_free(seg) 475 struct seg *seg; 476 { 477 ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock)); 478 } 479 480 /* 481 * Do a F_SOFTUNLOCK call over the range requested. 482 * The range must have already been F_SOFTLOCK'ed. 483 */ 484 static void 485 segmap_unlock( 486 struct hat *hat, 487 struct seg *seg, 488 caddr_t addr, 489 size_t len, 490 enum seg_rw rw, 491 struct smap *smp) 492 { 493 page_t *pp; 494 caddr_t adr; 495 u_offset_t off; 496 struct vnode *vp; 497 kmutex_t *smtx; 498 499 ASSERT(smp->sm_refcnt > 0); 500 501 #ifdef lint 502 seg = seg; 503 #endif 504 505 if (segmap_kpm && IS_KPM_ADDR(addr)) { 506 507 /* 508 * We're called only from segmap_fault and this was a 509 * NOP in case of a kpm based smap, so dangerous things 510 * must have happened in the meantime. Pages are prefaulted 511 * and locked in segmap_getmapflt and they will not be 512 * unlocked until segmap_release. 513 */ 514 panic("segmap_unlock: called with kpm addr %p", (void *)addr); 515 /*NOTREACHED*/ 516 } 517 518 vp = smp->sm_vp; 519 off = smp->sm_off + (u_offset_t)((uintptr_t)addr & MAXBOFFSET); 520 521 hat_unlock(hat, addr, P2ROUNDUP(len, PAGESIZE)); 522 for (adr = addr; adr < addr + len; adr += PAGESIZE, off += PAGESIZE) { 523 ushort_t bitmask; 524 525 /* 526 * Use page_find() instead of page_lookup() to 527 * find the page since we know that it has 528 * "shared" lock. 529 */ 530 pp = page_find(vp, off); 531 if (pp == NULL) { 532 panic("segmap_unlock: page not found"); 533 /*NOTREACHED*/ 534 } 535 536 if (rw == S_WRITE) { 537 hat_setrefmod(pp); 538 } else if (rw != S_OTHER) { 539 TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT, 540 "segmap_fault:pp %p vp %p offset %llx", pp, vp, off); 541 hat_setref(pp); 542 } 543 544 /* 545 * Clear bitmap, if the bit corresponding to "off" is set, 546 * since the page and translation are being unlocked. 547 */ 548 bitmask = SMAP_BIT_MASK((off - smp->sm_off) >> PAGESHIFT); 549 550 /* 551 * Large Files: Following assertion is to verify 552 * the correctness of the cast to (int) above. 553 */ 554 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 555 smtx = SMAPMTX(smp); 556 mutex_enter(smtx); 557 if (smp->sm_bitmap & bitmask) { 558 smp->sm_bitmap &= ~bitmask; 559 } 560 mutex_exit(smtx); 561 562 page_unlock(pp); 563 } 564 } 565 566 #define MAXPPB (MAXBSIZE/4096) /* assumes minimum page size of 4k */ 567 568 /* 569 * This routine is called via a machine specific fault handling 570 * routine. It is also called by software routines wishing to 571 * lock or unlock a range of addresses. 572 * 573 * Note that this routine expects a page-aligned "addr". 574 */ 575 faultcode_t 576 segmap_fault( 577 struct hat *hat, 578 struct seg *seg, 579 caddr_t addr, 580 size_t len, 581 enum fault_type type, 582 enum seg_rw rw) 583 { 584 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 585 struct smap *smp; 586 page_t *pp, **ppp; 587 struct vnode *vp; 588 u_offset_t off; 589 page_t *pl[MAXPPB + 1]; 590 uint_t prot; 591 u_offset_t addroff; 592 caddr_t adr; 593 int err; 594 u_offset_t sm_off; 595 int hat_flag; 596 597 if (segmap_kpm && IS_KPM_ADDR(addr)) { 598 int newpage; 599 kmutex_t *smtx; 600 601 /* 602 * Pages are successfully prefaulted and locked in 603 * segmap_getmapflt and can't be unlocked until 604 * segmap_release. No hat mappings have to be locked 605 * and they also can't be unlocked as long as the 606 * caller owns an active kpm addr. 607 */ 608 #ifndef DEBUG 609 if (type != F_SOFTUNLOCK) 610 return (0); 611 #endif 612 613 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 614 panic("segmap_fault: smap not found " 615 "for addr %p", (void *)addr); 616 /*NOTREACHED*/ 617 } 618 619 smtx = SMAPMTX(smp); 620 #ifdef DEBUG 621 newpage = smp->sm_flags & SM_KPM_NEWPAGE; 622 if (newpage) { 623 cmn_err(CE_WARN, "segmap_fault: newpage? smp %p", 624 (void *)smp); 625 } 626 627 if (type != F_SOFTUNLOCK) { 628 mutex_exit(smtx); 629 return (0); 630 } 631 #endif 632 mutex_exit(smtx); 633 vp = smp->sm_vp; 634 sm_off = smp->sm_off; 635 636 if (vp == NULL) 637 return (FC_MAKE_ERR(EIO)); 638 639 ASSERT(smp->sm_refcnt > 0); 640 641 addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET); 642 if (addroff + len > MAXBSIZE) 643 panic("segmap_fault: endaddr %p exceeds MAXBSIZE chunk", 644 (void *)(addr + len)); 645 646 off = sm_off + addroff; 647 648 pp = page_find(vp, off); 649 650 if (pp == NULL) 651 panic("segmap_fault: softunlock page not found"); 652 653 /* 654 * Set ref bit also here in case of S_OTHER to avoid the 655 * overhead of supporting other cases than F_SOFTUNLOCK 656 * with segkpm. We can do this because the underlying 657 * pages are locked anyway. 658 */ 659 if (rw == S_WRITE) { 660 hat_setrefmod(pp); 661 } else { 662 TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT, 663 "segmap_fault:pp %p vp %p offset %llx", 664 pp, vp, off); 665 hat_setref(pp); 666 } 667 668 return (0); 669 } 670 671 smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++; 672 smp = GET_SMAP(seg, addr); 673 vp = smp->sm_vp; 674 sm_off = smp->sm_off; 675 676 if (vp == NULL) 677 return (FC_MAKE_ERR(EIO)); 678 679 ASSERT(smp->sm_refcnt > 0); 680 681 addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET); 682 if (addroff + len > MAXBSIZE) { 683 panic("segmap_fault: endaddr %p " 684 "exceeds MAXBSIZE chunk", (void *)(addr + len)); 685 /*NOTREACHED*/ 686 } 687 off = sm_off + addroff; 688 689 /* 690 * First handle the easy stuff 691 */ 692 if (type == F_SOFTUNLOCK) { 693 segmap_unlock(hat, seg, addr, len, rw, smp); 694 return (0); 695 } 696 697 TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE, 698 "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp); 699 err = VOP_GETPAGE(vp, (offset_t)off, len, &prot, pl, MAXBSIZE, 700 seg, addr, rw, CRED(), NULL); 701 702 if (err) 703 return (FC_MAKE_ERR(err)); 704 705 prot &= smd->smd_prot; 706 707 /* 708 * Handle all pages returned in the pl[] array. 709 * This loop is coded on the assumption that if 710 * there was no error from the VOP_GETPAGE routine, 711 * that the page list returned will contain all the 712 * needed pages for the vp from [off..off + len]. 713 */ 714 ppp = pl; 715 while ((pp = *ppp++) != NULL) { 716 u_offset_t poff; 717 ASSERT(pp->p_vnode == vp); 718 hat_flag = HAT_LOAD; 719 720 /* 721 * Verify that the pages returned are within the range 722 * of this segmap region. Note that it is theoretically 723 * possible for pages outside this range to be returned, 724 * but it is not very likely. If we cannot use the 725 * page here, just release it and go on to the next one. 726 */ 727 if (pp->p_offset < sm_off || 728 pp->p_offset >= sm_off + MAXBSIZE) { 729 (void) page_release(pp, 1); 730 continue; 731 } 732 733 ASSERT(hat == kas.a_hat); 734 poff = pp->p_offset; 735 adr = addr + (poff - off); 736 if (adr >= addr && adr < addr + len) { 737 hat_setref(pp); 738 TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT, 739 "segmap_fault:pp %p vp %p offset %llx", 740 pp, vp, poff); 741 if (type == F_SOFTLOCK) 742 hat_flag = HAT_LOAD_LOCK; 743 } 744 745 /* 746 * Deal with VMODSORT pages here. If we know this is a write 747 * do the setmod now and allow write protection. 748 * As long as it's modified or not S_OTHER, remove write 749 * protection. With S_OTHER it's up to the FS to deal with this. 750 */ 751 if (IS_VMODSORT(vp)) { 752 if (rw == S_WRITE) 753 hat_setmod(pp); 754 else if (rw != S_OTHER && !hat_ismod(pp)) 755 prot &= ~PROT_WRITE; 756 } 757 758 hat_memload(hat, adr, pp, prot, hat_flag); 759 if (hat_flag != HAT_LOAD_LOCK) 760 page_unlock(pp); 761 } 762 return (0); 763 } 764 765 /* 766 * This routine is used to start I/O on pages asynchronously. 767 */ 768 static faultcode_t 769 segmap_faulta(struct seg *seg, caddr_t addr) 770 { 771 struct smap *smp; 772 struct vnode *vp; 773 u_offset_t off; 774 int err; 775 776 if (segmap_kpm && IS_KPM_ADDR(addr)) { 777 int newpage; 778 kmutex_t *smtx; 779 780 /* 781 * Pages are successfully prefaulted and locked in 782 * segmap_getmapflt and can't be unlocked until 783 * segmap_release. No hat mappings have to be locked 784 * and they also can't be unlocked as long as the 785 * caller owns an active kpm addr. 786 */ 787 #ifdef DEBUG 788 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 789 panic("segmap_faulta: smap not found " 790 "for addr %p", (void *)addr); 791 /*NOTREACHED*/ 792 } 793 794 smtx = SMAPMTX(smp); 795 newpage = smp->sm_flags & SM_KPM_NEWPAGE; 796 mutex_exit(smtx); 797 if (newpage) 798 cmn_err(CE_WARN, "segmap_faulta: newpage? smp %p", 799 (void *)smp); 800 #endif 801 return (0); 802 } 803 804 segmapcnt.smp_faulta.value.ul++; 805 smp = GET_SMAP(seg, addr); 806 807 ASSERT(smp->sm_refcnt > 0); 808 809 vp = smp->sm_vp; 810 off = smp->sm_off; 811 812 if (vp == NULL) { 813 cmn_err(CE_WARN, "segmap_faulta - no vp"); 814 return (FC_MAKE_ERR(EIO)); 815 } 816 817 TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE, 818 "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp); 819 820 err = VOP_GETPAGE(vp, (offset_t)(off + ((offset_t)((uintptr_t)addr 821 & MAXBOFFSET))), PAGESIZE, (uint_t *)NULL, (page_t **)NULL, 0, 822 seg, addr, S_READ, CRED(), NULL); 823 824 if (err) 825 return (FC_MAKE_ERR(err)); 826 return (0); 827 } 828 829 /*ARGSUSED*/ 830 static int 831 segmap_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) 832 { 833 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 834 835 ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock)); 836 837 /* 838 * Need not acquire the segment lock since 839 * "smd_prot" is a read-only field. 840 */ 841 return (((smd->smd_prot & prot) != prot) ? EACCES : 0); 842 } 843 844 static int 845 segmap_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv) 846 { 847 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 848 size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1; 849 850 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 851 852 if (pgno != 0) { 853 do { 854 protv[--pgno] = smd->smd_prot; 855 } while (pgno != 0); 856 } 857 return (0); 858 } 859 860 static u_offset_t 861 segmap_getoffset(struct seg *seg, caddr_t addr) 862 { 863 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 864 865 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock)); 866 867 return ((u_offset_t)smd->smd_sm->sm_off + (addr - seg->s_base)); 868 } 869 870 /*ARGSUSED*/ 871 static int 872 segmap_gettype(struct seg *seg, caddr_t addr) 873 { 874 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock)); 875 876 return (MAP_SHARED); 877 } 878 879 /*ARGSUSED*/ 880 static int 881 segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp) 882 { 883 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 884 885 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock)); 886 887 /* XXX - This doesn't make any sense */ 888 *vpp = smd->smd_sm->sm_vp; 889 return (0); 890 } 891 892 /* 893 * Check to see if it makes sense to do kluster/read ahead to 894 * addr + delta relative to the mapping at addr. We assume here 895 * that delta is a signed PAGESIZE'd multiple (which can be negative). 896 * 897 * For segmap we always "approve" of this action from our standpoint. 898 */ 899 /*ARGSUSED*/ 900 static int 901 segmap_kluster(struct seg *seg, caddr_t addr, ssize_t delta) 902 { 903 return (0); 904 } 905 906 static void 907 segmap_badop() 908 { 909 panic("segmap_badop"); 910 /*NOTREACHED*/ 911 } 912 913 /* 914 * Special private segmap operations 915 */ 916 917 /* 918 * Add smap to the appropriate free list. 919 */ 920 static void 921 segmap_smapadd(struct smap *smp) 922 { 923 struct smfree *sm; 924 struct smap *smpfreelist; 925 struct sm_freeq *releq; 926 927 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 928 929 if (smp->sm_refcnt != 0) { 930 panic("segmap_smapadd"); 931 /*NOTREACHED*/ 932 } 933 934 sm = &smd_free[smp->sm_free_ndx]; 935 /* 936 * Add to the tail of the release queue 937 * Note that sm_releq and sm_allocq could toggle 938 * before we get the lock. This does not affect 939 * correctness as the 2 queues are only maintained 940 * to reduce lock pressure. 941 */ 942 releq = sm->sm_releq; 943 if (releq == &sm->sm_freeq[0]) 944 smp->sm_flags |= SM_QNDX_ZERO; 945 else 946 smp->sm_flags &= ~SM_QNDX_ZERO; 947 mutex_enter(&releq->smq_mtx); 948 smpfreelist = releq->smq_free; 949 if (smpfreelist == 0) { 950 int want; 951 952 releq->smq_free = smp->sm_next = smp->sm_prev = smp; 953 /* 954 * Both queue mutexes held to set sm_want; 955 * snapshot the value before dropping releq mutex. 956 * If sm_want appears after the releq mutex is dropped, 957 * then the smap just freed is already gone. 958 */ 959 want = sm->sm_want; 960 mutex_exit(&releq->smq_mtx); 961 /* 962 * See if there was a waiter before dropping the releq mutex 963 * then recheck after obtaining sm_freeq[0] mutex as 964 * the another thread may have already signaled. 965 */ 966 if (want) { 967 mutex_enter(&sm->sm_freeq[0].smq_mtx); 968 if (sm->sm_want) 969 cv_signal(&sm->sm_free_cv); 970 mutex_exit(&sm->sm_freeq[0].smq_mtx); 971 } 972 } else { 973 smp->sm_next = smpfreelist; 974 smp->sm_prev = smpfreelist->sm_prev; 975 smpfreelist->sm_prev = smp; 976 smp->sm_prev->sm_next = smp; 977 mutex_exit(&releq->smq_mtx); 978 } 979 } 980 981 982 static struct smap * 983 segmap_hashin(struct smap *smp, struct vnode *vp, u_offset_t off, int hashid) 984 { 985 struct smap **hpp; 986 struct smap *tmp; 987 kmutex_t *hmtx; 988 989 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 990 ASSERT(smp->sm_vp == NULL); 991 ASSERT(smp->sm_hash == NULL); 992 ASSERT(smp->sm_prev == NULL); 993 ASSERT(smp->sm_next == NULL); 994 ASSERT(hashid >= 0 && hashid <= smd_hashmsk); 995 996 hmtx = SHASHMTX(hashid); 997 998 mutex_enter(hmtx); 999 /* 1000 * First we need to verify that no one has created a smp 1001 * with (vp,off) as its tag before we us. 1002 */ 1003 for (tmp = smd_hash[hashid].sh_hash_list; 1004 tmp != NULL; tmp = tmp->sm_hash) 1005 if (tmp->sm_vp == vp && tmp->sm_off == off) 1006 break; 1007 1008 if (tmp == NULL) { 1009 /* 1010 * No one created one yet. 1011 * 1012 * Funniness here - we don't increment the ref count on the 1013 * vnode * even though we have another pointer to it here. 1014 * The reason for this is that we don't want the fact that 1015 * a seg_map entry somewhere refers to a vnode to prevent the 1016 * vnode * itself from going away. This is because this 1017 * reference to the vnode is a "soft one". In the case where 1018 * a mapping is being used by a rdwr [or directory routine?] 1019 * there already has to be a non-zero ref count on the vnode. 1020 * In the case where the vp has been freed and the the smap 1021 * structure is on the free list, there are no pages in memory 1022 * that can refer to the vnode. Thus even if we reuse the same 1023 * vnode/smap structure for a vnode which has the same 1024 * address but represents a different object, we are ok. 1025 */ 1026 smp->sm_vp = vp; 1027 smp->sm_off = off; 1028 1029 hpp = &smd_hash[hashid].sh_hash_list; 1030 smp->sm_hash = *hpp; 1031 *hpp = smp; 1032 #ifdef SEGMAP_HASHSTATS 1033 smd_hash_len[hashid]++; 1034 #endif 1035 } 1036 mutex_exit(hmtx); 1037 1038 return (tmp); 1039 } 1040 1041 static void 1042 segmap_hashout(struct smap *smp) 1043 { 1044 struct smap **hpp, *hp; 1045 struct vnode *vp; 1046 kmutex_t *mtx; 1047 int hashid; 1048 u_offset_t off; 1049 1050 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 1051 1052 vp = smp->sm_vp; 1053 off = smp->sm_off; 1054 1055 SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */ 1056 mtx = SHASHMTX(hashid); 1057 mutex_enter(mtx); 1058 1059 hpp = &smd_hash[hashid].sh_hash_list; 1060 for (;;) { 1061 hp = *hpp; 1062 if (hp == NULL) { 1063 panic("segmap_hashout"); 1064 /*NOTREACHED*/ 1065 } 1066 if (hp == smp) 1067 break; 1068 hpp = &hp->sm_hash; 1069 } 1070 1071 *hpp = smp->sm_hash; 1072 smp->sm_hash = NULL; 1073 #ifdef SEGMAP_HASHSTATS 1074 smd_hash_len[hashid]--; 1075 #endif 1076 mutex_exit(mtx); 1077 1078 smp->sm_vp = NULL; 1079 smp->sm_off = (u_offset_t)0; 1080 1081 } 1082 1083 /* 1084 * Attempt to free unmodified, unmapped, and non locked segmap 1085 * pages. 1086 */ 1087 void 1088 segmap_pagefree(struct vnode *vp, u_offset_t off) 1089 { 1090 u_offset_t pgoff; 1091 page_t *pp; 1092 1093 for (pgoff = off; pgoff < off + MAXBSIZE; pgoff += PAGESIZE) { 1094 1095 if ((pp = page_lookup_nowait(vp, pgoff, SE_EXCL)) == NULL) 1096 continue; 1097 1098 switch (page_release(pp, 1)) { 1099 case PGREL_NOTREL: 1100 segmapcnt.smp_free_notfree.value.ul++; 1101 break; 1102 case PGREL_MOD: 1103 segmapcnt.smp_free_dirty.value.ul++; 1104 break; 1105 case PGREL_CLEAN: 1106 segmapcnt.smp_free.value.ul++; 1107 break; 1108 } 1109 } 1110 } 1111 1112 /* 1113 * Locks held on entry: smap lock 1114 * Locks held on exit : smap lock. 1115 */ 1116 1117 static void 1118 grab_smp(struct smap *smp, page_t *pp) 1119 { 1120 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 1121 ASSERT(smp->sm_refcnt == 0); 1122 1123 if (smp->sm_vp != (struct vnode *)NULL) { 1124 struct vnode *vp = smp->sm_vp; 1125 u_offset_t off = smp->sm_off; 1126 /* 1127 * Destroy old vnode association and 1128 * unload any hardware translations to 1129 * the old object. 1130 */ 1131 smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reuse++; 1132 segmap_hashout(smp); 1133 1134 /* 1135 * This node is off freelist and hashlist, 1136 * so there is no reason to drop/reacquire sm_mtx 1137 * across calls to hat_unload. 1138 */ 1139 if (segmap_kpm) { 1140 caddr_t vaddr; 1141 int hat_unload_needed = 0; 1142 1143 /* 1144 * unload kpm mapping 1145 */ 1146 if (pp != NULL) { 1147 vaddr = hat_kpm_page2va(pp, 1); 1148 hat_kpm_mapout(pp, GET_KPME(smp), vaddr); 1149 page_unlock(pp); 1150 } 1151 1152 /* 1153 * Check if we have (also) the rare case of a 1154 * non kpm mapping. 1155 */ 1156 if (smp->sm_flags & SM_NOTKPM_RELEASED) { 1157 hat_unload_needed = 1; 1158 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 1159 } 1160 1161 if (hat_unload_needed) { 1162 hat_unload(kas.a_hat, segkmap->s_base + 1163 ((smp - smd_smap) * MAXBSIZE), 1164 MAXBSIZE, HAT_UNLOAD); 1165 } 1166 1167 } else { 1168 ASSERT(smp->sm_flags & SM_NOTKPM_RELEASED); 1169 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 1170 hat_unload(kas.a_hat, segkmap->s_base + 1171 ((smp - smd_smap) * MAXBSIZE), 1172 MAXBSIZE, HAT_UNLOAD); 1173 } 1174 segmap_pagefree(vp, off); 1175 } 1176 } 1177 1178 static struct smap * 1179 get_free_smp(int free_ndx) 1180 { 1181 struct smfree *sm; 1182 kmutex_t *smtx; 1183 struct smap *smp, *first; 1184 struct sm_freeq *allocq, *releq; 1185 struct kpme *kpme; 1186 page_t *pp = NULL; 1187 int end_ndx, page_locked = 0; 1188 1189 end_ndx = free_ndx; 1190 sm = &smd_free[free_ndx]; 1191 1192 retry_queue: 1193 allocq = sm->sm_allocq; 1194 mutex_enter(&allocq->smq_mtx); 1195 1196 if ((smp = allocq->smq_free) == NULL) { 1197 1198 skip_queue: 1199 /* 1200 * The alloc list is empty or this queue is being skipped; 1201 * first see if the allocq toggled. 1202 */ 1203 if (sm->sm_allocq != allocq) { 1204 /* queue changed */ 1205 mutex_exit(&allocq->smq_mtx); 1206 goto retry_queue; 1207 } 1208 releq = sm->sm_releq; 1209 if (!mutex_tryenter(&releq->smq_mtx)) { 1210 /* cannot get releq; a free smp may be there now */ 1211 mutex_exit(&allocq->smq_mtx); 1212 1213 /* 1214 * This loop could spin forever if this thread has 1215 * higher priority than the thread that is holding 1216 * releq->smq_mtx. In order to force the other thread 1217 * to run, we'll lock/unlock the mutex which is safe 1218 * since we just unlocked the allocq mutex. 1219 */ 1220 mutex_enter(&releq->smq_mtx); 1221 mutex_exit(&releq->smq_mtx); 1222 goto retry_queue; 1223 } 1224 if (releq->smq_free == NULL) { 1225 /* 1226 * This freelist is empty. 1227 * This should not happen unless clients 1228 * are failing to release the segmap 1229 * window after accessing the data. 1230 * Before resorting to sleeping, try 1231 * the next list of the same color. 1232 */ 1233 free_ndx = (free_ndx + smd_ncolor) & smd_freemsk; 1234 if (free_ndx != end_ndx) { 1235 mutex_exit(&releq->smq_mtx); 1236 mutex_exit(&allocq->smq_mtx); 1237 sm = &smd_free[free_ndx]; 1238 goto retry_queue; 1239 } 1240 /* 1241 * Tried all freelists of the same color once, 1242 * wait on this list and hope something gets freed. 1243 */ 1244 segmapcnt.smp_get_nofree.value.ul++; 1245 sm->sm_want++; 1246 mutex_exit(&sm->sm_freeq[1].smq_mtx); 1247 cv_wait(&sm->sm_free_cv, 1248 &sm->sm_freeq[0].smq_mtx); 1249 sm->sm_want--; 1250 mutex_exit(&sm->sm_freeq[0].smq_mtx); 1251 sm = &smd_free[free_ndx]; 1252 goto retry_queue; 1253 } else { 1254 /* 1255 * Something on the rele queue; flip the alloc 1256 * and rele queues and retry. 1257 */ 1258 sm->sm_allocq = releq; 1259 sm->sm_releq = allocq; 1260 mutex_exit(&allocq->smq_mtx); 1261 mutex_exit(&releq->smq_mtx); 1262 if (page_locked) { 1263 delay(hz >> 2); 1264 page_locked = 0; 1265 } 1266 goto retry_queue; 1267 } 1268 } else { 1269 /* 1270 * Fastpath the case we get the smap mutex 1271 * on the first try. 1272 */ 1273 first = smp; 1274 next_smap: 1275 smtx = SMAPMTX(smp); 1276 if (!mutex_tryenter(smtx)) { 1277 /* 1278 * Another thread is trying to reclaim this slot. 1279 * Skip to the next queue or smap. 1280 */ 1281 if ((smp = smp->sm_next) == first) { 1282 goto skip_queue; 1283 } else { 1284 goto next_smap; 1285 } 1286 } else { 1287 /* 1288 * if kpme exists, get shared lock on the page 1289 */ 1290 if (segmap_kpm && smp->sm_vp != NULL) { 1291 1292 kpme = GET_KPME(smp); 1293 pp = kpme->kpe_page; 1294 1295 if (pp != NULL) { 1296 if (!page_trylock(pp, SE_SHARED)) { 1297 smp = smp->sm_next; 1298 mutex_exit(smtx); 1299 page_locked = 1; 1300 1301 pp = NULL; 1302 1303 if (smp == first) { 1304 goto skip_queue; 1305 } else { 1306 goto next_smap; 1307 } 1308 } else { 1309 if (kpme->kpe_page == NULL) { 1310 page_unlock(pp); 1311 pp = NULL; 1312 } 1313 } 1314 } 1315 } 1316 1317 /* 1318 * At this point, we've selected smp. Remove smp 1319 * from its freelist. If smp is the first one in 1320 * the freelist, update the head of the freelist. 1321 */ 1322 if (first == smp) { 1323 ASSERT(first == allocq->smq_free); 1324 allocq->smq_free = smp->sm_next; 1325 } 1326 1327 /* 1328 * if the head of the freelist still points to smp, 1329 * then there are no more free smaps in that list. 1330 */ 1331 if (allocq->smq_free == smp) 1332 /* 1333 * Took the last one 1334 */ 1335 allocq->smq_free = NULL; 1336 else { 1337 smp->sm_prev->sm_next = smp->sm_next; 1338 smp->sm_next->sm_prev = smp->sm_prev; 1339 } 1340 mutex_exit(&allocq->smq_mtx); 1341 smp->sm_prev = smp->sm_next = NULL; 1342 1343 /* 1344 * if pp != NULL, pp must have been locked; 1345 * grab_smp() unlocks pp. 1346 */ 1347 ASSERT((pp == NULL) || PAGE_LOCKED(pp)); 1348 grab_smp(smp, pp); 1349 /* return smp locked. */ 1350 ASSERT(SMAPMTX(smp) == smtx); 1351 ASSERT(MUTEX_HELD(smtx)); 1352 return (smp); 1353 } 1354 } 1355 } 1356 1357 /* 1358 * Special public segmap operations 1359 */ 1360 1361 /* 1362 * Create pages (without using VOP_GETPAGE) and load up translations to them. 1363 * If softlock is TRUE, then set things up so that it looks like a call 1364 * to segmap_fault with F_SOFTLOCK. 1365 * 1366 * Returns 1, if a page is created by calling page_create_va(), or 0 otherwise. 1367 * 1368 * All fields in the generic segment (struct seg) are considered to be 1369 * read-only for "segmap" even though the kernel address space (kas) may 1370 * not be locked, hence no lock is needed to access them. 1371 */ 1372 int 1373 segmap_pagecreate(struct seg *seg, caddr_t addr, size_t len, int softlock) 1374 { 1375 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 1376 page_t *pp; 1377 u_offset_t off; 1378 struct smap *smp; 1379 struct vnode *vp; 1380 caddr_t eaddr; 1381 int newpage = 0; 1382 uint_t prot; 1383 kmutex_t *smtx; 1384 int hat_flag; 1385 1386 ASSERT(seg->s_as == &kas); 1387 1388 if (segmap_kpm && IS_KPM_ADDR(addr)) { 1389 /* 1390 * Pages are successfully prefaulted and locked in 1391 * segmap_getmapflt and can't be unlocked until 1392 * segmap_release. The SM_KPM_NEWPAGE flag is set 1393 * in segmap_pagecreate_kpm when new pages are created. 1394 * and it is returned as "newpage" indication here. 1395 */ 1396 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 1397 panic("segmap_pagecreate: smap not found " 1398 "for addr %p", (void *)addr); 1399 /*NOTREACHED*/ 1400 } 1401 1402 smtx = SMAPMTX(smp); 1403 newpage = smp->sm_flags & SM_KPM_NEWPAGE; 1404 smp->sm_flags &= ~SM_KPM_NEWPAGE; 1405 mutex_exit(smtx); 1406 1407 return (newpage); 1408 } 1409 1410 smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++; 1411 1412 eaddr = addr + len; 1413 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1414 1415 smp = GET_SMAP(seg, addr); 1416 1417 /* 1418 * We don't grab smp mutex here since we assume the smp 1419 * has a refcnt set already which prevents the slot from 1420 * changing its id. 1421 */ 1422 ASSERT(smp->sm_refcnt > 0); 1423 1424 vp = smp->sm_vp; 1425 off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET)); 1426 prot = smd->smd_prot; 1427 1428 for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) { 1429 hat_flag = HAT_LOAD; 1430 pp = page_lookup(vp, off, SE_SHARED); 1431 if (pp == NULL) { 1432 ushort_t bitindex; 1433 1434 if ((pp = page_create_va(vp, off, 1435 PAGESIZE, PG_WAIT, seg, addr)) == NULL) { 1436 panic("segmap_pagecreate: page_create failed"); 1437 /*NOTREACHED*/ 1438 } 1439 newpage = 1; 1440 page_io_unlock(pp); 1441 1442 /* 1443 * Since pages created here do not contain valid 1444 * data until the caller writes into them, the 1445 * "exclusive" lock will not be dropped to prevent 1446 * other users from accessing the page. We also 1447 * have to lock the translation to prevent a fault 1448 * from occurring when the virtual address mapped by 1449 * this page is written into. This is necessary to 1450 * avoid a deadlock since we haven't dropped the 1451 * "exclusive" lock. 1452 */ 1453 bitindex = (ushort_t)((off - smp->sm_off) >> PAGESHIFT); 1454 1455 /* 1456 * Large Files: The following assertion is to 1457 * verify the cast above. 1458 */ 1459 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 1460 smtx = SMAPMTX(smp); 1461 mutex_enter(smtx); 1462 smp->sm_bitmap |= SMAP_BIT_MASK(bitindex); 1463 mutex_exit(smtx); 1464 1465 hat_flag = HAT_LOAD_LOCK; 1466 } else if (softlock) { 1467 hat_flag = HAT_LOAD_LOCK; 1468 } 1469 1470 if (IS_VMODSORT(pp->p_vnode) && (prot & PROT_WRITE)) 1471 hat_setmod(pp); 1472 1473 hat_memload(kas.a_hat, addr, pp, prot, hat_flag); 1474 1475 if (hat_flag != HAT_LOAD_LOCK) 1476 page_unlock(pp); 1477 1478 TRACE_5(TR_FAC_VM, TR_SEGMAP_PAGECREATE, 1479 "segmap_pagecreate:seg %p addr %p pp %p vp %p offset %llx", 1480 seg, addr, pp, vp, off); 1481 } 1482 1483 return (newpage); 1484 } 1485 1486 void 1487 segmap_pageunlock(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw) 1488 { 1489 struct smap *smp; 1490 ushort_t bitmask; 1491 page_t *pp; 1492 struct vnode *vp; 1493 u_offset_t off; 1494 caddr_t eaddr; 1495 kmutex_t *smtx; 1496 1497 ASSERT(seg->s_as == &kas); 1498 1499 eaddr = addr + len; 1500 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1501 1502 if (segmap_kpm && IS_KPM_ADDR(addr)) { 1503 /* 1504 * Pages are successfully prefaulted and locked in 1505 * segmap_getmapflt and can't be unlocked until 1506 * segmap_release, so no pages or hat mappings have 1507 * to be unlocked at this point. 1508 */ 1509 #ifdef DEBUG 1510 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 1511 panic("segmap_pageunlock: smap not found " 1512 "for addr %p", (void *)addr); 1513 /*NOTREACHED*/ 1514 } 1515 1516 ASSERT(smp->sm_refcnt > 0); 1517 mutex_exit(SMAPMTX(smp)); 1518 #endif 1519 return; 1520 } 1521 1522 smp = GET_SMAP(seg, addr); 1523 smtx = SMAPMTX(smp); 1524 1525 ASSERT(smp->sm_refcnt > 0); 1526 1527 vp = smp->sm_vp; 1528 off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET)); 1529 1530 for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) { 1531 bitmask = SMAP_BIT_MASK((int)(off - smp->sm_off) >> PAGESHIFT); 1532 1533 /* 1534 * Large Files: Following assertion is to verify 1535 * the correctness of the cast to (int) above. 1536 */ 1537 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 1538 1539 /* 1540 * If the bit corresponding to "off" is set, 1541 * clear this bit in the bitmap, unlock translations, 1542 * and release the "exclusive" lock on the page. 1543 */ 1544 if (smp->sm_bitmap & bitmask) { 1545 mutex_enter(smtx); 1546 smp->sm_bitmap &= ~bitmask; 1547 mutex_exit(smtx); 1548 1549 hat_unlock(kas.a_hat, addr, PAGESIZE); 1550 1551 /* 1552 * Use page_find() instead of page_lookup() to 1553 * find the page since we know that it has 1554 * "exclusive" lock. 1555 */ 1556 pp = page_find(vp, off); 1557 if (pp == NULL) { 1558 panic("segmap_pageunlock: page not found"); 1559 /*NOTREACHED*/ 1560 } 1561 if (rw == S_WRITE) { 1562 hat_setrefmod(pp); 1563 } else if (rw != S_OTHER) { 1564 hat_setref(pp); 1565 } 1566 1567 page_unlock(pp); 1568 } 1569 } 1570 } 1571 1572 caddr_t 1573 segmap_getmap(struct seg *seg, struct vnode *vp, u_offset_t off) 1574 { 1575 return (segmap_getmapflt(seg, vp, off, MAXBSIZE, 0, S_OTHER)); 1576 } 1577 1578 /* 1579 * This is the magic virtual address that offset 0 of an ELF 1580 * file gets mapped to in user space. This is used to pick 1581 * the vac color on the freelist. 1582 */ 1583 #define ELF_OFFZERO_VA (0x10000) 1584 /* 1585 * segmap_getmap allocates a MAXBSIZE big slot to map the vnode vp 1586 * in the range <off, off + len). off doesn't need to be MAXBSIZE aligned. 1587 * The return address is always MAXBSIZE aligned. 1588 * 1589 * If forcefault is nonzero and the MMU translations haven't yet been created, 1590 * segmap_getmap will call segmap_fault(..., F_INVAL, rw) to create them. 1591 */ 1592 caddr_t 1593 segmap_getmapflt( 1594 struct seg *seg, 1595 struct vnode *vp, 1596 u_offset_t off, 1597 size_t len, 1598 int forcefault, 1599 enum seg_rw rw) 1600 { 1601 struct smap *smp, *nsmp; 1602 extern struct vnode *common_specvp(); 1603 caddr_t baseaddr; /* MAXBSIZE aligned */ 1604 u_offset_t baseoff; 1605 int newslot; 1606 caddr_t vaddr; 1607 int color, hashid; 1608 kmutex_t *hashmtx, *smapmtx; 1609 struct smfree *sm; 1610 page_t *pp; 1611 struct kpme *kpme; 1612 uint_t prot; 1613 caddr_t base; 1614 page_t *pl[MAXPPB + 1]; 1615 int error; 1616 int is_kpm = 1; 1617 1618 ASSERT(seg->s_as == &kas); 1619 ASSERT(seg == segkmap); 1620 1621 baseoff = off & (offset_t)MAXBMASK; 1622 if (off + len > baseoff + MAXBSIZE) { 1623 panic("segmap_getmap bad len"); 1624 /*NOTREACHED*/ 1625 } 1626 1627 /* 1628 * If this is a block device we have to be sure to use the 1629 * "common" block device vnode for the mapping. 1630 */ 1631 if (vp->v_type == VBLK) 1632 vp = common_specvp(vp); 1633 1634 smd_cpu[CPU->cpu_seqid].scpu.scpu_getmap++; 1635 1636 if (segmap_kpm == 0 || 1637 (forcefault == SM_PAGECREATE && rw != S_WRITE)) { 1638 is_kpm = 0; 1639 } 1640 1641 SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */ 1642 hashmtx = SHASHMTX(hashid); 1643 1644 retry_hash: 1645 mutex_enter(hashmtx); 1646 for (smp = smd_hash[hashid].sh_hash_list; 1647 smp != NULL; smp = smp->sm_hash) 1648 if (smp->sm_vp == vp && smp->sm_off == baseoff) 1649 break; 1650 mutex_exit(hashmtx); 1651 1652 vrfy_smp: 1653 if (smp != NULL) { 1654 1655 ASSERT(vp->v_count != 0); 1656 1657 /* 1658 * Get smap lock and recheck its tag. The hash lock 1659 * is dropped since the hash is based on (vp, off) 1660 * and (vp, off) won't change when we have smap mtx. 1661 */ 1662 smapmtx = SMAPMTX(smp); 1663 mutex_enter(smapmtx); 1664 if (smp->sm_vp != vp || smp->sm_off != baseoff) { 1665 mutex_exit(smapmtx); 1666 goto retry_hash; 1667 } 1668 1669 if (smp->sm_refcnt == 0) { 1670 1671 smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reclaim++; 1672 1673 /* 1674 * Could still be on the free list. However, this 1675 * could also be an smp that is transitioning from 1676 * the free list when we have too much contention 1677 * for the smapmtx's. In this case, we have an 1678 * unlocked smp that is not on the free list any 1679 * longer, but still has a 0 refcnt. The only way 1680 * to be sure is to check the freelist pointers. 1681 * Since we now have the smapmtx, we are guaranteed 1682 * that the (vp, off) won't change, so we are safe 1683 * to reclaim it. get_free_smp() knows that this 1684 * can happen, and it will check the refcnt. 1685 */ 1686 1687 if ((smp->sm_next != NULL)) { 1688 struct sm_freeq *freeq; 1689 1690 ASSERT(smp->sm_prev != NULL); 1691 sm = &smd_free[smp->sm_free_ndx]; 1692 1693 if (smp->sm_flags & SM_QNDX_ZERO) 1694 freeq = &sm->sm_freeq[0]; 1695 else 1696 freeq = &sm->sm_freeq[1]; 1697 1698 mutex_enter(&freeq->smq_mtx); 1699 if (freeq->smq_free != smp) { 1700 /* 1701 * fastpath normal case 1702 */ 1703 smp->sm_prev->sm_next = smp->sm_next; 1704 smp->sm_next->sm_prev = smp->sm_prev; 1705 } else if (smp == smp->sm_next) { 1706 /* 1707 * Taking the last smap on freelist 1708 */ 1709 freeq->smq_free = NULL; 1710 } else { 1711 /* 1712 * Reclaiming 1st smap on list 1713 */ 1714 freeq->smq_free = smp->sm_next; 1715 smp->sm_prev->sm_next = smp->sm_next; 1716 smp->sm_next->sm_prev = smp->sm_prev; 1717 } 1718 mutex_exit(&freeq->smq_mtx); 1719 smp->sm_prev = smp->sm_next = NULL; 1720 } else { 1721 ASSERT(smp->sm_prev == NULL); 1722 segmapcnt.smp_stolen.value.ul++; 1723 } 1724 1725 } else { 1726 segmapcnt.smp_get_use.value.ul++; 1727 } 1728 smp->sm_refcnt++; /* another user */ 1729 1730 /* 1731 * We don't invoke segmap_fault via TLB miss, so we set ref 1732 * and mod bits in advance. For S_OTHER we set them in 1733 * segmap_fault F_SOFTUNLOCK. 1734 */ 1735 if (is_kpm) { 1736 if (rw == S_WRITE) { 1737 smp->sm_flags |= SM_WRITE_DATA; 1738 } else if (rw == S_READ) { 1739 smp->sm_flags |= SM_READ_DATA; 1740 } 1741 } 1742 mutex_exit(smapmtx); 1743 1744 newslot = 0; 1745 } else { 1746 1747 uint32_t free_ndx, *free_ndxp; 1748 union segmap_cpu *scpu; 1749 1750 /* 1751 * On a PAC machine or a machine with anti-alias 1752 * hardware, smd_colormsk will be zero. 1753 * 1754 * On a VAC machine- pick color by offset in the file 1755 * so we won't get VAC conflicts on elf files. 1756 * On data files, color does not matter but we 1757 * don't know what kind of file it is so we always 1758 * pick color by offset. This causes color 1759 * corresponding to file offset zero to be used more 1760 * heavily. 1761 */ 1762 color = (baseoff >> MAXBSHIFT) & smd_colormsk; 1763 scpu = smd_cpu+CPU->cpu_seqid; 1764 free_ndxp = &scpu->scpu.scpu_free_ndx[color]; 1765 free_ndx = (*free_ndxp += smd_ncolor) & smd_freemsk; 1766 #ifdef DEBUG 1767 colors_used[free_ndx]++; 1768 #endif /* DEBUG */ 1769 1770 /* 1771 * Get a locked smp slot from the free list. 1772 */ 1773 smp = get_free_smp(free_ndx); 1774 smapmtx = SMAPMTX(smp); 1775 1776 ASSERT(smp->sm_vp == NULL); 1777 1778 if ((nsmp = segmap_hashin(smp, vp, baseoff, hashid)) != NULL) { 1779 /* 1780 * Failed to hashin, there exists one now. 1781 * Return the smp we just allocated. 1782 */ 1783 segmap_smapadd(smp); 1784 mutex_exit(smapmtx); 1785 1786 smp = nsmp; 1787 goto vrfy_smp; 1788 } 1789 smp->sm_refcnt++; /* another user */ 1790 1791 /* 1792 * We don't invoke segmap_fault via TLB miss, so we set ref 1793 * and mod bits in advance. For S_OTHER we set them in 1794 * segmap_fault F_SOFTUNLOCK. 1795 */ 1796 if (is_kpm) { 1797 if (rw == S_WRITE) { 1798 smp->sm_flags |= SM_WRITE_DATA; 1799 } else if (rw == S_READ) { 1800 smp->sm_flags |= SM_READ_DATA; 1801 } 1802 } 1803 mutex_exit(smapmtx); 1804 1805 newslot = 1; 1806 } 1807 1808 if (!is_kpm) 1809 goto use_segmap_range; 1810 1811 /* 1812 * Use segkpm 1813 */ 1814 /* Lint directive required until 6746211 is fixed */ 1815 /*CONSTCOND*/ 1816 ASSERT(PAGESIZE == MAXBSIZE); 1817 1818 /* 1819 * remember the last smp faulted on this cpu. 1820 */ 1821 (smd_cpu+CPU->cpu_seqid)->scpu.scpu_last_smap = smp; 1822 1823 if (forcefault == SM_PAGECREATE) { 1824 baseaddr = segmap_pagecreate_kpm(seg, vp, baseoff, smp, rw); 1825 return (baseaddr); 1826 } 1827 1828 if (newslot == 0 && 1829 (pp = GET_KPME(smp)->kpe_page) != NULL) { 1830 1831 /* fastpath */ 1832 switch (rw) { 1833 case S_READ: 1834 case S_WRITE: 1835 if (page_trylock(pp, SE_SHARED)) { 1836 if (PP_ISFREE(pp) || 1837 !(pp->p_vnode == vp && 1838 pp->p_offset == baseoff)) { 1839 page_unlock(pp); 1840 pp = page_lookup(vp, baseoff, 1841 SE_SHARED); 1842 } 1843 } else { 1844 pp = page_lookup(vp, baseoff, SE_SHARED); 1845 } 1846 1847 if (pp == NULL) { 1848 ASSERT(GET_KPME(smp)->kpe_page == NULL); 1849 break; 1850 } 1851 1852 if (rw == S_WRITE && 1853 hat_page_getattr(pp, P_MOD | P_REF) != 1854 (P_MOD | P_REF)) { 1855 page_unlock(pp); 1856 break; 1857 } 1858 1859 /* 1860 * We have the p_selock as reader, grab_smp 1861 * can't hit us, we have bumped the smap 1862 * refcnt and hat_pageunload needs the 1863 * p_selock exclusive. 1864 */ 1865 kpme = GET_KPME(smp); 1866 if (kpme->kpe_page == pp) { 1867 baseaddr = hat_kpm_page2va(pp, 0); 1868 } else if (kpme->kpe_page == NULL) { 1869 baseaddr = hat_kpm_mapin(pp, kpme); 1870 } else { 1871 panic("segmap_getmapflt: stale " 1872 "kpme page, kpme %p", (void *)kpme); 1873 /*NOTREACHED*/ 1874 } 1875 1876 /* 1877 * We don't invoke segmap_fault via TLB miss, 1878 * so we set ref and mod bits in advance. 1879 * For S_OTHER and we set them in segmap_fault 1880 * F_SOFTUNLOCK. 1881 */ 1882 if (rw == S_READ && !hat_isref(pp)) 1883 hat_setref(pp); 1884 1885 return (baseaddr); 1886 default: 1887 break; 1888 } 1889 } 1890 1891 base = segkpm_create_va(baseoff); 1892 error = VOP_GETPAGE(vp, (offset_t)baseoff, len, &prot, pl, MAXBSIZE, 1893 seg, base, rw, CRED(), NULL); 1894 1895 pp = pl[0]; 1896 if (error || pp == NULL) { 1897 /* 1898 * Use segmap address slot and let segmap_fault deal 1899 * with the error cases. There is no error return 1900 * possible here. 1901 */ 1902 goto use_segmap_range; 1903 } 1904 1905 ASSERT(pl[1] == NULL); 1906 1907 /* 1908 * When prot is not returned w/ PROT_ALL the returned pages 1909 * are not backed by fs blocks. For most of the segmap users 1910 * this is no problem, they don't write to the pages in the 1911 * same request and therefore don't rely on a following 1912 * trap driven segmap_fault. With SM_LOCKPROTO users it 1913 * is more secure to use segkmap adresses to allow 1914 * protection segmap_fault's. 1915 */ 1916 if (prot != PROT_ALL && forcefault == SM_LOCKPROTO) { 1917 /* 1918 * Use segmap address slot and let segmap_fault 1919 * do the error return. 1920 */ 1921 ASSERT(rw != S_WRITE); 1922 ASSERT(PAGE_LOCKED(pp)); 1923 page_unlock(pp); 1924 forcefault = 0; 1925 goto use_segmap_range; 1926 } 1927 1928 /* 1929 * We have the p_selock as reader, grab_smp can't hit us, we 1930 * have bumped the smap refcnt and hat_pageunload needs the 1931 * p_selock exclusive. 1932 */ 1933 kpme = GET_KPME(smp); 1934 if (kpme->kpe_page == pp) { 1935 baseaddr = hat_kpm_page2va(pp, 0); 1936 } else if (kpme->kpe_page == NULL) { 1937 baseaddr = hat_kpm_mapin(pp, kpme); 1938 } else { 1939 panic("segmap_getmapflt: stale kpme page after " 1940 "VOP_GETPAGE, kpme %p", (void *)kpme); 1941 /*NOTREACHED*/ 1942 } 1943 1944 smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++; 1945 1946 return (baseaddr); 1947 1948 1949 use_segmap_range: 1950 baseaddr = seg->s_base + ((smp - smd_smap) * MAXBSIZE); 1951 TRACE_4(TR_FAC_VM, TR_SEGMAP_GETMAP, 1952 "segmap_getmap:seg %p addr %p vp %p offset %llx", 1953 seg, baseaddr, vp, baseoff); 1954 1955 /* 1956 * Prefault the translations 1957 */ 1958 vaddr = baseaddr + (off - baseoff); 1959 if (forcefault && (newslot || !hat_probe(kas.a_hat, vaddr))) { 1960 1961 caddr_t pgaddr = (caddr_t)((uintptr_t)vaddr & 1962 (uintptr_t)PAGEMASK); 1963 1964 (void) segmap_fault(kas.a_hat, seg, pgaddr, 1965 (vaddr + len - pgaddr + PAGESIZE - 1) & (uintptr_t)PAGEMASK, 1966 F_INVAL, rw); 1967 } 1968 1969 return (baseaddr); 1970 } 1971 1972 int 1973 segmap_release(struct seg *seg, caddr_t addr, uint_t flags) 1974 { 1975 struct smap *smp; 1976 int error; 1977 int bflags = 0; 1978 struct vnode *vp; 1979 u_offset_t offset; 1980 kmutex_t *smtx; 1981 int is_kpm = 0; 1982 page_t *pp; 1983 1984 if (segmap_kpm && IS_KPM_ADDR(addr)) { 1985 1986 if (((uintptr_t)addr & MAXBOFFSET) != 0) { 1987 panic("segmap_release: addr %p not " 1988 "MAXBSIZE aligned", (void *)addr); 1989 /*NOTREACHED*/ 1990 } 1991 1992 if ((smp = get_smap_kpm(addr, &pp)) == NULL) { 1993 panic("segmap_release: smap not found " 1994 "for addr %p", (void *)addr); 1995 /*NOTREACHED*/ 1996 } 1997 1998 TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP, 1999 "segmap_relmap:seg %p addr %p smp %p", 2000 seg, addr, smp); 2001 2002 smtx = SMAPMTX(smp); 2003 2004 /* 2005 * For compatibility reasons segmap_pagecreate_kpm sets this 2006 * flag to allow a following segmap_pagecreate to return 2007 * this as "newpage" flag. When segmap_pagecreate is not 2008 * called at all we clear it now. 2009 */ 2010 smp->sm_flags &= ~SM_KPM_NEWPAGE; 2011 is_kpm = 1; 2012 if (smp->sm_flags & SM_WRITE_DATA) { 2013 hat_setrefmod(pp); 2014 } else if (smp->sm_flags & SM_READ_DATA) { 2015 hat_setref(pp); 2016 } 2017 } else { 2018 if (addr < seg->s_base || addr >= seg->s_base + seg->s_size || 2019 ((uintptr_t)addr & MAXBOFFSET) != 0) { 2020 panic("segmap_release: bad addr %p", (void *)addr); 2021 /*NOTREACHED*/ 2022 } 2023 smp = GET_SMAP(seg, addr); 2024 2025 TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP, 2026 "segmap_relmap:seg %p addr %p smp %p", 2027 seg, addr, smp); 2028 2029 smtx = SMAPMTX(smp); 2030 mutex_enter(smtx); 2031 smp->sm_flags |= SM_NOTKPM_RELEASED; 2032 } 2033 2034 ASSERT(smp->sm_refcnt > 0); 2035 2036 /* 2037 * Need to call VOP_PUTPAGE() if any flags (except SM_DONTNEED) 2038 * are set. 2039 */ 2040 if ((flags & ~SM_DONTNEED) != 0) { 2041 if (flags & SM_WRITE) 2042 segmapcnt.smp_rel_write.value.ul++; 2043 if (flags & SM_ASYNC) { 2044 bflags |= B_ASYNC; 2045 segmapcnt.smp_rel_async.value.ul++; 2046 } 2047 if (flags & SM_INVAL) { 2048 bflags |= B_INVAL; 2049 segmapcnt.smp_rel_abort.value.ul++; 2050 } 2051 if (flags & SM_DESTROY) { 2052 bflags |= (B_INVAL|B_TRUNC); 2053 segmapcnt.smp_rel_abort.value.ul++; 2054 } 2055 if (smp->sm_refcnt == 1) { 2056 /* 2057 * We only bother doing the FREE and DONTNEED flags 2058 * if no one else is still referencing this mapping. 2059 */ 2060 if (flags & SM_FREE) { 2061 bflags |= B_FREE; 2062 segmapcnt.smp_rel_free.value.ul++; 2063 } 2064 if (flags & SM_DONTNEED) { 2065 bflags |= B_DONTNEED; 2066 segmapcnt.smp_rel_dontneed.value.ul++; 2067 } 2068 } 2069 } else { 2070 smd_cpu[CPU->cpu_seqid].scpu.scpu_release++; 2071 } 2072 2073 vp = smp->sm_vp; 2074 offset = smp->sm_off; 2075 2076 if (--smp->sm_refcnt == 0) { 2077 2078 smp->sm_flags &= ~(SM_WRITE_DATA | SM_READ_DATA); 2079 2080 if (flags & (SM_INVAL|SM_DESTROY)) { 2081 segmap_hashout(smp); /* remove map info */ 2082 if (is_kpm) { 2083 hat_kpm_mapout(pp, GET_KPME(smp), addr); 2084 if (smp->sm_flags & SM_NOTKPM_RELEASED) { 2085 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 2086 hat_unload(kas.a_hat, segkmap->s_base + 2087 ((smp - smd_smap) * MAXBSIZE), 2088 MAXBSIZE, HAT_UNLOAD); 2089 } 2090 2091 } else { 2092 if (segmap_kpm) 2093 segkpm_mapout_validkpme(GET_KPME(smp)); 2094 2095 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 2096 hat_unload(kas.a_hat, addr, MAXBSIZE, 2097 HAT_UNLOAD); 2098 } 2099 } 2100 segmap_smapadd(smp); /* add to free list */ 2101 } 2102 2103 mutex_exit(smtx); 2104 2105 if (is_kpm) 2106 page_unlock(pp); 2107 /* 2108 * Now invoke VOP_PUTPAGE() if any flags (except SM_DONTNEED) 2109 * are set. 2110 */ 2111 if ((flags & ~SM_DONTNEED) != 0) { 2112 error = VOP_PUTPAGE(vp, offset, MAXBSIZE, 2113 bflags, CRED(), NULL); 2114 } else { 2115 error = 0; 2116 } 2117 2118 return (error); 2119 } 2120 2121 /* 2122 * Dump the pages belonging to this segmap segment. 2123 */ 2124 static void 2125 segmap_dump(struct seg *seg) 2126 { 2127 struct segmap_data *smd; 2128 struct smap *smp, *smp_end; 2129 page_t *pp; 2130 pfn_t pfn; 2131 u_offset_t off; 2132 caddr_t addr; 2133 2134 smd = (struct segmap_data *)seg->s_data; 2135 addr = seg->s_base; 2136 for (smp = smd->smd_sm, smp_end = smp + smd->smd_npages; 2137 smp < smp_end; smp++) { 2138 2139 if (smp->sm_refcnt) { 2140 for (off = 0; off < MAXBSIZE; off += PAGESIZE) { 2141 int we_own_it = 0; 2142 2143 /* 2144 * If pp == NULL, the page either does 2145 * not exist or is exclusively locked. 2146 * So determine if it exists before 2147 * searching for it. 2148 */ 2149 if ((pp = page_lookup_nowait(smp->sm_vp, 2150 smp->sm_off + off, SE_SHARED))) 2151 we_own_it = 1; 2152 else 2153 pp = page_exists(smp->sm_vp, 2154 smp->sm_off + off); 2155 2156 if (pp) { 2157 pfn = page_pptonum(pp); 2158 dump_addpage(seg->s_as, 2159 addr + off, pfn); 2160 if (we_own_it) 2161 page_unlock(pp); 2162 } 2163 dump_timeleft = dump_timeout; 2164 } 2165 } 2166 addr += MAXBSIZE; 2167 } 2168 } 2169 2170 /*ARGSUSED*/ 2171 static int 2172 segmap_pagelock(struct seg *seg, caddr_t addr, size_t len, 2173 struct page ***ppp, enum lock_type type, enum seg_rw rw) 2174 { 2175 return (ENOTSUP); 2176 } 2177 2178 static int 2179 segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp) 2180 { 2181 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 2182 2183 memidp->val[0] = (uintptr_t)smd->smd_sm->sm_vp; 2184 memidp->val[1] = smd->smd_sm->sm_off + (uintptr_t)(addr - seg->s_base); 2185 return (0); 2186 } 2187 2188 2189 #ifdef SEGKPM_SUPPORT 2190 2191 /* 2192 * segkpm support routines 2193 */ 2194 2195 static caddr_t 2196 segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off, 2197 struct smap *smp, enum seg_rw rw) 2198 { 2199 caddr_t base; 2200 page_t *pp; 2201 int newpage = 0; 2202 struct kpme *kpme; 2203 2204 ASSERT(smp->sm_refcnt > 0); 2205 2206 if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) { 2207 kmutex_t *smtx; 2208 2209 base = segkpm_create_va(off); 2210 2211 if ((pp = page_create_va(vp, off, PAGESIZE, PG_WAIT, 2212 seg, base)) == NULL) { 2213 panic("segmap_pagecreate_kpm: " 2214 "page_create failed"); 2215 /*NOTREACHED*/ 2216 } 2217 2218 newpage = 1; 2219 page_io_unlock(pp); 2220 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 2221 2222 /* 2223 * Mark this here until the following segmap_pagecreate 2224 * or segmap_release. 2225 */ 2226 smtx = SMAPMTX(smp); 2227 mutex_enter(smtx); 2228 smp->sm_flags |= SM_KPM_NEWPAGE; 2229 mutex_exit(smtx); 2230 } 2231 2232 kpme = GET_KPME(smp); 2233 if (!newpage && kpme->kpe_page == pp) 2234 base = hat_kpm_page2va(pp, 0); 2235 else 2236 base = hat_kpm_mapin(pp, kpme); 2237 2238 /* 2239 * FS code may decide not to call segmap_pagecreate and we 2240 * don't invoke segmap_fault via TLB miss, so we have to set 2241 * ref and mod bits in advance. 2242 */ 2243 if (rw == S_WRITE) { 2244 hat_setrefmod(pp); 2245 } else { 2246 ASSERT(rw == S_READ); 2247 hat_setref(pp); 2248 } 2249 2250 smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++; 2251 2252 return (base); 2253 } 2254 2255 /* 2256 * Find the smap structure corresponding to the 2257 * KPM addr and return it locked. 2258 */ 2259 struct smap * 2260 get_smap_kpm(caddr_t addr, page_t **ppp) 2261 { 2262 struct smap *smp; 2263 struct vnode *vp; 2264 u_offset_t offset; 2265 caddr_t baseaddr = (caddr_t)((uintptr_t)addr & MAXBMASK); 2266 int hashid; 2267 kmutex_t *hashmtx; 2268 page_t *pp; 2269 union segmap_cpu *scpu; 2270 2271 pp = hat_kpm_vaddr2page(baseaddr); 2272 2273 ASSERT(pp && !PP_ISFREE(pp)); 2274 ASSERT(PAGE_LOCKED(pp)); 2275 ASSERT(((uintptr_t)pp->p_offset & MAXBOFFSET) == 0); 2276 2277 vp = pp->p_vnode; 2278 offset = pp->p_offset; 2279 ASSERT(vp != NULL); 2280 2281 /* 2282 * Assume the last smap used on this cpu is the one needed. 2283 */ 2284 scpu = smd_cpu+CPU->cpu_seqid; 2285 smp = scpu->scpu.scpu_last_smap; 2286 mutex_enter(&smp->sm_mtx); 2287 if (smp->sm_vp == vp && smp->sm_off == offset) { 2288 ASSERT(smp->sm_refcnt > 0); 2289 } else { 2290 /* 2291 * Assumption wrong, find the smap on the hash chain. 2292 */ 2293 mutex_exit(&smp->sm_mtx); 2294 SMAP_HASHFUNC(vp, offset, hashid); /* macro assigns hashid */ 2295 hashmtx = SHASHMTX(hashid); 2296 2297 mutex_enter(hashmtx); 2298 smp = smd_hash[hashid].sh_hash_list; 2299 for (; smp != NULL; smp = smp->sm_hash) { 2300 if (smp->sm_vp == vp && smp->sm_off == offset) 2301 break; 2302 } 2303 mutex_exit(hashmtx); 2304 if (smp) { 2305 mutex_enter(&smp->sm_mtx); 2306 ASSERT(smp->sm_vp == vp && smp->sm_off == offset); 2307 } 2308 } 2309 2310 if (ppp) 2311 *ppp = smp ? pp : NULL; 2312 2313 return (smp); 2314 } 2315 2316 #else /* SEGKPM_SUPPORT */ 2317 2318 /* segkpm stubs */ 2319 2320 /*ARGSUSED*/ 2321 static caddr_t 2322 segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off, 2323 struct smap *smp, enum seg_rw rw) 2324 { 2325 return (NULL); 2326 } 2327 2328 /*ARGSUSED*/ 2329 struct smap * 2330 get_smap_kpm(caddr_t addr, page_t **ppp) 2331 { 2332 return (NULL); 2333 } 2334 2335 #endif /* SEGKPM_SUPPORT */