1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * Portions of this source code were derived from Berkeley 4.3 BSD 31 * under license from the Regents of the University of California. 32 */ 33 34 /* 35 * VM - generic vnode mapping segment. 36 * 37 * The segmap driver is used only by the kernel to get faster (than seg_vn) 38 * mappings [lower routine overhead; more persistent cache] to random 39 * vnode/offsets. Note than the kernel may (and does) use seg_vn as well. 40 */ 41 42 #include <sys/types.h> 43 #include <sys/t_lock.h> 44 #include <sys/param.h> 45 #include <sys/sysmacros.h> 46 #include <sys/buf.h> 47 #include <sys/systm.h> 48 #include <sys/vnode.h> 49 #include <sys/mman.h> 50 #include <sys/errno.h> 51 #include <sys/cred.h> 52 #include <sys/kmem.h> 53 #include <sys/vtrace.h> 54 #include <sys/cmn_err.h> 55 #include <sys/debug.h> 56 #include <sys/thread.h> 57 #include <sys/dumphdr.h> 58 #include <sys/bitmap.h> 59 #include <sys/lgrp.h> 60 61 #include <vm/seg_kmem.h> 62 #include <vm/hat.h> 63 #include <vm/as.h> 64 #include <vm/seg.h> 65 #include <vm/seg_kpm.h> 66 #include <vm/seg_map.h> 67 #include <vm/page.h> 68 #include <vm/pvn.h> 69 #include <vm/rm.h> 70 71 /* 72 * Private seg op routines. 73 */ 74 static void segmap_free(struct seg *seg); 75 faultcode_t segmap_fault(struct hat *hat, struct seg *seg, caddr_t addr, 76 size_t len, enum fault_type type, enum seg_rw rw); 77 static faultcode_t segmap_faulta(struct seg *seg, caddr_t addr); 78 static int segmap_checkprot(struct seg *seg, caddr_t addr, size_t len, 79 uint_t prot); 80 static int segmap_kluster(struct seg *seg, caddr_t addr, ssize_t); 81 static int segmap_getprot(struct seg *seg, caddr_t addr, size_t len, 82 uint_t *protv); 83 static u_offset_t segmap_getoffset(struct seg *seg, caddr_t addr); 84 static int segmap_gettype(struct seg *seg, caddr_t addr); 85 static int segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp); 86 static void segmap_dump(struct seg *seg); 87 static int segmap_pagelock(struct seg *seg, caddr_t addr, size_t len, 88 struct page ***ppp, enum lock_type type, 89 enum seg_rw rw); 90 static void segmap_badop(void); 91 static int segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp); 92 static lgrp_mem_policy_info_t *segmap_getpolicy(struct seg *seg, 93 caddr_t addr); 94 static int segmap_capable(struct seg *seg, segcapability_t capability); 95 96 /* segkpm support */ 97 static caddr_t segmap_pagecreate_kpm(struct seg *, vnode_t *, u_offset_t, 98 struct smap *, enum seg_rw); 99 struct smap *get_smap_kpm(caddr_t, page_t **); 100 101 #define SEGMAP_BADOP(t) (t(*)())segmap_badop 102 103 static struct seg_ops segmap_ops = { 104 SEGMAP_BADOP(int), /* dup */ 105 SEGMAP_BADOP(int), /* unmap */ 106 segmap_free, 107 segmap_fault, 108 segmap_faulta, 109 SEGMAP_BADOP(int), /* setprot */ 110 segmap_checkprot, 111 segmap_kluster, 112 SEGMAP_BADOP(int), /* sync */ 113 SEGMAP_BADOP(size_t), /* incore */ 114 SEGMAP_BADOP(int), /* lockop */ 115 segmap_getprot, 116 segmap_getoffset, 117 segmap_gettype, 118 segmap_getvp, 119 SEGMAP_BADOP(int), /* advise */ 120 segmap_dump, 121 segmap_pagelock, /* pagelock */ 122 SEGMAP_BADOP(int), /* setpgsz */ 123 segmap_getmemid, /* getmemid */ 124 segmap_getpolicy, /* getpolicy */ 125 segmap_capable, /* capable */ 126 }; 127 128 /* 129 * Private segmap routines. 130 */ 131 static void segmap_unlock(struct hat *hat, struct seg *seg, caddr_t addr, 132 size_t len, enum seg_rw rw, struct smap *smp); 133 static void segmap_smapadd(struct smap *smp); 134 static struct smap *segmap_hashin(struct smap *smp, struct vnode *vp, 135 u_offset_t off, int hashid); 136 static void segmap_hashout(struct smap *smp); 137 138 139 /* 140 * Statistics for segmap operations. 141 * 142 * No explicit locking to protect these stats. 143 */ 144 struct segmapcnt segmapcnt = { 145 { "fault", KSTAT_DATA_ULONG }, 146 { "faulta", KSTAT_DATA_ULONG }, 147 { "getmap", KSTAT_DATA_ULONG }, 148 { "get_use", KSTAT_DATA_ULONG }, 149 { "get_reclaim", KSTAT_DATA_ULONG }, 150 { "get_reuse", KSTAT_DATA_ULONG }, 151 { "get_unused", KSTAT_DATA_ULONG }, 152 { "get_nofree", KSTAT_DATA_ULONG }, 153 { "rel_async", KSTAT_DATA_ULONG }, 154 { "rel_write", KSTAT_DATA_ULONG }, 155 { "rel_free", KSTAT_DATA_ULONG }, 156 { "rel_abort", KSTAT_DATA_ULONG }, 157 { "rel_dontneed", KSTAT_DATA_ULONG }, 158 { "release", KSTAT_DATA_ULONG }, 159 { "pagecreate", KSTAT_DATA_ULONG }, 160 { "free_notfree", KSTAT_DATA_ULONG }, 161 { "free_dirty", KSTAT_DATA_ULONG }, 162 { "free", KSTAT_DATA_ULONG }, 163 { "stolen", KSTAT_DATA_ULONG }, 164 { "get_nomtx", KSTAT_DATA_ULONG } 165 }; 166 167 kstat_named_t *segmapcnt_ptr = (kstat_named_t *)&segmapcnt; 168 uint_t segmapcnt_ndata = sizeof (segmapcnt) / sizeof (kstat_named_t); 169 170 /* 171 * Return number of map pages in segment. 172 */ 173 #define MAP_PAGES(seg) ((seg)->s_size >> MAXBSHIFT) 174 175 /* 176 * Translate addr into smap number within segment. 177 */ 178 #define MAP_PAGE(seg, addr) (((addr) - (seg)->s_base) >> MAXBSHIFT) 179 180 /* 181 * Translate addr in seg into struct smap pointer. 182 */ 183 #define GET_SMAP(seg, addr) \ 184 &(((struct segmap_data *)((seg)->s_data))->smd_sm[MAP_PAGE(seg, addr)]) 185 186 /* 187 * Bit in map (16 bit bitmap). 188 */ 189 #define SMAP_BIT_MASK(bitindex) (1 << ((bitindex) & 0xf)) 190 191 static int smd_colormsk = 0; 192 static int smd_ncolor = 0; 193 static int smd_nfree = 0; 194 static int smd_freemsk = 0; 195 #ifdef DEBUG 196 static int *colors_used; 197 #endif 198 static struct smap *smd_smap; 199 static struct smaphash *smd_hash; 200 #ifdef SEGMAP_HASHSTATS 201 static unsigned int *smd_hash_len; 202 #endif 203 static struct smfree *smd_free; 204 static ulong_t smd_hashmsk = 0; 205 206 #define SEGMAP_MAXCOLOR 2 207 #define SEGMAP_CACHE_PAD 64 208 209 union segmap_cpu { 210 struct { 211 uint32_t scpu_free_ndx[SEGMAP_MAXCOLOR]; 212 struct smap *scpu_last_smap; 213 ulong_t scpu_getmap; 214 ulong_t scpu_release; 215 ulong_t scpu_get_reclaim; 216 ulong_t scpu_fault; 217 ulong_t scpu_pagecreate; 218 ulong_t scpu_get_reuse; 219 } scpu; 220 char scpu_pad[SEGMAP_CACHE_PAD]; 221 }; 222 static union segmap_cpu *smd_cpu; 223 224 /* 225 * There are three locks in seg_map: 226 * - per freelist mutexes 227 * - per hashchain mutexes 228 * - per smap mutexes 229 * 230 * The lock ordering is to get the smap mutex to lock down the slot 231 * first then the hash lock (for hash in/out (vp, off) list) or the 232 * freelist lock to put the slot back on the free list. 233 * 234 * The hash search is done by only holding the hashchain lock, when a wanted 235 * slot is found, we drop the hashchain lock then lock the slot so there 236 * is no overlapping of hashchain and smap locks. After the slot is 237 * locked, we verify again if the slot is still what we are looking 238 * for. 239 * 240 * Allocation of a free slot is done by holding the freelist lock, 241 * then locking the smap slot at the head of the freelist. This is 242 * in reversed lock order so mutex_tryenter() is used. 243 * 244 * The smap lock protects all fields in smap structure except for 245 * the link fields for hash/free lists which are protected by 246 * hashchain and freelist locks. 247 */ 248 249 #define SHASHMTX(hashid) (&smd_hash[hashid].sh_mtx) 250 251 #define SMP2SMF(smp) (&smd_free[(smp - smd_smap) & smd_freemsk]) 252 #define SMP2SMF_NDX(smp) (ushort_t)((smp - smd_smap) & smd_freemsk) 253 254 #define SMAPMTX(smp) (&smp->sm_mtx) 255 256 #define SMAP_HASHFUNC(vp, off, hashid) \ 257 { \ 258 hashid = ((((uintptr_t)(vp) >> 6) + ((uintptr_t)(vp) >> 3) + \ 259 ((off) >> MAXBSHIFT)) & smd_hashmsk); \ 260 } 261 262 /* 263 * The most frequently updated kstat counters are kept in the 264 * per cpu array to avoid hot cache blocks. The update function 265 * sums the cpu local counters to update the global counters. 266 */ 267 268 /* ARGSUSED */ 269 int 270 segmap_kstat_update(kstat_t *ksp, int rw) 271 { 272 int i; 273 ulong_t getmap, release, get_reclaim; 274 ulong_t fault, pagecreate, get_reuse; 275 276 if (rw == KSTAT_WRITE) 277 return (EACCES); 278 getmap = release = get_reclaim = (ulong_t)0; 279 fault = pagecreate = get_reuse = (ulong_t)0; 280 for (i = 0; i < max_ncpus; i++) { 281 getmap += smd_cpu[i].scpu.scpu_getmap; 282 release += smd_cpu[i].scpu.scpu_release; 283 get_reclaim += smd_cpu[i].scpu.scpu_get_reclaim; 284 fault += smd_cpu[i].scpu.scpu_fault; 285 pagecreate += smd_cpu[i].scpu.scpu_pagecreate; 286 get_reuse += smd_cpu[i].scpu.scpu_get_reuse; 287 } 288 segmapcnt.smp_getmap.value.ul = getmap; 289 segmapcnt.smp_release.value.ul = release; 290 segmapcnt.smp_get_reclaim.value.ul = get_reclaim; 291 segmapcnt.smp_fault.value.ul = fault; 292 segmapcnt.smp_pagecreate.value.ul = pagecreate; 293 segmapcnt.smp_get_reuse.value.ul = get_reuse; 294 return (0); 295 } 296 297 int 298 segmap_create(struct seg *seg, void *argsp) 299 { 300 struct segmap_data *smd; 301 struct smap *smp; 302 struct smfree *sm; 303 struct segmap_crargs *a = (struct segmap_crargs *)argsp; 304 struct smaphash *shashp; 305 union segmap_cpu *scpu; 306 long i, npages; 307 size_t hashsz; 308 uint_t nfreelist; 309 extern void prefetch_smap_w(void *); 310 extern int max_ncpus; 311 312 ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock)); 313 314 if (((uintptr_t)seg->s_base | seg->s_size) & MAXBOFFSET) { 315 panic("segkmap not MAXBSIZE aligned"); 316 /*NOTREACHED*/ 317 } 318 319 smd = kmem_zalloc(sizeof (struct segmap_data), KM_SLEEP); 320 321 seg->s_data = (void *)smd; 322 seg->s_ops = &segmap_ops; 323 smd->smd_prot = a->prot; 324 325 /* 326 * Scale the number of smap freelists to be 327 * proportional to max_ncpus * number of virtual colors. 328 * The caller can over-ride this scaling by providing 329 * a non-zero a->nfreelist argument. 330 */ 331 nfreelist = a->nfreelist; 332 if (nfreelist == 0) 333 nfreelist = max_ncpus; 334 else if (nfreelist < 0 || nfreelist > 4 * max_ncpus) { 335 cmn_err(CE_WARN, "segmap_create: nfreelist out of range " 336 "%d, using %d", nfreelist, max_ncpus); 337 nfreelist = max_ncpus; 338 } 339 if (nfreelist & (nfreelist - 1)) { 340 /* round up nfreelist to the next power of two. */ 341 nfreelist = 1 << (highbit(nfreelist)); 342 } 343 344 /* 345 * Get the number of virtual colors - must be a power of 2. 346 */ 347 if (a->shmsize) 348 smd_ncolor = a->shmsize >> MAXBSHIFT; 349 else 350 smd_ncolor = 1; 351 ASSERT((smd_ncolor & (smd_ncolor - 1)) == 0); 352 ASSERT(smd_ncolor <= SEGMAP_MAXCOLOR); 353 smd_colormsk = smd_ncolor - 1; 354 smd->smd_nfree = smd_nfree = smd_ncolor * nfreelist; 355 smd_freemsk = smd_nfree - 1; 356 357 /* 358 * Allocate and initialize the freelist headers. 359 * Note that sm_freeq[1] starts out as the release queue. This 360 * is known when the smap structures are initialized below. 361 */ 362 smd_free = smd->smd_free = 363 kmem_zalloc(smd_nfree * sizeof (struct smfree), KM_SLEEP); 364 for (i = 0; i < smd_nfree; i++) { 365 sm = &smd->smd_free[i]; 366 mutex_init(&sm->sm_freeq[0].smq_mtx, NULL, MUTEX_DEFAULT, NULL); 367 mutex_init(&sm->sm_freeq[1].smq_mtx, NULL, MUTEX_DEFAULT, NULL); 368 sm->sm_allocq = &sm->sm_freeq[0]; 369 sm->sm_releq = &sm->sm_freeq[1]; 370 } 371 372 /* 373 * Allocate and initialize the smap hash chain headers. 374 * Compute hash size rounding down to the next power of two. 375 */ 376 npages = MAP_PAGES(seg); 377 smd->smd_npages = npages; 378 hashsz = npages / SMAP_HASHAVELEN; 379 hashsz = 1 << (highbit(hashsz)-1); 380 smd_hashmsk = hashsz - 1; 381 smd_hash = smd->smd_hash = 382 kmem_alloc(hashsz * sizeof (struct smaphash), KM_SLEEP); 383 #ifdef SEGMAP_HASHSTATS 384 smd_hash_len = 385 kmem_zalloc(hashsz * sizeof (unsigned int), KM_SLEEP); 386 #endif 387 for (i = 0, shashp = smd_hash; i < hashsz; i++, shashp++) { 388 shashp->sh_hash_list = NULL; 389 mutex_init(&shashp->sh_mtx, NULL, MUTEX_DEFAULT, NULL); 390 } 391 392 /* 393 * Allocate and initialize the smap structures. 394 * Link all slots onto the appropriate freelist. 395 * The smap array is large enough to affect boot time 396 * on large systems, so use memory prefetching and only 397 * go through the array 1 time. Inline a optimized version 398 * of segmap_smapadd to add structures to freelists with 399 * knowledge that no locks are needed here. 400 */ 401 smd_smap = smd->smd_sm = 402 kmem_alloc(sizeof (struct smap) * npages, KM_SLEEP); 403 404 for (smp = &smd->smd_sm[MAP_PAGES(seg) - 1]; 405 smp >= smd->smd_sm; smp--) { 406 struct smap *smpfreelist; 407 struct sm_freeq *releq; 408 409 prefetch_smap_w((char *)smp); 410 411 smp->sm_vp = NULL; 412 smp->sm_hash = NULL; 413 smp->sm_off = 0; 414 smp->sm_bitmap = 0; 415 smp->sm_refcnt = 0; 416 mutex_init(&smp->sm_mtx, NULL, MUTEX_DEFAULT, NULL); 417 smp->sm_free_ndx = SMP2SMF_NDX(smp); 418 419 sm = SMP2SMF(smp); 420 releq = sm->sm_releq; 421 422 smpfreelist = releq->smq_free; 423 if (smpfreelist == 0) { 424 releq->smq_free = smp->sm_next = smp->sm_prev = smp; 425 } else { 426 smp->sm_next = smpfreelist; 427 smp->sm_prev = smpfreelist->sm_prev; 428 smpfreelist->sm_prev = smp; 429 smp->sm_prev->sm_next = smp; 430 releq->smq_free = smp->sm_next; 431 } 432 433 /* 434 * sm_flag = 0 (no SM_QNDX_ZERO) implies smap on sm_freeq[1] 435 */ 436 smp->sm_flags = 0; 437 438 #ifdef SEGKPM_SUPPORT 439 /* 440 * Due to the fragile prefetch loop no 441 * separate function is used here. 442 */ 443 smp->sm_kpme_next = NULL; 444 smp->sm_kpme_prev = NULL; 445 smp->sm_kpme_page = NULL; 446 #endif 447 } 448 449 /* 450 * Allocate the per color indices that distribute allocation 451 * requests over the free lists. Each cpu will have a private 452 * rotor index to spread the allocations even across the available 453 * smap freelists. Init the scpu_last_smap field to the first 454 * smap element so there is no need to check for NULL. 455 */ 456 smd_cpu = 457 kmem_zalloc(sizeof (union segmap_cpu) * max_ncpus, KM_SLEEP); 458 for (i = 0, scpu = smd_cpu; i < max_ncpus; i++, scpu++) { 459 int j; 460 for (j = 0; j < smd_ncolor; j++) 461 scpu->scpu.scpu_free_ndx[j] = j; 462 scpu->scpu.scpu_last_smap = smd_smap; 463 } 464 465 vpm_init(); 466 467 #ifdef DEBUG 468 /* 469 * Keep track of which colors are used more often. 470 */ 471 colors_used = kmem_zalloc(smd_nfree * sizeof (int), KM_SLEEP); 472 #endif /* DEBUG */ 473 474 return (0); 475 } 476 477 static void 478 segmap_free(seg) 479 struct seg *seg; 480 { 481 ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock)); 482 } 483 484 /* 485 * Do a F_SOFTUNLOCK call over the range requested. 486 * The range must have already been F_SOFTLOCK'ed. 487 */ 488 static void 489 segmap_unlock( 490 struct hat *hat, 491 struct seg *seg, 492 caddr_t addr, 493 size_t len, 494 enum seg_rw rw, 495 struct smap *smp) 496 { 497 page_t *pp; 498 caddr_t adr; 499 u_offset_t off; 500 struct vnode *vp; 501 kmutex_t *smtx; 502 503 ASSERT(smp->sm_refcnt > 0); 504 505 #ifdef lint 506 seg = seg; 507 #endif 508 509 if (segmap_kpm && IS_KPM_ADDR(addr)) { 510 511 /* 512 * We're called only from segmap_fault and this was a 513 * NOP in case of a kpm based smap, so dangerous things 514 * must have happened in the meantime. Pages are prefaulted 515 * and locked in segmap_getmapflt and they will not be 516 * unlocked until segmap_release. 517 */ 518 panic("segmap_unlock: called with kpm addr %p", (void *)addr); 519 /*NOTREACHED*/ 520 } 521 522 vp = smp->sm_vp; 523 off = smp->sm_off + (u_offset_t)((uintptr_t)addr & MAXBOFFSET); 524 525 hat_unlock(hat, addr, P2ROUNDUP(len, PAGESIZE)); 526 for (adr = addr; adr < addr + len; adr += PAGESIZE, off += PAGESIZE) { 527 ushort_t bitmask; 528 529 /* 530 * Use page_find() instead of page_lookup() to 531 * find the page since we know that it has 532 * "shared" lock. 533 */ 534 pp = page_find(vp, off); 535 if (pp == NULL) { 536 panic("segmap_unlock: page not found"); 537 /*NOTREACHED*/ 538 } 539 540 if (rw == S_WRITE) { 541 hat_setrefmod(pp); 542 } else if (rw != S_OTHER) { 543 TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT, 544 "segmap_fault:pp %p vp %p offset %llx", pp, vp, off); 545 hat_setref(pp); 546 } 547 548 /* 549 * Clear bitmap, if the bit corresponding to "off" is set, 550 * since the page and translation are being unlocked. 551 */ 552 bitmask = SMAP_BIT_MASK((off - smp->sm_off) >> PAGESHIFT); 553 554 /* 555 * Large Files: Following assertion is to verify 556 * the correctness of the cast to (int) above. 557 */ 558 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 559 smtx = SMAPMTX(smp); 560 mutex_enter(smtx); 561 if (smp->sm_bitmap & bitmask) { 562 smp->sm_bitmap &= ~bitmask; 563 } 564 mutex_exit(smtx); 565 566 page_unlock(pp); 567 } 568 } 569 570 #define MAXPPB (MAXBSIZE/4096) /* assumes minimum page size of 4k */ 571 572 /* 573 * This routine is called via a machine specific fault handling 574 * routine. It is also called by software routines wishing to 575 * lock or unlock a range of addresses. 576 * 577 * Note that this routine expects a page-aligned "addr". 578 */ 579 faultcode_t 580 segmap_fault( 581 struct hat *hat, 582 struct seg *seg, 583 caddr_t addr, 584 size_t len, 585 enum fault_type type, 586 enum seg_rw rw) 587 { 588 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 589 struct smap *smp; 590 page_t *pp, **ppp; 591 struct vnode *vp; 592 u_offset_t off; 593 page_t *pl[MAXPPB + 1]; 594 uint_t prot; 595 u_offset_t addroff; 596 caddr_t adr; 597 int err; 598 u_offset_t sm_off; 599 int hat_flag; 600 601 if (segmap_kpm && IS_KPM_ADDR(addr)) { 602 int newpage; 603 kmutex_t *smtx; 604 605 /* 606 * Pages are successfully prefaulted and locked in 607 * segmap_getmapflt and can't be unlocked until 608 * segmap_release. No hat mappings have to be locked 609 * and they also can't be unlocked as long as the 610 * caller owns an active kpm addr. 611 */ 612 #ifndef DEBUG 613 if (type != F_SOFTUNLOCK) 614 return (0); 615 #endif 616 617 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 618 panic("segmap_fault: smap not found " 619 "for addr %p", (void *)addr); 620 /*NOTREACHED*/ 621 } 622 623 smtx = SMAPMTX(smp); 624 #ifdef DEBUG 625 newpage = smp->sm_flags & SM_KPM_NEWPAGE; 626 if (newpage) { 627 cmn_err(CE_WARN, "segmap_fault: newpage? smp %p", 628 (void *)smp); 629 } 630 631 if (type != F_SOFTUNLOCK) { 632 mutex_exit(smtx); 633 return (0); 634 } 635 #endif 636 mutex_exit(smtx); 637 vp = smp->sm_vp; 638 sm_off = smp->sm_off; 639 640 if (vp == NULL) 641 return (FC_MAKE_ERR(EIO)); 642 643 ASSERT(smp->sm_refcnt > 0); 644 645 addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET); 646 if (addroff + len > MAXBSIZE) 647 panic("segmap_fault: endaddr %p exceeds MAXBSIZE chunk", 648 (void *)(addr + len)); 649 650 off = sm_off + addroff; 651 652 pp = page_find(vp, off); 653 654 if (pp == NULL) 655 panic("segmap_fault: softunlock page not found"); 656 657 /* 658 * Set ref bit also here in case of S_OTHER to avoid the 659 * overhead of supporting other cases than F_SOFTUNLOCK 660 * with segkpm. We can do this because the underlying 661 * pages are locked anyway. 662 */ 663 if (rw == S_WRITE) { 664 hat_setrefmod(pp); 665 } else { 666 TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT, 667 "segmap_fault:pp %p vp %p offset %llx", 668 pp, vp, off); 669 hat_setref(pp); 670 } 671 672 return (0); 673 } 674 675 smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++; 676 smp = GET_SMAP(seg, addr); 677 vp = smp->sm_vp; 678 sm_off = smp->sm_off; 679 680 if (vp == NULL) 681 return (FC_MAKE_ERR(EIO)); 682 683 ASSERT(smp->sm_refcnt > 0); 684 685 addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET); 686 if (addroff + len > MAXBSIZE) { 687 panic("segmap_fault: endaddr %p " 688 "exceeds MAXBSIZE chunk", (void *)(addr + len)); 689 /*NOTREACHED*/ 690 } 691 off = sm_off + addroff; 692 693 /* 694 * First handle the easy stuff 695 */ 696 if (type == F_SOFTUNLOCK) { 697 segmap_unlock(hat, seg, addr, len, rw, smp); 698 return (0); 699 } 700 701 TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE, 702 "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp); 703 err = VOP_GETPAGE(vp, (offset_t)off, len, &prot, pl, MAXBSIZE, 704 seg, addr, rw, CRED(), NULL); 705 706 if (err) 707 return (FC_MAKE_ERR(err)); 708 709 prot &= smd->smd_prot; 710 711 /* 712 * Handle all pages returned in the pl[] array. 713 * This loop is coded on the assumption that if 714 * there was no error from the VOP_GETPAGE routine, 715 * that the page list returned will contain all the 716 * needed pages for the vp from [off..off + len]. 717 */ 718 ppp = pl; 719 while ((pp = *ppp++) != NULL) { 720 u_offset_t poff; 721 ASSERT(pp->p_vnode == vp); 722 hat_flag = HAT_LOAD; 723 724 /* 725 * Verify that the pages returned are within the range 726 * of this segmap region. Note that it is theoretically 727 * possible for pages outside this range to be returned, 728 * but it is not very likely. If we cannot use the 729 * page here, just release it and go on to the next one. 730 */ 731 if (pp->p_offset < sm_off || 732 pp->p_offset >= sm_off + MAXBSIZE) { 733 (void) page_release(pp, 1); 734 continue; 735 } 736 737 ASSERT(hat == kas.a_hat); 738 poff = pp->p_offset; 739 adr = addr + (poff - off); 740 if (adr >= addr && adr < addr + len) { 741 hat_setref(pp); 742 TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT, 743 "segmap_fault:pp %p vp %p offset %llx", 744 pp, vp, poff); 745 if (type == F_SOFTLOCK) 746 hat_flag = HAT_LOAD_LOCK; 747 } 748 749 /* 750 * Deal with VMODSORT pages here. If we know this is a write 751 * do the setmod now and allow write protection. 752 * As long as it's modified or not S_OTHER, remove write 753 * protection. With S_OTHER it's up to the FS to deal with this. 754 */ 755 if (IS_VMODSORT(vp)) { 756 if (rw == S_WRITE) 757 hat_setmod(pp); 758 else if (rw != S_OTHER && !hat_ismod(pp)) 759 prot &= ~PROT_WRITE; 760 } 761 762 hat_memload(hat, adr, pp, prot, hat_flag); 763 if (hat_flag != HAT_LOAD_LOCK) 764 page_unlock(pp); 765 } 766 return (0); 767 } 768 769 /* 770 * This routine is used to start I/O on pages asynchronously. 771 */ 772 static faultcode_t 773 segmap_faulta(struct seg *seg, caddr_t addr) 774 { 775 struct smap *smp; 776 struct vnode *vp; 777 u_offset_t off; 778 int err; 779 780 if (segmap_kpm && IS_KPM_ADDR(addr)) { 781 int newpage; 782 kmutex_t *smtx; 783 784 /* 785 * Pages are successfully prefaulted and locked in 786 * segmap_getmapflt and can't be unlocked until 787 * segmap_release. No hat mappings have to be locked 788 * and they also can't be unlocked as long as the 789 * caller owns an active kpm addr. 790 */ 791 #ifdef DEBUG 792 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 793 panic("segmap_faulta: smap not found " 794 "for addr %p", (void *)addr); 795 /*NOTREACHED*/ 796 } 797 798 smtx = SMAPMTX(smp); 799 newpage = smp->sm_flags & SM_KPM_NEWPAGE; 800 mutex_exit(smtx); 801 if (newpage) 802 cmn_err(CE_WARN, "segmap_faulta: newpage? smp %p", 803 (void *)smp); 804 #endif 805 return (0); 806 } 807 808 segmapcnt.smp_faulta.value.ul++; 809 smp = GET_SMAP(seg, addr); 810 811 ASSERT(smp->sm_refcnt > 0); 812 813 vp = smp->sm_vp; 814 off = smp->sm_off; 815 816 if (vp == NULL) { 817 cmn_err(CE_WARN, "segmap_faulta - no vp"); 818 return (FC_MAKE_ERR(EIO)); 819 } 820 821 TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE, 822 "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp); 823 824 err = VOP_GETPAGE(vp, (offset_t)(off + ((offset_t)((uintptr_t)addr 825 & MAXBOFFSET))), PAGESIZE, (uint_t *)NULL, (page_t **)NULL, 0, 826 seg, addr, S_READ, CRED(), NULL); 827 828 if (err) 829 return (FC_MAKE_ERR(err)); 830 return (0); 831 } 832 833 /*ARGSUSED*/ 834 static int 835 segmap_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) 836 { 837 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 838 839 ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock)); 840 841 /* 842 * Need not acquire the segment lock since 843 * "smd_prot" is a read-only field. 844 */ 845 return (((smd->smd_prot & prot) != prot) ? EACCES : 0); 846 } 847 848 static int 849 segmap_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv) 850 { 851 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 852 size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1; 853 854 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 855 856 if (pgno != 0) { 857 do { 858 protv[--pgno] = smd->smd_prot; 859 } while (pgno != 0); 860 } 861 return (0); 862 } 863 864 static u_offset_t 865 segmap_getoffset(struct seg *seg, caddr_t addr) 866 { 867 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 868 869 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock)); 870 871 return ((u_offset_t)smd->smd_sm->sm_off + (addr - seg->s_base)); 872 } 873 874 /*ARGSUSED*/ 875 static int 876 segmap_gettype(struct seg *seg, caddr_t addr) 877 { 878 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock)); 879 880 return (MAP_SHARED); 881 } 882 883 /*ARGSUSED*/ 884 static int 885 segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp) 886 { 887 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 888 889 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock)); 890 891 /* XXX - This doesn't make any sense */ 892 *vpp = smd->smd_sm->sm_vp; 893 return (0); 894 } 895 896 /* 897 * Check to see if it makes sense to do kluster/read ahead to 898 * addr + delta relative to the mapping at addr. We assume here 899 * that delta is a signed PAGESIZE'd multiple (which can be negative). 900 * 901 * For segmap we always "approve" of this action from our standpoint. 902 */ 903 /*ARGSUSED*/ 904 static int 905 segmap_kluster(struct seg *seg, caddr_t addr, ssize_t delta) 906 { 907 return (0); 908 } 909 910 static void 911 segmap_badop() 912 { 913 panic("segmap_badop"); 914 /*NOTREACHED*/ 915 } 916 917 /* 918 * Special private segmap operations 919 */ 920 921 /* 922 * Add smap to the appropriate free list. 923 */ 924 static void 925 segmap_smapadd(struct smap *smp) 926 { 927 struct smfree *sm; 928 struct smap *smpfreelist; 929 struct sm_freeq *releq; 930 931 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 932 933 if (smp->sm_refcnt != 0) { 934 panic("segmap_smapadd"); 935 /*NOTREACHED*/ 936 } 937 938 sm = &smd_free[smp->sm_free_ndx]; 939 /* 940 * Add to the tail of the release queue 941 * Note that sm_releq and sm_allocq could toggle 942 * before we get the lock. This does not affect 943 * correctness as the 2 queues are only maintained 944 * to reduce lock pressure. 945 */ 946 releq = sm->sm_releq; 947 if (releq == &sm->sm_freeq[0]) 948 smp->sm_flags |= SM_QNDX_ZERO; 949 else 950 smp->sm_flags &= ~SM_QNDX_ZERO; 951 mutex_enter(&releq->smq_mtx); 952 smpfreelist = releq->smq_free; 953 if (smpfreelist == 0) { 954 int want; 955 956 releq->smq_free = smp->sm_next = smp->sm_prev = smp; 957 /* 958 * Both queue mutexes held to set sm_want; 959 * snapshot the value before dropping releq mutex. 960 * If sm_want appears after the releq mutex is dropped, 961 * then the smap just freed is already gone. 962 */ 963 want = sm->sm_want; 964 mutex_exit(&releq->smq_mtx); 965 /* 966 * See if there was a waiter before dropping the releq mutex 967 * then recheck after obtaining sm_freeq[0] mutex as 968 * the another thread may have already signaled. 969 */ 970 if (want) { 971 mutex_enter(&sm->sm_freeq[0].smq_mtx); 972 if (sm->sm_want) 973 cv_signal(&sm->sm_free_cv); 974 mutex_exit(&sm->sm_freeq[0].smq_mtx); 975 } 976 } else { 977 smp->sm_next = smpfreelist; 978 smp->sm_prev = smpfreelist->sm_prev; 979 smpfreelist->sm_prev = smp; 980 smp->sm_prev->sm_next = smp; 981 mutex_exit(&releq->smq_mtx); 982 } 983 } 984 985 986 static struct smap * 987 segmap_hashin(struct smap *smp, struct vnode *vp, u_offset_t off, int hashid) 988 { 989 struct smap **hpp; 990 struct smap *tmp; 991 kmutex_t *hmtx; 992 993 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 994 ASSERT(smp->sm_vp == NULL); 995 ASSERT(smp->sm_hash == NULL); 996 ASSERT(smp->sm_prev == NULL); 997 ASSERT(smp->sm_next == NULL); 998 ASSERT(hashid >= 0 && hashid <= smd_hashmsk); 999 1000 hmtx = SHASHMTX(hashid); 1001 1002 mutex_enter(hmtx); 1003 /* 1004 * First we need to verify that no one has created a smp 1005 * with (vp,off) as its tag before we us. 1006 */ 1007 for (tmp = smd_hash[hashid].sh_hash_list; 1008 tmp != NULL; tmp = tmp->sm_hash) 1009 if (tmp->sm_vp == vp && tmp->sm_off == off) 1010 break; 1011 1012 if (tmp == NULL) { 1013 /* 1014 * No one created one yet. 1015 * 1016 * Funniness here - we don't increment the ref count on the 1017 * vnode * even though we have another pointer to it here. 1018 * The reason for this is that we don't want the fact that 1019 * a seg_map entry somewhere refers to a vnode to prevent the 1020 * vnode * itself from going away. This is because this 1021 * reference to the vnode is a "soft one". In the case where 1022 * a mapping is being used by a rdwr [or directory routine?] 1023 * there already has to be a non-zero ref count on the vnode. 1024 * In the case where the vp has been freed and the the smap 1025 * structure is on the free list, there are no pages in memory 1026 * that can refer to the vnode. Thus even if we reuse the same 1027 * vnode/smap structure for a vnode which has the same 1028 * address but represents a different object, we are ok. 1029 */ 1030 smp->sm_vp = vp; 1031 smp->sm_off = off; 1032 1033 hpp = &smd_hash[hashid].sh_hash_list; 1034 smp->sm_hash = *hpp; 1035 *hpp = smp; 1036 #ifdef SEGMAP_HASHSTATS 1037 smd_hash_len[hashid]++; 1038 #endif 1039 } 1040 mutex_exit(hmtx); 1041 1042 return (tmp); 1043 } 1044 1045 static void 1046 segmap_hashout(struct smap *smp) 1047 { 1048 struct smap **hpp, *hp; 1049 struct vnode *vp; 1050 kmutex_t *mtx; 1051 int hashid; 1052 u_offset_t off; 1053 1054 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 1055 1056 vp = smp->sm_vp; 1057 off = smp->sm_off; 1058 1059 SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */ 1060 mtx = SHASHMTX(hashid); 1061 mutex_enter(mtx); 1062 1063 hpp = &smd_hash[hashid].sh_hash_list; 1064 for (;;) { 1065 hp = *hpp; 1066 if (hp == NULL) { 1067 panic("segmap_hashout"); 1068 /*NOTREACHED*/ 1069 } 1070 if (hp == smp) 1071 break; 1072 hpp = &hp->sm_hash; 1073 } 1074 1075 *hpp = smp->sm_hash; 1076 smp->sm_hash = NULL; 1077 #ifdef SEGMAP_HASHSTATS 1078 smd_hash_len[hashid]--; 1079 #endif 1080 mutex_exit(mtx); 1081 1082 smp->sm_vp = NULL; 1083 smp->sm_off = (u_offset_t)0; 1084 1085 } 1086 1087 /* 1088 * Attempt to free unmodified, unmapped, and non locked segmap 1089 * pages. 1090 */ 1091 void 1092 segmap_pagefree(struct vnode *vp, u_offset_t off) 1093 { 1094 u_offset_t pgoff; 1095 page_t *pp; 1096 1097 for (pgoff = off; pgoff < off + MAXBSIZE; pgoff += PAGESIZE) { 1098 1099 if ((pp = page_lookup_nowait(vp, pgoff, SE_EXCL)) == NULL) 1100 continue; 1101 1102 switch (page_release(pp, 1)) { 1103 case PGREL_NOTREL: 1104 segmapcnt.smp_free_notfree.value.ul++; 1105 break; 1106 case PGREL_MOD: 1107 segmapcnt.smp_free_dirty.value.ul++; 1108 break; 1109 case PGREL_CLEAN: 1110 segmapcnt.smp_free.value.ul++; 1111 break; 1112 } 1113 } 1114 } 1115 1116 /* 1117 * Locks held on entry: smap lock 1118 * Locks held on exit : smap lock. 1119 */ 1120 1121 static void 1122 grab_smp(struct smap *smp, page_t *pp) 1123 { 1124 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 1125 ASSERT(smp->sm_refcnt == 0); 1126 1127 if (smp->sm_vp != (struct vnode *)NULL) { 1128 struct vnode *vp = smp->sm_vp; 1129 u_offset_t off = smp->sm_off; 1130 /* 1131 * Destroy old vnode association and 1132 * unload any hardware translations to 1133 * the old object. 1134 */ 1135 smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reuse++; 1136 segmap_hashout(smp); 1137 1138 /* 1139 * This node is off freelist and hashlist, 1140 * so there is no reason to drop/reacquire sm_mtx 1141 * across calls to hat_unload. 1142 */ 1143 if (segmap_kpm) { 1144 caddr_t vaddr; 1145 int hat_unload_needed = 0; 1146 1147 /* 1148 * unload kpm mapping 1149 */ 1150 if (pp != NULL) { 1151 vaddr = hat_kpm_page2va(pp, 1); 1152 hat_kpm_mapout(pp, GET_KPME(smp), vaddr); 1153 page_unlock(pp); 1154 } 1155 1156 /* 1157 * Check if we have (also) the rare case of a 1158 * non kpm mapping. 1159 */ 1160 if (smp->sm_flags & SM_NOTKPM_RELEASED) { 1161 hat_unload_needed = 1; 1162 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 1163 } 1164 1165 if (hat_unload_needed) { 1166 hat_unload(kas.a_hat, segkmap->s_base + 1167 ((smp - smd_smap) * MAXBSIZE), 1168 MAXBSIZE, HAT_UNLOAD); 1169 } 1170 1171 } else { 1172 ASSERT(smp->sm_flags & SM_NOTKPM_RELEASED); 1173 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 1174 hat_unload(kas.a_hat, segkmap->s_base + 1175 ((smp - smd_smap) * MAXBSIZE), 1176 MAXBSIZE, HAT_UNLOAD); 1177 } 1178 segmap_pagefree(vp, off); 1179 } 1180 } 1181 1182 static struct smap * 1183 get_free_smp(int free_ndx) 1184 { 1185 struct smfree *sm; 1186 kmutex_t *smtx; 1187 struct smap *smp, *first; 1188 struct sm_freeq *allocq, *releq; 1189 struct kpme *kpme; 1190 page_t *pp = NULL; 1191 int end_ndx, page_locked = 0; 1192 1193 end_ndx = free_ndx; 1194 sm = &smd_free[free_ndx]; 1195 1196 retry_queue: 1197 allocq = sm->sm_allocq; 1198 mutex_enter(&allocq->smq_mtx); 1199 1200 if ((smp = allocq->smq_free) == NULL) { 1201 1202 skip_queue: 1203 /* 1204 * The alloc list is empty or this queue is being skipped; 1205 * first see if the allocq toggled. 1206 */ 1207 if (sm->sm_allocq != allocq) { 1208 /* queue changed */ 1209 mutex_exit(&allocq->smq_mtx); 1210 goto retry_queue; 1211 } 1212 releq = sm->sm_releq; 1213 if (!mutex_tryenter(&releq->smq_mtx)) { 1214 /* cannot get releq; a free smp may be there now */ 1215 mutex_exit(&allocq->smq_mtx); 1216 1217 /* 1218 * This loop could spin forever if this thread has 1219 * higher priority than the thread that is holding 1220 * releq->smq_mtx. In order to force the other thread 1221 * to run, we'll lock/unlock the mutex which is safe 1222 * since we just unlocked the allocq mutex. 1223 */ 1224 mutex_enter(&releq->smq_mtx); 1225 mutex_exit(&releq->smq_mtx); 1226 goto retry_queue; 1227 } 1228 if (releq->smq_free == NULL) { 1229 /* 1230 * This freelist is empty. 1231 * This should not happen unless clients 1232 * are failing to release the segmap 1233 * window after accessing the data. 1234 * Before resorting to sleeping, try 1235 * the next list of the same color. 1236 */ 1237 free_ndx = (free_ndx + smd_ncolor) & smd_freemsk; 1238 if (free_ndx != end_ndx) { 1239 mutex_exit(&releq->smq_mtx); 1240 mutex_exit(&allocq->smq_mtx); 1241 sm = &smd_free[free_ndx]; 1242 goto retry_queue; 1243 } 1244 /* 1245 * Tried all freelists of the same color once, 1246 * wait on this list and hope something gets freed. 1247 */ 1248 segmapcnt.smp_get_nofree.value.ul++; 1249 sm->sm_want++; 1250 mutex_exit(&sm->sm_freeq[1].smq_mtx); 1251 cv_wait(&sm->sm_free_cv, 1252 &sm->sm_freeq[0].smq_mtx); 1253 sm->sm_want--; 1254 mutex_exit(&sm->sm_freeq[0].smq_mtx); 1255 sm = &smd_free[free_ndx]; 1256 goto retry_queue; 1257 } else { 1258 /* 1259 * Something on the rele queue; flip the alloc 1260 * and rele queues and retry. 1261 */ 1262 sm->sm_allocq = releq; 1263 sm->sm_releq = allocq; 1264 mutex_exit(&allocq->smq_mtx); 1265 mutex_exit(&releq->smq_mtx); 1266 if (page_locked) { 1267 delay(hz >> 2); 1268 page_locked = 0; 1269 } 1270 goto retry_queue; 1271 } 1272 } else { 1273 /* 1274 * Fastpath the case we get the smap mutex 1275 * on the first try. 1276 */ 1277 first = smp; 1278 next_smap: 1279 smtx = SMAPMTX(smp); 1280 if (!mutex_tryenter(smtx)) { 1281 /* 1282 * Another thread is trying to reclaim this slot. 1283 * Skip to the next queue or smap. 1284 */ 1285 if ((smp = smp->sm_next) == first) { 1286 goto skip_queue; 1287 } else { 1288 goto next_smap; 1289 } 1290 } else { 1291 /* 1292 * if kpme exists, get shared lock on the page 1293 */ 1294 if (segmap_kpm && smp->sm_vp != NULL) { 1295 1296 kpme = GET_KPME(smp); 1297 pp = kpme->kpe_page; 1298 1299 if (pp != NULL) { 1300 if (!page_trylock(pp, SE_SHARED)) { 1301 smp = smp->sm_next; 1302 mutex_exit(smtx); 1303 page_locked = 1; 1304 1305 pp = NULL; 1306 1307 if (smp == first) { 1308 goto skip_queue; 1309 } else { 1310 goto next_smap; 1311 } 1312 } else { 1313 if (kpme->kpe_page == NULL) { 1314 page_unlock(pp); 1315 pp = NULL; 1316 } 1317 } 1318 } 1319 } 1320 1321 /* 1322 * At this point, we've selected smp. Remove smp 1323 * from its freelist. If smp is the first one in 1324 * the freelist, update the head of the freelist. 1325 */ 1326 if (first == smp) { 1327 ASSERT(first == allocq->smq_free); 1328 allocq->smq_free = smp->sm_next; 1329 } 1330 1331 /* 1332 * if the head of the freelist still points to smp, 1333 * then there are no more free smaps in that list. 1334 */ 1335 if (allocq->smq_free == smp) 1336 /* 1337 * Took the last one 1338 */ 1339 allocq->smq_free = NULL; 1340 else { 1341 smp->sm_prev->sm_next = smp->sm_next; 1342 smp->sm_next->sm_prev = smp->sm_prev; 1343 } 1344 mutex_exit(&allocq->smq_mtx); 1345 smp->sm_prev = smp->sm_next = NULL; 1346 1347 /* 1348 * if pp != NULL, pp must have been locked; 1349 * grab_smp() unlocks pp. 1350 */ 1351 ASSERT((pp == NULL) || PAGE_LOCKED(pp)); 1352 grab_smp(smp, pp); 1353 /* return smp locked. */ 1354 ASSERT(SMAPMTX(smp) == smtx); 1355 ASSERT(MUTEX_HELD(smtx)); 1356 return (smp); 1357 } 1358 } 1359 } 1360 1361 /* 1362 * Special public segmap operations 1363 */ 1364 1365 /* 1366 * Create pages (without using VOP_GETPAGE) and load up translations to them. 1367 * If softlock is TRUE, then set things up so that it looks like a call 1368 * to segmap_fault with F_SOFTLOCK. 1369 * 1370 * Returns 1, if a page is created by calling page_create_va(), or 0 otherwise. 1371 * 1372 * All fields in the generic segment (struct seg) are considered to be 1373 * read-only for "segmap" even though the kernel address space (kas) may 1374 * not be locked, hence no lock is needed to access them. 1375 */ 1376 int 1377 segmap_pagecreate(struct seg *seg, caddr_t addr, size_t len, int softlock) 1378 { 1379 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 1380 page_t *pp; 1381 u_offset_t off; 1382 struct smap *smp; 1383 struct vnode *vp; 1384 caddr_t eaddr; 1385 int newpage = 0; 1386 uint_t prot; 1387 kmutex_t *smtx; 1388 int hat_flag; 1389 1390 ASSERT(seg->s_as == &kas); 1391 1392 if (segmap_kpm && IS_KPM_ADDR(addr)) { 1393 /* 1394 * Pages are successfully prefaulted and locked in 1395 * segmap_getmapflt and can't be unlocked until 1396 * segmap_release. The SM_KPM_NEWPAGE flag is set 1397 * in segmap_pagecreate_kpm when new pages are created. 1398 * and it is returned as "newpage" indication here. 1399 */ 1400 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 1401 panic("segmap_pagecreate: smap not found " 1402 "for addr %p", (void *)addr); 1403 /*NOTREACHED*/ 1404 } 1405 1406 smtx = SMAPMTX(smp); 1407 newpage = smp->sm_flags & SM_KPM_NEWPAGE; 1408 smp->sm_flags &= ~SM_KPM_NEWPAGE; 1409 mutex_exit(smtx); 1410 1411 return (newpage); 1412 } 1413 1414 smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++; 1415 1416 eaddr = addr + len; 1417 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1418 1419 smp = GET_SMAP(seg, addr); 1420 1421 /* 1422 * We don't grab smp mutex here since we assume the smp 1423 * has a refcnt set already which prevents the slot from 1424 * changing its id. 1425 */ 1426 ASSERT(smp->sm_refcnt > 0); 1427 1428 vp = smp->sm_vp; 1429 off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET)); 1430 prot = smd->smd_prot; 1431 1432 for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) { 1433 hat_flag = HAT_LOAD; 1434 pp = page_lookup(vp, off, SE_SHARED); 1435 if (pp == NULL) { 1436 ushort_t bitindex; 1437 1438 if ((pp = page_create_va(vp, off, 1439 PAGESIZE, PG_WAIT, seg, addr)) == NULL) { 1440 panic("segmap_pagecreate: page_create failed"); 1441 /*NOTREACHED*/ 1442 } 1443 newpage = 1; 1444 page_io_unlock(pp); 1445 1446 /* 1447 * Since pages created here do not contain valid 1448 * data until the caller writes into them, the 1449 * "exclusive" lock will not be dropped to prevent 1450 * other users from accessing the page. We also 1451 * have to lock the translation to prevent a fault 1452 * from occurring when the virtual address mapped by 1453 * this page is written into. This is necessary to 1454 * avoid a deadlock since we haven't dropped the 1455 * "exclusive" lock. 1456 */ 1457 bitindex = (ushort_t)((off - smp->sm_off) >> PAGESHIFT); 1458 1459 /* 1460 * Large Files: The following assertion is to 1461 * verify the cast above. 1462 */ 1463 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 1464 smtx = SMAPMTX(smp); 1465 mutex_enter(smtx); 1466 smp->sm_bitmap |= SMAP_BIT_MASK(bitindex); 1467 mutex_exit(smtx); 1468 1469 hat_flag = HAT_LOAD_LOCK; 1470 } else if (softlock) { 1471 hat_flag = HAT_LOAD_LOCK; 1472 } 1473 1474 if (IS_VMODSORT(pp->p_vnode) && (prot & PROT_WRITE)) 1475 hat_setmod(pp); 1476 1477 hat_memload(kas.a_hat, addr, pp, prot, hat_flag); 1478 1479 if (hat_flag != HAT_LOAD_LOCK) 1480 page_unlock(pp); 1481 1482 TRACE_5(TR_FAC_VM, TR_SEGMAP_PAGECREATE, 1483 "segmap_pagecreate:seg %p addr %p pp %p vp %p offset %llx", 1484 seg, addr, pp, vp, off); 1485 } 1486 1487 return (newpage); 1488 } 1489 1490 void 1491 segmap_pageunlock(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw) 1492 { 1493 struct smap *smp; 1494 ushort_t bitmask; 1495 page_t *pp; 1496 struct vnode *vp; 1497 u_offset_t off; 1498 caddr_t eaddr; 1499 kmutex_t *smtx; 1500 1501 ASSERT(seg->s_as == &kas); 1502 1503 eaddr = addr + len; 1504 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1505 1506 if (segmap_kpm && IS_KPM_ADDR(addr)) { 1507 /* 1508 * Pages are successfully prefaulted and locked in 1509 * segmap_getmapflt and can't be unlocked until 1510 * segmap_release, so no pages or hat mappings have 1511 * to be unlocked at this point. 1512 */ 1513 #ifdef DEBUG 1514 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 1515 panic("segmap_pageunlock: smap not found " 1516 "for addr %p", (void *)addr); 1517 /*NOTREACHED*/ 1518 } 1519 1520 ASSERT(smp->sm_refcnt > 0); 1521 mutex_exit(SMAPMTX(smp)); 1522 #endif 1523 return; 1524 } 1525 1526 smp = GET_SMAP(seg, addr); 1527 smtx = SMAPMTX(smp); 1528 1529 ASSERT(smp->sm_refcnt > 0); 1530 1531 vp = smp->sm_vp; 1532 off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET)); 1533 1534 for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) { 1535 bitmask = SMAP_BIT_MASK((int)(off - smp->sm_off) >> PAGESHIFT); 1536 1537 /* 1538 * Large Files: Following assertion is to verify 1539 * the correctness of the cast to (int) above. 1540 */ 1541 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 1542 1543 /* 1544 * If the bit corresponding to "off" is set, 1545 * clear this bit in the bitmap, unlock translations, 1546 * and release the "exclusive" lock on the page. 1547 */ 1548 if (smp->sm_bitmap & bitmask) { 1549 mutex_enter(smtx); 1550 smp->sm_bitmap &= ~bitmask; 1551 mutex_exit(smtx); 1552 1553 hat_unlock(kas.a_hat, addr, PAGESIZE); 1554 1555 /* 1556 * Use page_find() instead of page_lookup() to 1557 * find the page since we know that it has 1558 * "exclusive" lock. 1559 */ 1560 pp = page_find(vp, off); 1561 if (pp == NULL) { 1562 panic("segmap_pageunlock: page not found"); 1563 /*NOTREACHED*/ 1564 } 1565 if (rw == S_WRITE) { 1566 hat_setrefmod(pp); 1567 } else if (rw != S_OTHER) { 1568 hat_setref(pp); 1569 } 1570 1571 page_unlock(pp); 1572 } 1573 } 1574 } 1575 1576 caddr_t 1577 segmap_getmap(struct seg *seg, struct vnode *vp, u_offset_t off) 1578 { 1579 return (segmap_getmapflt(seg, vp, off, MAXBSIZE, 0, S_OTHER)); 1580 } 1581 1582 /* 1583 * This is the magic virtual address that offset 0 of an ELF 1584 * file gets mapped to in user space. This is used to pick 1585 * the vac color on the freelist. 1586 */ 1587 #define ELF_OFFZERO_VA (0x10000) 1588 /* 1589 * segmap_getmap allocates a MAXBSIZE big slot to map the vnode vp 1590 * in the range <off, off + len). off doesn't need to be MAXBSIZE aligned. 1591 * The return address is always MAXBSIZE aligned. 1592 * 1593 * If forcefault is nonzero and the MMU translations haven't yet been created, 1594 * segmap_getmap will call segmap_fault(..., F_INVAL, rw) to create them. 1595 */ 1596 caddr_t 1597 segmap_getmapflt( 1598 struct seg *seg, 1599 struct vnode *vp, 1600 u_offset_t off, 1601 size_t len, 1602 int forcefault, 1603 enum seg_rw rw) 1604 { 1605 struct smap *smp, *nsmp; 1606 extern struct vnode *common_specvp(); 1607 caddr_t baseaddr; /* MAXBSIZE aligned */ 1608 u_offset_t baseoff; 1609 int newslot; 1610 caddr_t vaddr; 1611 int color, hashid; 1612 kmutex_t *hashmtx, *smapmtx; 1613 struct smfree *sm; 1614 page_t *pp; 1615 struct kpme *kpme; 1616 uint_t prot; 1617 caddr_t base; 1618 page_t *pl[MAXPPB + 1]; 1619 int error; 1620 int is_kpm = 1; 1621 1622 ASSERT(seg->s_as == &kas); 1623 ASSERT(seg == segkmap); 1624 1625 baseoff = off & (offset_t)MAXBMASK; 1626 if (off + len > baseoff + MAXBSIZE) { 1627 panic("segmap_getmap bad len"); 1628 /*NOTREACHED*/ 1629 } 1630 1631 /* 1632 * If this is a block device we have to be sure to use the 1633 * "common" block device vnode for the mapping. 1634 */ 1635 if (vp->v_type == VBLK) 1636 vp = common_specvp(vp); 1637 1638 smd_cpu[CPU->cpu_seqid].scpu.scpu_getmap++; 1639 1640 if (segmap_kpm == 0 || 1641 (forcefault == SM_PAGECREATE && rw != S_WRITE)) { 1642 is_kpm = 0; 1643 } 1644 1645 SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */ 1646 hashmtx = SHASHMTX(hashid); 1647 1648 retry_hash: 1649 mutex_enter(hashmtx); 1650 for (smp = smd_hash[hashid].sh_hash_list; 1651 smp != NULL; smp = smp->sm_hash) 1652 if (smp->sm_vp == vp && smp->sm_off == baseoff) 1653 break; 1654 mutex_exit(hashmtx); 1655 1656 vrfy_smp: 1657 if (smp != NULL) { 1658 1659 ASSERT(vp->v_count != 0); 1660 1661 /* 1662 * Get smap lock and recheck its tag. The hash lock 1663 * is dropped since the hash is based on (vp, off) 1664 * and (vp, off) won't change when we have smap mtx. 1665 */ 1666 smapmtx = SMAPMTX(smp); 1667 mutex_enter(smapmtx); 1668 if (smp->sm_vp != vp || smp->sm_off != baseoff) { 1669 mutex_exit(smapmtx); 1670 goto retry_hash; 1671 } 1672 1673 if (smp->sm_refcnt == 0) { 1674 1675 smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reclaim++; 1676 1677 /* 1678 * Could still be on the free list. However, this 1679 * could also be an smp that is transitioning from 1680 * the free list when we have too much contention 1681 * for the smapmtx's. In this case, we have an 1682 * unlocked smp that is not on the free list any 1683 * longer, but still has a 0 refcnt. The only way 1684 * to be sure is to check the freelist pointers. 1685 * Since we now have the smapmtx, we are guaranteed 1686 * that the (vp, off) won't change, so we are safe 1687 * to reclaim it. get_free_smp() knows that this 1688 * can happen, and it will check the refcnt. 1689 */ 1690 1691 if ((smp->sm_next != NULL)) { 1692 struct sm_freeq *freeq; 1693 1694 ASSERT(smp->sm_prev != NULL); 1695 sm = &smd_free[smp->sm_free_ndx]; 1696 1697 if (smp->sm_flags & SM_QNDX_ZERO) 1698 freeq = &sm->sm_freeq[0]; 1699 else 1700 freeq = &sm->sm_freeq[1]; 1701 1702 mutex_enter(&freeq->smq_mtx); 1703 if (freeq->smq_free != smp) { 1704 /* 1705 * fastpath normal case 1706 */ 1707 smp->sm_prev->sm_next = smp->sm_next; 1708 smp->sm_next->sm_prev = smp->sm_prev; 1709 } else if (smp == smp->sm_next) { 1710 /* 1711 * Taking the last smap on freelist 1712 */ 1713 freeq->smq_free = NULL; 1714 } else { 1715 /* 1716 * Reclaiming 1st smap on list 1717 */ 1718 freeq->smq_free = smp->sm_next; 1719 smp->sm_prev->sm_next = smp->sm_next; 1720 smp->sm_next->sm_prev = smp->sm_prev; 1721 } 1722 mutex_exit(&freeq->smq_mtx); 1723 smp->sm_prev = smp->sm_next = NULL; 1724 } else { 1725 ASSERT(smp->sm_prev == NULL); 1726 segmapcnt.smp_stolen.value.ul++; 1727 } 1728 1729 } else { 1730 segmapcnt.smp_get_use.value.ul++; 1731 } 1732 smp->sm_refcnt++; /* another user */ 1733 1734 /* 1735 * We don't invoke segmap_fault via TLB miss, so we set ref 1736 * and mod bits in advance. For S_OTHER we set them in 1737 * segmap_fault F_SOFTUNLOCK. 1738 */ 1739 if (is_kpm) { 1740 if (rw == S_WRITE) { 1741 smp->sm_flags |= SM_WRITE_DATA; 1742 } else if (rw == S_READ) { 1743 smp->sm_flags |= SM_READ_DATA; 1744 } 1745 } 1746 mutex_exit(smapmtx); 1747 1748 newslot = 0; 1749 } else { 1750 1751 uint32_t free_ndx, *free_ndxp; 1752 union segmap_cpu *scpu; 1753 1754 /* 1755 * On a PAC machine or a machine with anti-alias 1756 * hardware, smd_colormsk will be zero. 1757 * 1758 * On a VAC machine- pick color by offset in the file 1759 * so we won't get VAC conflicts on elf files. 1760 * On data files, color does not matter but we 1761 * don't know what kind of file it is so we always 1762 * pick color by offset. This causes color 1763 * corresponding to file offset zero to be used more 1764 * heavily. 1765 */ 1766 color = (baseoff >> MAXBSHIFT) & smd_colormsk; 1767 scpu = smd_cpu+CPU->cpu_seqid; 1768 free_ndxp = &scpu->scpu.scpu_free_ndx[color]; 1769 free_ndx = (*free_ndxp += smd_ncolor) & smd_freemsk; 1770 #ifdef DEBUG 1771 colors_used[free_ndx]++; 1772 #endif /* DEBUG */ 1773 1774 /* 1775 * Get a locked smp slot from the free list. 1776 */ 1777 smp = get_free_smp(free_ndx); 1778 smapmtx = SMAPMTX(smp); 1779 1780 ASSERT(smp->sm_vp == NULL); 1781 1782 if ((nsmp = segmap_hashin(smp, vp, baseoff, hashid)) != NULL) { 1783 /* 1784 * Failed to hashin, there exists one now. 1785 * Return the smp we just allocated. 1786 */ 1787 segmap_smapadd(smp); 1788 mutex_exit(smapmtx); 1789 1790 smp = nsmp; 1791 goto vrfy_smp; 1792 } 1793 smp->sm_refcnt++; /* another user */ 1794 1795 /* 1796 * We don't invoke segmap_fault via TLB miss, so we set ref 1797 * and mod bits in advance. For S_OTHER we set them in 1798 * segmap_fault F_SOFTUNLOCK. 1799 */ 1800 if (is_kpm) { 1801 if (rw == S_WRITE) { 1802 smp->sm_flags |= SM_WRITE_DATA; 1803 } else if (rw == S_READ) { 1804 smp->sm_flags |= SM_READ_DATA; 1805 } 1806 } 1807 mutex_exit(smapmtx); 1808 1809 newslot = 1; 1810 } 1811 1812 if (!is_kpm) 1813 goto use_segmap_range; 1814 1815 /* 1816 * Use segkpm 1817 */ 1818 /* Lint directive required until 6746211 is fixed */ 1819 /*CONSTCOND*/ 1820 ASSERT(PAGESIZE == MAXBSIZE); 1821 1822 /* 1823 * remember the last smp faulted on this cpu. 1824 */ 1825 (smd_cpu+CPU->cpu_seqid)->scpu.scpu_last_smap = smp; 1826 1827 if (forcefault == SM_PAGECREATE) { 1828 baseaddr = segmap_pagecreate_kpm(seg, vp, baseoff, smp, rw); 1829 return (baseaddr); 1830 } 1831 1832 if (newslot == 0 && 1833 (pp = GET_KPME(smp)->kpe_page) != NULL) { 1834 1835 /* fastpath */ 1836 switch (rw) { 1837 case S_READ: 1838 case S_WRITE: 1839 if (page_trylock(pp, SE_SHARED)) { 1840 if (PP_ISFREE(pp) || 1841 !(pp->p_vnode == vp && 1842 pp->p_offset == baseoff)) { 1843 page_unlock(pp); 1844 pp = page_lookup(vp, baseoff, 1845 SE_SHARED); 1846 } 1847 } else { 1848 pp = page_lookup(vp, baseoff, SE_SHARED); 1849 } 1850 1851 if (pp == NULL) { 1852 ASSERT(GET_KPME(smp)->kpe_page == NULL); 1853 break; 1854 } 1855 1856 if (rw == S_WRITE && 1857 hat_page_getattr(pp, P_MOD | P_REF) != 1858 (P_MOD | P_REF)) { 1859 page_unlock(pp); 1860 break; 1861 } 1862 1863 /* 1864 * We have the p_selock as reader, grab_smp 1865 * can't hit us, we have bumped the smap 1866 * refcnt and hat_pageunload needs the 1867 * p_selock exclusive. 1868 */ 1869 kpme = GET_KPME(smp); 1870 if (kpme->kpe_page == pp) { 1871 baseaddr = hat_kpm_page2va(pp, 0); 1872 } else if (kpme->kpe_page == NULL) { 1873 baseaddr = hat_kpm_mapin(pp, kpme); 1874 } else { 1875 panic("segmap_getmapflt: stale " 1876 "kpme page, kpme %p", (void *)kpme); 1877 /*NOTREACHED*/ 1878 } 1879 1880 /* 1881 * We don't invoke segmap_fault via TLB miss, 1882 * so we set ref and mod bits in advance. 1883 * For S_OTHER and we set them in segmap_fault 1884 * F_SOFTUNLOCK. 1885 */ 1886 if (rw == S_READ && !hat_isref(pp)) 1887 hat_setref(pp); 1888 1889 return (baseaddr); 1890 default: 1891 break; 1892 } 1893 } 1894 1895 base = segkpm_create_va(baseoff); 1896 error = VOP_GETPAGE(vp, (offset_t)baseoff, len, &prot, pl, MAXBSIZE, 1897 seg, base, rw, CRED(), NULL); 1898 1899 pp = pl[0]; 1900 if (error || pp == NULL) { 1901 /* 1902 * Use segmap address slot and let segmap_fault deal 1903 * with the error cases. There is no error return 1904 * possible here. 1905 */ 1906 goto use_segmap_range; 1907 } 1908 1909 ASSERT(pl[1] == NULL); 1910 1911 /* 1912 * When prot is not returned w/ PROT_ALL the returned pages 1913 * are not backed by fs blocks. For most of the segmap users 1914 * this is no problem, they don't write to the pages in the 1915 * same request and therefore don't rely on a following 1916 * trap driven segmap_fault. With SM_LOCKPROTO users it 1917 * is more secure to use segkmap adresses to allow 1918 * protection segmap_fault's. 1919 */ 1920 if (prot != PROT_ALL && forcefault == SM_LOCKPROTO) { 1921 /* 1922 * Use segmap address slot and let segmap_fault 1923 * do the error return. 1924 */ 1925 ASSERT(rw != S_WRITE); 1926 ASSERT(PAGE_LOCKED(pp)); 1927 page_unlock(pp); 1928 forcefault = 0; 1929 goto use_segmap_range; 1930 } 1931 1932 /* 1933 * We have the p_selock as reader, grab_smp can't hit us, we 1934 * have bumped the smap refcnt and hat_pageunload needs the 1935 * p_selock exclusive. 1936 */ 1937 kpme = GET_KPME(smp); 1938 if (kpme->kpe_page == pp) { 1939 baseaddr = hat_kpm_page2va(pp, 0); 1940 } else if (kpme->kpe_page == NULL) { 1941 baseaddr = hat_kpm_mapin(pp, kpme); 1942 } else { 1943 panic("segmap_getmapflt: stale kpme page after " 1944 "VOP_GETPAGE, kpme %p", (void *)kpme); 1945 /*NOTREACHED*/ 1946 } 1947 1948 smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++; 1949 1950 return (baseaddr); 1951 1952 1953 use_segmap_range: 1954 baseaddr = seg->s_base + ((smp - smd_smap) * MAXBSIZE); 1955 TRACE_4(TR_FAC_VM, TR_SEGMAP_GETMAP, 1956 "segmap_getmap:seg %p addr %p vp %p offset %llx", 1957 seg, baseaddr, vp, baseoff); 1958 1959 /* 1960 * Prefault the translations 1961 */ 1962 vaddr = baseaddr + (off - baseoff); 1963 if (forcefault && (newslot || !hat_probe(kas.a_hat, vaddr))) { 1964 1965 caddr_t pgaddr = (caddr_t)((uintptr_t)vaddr & 1966 (uintptr_t)PAGEMASK); 1967 1968 (void) segmap_fault(kas.a_hat, seg, pgaddr, 1969 (vaddr + len - pgaddr + PAGESIZE - 1) & (uintptr_t)PAGEMASK, 1970 F_INVAL, rw); 1971 } 1972 1973 return (baseaddr); 1974 } 1975 1976 int 1977 segmap_release(struct seg *seg, caddr_t addr, uint_t flags) 1978 { 1979 struct smap *smp; 1980 int error; 1981 int bflags = 0; 1982 struct vnode *vp; 1983 u_offset_t offset; 1984 kmutex_t *smtx; 1985 int is_kpm = 0; 1986 page_t *pp; 1987 1988 if (segmap_kpm && IS_KPM_ADDR(addr)) { 1989 1990 if (((uintptr_t)addr & MAXBOFFSET) != 0) { 1991 panic("segmap_release: addr %p not " 1992 "MAXBSIZE aligned", (void *)addr); 1993 /*NOTREACHED*/ 1994 } 1995 1996 if ((smp = get_smap_kpm(addr, &pp)) == NULL) { 1997 panic("segmap_release: smap not found " 1998 "for addr %p", (void *)addr); 1999 /*NOTREACHED*/ 2000 } 2001 2002 TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP, 2003 "segmap_relmap:seg %p addr %p smp %p", 2004 seg, addr, smp); 2005 2006 smtx = SMAPMTX(smp); 2007 2008 /* 2009 * For compatibility reasons segmap_pagecreate_kpm sets this 2010 * flag to allow a following segmap_pagecreate to return 2011 * this as "newpage" flag. When segmap_pagecreate is not 2012 * called at all we clear it now. 2013 */ 2014 smp->sm_flags &= ~SM_KPM_NEWPAGE; 2015 is_kpm = 1; 2016 if (smp->sm_flags & SM_WRITE_DATA) { 2017 hat_setrefmod(pp); 2018 } else if (smp->sm_flags & SM_READ_DATA) { 2019 hat_setref(pp); 2020 } 2021 } else { 2022 if (addr < seg->s_base || addr >= seg->s_base + seg->s_size || 2023 ((uintptr_t)addr & MAXBOFFSET) != 0) { 2024 panic("segmap_release: bad addr %p", (void *)addr); 2025 /*NOTREACHED*/ 2026 } 2027 smp = GET_SMAP(seg, addr); 2028 2029 TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP, 2030 "segmap_relmap:seg %p addr %p smp %p", 2031 seg, addr, smp); 2032 2033 smtx = SMAPMTX(smp); 2034 mutex_enter(smtx); 2035 smp->sm_flags |= SM_NOTKPM_RELEASED; 2036 } 2037 2038 ASSERT(smp->sm_refcnt > 0); 2039 2040 /* 2041 * Need to call VOP_PUTPAGE() if any flags (except SM_DONTNEED) 2042 * are set. 2043 */ 2044 if ((flags & ~SM_DONTNEED) != 0) { 2045 if (flags & SM_WRITE) 2046 segmapcnt.smp_rel_write.value.ul++; 2047 if (flags & SM_ASYNC) { 2048 bflags |= B_ASYNC; 2049 segmapcnt.smp_rel_async.value.ul++; 2050 } 2051 if (flags & SM_INVAL) { 2052 bflags |= B_INVAL; 2053 segmapcnt.smp_rel_abort.value.ul++; 2054 } 2055 if (flags & SM_DESTROY) { 2056 bflags |= (B_INVAL|B_TRUNC); 2057 segmapcnt.smp_rel_abort.value.ul++; 2058 } 2059 if (smp->sm_refcnt == 1) { 2060 /* 2061 * We only bother doing the FREE and DONTNEED flags 2062 * if no one else is still referencing this mapping. 2063 */ 2064 if (flags & SM_FREE) { 2065 bflags |= B_FREE; 2066 segmapcnt.smp_rel_free.value.ul++; 2067 } 2068 if (flags & SM_DONTNEED) { 2069 bflags |= B_DONTNEED; 2070 segmapcnt.smp_rel_dontneed.value.ul++; 2071 } 2072 } 2073 } else { 2074 smd_cpu[CPU->cpu_seqid].scpu.scpu_release++; 2075 } 2076 2077 vp = smp->sm_vp; 2078 offset = smp->sm_off; 2079 2080 if (--smp->sm_refcnt == 0) { 2081 2082 smp->sm_flags &= ~(SM_WRITE_DATA | SM_READ_DATA); 2083 2084 if (flags & (SM_INVAL|SM_DESTROY)) { 2085 segmap_hashout(smp); /* remove map info */ 2086 if (is_kpm) { 2087 hat_kpm_mapout(pp, GET_KPME(smp), addr); 2088 if (smp->sm_flags & SM_NOTKPM_RELEASED) { 2089 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 2090 hat_unload(kas.a_hat, segkmap->s_base + 2091 ((smp - smd_smap) * MAXBSIZE), 2092 MAXBSIZE, HAT_UNLOAD); 2093 } 2094 2095 } else { 2096 if (segmap_kpm) 2097 segkpm_mapout_validkpme(GET_KPME(smp)); 2098 2099 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 2100 hat_unload(kas.a_hat, addr, MAXBSIZE, 2101 HAT_UNLOAD); 2102 } 2103 } 2104 segmap_smapadd(smp); /* add to free list */ 2105 } 2106 2107 mutex_exit(smtx); 2108 2109 if (is_kpm) 2110 page_unlock(pp); 2111 /* 2112 * Now invoke VOP_PUTPAGE() if any flags (except SM_DONTNEED) 2113 * are set. 2114 */ 2115 if ((flags & ~SM_DONTNEED) != 0) { 2116 error = VOP_PUTPAGE(vp, offset, MAXBSIZE, 2117 bflags, CRED(), NULL); 2118 } else { 2119 error = 0; 2120 } 2121 2122 return (error); 2123 } 2124 2125 /* 2126 * Dump the pages belonging to this segmap segment. 2127 */ 2128 static void 2129 segmap_dump(struct seg *seg) 2130 { 2131 struct segmap_data *smd; 2132 struct smap *smp, *smp_end; 2133 page_t *pp; 2134 pfn_t pfn; 2135 u_offset_t off; 2136 caddr_t addr; 2137 2138 smd = (struct segmap_data *)seg->s_data; 2139 addr = seg->s_base; 2140 for (smp = smd->smd_sm, smp_end = smp + smd->smd_npages; 2141 smp < smp_end; smp++) { 2142 2143 if (smp->sm_refcnt) { 2144 for (off = 0; off < MAXBSIZE; off += PAGESIZE) { 2145 int we_own_it = 0; 2146 2147 /* 2148 * If pp == NULL, the page either does 2149 * not exist or is exclusively locked. 2150 * So determine if it exists before 2151 * searching for it. 2152 */ 2153 if ((pp = page_lookup_nowait(smp->sm_vp, 2154 smp->sm_off + off, SE_SHARED))) 2155 we_own_it = 1; 2156 else 2157 pp = page_exists(smp->sm_vp, 2158 smp->sm_off + off); 2159 2160 if (pp) { 2161 pfn = page_pptonum(pp); 2162 dump_addpage(seg->s_as, 2163 addr + off, pfn); 2164 if (we_own_it) 2165 page_unlock(pp); 2166 } 2167 dump_timeleft = dump_timeout; 2168 } 2169 } 2170 addr += MAXBSIZE; 2171 } 2172 } 2173 2174 /*ARGSUSED*/ 2175 static int 2176 segmap_pagelock(struct seg *seg, caddr_t addr, size_t len, 2177 struct page ***ppp, enum lock_type type, enum seg_rw rw) 2178 { 2179 return (ENOTSUP); 2180 } 2181 2182 static int 2183 segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp) 2184 { 2185 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 2186 2187 memidp->val[0] = (uintptr_t)smd->smd_sm->sm_vp; 2188 memidp->val[1] = smd->smd_sm->sm_off + (uintptr_t)(addr - seg->s_base); 2189 return (0); 2190 } 2191 2192 /*ARGSUSED*/ 2193 static lgrp_mem_policy_info_t * 2194 segmap_getpolicy(struct seg *seg, caddr_t addr) 2195 { 2196 return (NULL); 2197 } 2198 2199 /*ARGSUSED*/ 2200 static int 2201 segmap_capable(struct seg *seg, segcapability_t capability) 2202 { 2203 return (0); 2204 } 2205 2206 2207 #ifdef SEGKPM_SUPPORT 2208 2209 /* 2210 * segkpm support routines 2211 */ 2212 2213 static caddr_t 2214 segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off, 2215 struct smap *smp, enum seg_rw rw) 2216 { 2217 caddr_t base; 2218 page_t *pp; 2219 int newpage = 0; 2220 struct kpme *kpme; 2221 2222 ASSERT(smp->sm_refcnt > 0); 2223 2224 if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) { 2225 kmutex_t *smtx; 2226 2227 base = segkpm_create_va(off); 2228 2229 if ((pp = page_create_va(vp, off, PAGESIZE, PG_WAIT, 2230 seg, base)) == NULL) { 2231 panic("segmap_pagecreate_kpm: " 2232 "page_create failed"); 2233 /*NOTREACHED*/ 2234 } 2235 2236 newpage = 1; 2237 page_io_unlock(pp); 2238 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 2239 2240 /* 2241 * Mark this here until the following segmap_pagecreate 2242 * or segmap_release. 2243 */ 2244 smtx = SMAPMTX(smp); 2245 mutex_enter(smtx); 2246 smp->sm_flags |= SM_KPM_NEWPAGE; 2247 mutex_exit(smtx); 2248 } 2249 2250 kpme = GET_KPME(smp); 2251 if (!newpage && kpme->kpe_page == pp) 2252 base = hat_kpm_page2va(pp, 0); 2253 else 2254 base = hat_kpm_mapin(pp, kpme); 2255 2256 /* 2257 * FS code may decide not to call segmap_pagecreate and we 2258 * don't invoke segmap_fault via TLB miss, so we have to set 2259 * ref and mod bits in advance. 2260 */ 2261 if (rw == S_WRITE) { 2262 hat_setrefmod(pp); 2263 } else { 2264 ASSERT(rw == S_READ); 2265 hat_setref(pp); 2266 } 2267 2268 smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++; 2269 2270 return (base); 2271 } 2272 2273 /* 2274 * Find the smap structure corresponding to the 2275 * KPM addr and return it locked. 2276 */ 2277 struct smap * 2278 get_smap_kpm(caddr_t addr, page_t **ppp) 2279 { 2280 struct smap *smp; 2281 struct vnode *vp; 2282 u_offset_t offset; 2283 caddr_t baseaddr = (caddr_t)((uintptr_t)addr & MAXBMASK); 2284 int hashid; 2285 kmutex_t *hashmtx; 2286 page_t *pp; 2287 union segmap_cpu *scpu; 2288 2289 pp = hat_kpm_vaddr2page(baseaddr); 2290 2291 ASSERT(pp && !PP_ISFREE(pp)); 2292 ASSERT(PAGE_LOCKED(pp)); 2293 ASSERT(((uintptr_t)pp->p_offset & MAXBOFFSET) == 0); 2294 2295 vp = pp->p_vnode; 2296 offset = pp->p_offset; 2297 ASSERT(vp != NULL); 2298 2299 /* 2300 * Assume the last smap used on this cpu is the one needed. 2301 */ 2302 scpu = smd_cpu+CPU->cpu_seqid; 2303 smp = scpu->scpu.scpu_last_smap; 2304 mutex_enter(&smp->sm_mtx); 2305 if (smp->sm_vp == vp && smp->sm_off == offset) { 2306 ASSERT(smp->sm_refcnt > 0); 2307 } else { 2308 /* 2309 * Assumption wrong, find the smap on the hash chain. 2310 */ 2311 mutex_exit(&smp->sm_mtx); 2312 SMAP_HASHFUNC(vp, offset, hashid); /* macro assigns hashid */ 2313 hashmtx = SHASHMTX(hashid); 2314 2315 mutex_enter(hashmtx); 2316 smp = smd_hash[hashid].sh_hash_list; 2317 for (; smp != NULL; smp = smp->sm_hash) { 2318 if (smp->sm_vp == vp && smp->sm_off == offset) 2319 break; 2320 } 2321 mutex_exit(hashmtx); 2322 if (smp) { 2323 mutex_enter(&smp->sm_mtx); 2324 ASSERT(smp->sm_vp == vp && smp->sm_off == offset); 2325 } 2326 } 2327 2328 if (ppp) 2329 *ppp = smp ? pp : NULL; 2330 2331 return (smp); 2332 } 2333 2334 #else /* SEGKPM_SUPPORT */ 2335 2336 /* segkpm stubs */ 2337 2338 /*ARGSUSED*/ 2339 static caddr_t 2340 segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off, 2341 struct smap *smp, enum seg_rw rw) 2342 { 2343 return (NULL); 2344 } 2345 2346 /*ARGSUSED*/ 2347 struct smap * 2348 get_smap_kpm(caddr_t addr, page_t **ppp) 2349 { 2350 return (NULL); 2351 } 2352 2353 #endif /* SEGKPM_SUPPORT */