1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright (c) 2015, Joyent, Inc. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 /* 31 * University Copyright- Copyright (c) 1982, 1986, 1988 32 * The Regents of the University of California 33 * All Rights Reserved 34 * 35 * University Acknowledgment- Portions of this document are derived from 36 * software developed by the University of California, Berkeley, and its 37 * contributors. 38 */ 39 40 /* 41 * VM - segment management. 42 */ 43 44 #include <sys/types.h> 45 #include <sys/inttypes.h> 46 #include <sys/t_lock.h> 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/kmem.h> 50 #include <sys/sysmacros.h> 51 #include <sys/vmsystm.h> 52 #include <sys/tuneable.h> 53 #include <sys/debug.h> 54 #include <sys/fs/swapnode.h> 55 #include <sys/cmn_err.h> 56 #include <sys/callb.h> 57 #include <sys/mem_config.h> 58 #include <sys/mman.h> 59 60 #include <vm/hat.h> 61 #include <vm/as.h> 62 #include <vm/seg.h> 63 #include <vm/seg_kmem.h> 64 #include <vm/seg_spt.h> 65 #include <vm/seg_vn.h> 66 #include <vm/anon.h> 67 68 /* 69 * kstats for segment advise 70 */ 71 segadvstat_t segadvstat = { 72 { "MADV_FREE_hit", KSTAT_DATA_ULONG }, 73 { "MADV_FREE_miss", KSTAT_DATA_ULONG }, 74 }; 75 76 kstat_named_t *segadvstat_ptr = (kstat_named_t *)&segadvstat; 77 uint_t segadvstat_ndata = sizeof (segadvstat) / sizeof (kstat_named_t); 78 79 /* 80 * entry in the segment page cache 81 */ 82 struct seg_pcache { 83 struct seg_pcache *p_hnext; /* list for hashed blocks */ 84 struct seg_pcache *p_hprev; 85 pcache_link_t p_plink; /* per segment/amp list */ 86 void *p_htag0; /* segment/amp pointer */ 87 caddr_t p_addr; /* base address/anon_idx */ 88 size_t p_len; /* total bytes */ 89 size_t p_wlen; /* writtable bytes at p_addr */ 90 struct page **p_pp; /* pp shadow list */ 91 seg_preclaim_cbfunc_t p_callback; /* reclaim callback function */ 92 clock_t p_lbolt; /* lbolt from last use */ 93 struct seg_phash *p_hashp; /* our pcache hash bucket */ 94 uint_t p_active; /* active count */ 95 uchar_t p_write; /* true if S_WRITE */ 96 uchar_t p_ref; /* reference byte */ 97 ushort_t p_flags; /* bit flags */ 98 }; 99 100 struct seg_phash { 101 struct seg_pcache *p_hnext; /* list for hashed blocks */ 102 struct seg_pcache *p_hprev; 103 kmutex_t p_hmutex; /* protects hash bucket */ 104 pcache_link_t p_halink[2]; /* active bucket linkages */ 105 }; 106 107 struct seg_phash_wired { 108 struct seg_pcache *p_hnext; /* list for hashed blocks */ 109 struct seg_pcache *p_hprev; 110 kmutex_t p_hmutex; /* protects hash bucket */ 111 }; 112 113 /* 114 * A parameter to control a maximum number of bytes that can be 115 * purged from pcache at a time. 116 */ 117 #define P_MAX_APURGE_BYTES (1024 * 1024 * 1024) 118 119 /* 120 * log2(fraction of pcache to reclaim at a time). 121 */ 122 #define P_SHRINK_SHFT (5) 123 124 /* 125 * The following variables can be tuned via /etc/system. 126 */ 127 128 int segpcache_enabled = 1; /* if 1, shadow lists are cached */ 129 ulong_t segpcache_hashsize_win = 0; /* # of non wired buckets */ 130 ulong_t segpcache_hashsize_wired = 0; /* # of wired buckets */ 131 int segpcache_reap_sec = 1; /* reap check rate in secs */ 132 clock_t segpcache_reap_ticks = 0; /* reap interval in ticks */ 133 int segpcache_pcp_maxage_sec = 1; /* pcp max age in secs */ 134 clock_t segpcache_pcp_maxage_ticks = 0; /* pcp max age in ticks */ 135 int segpcache_shrink_shift = P_SHRINK_SHFT; /* log2 reap fraction */ 136 pgcnt_t segpcache_maxapurge_bytes = P_MAX_APURGE_BYTES; /* max purge bytes */ 137 138 static kmutex_t seg_pcache_mtx; /* protects seg_pdisabled counter */ 139 static kmutex_t seg_pasync_mtx; /* protects async thread scheduling */ 140 static kcondvar_t seg_pasync_cv; 141 142 #pragma align 64(pctrl1) 143 #pragma align 64(pctrl2) 144 #pragma align 64(pctrl3) 145 146 /* 147 * Keep frequently used variables together in one cache line. 148 */ 149 static struct p_ctrl1 { 150 uint_t p_disabled; /* if not 0, caching temporarily off */ 151 size_t p_hashwin_sz; /* # of non wired buckets */ 152 struct seg_phash *p_htabwin; /* hash table for non wired entries */ 153 size_t p_hashwired_sz; /* # of wired buckets */ 154 struct seg_phash_wired *p_htabwired; /* hash table for wired entries */ 155 kmem_cache_t *p_kmcache; /* kmem cache for seg_pcache structs */ 156 #ifdef _LP64 157 ulong_t pad[2]; 158 #endif /* _LP64 */ 159 } pctrl1; 160 161 static struct p_ctrl2 { 162 kmutex_t p_mem_mtx; /* protects window counter and p_halinks */ 163 pgcnt_t p_locked_win; /* # pages from window */ 164 pgcnt_t p_locked; /* # of pages cached by pagelock */ 165 uchar_t p_ahcur; /* current active links for insert/delete */ 166 uchar_t p_athr_on; /* async reclaim thread is running. */ 167 pcache_link_t p_ahhead[2]; /* active buckets linkages */ 168 } pctrl2; 169 170 static struct p_ctrl3 { 171 clock_t p_pcp_maxage; /* max pcp age in ticks */ 172 ulong_t p_athr_empty_ahb; /* athread walk stats */ 173 ulong_t p_athr_full_ahb; /* athread walk stats */ 174 pgcnt_t p_maxapurge_npages; /* max pages to purge at a time */ 175 int p_shrink_shft; /* reap shift factor */ 176 #ifdef _LP64 177 ulong_t pad[3]; 178 #endif /* _LP64 */ 179 } pctrl3; 180 181 #define seg_pdisabled pctrl1.p_disabled 182 #define seg_phashsize_win pctrl1.p_hashwin_sz 183 #define seg_phashtab_win pctrl1.p_htabwin 184 #define seg_phashsize_wired pctrl1.p_hashwired_sz 185 #define seg_phashtab_wired pctrl1.p_htabwired 186 #define seg_pkmcache pctrl1.p_kmcache 187 #define seg_pmem_mtx pctrl2.p_mem_mtx 188 #define seg_plocked_window pctrl2.p_locked_win 189 #define seg_plocked pctrl2.p_locked 190 #define seg_pahcur pctrl2.p_ahcur 191 #define seg_pathr_on pctrl2.p_athr_on 192 #define seg_pahhead pctrl2.p_ahhead 193 #define seg_pmax_pcpage pctrl3.p_pcp_maxage 194 #define seg_pathr_empty_ahb pctrl3.p_athr_empty_ahb 195 #define seg_pathr_full_ahb pctrl3.p_athr_full_ahb 196 #define seg_pshrink_shift pctrl3.p_shrink_shft 197 #define seg_pmaxapurge_npages pctrl3.p_maxapurge_npages 198 199 #define P_HASHWIN_MASK (seg_phashsize_win - 1) 200 #define P_HASHWIRED_MASK (seg_phashsize_wired - 1) 201 #define P_BASESHIFT (6) 202 203 kthread_t *seg_pasync_thr; 204 205 extern const struct seg_ops segvn_ops; 206 extern const struct seg_ops segspt_shmops; 207 208 #define IS_PFLAGS_WIRED(flags) ((flags) & SEGP_FORCE_WIRED) 209 #define IS_PCP_WIRED(pcp) IS_PFLAGS_WIRED((pcp)->p_flags) 210 211 #define LBOLT_DELTA(t) ((ulong_t)(ddi_get_lbolt() - (t))) 212 213 #define PCP_AGE(pcp) LBOLT_DELTA((pcp)->p_lbolt) 214 215 /* 216 * htag0 argument can be a seg or amp pointer. 217 */ 218 #define P_HASHBP(seg, htag0, addr, flags) \ 219 (IS_PFLAGS_WIRED((flags)) ? \ 220 ((struct seg_phash *)&seg_phashtab_wired[P_HASHWIRED_MASK & \ 221 ((uintptr_t)(htag0) >> P_BASESHIFT)]) : \ 222 (&seg_phashtab_win[P_HASHWIN_MASK & \ 223 (((uintptr_t)(htag0) >> 3) ^ \ 224 ((uintptr_t)(addr) >> ((flags & SEGP_PSHIFT) ? \ 225 (flags >> 16) : page_get_shift((seg)->s_szc))))])) 226 227 /* 228 * htag0 argument can be a seg or amp pointer. 229 */ 230 #define P_MATCH(pcp, htag0, addr, len) \ 231 ((pcp)->p_htag0 == (htag0) && \ 232 (pcp)->p_addr == (addr) && \ 233 (pcp)->p_len >= (len)) 234 235 #define P_MATCH_PP(pcp, htag0, addr, len, pp) \ 236 ((pcp)->p_pp == (pp) && \ 237 (pcp)->p_htag0 == (htag0) && \ 238 (pcp)->p_addr == (addr) && \ 239 (pcp)->p_len >= (len)) 240 241 #define plink2pcache(pl) ((struct seg_pcache *)((uintptr_t)(pl) - \ 242 offsetof(struct seg_pcache, p_plink))) 243 244 #define hlink2phash(hl, l) ((struct seg_phash *)((uintptr_t)(hl) - \ 245 offsetof(struct seg_phash, p_halink[l]))) 246 247 /* 248 * seg_padd_abuck()/seg_premove_abuck() link and unlink hash buckets from 249 * active hash bucket lists. We maintain active bucket lists to reduce the 250 * overhead of finding active buckets during asynchronous purging since there 251 * can be 10s of millions of buckets on a large system but only a small subset 252 * of them in actual use. 253 * 254 * There're 2 active bucket lists. Current active list (as per seg_pahcur) is 255 * used by seg_pinsert()/seg_pinactive()/seg_ppurge() to add and delete 256 * buckets. The other list is used by asynchronous purge thread. This allows 257 * the purge thread to walk its active list without holding seg_pmem_mtx for a 258 * long time. When asynchronous thread is done with its list it switches to 259 * current active list and makes the list it just finished processing as 260 * current active list. 261 * 262 * seg_padd_abuck() only adds the bucket to current list if the bucket is not 263 * yet on any list. seg_premove_abuck() may remove the bucket from either 264 * list. If the bucket is on current list it will be always removed. Otherwise 265 * the bucket is only removed if asynchronous purge thread is not currently 266 * running or seg_premove_abuck() is called by asynchronous purge thread 267 * itself. A given bucket can only be on one of active lists at a time. These 268 * routines should be called with per bucket lock held. The routines use 269 * seg_pmem_mtx to protect list updates. seg_padd_abuck() must be called after 270 * the first entry is added to the bucket chain and seg_premove_abuck() must 271 * be called after the last pcp entry is deleted from its chain. Per bucket 272 * lock should be held by the callers. This avoids a potential race condition 273 * when seg_premove_abuck() removes a bucket after pcp entries are added to 274 * its list after the caller checked that the bucket has no entries. (this 275 * race would cause a loss of an active bucket from the active lists). 276 * 277 * Both lists are circular doubly linked lists anchored at seg_pahhead heads. 278 * New entries are added to the end of the list since LRU is used as the 279 * purging policy. 280 */ 281 static void 282 seg_padd_abuck(struct seg_phash *hp) 283 { 284 int lix; 285 286 ASSERT(MUTEX_HELD(&hp->p_hmutex)); 287 ASSERT((struct seg_phash *)hp->p_hnext != hp); 288 ASSERT((struct seg_phash *)hp->p_hprev != hp); 289 ASSERT(hp->p_hnext == hp->p_hprev); 290 ASSERT(!IS_PCP_WIRED(hp->p_hnext)); 291 ASSERT(hp->p_hnext->p_hnext == (struct seg_pcache *)hp); 292 ASSERT(hp->p_hprev->p_hprev == (struct seg_pcache *)hp); 293 ASSERT(hp >= seg_phashtab_win && 294 hp < &seg_phashtab_win[seg_phashsize_win]); 295 296 /* 297 * This bucket can already be on one of active lists 298 * since seg_premove_abuck() may have failed to remove it 299 * before. 300 */ 301 mutex_enter(&seg_pmem_mtx); 302 lix = seg_pahcur; 303 ASSERT(lix >= 0 && lix <= 1); 304 if (hp->p_halink[lix].p_lnext != NULL) { 305 ASSERT(hp->p_halink[lix].p_lprev != NULL); 306 ASSERT(hp->p_halink[!lix].p_lnext == NULL); 307 ASSERT(hp->p_halink[!lix].p_lprev == NULL); 308 mutex_exit(&seg_pmem_mtx); 309 return; 310 } 311 ASSERT(hp->p_halink[lix].p_lprev == NULL); 312 313 /* 314 * If this bucket is still on list !lix async thread can't yet remove 315 * it since we hold here per bucket lock. In this case just return 316 * since async thread will eventually find and process this bucket. 317 */ 318 if (hp->p_halink[!lix].p_lnext != NULL) { 319 ASSERT(hp->p_halink[!lix].p_lprev != NULL); 320 mutex_exit(&seg_pmem_mtx); 321 return; 322 } 323 ASSERT(hp->p_halink[!lix].p_lprev == NULL); 324 /* 325 * This bucket is not on any active bucket list yet. 326 * Add the bucket to the tail of current active list. 327 */ 328 hp->p_halink[lix].p_lnext = &seg_pahhead[lix]; 329 hp->p_halink[lix].p_lprev = seg_pahhead[lix].p_lprev; 330 seg_pahhead[lix].p_lprev->p_lnext = &hp->p_halink[lix]; 331 seg_pahhead[lix].p_lprev = &hp->p_halink[lix]; 332 mutex_exit(&seg_pmem_mtx); 333 } 334 335 static void 336 seg_premove_abuck(struct seg_phash *hp, int athr) 337 { 338 int lix; 339 340 ASSERT(MUTEX_HELD(&hp->p_hmutex)); 341 ASSERT((struct seg_phash *)hp->p_hnext == hp); 342 ASSERT((struct seg_phash *)hp->p_hprev == hp); 343 ASSERT(hp >= seg_phashtab_win && 344 hp < &seg_phashtab_win[seg_phashsize_win]); 345 346 if (athr) { 347 ASSERT(seg_pathr_on); 348 ASSERT(seg_pahcur <= 1); 349 /* 350 * We are called by asynchronous thread that found this bucket 351 * on not currently active (i.e. !seg_pahcur) list. Remove it 352 * from there. Per bucket lock we are holding makes sure 353 * seg_pinsert() can't sneak in and add pcp entries to this 354 * bucket right before we remove the bucket from its list. 355 */ 356 lix = !seg_pahcur; 357 ASSERT(hp->p_halink[lix].p_lnext != NULL); 358 ASSERT(hp->p_halink[lix].p_lprev != NULL); 359 ASSERT(hp->p_halink[!lix].p_lnext == NULL); 360 ASSERT(hp->p_halink[!lix].p_lprev == NULL); 361 hp->p_halink[lix].p_lnext->p_lprev = hp->p_halink[lix].p_lprev; 362 hp->p_halink[lix].p_lprev->p_lnext = hp->p_halink[lix].p_lnext; 363 hp->p_halink[lix].p_lnext = NULL; 364 hp->p_halink[lix].p_lprev = NULL; 365 return; 366 } 367 368 mutex_enter(&seg_pmem_mtx); 369 lix = seg_pahcur; 370 ASSERT(lix >= 0 && lix <= 1); 371 372 /* 373 * If the bucket is on currently active list just remove it from 374 * there. 375 */ 376 if (hp->p_halink[lix].p_lnext != NULL) { 377 ASSERT(hp->p_halink[lix].p_lprev != NULL); 378 ASSERT(hp->p_halink[!lix].p_lnext == NULL); 379 ASSERT(hp->p_halink[!lix].p_lprev == NULL); 380 hp->p_halink[lix].p_lnext->p_lprev = hp->p_halink[lix].p_lprev; 381 hp->p_halink[lix].p_lprev->p_lnext = hp->p_halink[lix].p_lnext; 382 hp->p_halink[lix].p_lnext = NULL; 383 hp->p_halink[lix].p_lprev = NULL; 384 mutex_exit(&seg_pmem_mtx); 385 return; 386 } 387 ASSERT(hp->p_halink[lix].p_lprev == NULL); 388 389 /* 390 * If asynchronous thread is not running we can remove the bucket from 391 * not currently active list. The bucket must be on this list since we 392 * already checked that it's not on the other list and the bucket from 393 * which we just deleted the last pcp entry must be still on one of the 394 * active bucket lists. 395 */ 396 lix = !lix; 397 ASSERT(hp->p_halink[lix].p_lnext != NULL); 398 ASSERT(hp->p_halink[lix].p_lprev != NULL); 399 400 if (!seg_pathr_on) { 401 hp->p_halink[lix].p_lnext->p_lprev = hp->p_halink[lix].p_lprev; 402 hp->p_halink[lix].p_lprev->p_lnext = hp->p_halink[lix].p_lnext; 403 hp->p_halink[lix].p_lnext = NULL; 404 hp->p_halink[lix].p_lprev = NULL; 405 } 406 mutex_exit(&seg_pmem_mtx); 407 } 408 409 /* 410 * Check if bucket pointed by hp already has a pcp entry that matches request 411 * htag0, addr and len. Set *found to 1 if match is found and to 0 otherwise. 412 * Also delete matching entries that cover smaller address range but start 413 * at the same address as addr argument. Return the list of deleted entries if 414 * any. This is an internal helper function called from seg_pinsert() only 415 * for non wired shadow lists. The caller already holds a per seg/amp list 416 * lock. 417 */ 418 static struct seg_pcache * 419 seg_plookup_checkdup(struct seg_phash *hp, void *htag0, 420 caddr_t addr, size_t len, int *found) 421 { 422 struct seg_pcache *pcp; 423 struct seg_pcache *delcallb_list = NULL; 424 425 ASSERT(MUTEX_HELD(&hp->p_hmutex)); 426 427 *found = 0; 428 for (pcp = hp->p_hnext; pcp != (struct seg_pcache *)hp; 429 pcp = pcp->p_hnext) { 430 ASSERT(pcp->p_hashp == hp); 431 if (pcp->p_htag0 == htag0 && pcp->p_addr == addr) { 432 ASSERT(!IS_PCP_WIRED(pcp)); 433 if (pcp->p_len < len) { 434 pcache_link_t *plinkp; 435 if (pcp->p_active) { 436 continue; 437 } 438 plinkp = &pcp->p_plink; 439 plinkp->p_lprev->p_lnext = plinkp->p_lnext; 440 plinkp->p_lnext->p_lprev = plinkp->p_lprev; 441 pcp->p_hprev->p_hnext = pcp->p_hnext; 442 pcp->p_hnext->p_hprev = pcp->p_hprev; 443 pcp->p_hprev = delcallb_list; 444 delcallb_list = pcp; 445 } else { 446 *found = 1; 447 break; 448 } 449 } 450 } 451 return (delcallb_list); 452 } 453 454 /* 455 * lookup an address range in pagelock cache. Return shadow list and bump up 456 * active count. If amp is not NULL use amp as a lookup tag otherwise use seg 457 * as a lookup tag. 458 */ 459 struct page ** 460 seg_plookup(struct seg *seg, struct anon_map *amp, caddr_t addr, size_t len, 461 enum seg_rw rw, uint_t flags) 462 { 463 struct seg_pcache *pcp; 464 struct seg_phash *hp; 465 void *htag0; 466 467 ASSERT(seg != NULL); 468 ASSERT(rw == S_READ || rw == S_WRITE); 469 470 /* 471 * Skip pagelock cache, while DR is in progress or 472 * seg_pcache is off. 473 */ 474 if (seg_pdisabled) { 475 return (NULL); 476 } 477 ASSERT(seg_phashsize_win != 0); 478 479 htag0 = (amp == NULL ? (void *)seg : (void *)amp); 480 hp = P_HASHBP(seg, htag0, addr, flags); 481 mutex_enter(&hp->p_hmutex); 482 for (pcp = hp->p_hnext; pcp != (struct seg_pcache *)hp; 483 pcp = pcp->p_hnext) { 484 ASSERT(pcp->p_hashp == hp); 485 if (P_MATCH(pcp, htag0, addr, len)) { 486 ASSERT(IS_PFLAGS_WIRED(flags) == IS_PCP_WIRED(pcp)); 487 /* 488 * If this request wants to write pages 489 * but write permissions starting from 490 * addr don't cover the entire length len 491 * return lookup failure back to the caller. 492 * It will check protections and fail this 493 * pagelock operation with EACCESS error. 494 */ 495 if (rw == S_WRITE && pcp->p_wlen < len) { 496 break; 497 } 498 if (pcp->p_active == UINT_MAX) { 499 break; 500 } 501 pcp->p_active++; 502 if (rw == S_WRITE && !pcp->p_write) { 503 pcp->p_write = 1; 504 } 505 mutex_exit(&hp->p_hmutex); 506 return (pcp->p_pp); 507 } 508 } 509 mutex_exit(&hp->p_hmutex); 510 return (NULL); 511 } 512 513 /* 514 * mark address range inactive. If the cache is off or the address range is 515 * not in the cache or another shadow list that covers bigger range is found 516 * we call the segment driver to reclaim the pages. Otherwise just decrement 517 * active count and set ref bit. If amp is not NULL use amp as a lookup tag 518 * otherwise use seg as a lookup tag. 519 */ 520 void 521 seg_pinactive(struct seg *seg, struct anon_map *amp, caddr_t addr, 522 size_t len, struct page **pp, enum seg_rw rw, uint_t flags, 523 seg_preclaim_cbfunc_t callback) 524 { 525 struct seg_pcache *pcp; 526 struct seg_phash *hp; 527 kmutex_t *pmtx = NULL; 528 pcache_link_t *pheadp; 529 void *htag0; 530 pgcnt_t npages = 0; 531 int keep = 0; 532 533 ASSERT(seg != NULL); 534 ASSERT(rw == S_READ || rw == S_WRITE); 535 536 htag0 = (amp == NULL ? (void *)seg : (void *)amp); 537 538 /* 539 * Skip lookup if pcache is not configured. 540 */ 541 if (seg_phashsize_win == 0) { 542 goto out; 543 } 544 545 /* 546 * Grab per seg/amp lock before hash lock if we are going to remove 547 * inactive entry from pcache. 548 */ 549 if (!IS_PFLAGS_WIRED(flags) && seg_pdisabled) { 550 if (amp == NULL) { 551 pheadp = &seg->s_phead; 552 pmtx = &seg->s_pmtx; 553 } else { 554 pheadp = &->a_phead; 555 pmtx = &->a_pmtx; 556 } 557 mutex_enter(pmtx); 558 } 559 560 hp = P_HASHBP(seg, htag0, addr, flags); 561 mutex_enter(&hp->p_hmutex); 562 again: 563 for (pcp = hp->p_hnext; pcp != (struct seg_pcache *)hp; 564 pcp = pcp->p_hnext) { 565 ASSERT(pcp->p_hashp == hp); 566 if (P_MATCH_PP(pcp, htag0, addr, len, pp)) { 567 ASSERT(IS_PFLAGS_WIRED(flags) == IS_PCP_WIRED(pcp)); 568 ASSERT(pcp->p_active); 569 if (keep) { 570 /* 571 * Don't remove this pcp entry 572 * if we didn't find duplicate 573 * shadow lists on second search. 574 * Somebody removed those duplicates 575 * since we dropped hash lock after first 576 * search. 577 */ 578 ASSERT(pmtx != NULL); 579 ASSERT(!IS_PFLAGS_WIRED(flags)); 580 mutex_exit(pmtx); 581 pmtx = NULL; 582 } 583 pcp->p_active--; 584 if (pcp->p_active == 0 && (pmtx != NULL || 585 (seg_pdisabled && IS_PFLAGS_WIRED(flags)))) { 586 587 /* 588 * This entry is no longer active. Remove it 589 * now either because pcaching is temporarily 590 * disabled or there're other pcp entries that 591 * can match this pagelock request (i.e. this 592 * entry is a duplicate). 593 */ 594 595 ASSERT(callback == pcp->p_callback); 596 if (pmtx != NULL) { 597 pcache_link_t *plinkp = &pcp->p_plink; 598 ASSERT(!IS_PCP_WIRED(pcp)); 599 ASSERT(pheadp->p_lnext != pheadp); 600 ASSERT(pheadp->p_lprev != pheadp); 601 plinkp->p_lprev->p_lnext = 602 plinkp->p_lnext; 603 plinkp->p_lnext->p_lprev = 604 plinkp->p_lprev; 605 } 606 pcp->p_hprev->p_hnext = pcp->p_hnext; 607 pcp->p_hnext->p_hprev = pcp->p_hprev; 608 if (!IS_PCP_WIRED(pcp) && 609 hp->p_hnext == (struct seg_pcache *)hp) { 610 /* 611 * We removed the last entry from this 612 * bucket. Now remove the bucket from 613 * its active list. 614 */ 615 seg_premove_abuck(hp, 0); 616 } 617 mutex_exit(&hp->p_hmutex); 618 if (pmtx != NULL) { 619 mutex_exit(pmtx); 620 } 621 len = pcp->p_len; 622 npages = btop(len); 623 if (rw != S_WRITE && pcp->p_write) { 624 rw = S_WRITE; 625 } 626 kmem_cache_free(seg_pkmcache, pcp); 627 goto out; 628 } else { 629 /* 630 * We found a matching pcp entry but will not 631 * free it right away even if it's no longer 632 * active. 633 */ 634 if (!pcp->p_active && !IS_PCP_WIRED(pcp)) { 635 /* 636 * Set the reference bit and mark the 637 * time of last access to this pcp 638 * so that asynchronous thread doesn't 639 * free it immediately since 640 * it may be reactivated very soon. 641 */ 642 pcp->p_lbolt = ddi_get_lbolt(); 643 pcp->p_ref = 1; 644 } 645 mutex_exit(&hp->p_hmutex); 646 if (pmtx != NULL) { 647 mutex_exit(pmtx); 648 } 649 return; 650 } 651 } else if (!IS_PFLAGS_WIRED(flags) && 652 P_MATCH(pcp, htag0, addr, len)) { 653 /* 654 * This is a duplicate pcp entry. This situation may 655 * happen if a bigger shadow list that covers our 656 * range was added while our entry was still active. 657 * Now we can free our pcp entry if it becomes 658 * inactive. 659 */ 660 if (!pcp->p_active) { 661 /* 662 * Mark this entry as referenced just in case 663 * we'll free our own pcp entry soon. 664 */ 665 pcp->p_lbolt = ddi_get_lbolt(); 666 pcp->p_ref = 1; 667 } 668 if (pmtx != NULL) { 669 /* 670 * we are already holding pmtx and found a 671 * duplicate. Don't keep our own pcp entry. 672 */ 673 keep = 0; 674 continue; 675 } 676 /* 677 * We have to use mutex_tryenter to attempt to lock 678 * seg/amp list lock since we already hold hash lock 679 * and seg/amp list lock is above hash lock in lock 680 * order. If mutex_tryenter fails drop hash lock and 681 * retake both locks in correct order and research 682 * this hash chain. 683 */ 684 ASSERT(keep == 0); 685 if (amp == NULL) { 686 pheadp = &seg->s_phead; 687 pmtx = &seg->s_pmtx; 688 } else { 689 pheadp = &->a_phead; 690 pmtx = &->a_pmtx; 691 } 692 if (!mutex_tryenter(pmtx)) { 693 mutex_exit(&hp->p_hmutex); 694 mutex_enter(pmtx); 695 mutex_enter(&hp->p_hmutex); 696 /* 697 * If we don't find bigger shadow list on 698 * second search (it may happen since we 699 * dropped bucket lock) keep the entry that 700 * matches our own shadow list. 701 */ 702 keep = 1; 703 goto again; 704 } 705 } 706 } 707 mutex_exit(&hp->p_hmutex); 708 if (pmtx != NULL) { 709 mutex_exit(pmtx); 710 } 711 out: 712 (*callback)(htag0, addr, len, pp, rw, 0); 713 if (npages) { 714 mutex_enter(&seg_pmem_mtx); 715 ASSERT(seg_plocked >= npages); 716 seg_plocked -= npages; 717 if (!IS_PFLAGS_WIRED(flags)) { 718 ASSERT(seg_plocked_window >= npages); 719 seg_plocked_window -= npages; 720 } 721 mutex_exit(&seg_pmem_mtx); 722 } 723 724 } 725 726 #ifdef DEBUG 727 static uint32_t p_insert_chk_mtbf = 0; 728 #endif 729 730 /* 731 * The seg_pinsert_check() is used by segment drivers to predict whether 732 * a call to seg_pinsert will fail and thereby avoid wasteful pre-processing. 733 */ 734 /*ARGSUSED*/ 735 int 736 seg_pinsert_check(struct seg *seg, struct anon_map *amp, caddr_t addr, 737 size_t len, uint_t flags) 738 { 739 ASSERT(seg != NULL); 740 741 #ifdef DEBUG 742 if (p_insert_chk_mtbf && !(gethrtime() % p_insert_chk_mtbf)) { 743 return (SEGP_FAIL); 744 } 745 #endif 746 747 if (seg_pdisabled) { 748 return (SEGP_FAIL); 749 } 750 ASSERT(seg_phashsize_win != 0); 751 752 if (IS_PFLAGS_WIRED(flags)) { 753 return (SEGP_SUCCESS); 754 } 755 756 if (freemem < desfree) { 757 return (SEGP_FAIL); 758 } 759 760 return (SEGP_SUCCESS); 761 } 762 763 #ifdef DEBUG 764 static uint32_t p_insert_mtbf = 0; 765 #endif 766 767 /* 768 * Insert address range with shadow list into pagelock cache if there's no 769 * shadow list already cached for this address range. If the cache is off or 770 * caching is temporarily disabled or the allowed 'window' is exceeded return 771 * SEGP_FAIL. Otherwise return SEGP_SUCCESS. 772 * 773 * For non wired shadow lists (segvn case) include address in the hashing 774 * function to avoid linking all the entries from the same segment or amp on 775 * the same bucket. amp is used instead of seg if amp is not NULL. Non wired 776 * pcache entries are also linked on a per segment/amp list so that all 777 * entries can be found quickly during seg/amp purge without walking the 778 * entire pcache hash table. For wired shadow lists (segspt case) we 779 * don't use address hashing and per segment linking because the caller 780 * currently inserts only one entry per segment that covers the entire 781 * segment. If we used per segment linking even for segspt it would complicate 782 * seg_ppurge_wiredpp() locking. 783 * 784 * Both hash bucket and per seg/amp locks need to be held before adding a non 785 * wired entry to hash and per seg/amp lists. per seg/amp lock should be taken 786 * first. 787 * 788 * This function will also remove from pcache old inactive shadow lists that 789 * overlap with this request but cover smaller range for the same start 790 * address. 791 */ 792 int 793 seg_pinsert(struct seg *seg, struct anon_map *amp, caddr_t addr, size_t len, 794 size_t wlen, struct page **pp, enum seg_rw rw, uint_t flags, 795 seg_preclaim_cbfunc_t callback) 796 { 797 struct seg_pcache *pcp; 798 struct seg_phash *hp; 799 pgcnt_t npages; 800 pcache_link_t *pheadp; 801 kmutex_t *pmtx; 802 struct seg_pcache *delcallb_list = NULL; 803 804 ASSERT(seg != NULL); 805 ASSERT(rw == S_READ || rw == S_WRITE); 806 ASSERT(rw == S_READ || wlen == len); 807 ASSERT(rw == S_WRITE || wlen <= len); 808 ASSERT(amp == NULL || wlen == len); 809 810 #ifdef DEBUG 811 if (p_insert_mtbf && !(gethrtime() % p_insert_mtbf)) { 812 return (SEGP_FAIL); 813 } 814 #endif 815 816 if (seg_pdisabled) { 817 return (SEGP_FAIL); 818 } 819 ASSERT(seg_phashsize_win != 0); 820 821 ASSERT((len & PAGEOFFSET) == 0); 822 npages = btop(len); 823 mutex_enter(&seg_pmem_mtx); 824 if (!IS_PFLAGS_WIRED(flags)) { 825 seg_plocked_window += npages; 826 } 827 seg_plocked += npages; 828 mutex_exit(&seg_pmem_mtx); 829 830 pcp = kmem_cache_alloc(seg_pkmcache, KM_SLEEP); 831 /* 832 * If amp is not NULL set htag0 to amp otherwise set it to seg. 833 */ 834 if (amp == NULL) { 835 pcp->p_htag0 = (void *)seg; 836 pcp->p_flags = flags & 0xffff; 837 } else { 838 pcp->p_htag0 = (void *)amp; 839 pcp->p_flags = (flags & 0xffff) | SEGP_AMP; 840 } 841 pcp->p_addr = addr; 842 pcp->p_len = len; 843 pcp->p_wlen = wlen; 844 pcp->p_pp = pp; 845 pcp->p_write = (rw == S_WRITE); 846 pcp->p_callback = callback; 847 pcp->p_active = 1; 848 849 hp = P_HASHBP(seg, pcp->p_htag0, addr, flags); 850 if (!IS_PFLAGS_WIRED(flags)) { 851 int found; 852 void *htag0; 853 if (amp == NULL) { 854 pheadp = &seg->s_phead; 855 pmtx = &seg->s_pmtx; 856 htag0 = (void *)seg; 857 } else { 858 pheadp = &->a_phead; 859 pmtx = &->a_pmtx; 860 htag0 = (void *)amp; 861 } 862 mutex_enter(pmtx); 863 mutex_enter(&hp->p_hmutex); 864 delcallb_list = seg_plookup_checkdup(hp, htag0, addr, 865 len, &found); 866 if (found) { 867 mutex_exit(&hp->p_hmutex); 868 mutex_exit(pmtx); 869 mutex_enter(&seg_pmem_mtx); 870 seg_plocked -= npages; 871 seg_plocked_window -= npages; 872 mutex_exit(&seg_pmem_mtx); 873 kmem_cache_free(seg_pkmcache, pcp); 874 goto out; 875 } 876 pcp->p_plink.p_lnext = pheadp->p_lnext; 877 pcp->p_plink.p_lprev = pheadp; 878 pheadp->p_lnext->p_lprev = &pcp->p_plink; 879 pheadp->p_lnext = &pcp->p_plink; 880 } else { 881 mutex_enter(&hp->p_hmutex); 882 } 883 pcp->p_hashp = hp; 884 pcp->p_hnext = hp->p_hnext; 885 pcp->p_hprev = (struct seg_pcache *)hp; 886 hp->p_hnext->p_hprev = pcp; 887 hp->p_hnext = pcp; 888 if (!IS_PFLAGS_WIRED(flags) && 889 hp->p_hprev == pcp) { 890 seg_padd_abuck(hp); 891 } 892 mutex_exit(&hp->p_hmutex); 893 if (!IS_PFLAGS_WIRED(flags)) { 894 mutex_exit(pmtx); 895 } 896 897 out: 898 npages = 0; 899 while (delcallb_list != NULL) { 900 pcp = delcallb_list; 901 delcallb_list = pcp->p_hprev; 902 ASSERT(!IS_PCP_WIRED(pcp) && !pcp->p_active); 903 (void) (*pcp->p_callback)(pcp->p_htag0, pcp->p_addr, 904 pcp->p_len, pcp->p_pp, pcp->p_write ? S_WRITE : S_READ, 0); 905 npages += btop(pcp->p_len); 906 kmem_cache_free(seg_pkmcache, pcp); 907 } 908 if (npages) { 909 ASSERT(!IS_PFLAGS_WIRED(flags)); 910 mutex_enter(&seg_pmem_mtx); 911 ASSERT(seg_plocked >= npages); 912 ASSERT(seg_plocked_window >= npages); 913 seg_plocked -= npages; 914 seg_plocked_window -= npages; 915 mutex_exit(&seg_pmem_mtx); 916 } 917 918 return (SEGP_SUCCESS); 919 } 920 921 /* 922 * purge entries from the pagelock cache if not active 923 * and not recently used. 924 */ 925 static void 926 seg_ppurge_async(int force) 927 { 928 struct seg_pcache *delcallb_list = NULL; 929 struct seg_pcache *pcp; 930 struct seg_phash *hp; 931 pgcnt_t npages = 0; 932 pgcnt_t npages_window = 0; 933 pgcnt_t npgs_to_purge; 934 pgcnt_t npgs_purged = 0; 935 int hlinks = 0; 936 int hlix; 937 pcache_link_t *hlinkp; 938 pcache_link_t *hlnextp = NULL; 939 int lowmem; 940 941 ASSERT(seg_phashsize_win != 0); 942 943 /* 944 * if the cache is off or empty, return 945 */ 946 if (seg_plocked == 0 || (!force && seg_plocked_window == 0)) { 947 return; 948 } 949 950 if (!force) { 951 lowmem = 0; 952 if (freemem < lotsfree + needfree) { 953 spgcnt_t fmem = MAX((spgcnt_t)(freemem - needfree), 0); 954 if (fmem <= 5 * (desfree >> 2)) { 955 lowmem = 1; 956 } else if (fmem <= 7 * (lotsfree >> 3)) { 957 if (seg_plocked_window >= 958 (availrmem_initial >> 1)) { 959 lowmem = 1; 960 } 961 } else if (fmem < lotsfree) { 962 if (seg_plocked_window >= 963 3 * (availrmem_initial >> 2)) { 964 lowmem = 1; 965 } 966 } 967 } 968 if (!lowmem) { 969 return; 970 } 971 npgs_to_purge = seg_plocked_window >> 972 seg_pshrink_shift; 973 if (lowmem) { 974 npgs_to_purge = MIN(npgs_to_purge, 975 MAX(seg_pmaxapurge_npages, desfree)); 976 } else { 977 npgs_to_purge = MIN(npgs_to_purge, 978 seg_pmaxapurge_npages); 979 } 980 if (npgs_to_purge == 0) { 981 return; 982 } 983 } else { 984 struct seg_phash_wired *hpw; 985 986 ASSERT(seg_phashsize_wired != 0); 987 988 for (hpw = seg_phashtab_wired; 989 hpw < &seg_phashtab_wired[seg_phashsize_wired]; hpw++) { 990 991 if (hpw->p_hnext == (struct seg_pcache *)hpw) { 992 continue; 993 } 994 995 mutex_enter(&hpw->p_hmutex); 996 997 for (pcp = hpw->p_hnext; 998 pcp != (struct seg_pcache *)hpw; 999 pcp = pcp->p_hnext) { 1000 1001 ASSERT(IS_PCP_WIRED(pcp)); 1002 ASSERT(pcp->p_hashp == 1003 (struct seg_phash *)hpw); 1004 1005 if (pcp->p_active) { 1006 continue; 1007 } 1008 pcp->p_hprev->p_hnext = pcp->p_hnext; 1009 pcp->p_hnext->p_hprev = pcp->p_hprev; 1010 pcp->p_hprev = delcallb_list; 1011 delcallb_list = pcp; 1012 } 1013 mutex_exit(&hpw->p_hmutex); 1014 } 1015 } 1016 1017 mutex_enter(&seg_pmem_mtx); 1018 if (seg_pathr_on) { 1019 mutex_exit(&seg_pmem_mtx); 1020 goto runcb; 1021 } 1022 seg_pathr_on = 1; 1023 mutex_exit(&seg_pmem_mtx); 1024 ASSERT(seg_pahcur <= 1); 1025 hlix = !seg_pahcur; 1026 1027 again: 1028 for (hlinkp = seg_pahhead[hlix].p_lnext; hlinkp != &seg_pahhead[hlix]; 1029 hlinkp = hlnextp) { 1030 1031 hlnextp = hlinkp->p_lnext; 1032 ASSERT(hlnextp != NULL); 1033 1034 hp = hlink2phash(hlinkp, hlix); 1035 if (hp->p_hnext == (struct seg_pcache *)hp) { 1036 seg_pathr_empty_ahb++; 1037 continue; 1038 } 1039 seg_pathr_full_ahb++; 1040 mutex_enter(&hp->p_hmutex); 1041 1042 for (pcp = hp->p_hnext; pcp != (struct seg_pcache *)hp; 1043 pcp = pcp->p_hnext) { 1044 pcache_link_t *pheadp; 1045 pcache_link_t *plinkp; 1046 void *htag0; 1047 kmutex_t *pmtx; 1048 1049 ASSERT(!IS_PCP_WIRED(pcp)); 1050 ASSERT(pcp->p_hashp == hp); 1051 1052 if (pcp->p_active) { 1053 continue; 1054 } 1055 if (!force && pcp->p_ref && 1056 PCP_AGE(pcp) < seg_pmax_pcpage) { 1057 pcp->p_ref = 0; 1058 continue; 1059 } 1060 plinkp = &pcp->p_plink; 1061 htag0 = pcp->p_htag0; 1062 if (pcp->p_flags & SEGP_AMP) { 1063 pheadp = &((amp_t *)htag0)->a_phead; 1064 pmtx = &((amp_t *)htag0)->a_pmtx; 1065 } else { 1066 pheadp = &((seg_t *)htag0)->s_phead; 1067 pmtx = &((seg_t *)htag0)->s_pmtx; 1068 } 1069 if (!mutex_tryenter(pmtx)) { 1070 continue; 1071 } 1072 ASSERT(pheadp->p_lnext != pheadp); 1073 ASSERT(pheadp->p_lprev != pheadp); 1074 plinkp->p_lprev->p_lnext = 1075 plinkp->p_lnext; 1076 plinkp->p_lnext->p_lprev = 1077 plinkp->p_lprev; 1078 pcp->p_hprev->p_hnext = pcp->p_hnext; 1079 pcp->p_hnext->p_hprev = pcp->p_hprev; 1080 mutex_exit(pmtx); 1081 pcp->p_hprev = delcallb_list; 1082 delcallb_list = pcp; 1083 npgs_purged += btop(pcp->p_len); 1084 } 1085 if (hp->p_hnext == (struct seg_pcache *)hp) { 1086 seg_premove_abuck(hp, 1); 1087 } 1088 mutex_exit(&hp->p_hmutex); 1089 if (npgs_purged >= seg_plocked_window) { 1090 break; 1091 } 1092 if (!force) { 1093 if (npgs_purged >= npgs_to_purge) { 1094 break; 1095 } 1096 if (!(seg_pathr_full_ahb & 15)) { 1097 ASSERT(lowmem); 1098 if (freemem >= lotsfree + needfree) { 1099 break; 1100 } 1101 } 1102 } 1103 } 1104 1105 if (hlinkp == &seg_pahhead[hlix]) { 1106 /* 1107 * We processed the entire hlix active bucket list 1108 * but didn't find enough pages to reclaim. 1109 * Switch the lists and walk the other list 1110 * if we haven't done it yet. 1111 */ 1112 mutex_enter(&seg_pmem_mtx); 1113 ASSERT(seg_pathr_on); 1114 ASSERT(seg_pahcur == !hlix); 1115 seg_pahcur = hlix; 1116 mutex_exit(&seg_pmem_mtx); 1117 if (++hlinks < 2) { 1118 hlix = !hlix; 1119 goto again; 1120 } 1121 } else if ((hlinkp = hlnextp) != &seg_pahhead[hlix] && 1122 seg_pahhead[hlix].p_lnext != hlinkp) { 1123 ASSERT(hlinkp != NULL); 1124 ASSERT(hlinkp->p_lprev != &seg_pahhead[hlix]); 1125 ASSERT(seg_pahhead[hlix].p_lnext != &seg_pahhead[hlix]); 1126 ASSERT(seg_pahhead[hlix].p_lprev != &seg_pahhead[hlix]); 1127 1128 /* 1129 * Reinsert the header to point to hlinkp 1130 * so that we start from hlinkp bucket next time around. 1131 */ 1132 seg_pahhead[hlix].p_lnext->p_lprev = seg_pahhead[hlix].p_lprev; 1133 seg_pahhead[hlix].p_lprev->p_lnext = seg_pahhead[hlix].p_lnext; 1134 seg_pahhead[hlix].p_lnext = hlinkp; 1135 seg_pahhead[hlix].p_lprev = hlinkp->p_lprev; 1136 hlinkp->p_lprev->p_lnext = &seg_pahhead[hlix]; 1137 hlinkp->p_lprev = &seg_pahhead[hlix]; 1138 } 1139 1140 mutex_enter(&seg_pmem_mtx); 1141 ASSERT(seg_pathr_on); 1142 seg_pathr_on = 0; 1143 mutex_exit(&seg_pmem_mtx); 1144 1145 runcb: 1146 /* 1147 * Run the delayed callback list. segments/amps can't go away until 1148 * callback is executed since they must have non 0 softlockcnt. That's 1149 * why we don't need to hold as/seg/amp locks to execute the callback. 1150 */ 1151 while (delcallb_list != NULL) { 1152 pcp = delcallb_list; 1153 delcallb_list = pcp->p_hprev; 1154 ASSERT(!pcp->p_active); 1155 (void) (*pcp->p_callback)(pcp->p_htag0, pcp->p_addr, 1156 pcp->p_len, pcp->p_pp, pcp->p_write ? S_WRITE : S_READ, 1); 1157 npages += btop(pcp->p_len); 1158 if (!IS_PCP_WIRED(pcp)) { 1159 npages_window += btop(pcp->p_len); 1160 } 1161 kmem_cache_free(seg_pkmcache, pcp); 1162 } 1163 if (npages) { 1164 mutex_enter(&seg_pmem_mtx); 1165 ASSERT(seg_plocked >= npages); 1166 ASSERT(seg_plocked_window >= npages_window); 1167 seg_plocked -= npages; 1168 seg_plocked_window -= npages_window; 1169 mutex_exit(&seg_pmem_mtx); 1170 } 1171 } 1172 1173 /* 1174 * Remove cached pages for segment(s) entries from hashtable. The segments 1175 * are identified by pp array. This is useful for multiple seg's cached on 1176 * behalf of dummy segment (ISM/DISM) with common pp array. 1177 */ 1178 void 1179 seg_ppurge_wiredpp(struct page **pp) 1180 { 1181 struct seg_pcache *pcp; 1182 struct seg_phash_wired *hp; 1183 pgcnt_t npages = 0; 1184 struct seg_pcache *delcallb_list = NULL; 1185 1186 /* 1187 * if the cache is empty, return 1188 */ 1189 if (seg_plocked == 0) { 1190 return; 1191 } 1192 ASSERT(seg_phashsize_wired != 0); 1193 1194 for (hp = seg_phashtab_wired; 1195 hp < &seg_phashtab_wired[seg_phashsize_wired]; hp++) { 1196 if (hp->p_hnext == (struct seg_pcache *)hp) { 1197 continue; 1198 } 1199 mutex_enter(&hp->p_hmutex); 1200 pcp = hp->p_hnext; 1201 while (pcp != (struct seg_pcache *)hp) { 1202 ASSERT(pcp->p_hashp == (struct seg_phash *)hp); 1203 ASSERT(IS_PCP_WIRED(pcp)); 1204 /* 1205 * purge entries which are not active 1206 */ 1207 if (!pcp->p_active && pcp->p_pp == pp) { 1208 ASSERT(pcp->p_htag0 != NULL); 1209 pcp->p_hprev->p_hnext = pcp->p_hnext; 1210 pcp->p_hnext->p_hprev = pcp->p_hprev; 1211 pcp->p_hprev = delcallb_list; 1212 delcallb_list = pcp; 1213 } 1214 pcp = pcp->p_hnext; 1215 } 1216 mutex_exit(&hp->p_hmutex); 1217 /* 1218 * segments can't go away until callback is executed since 1219 * they must have non 0 softlockcnt. That's why we don't 1220 * need to hold as/seg locks to execute the callback. 1221 */ 1222 while (delcallb_list != NULL) { 1223 int done; 1224 pcp = delcallb_list; 1225 delcallb_list = pcp->p_hprev; 1226 ASSERT(!pcp->p_active); 1227 done = (*pcp->p_callback)(pcp->p_htag0, pcp->p_addr, 1228 pcp->p_len, pcp->p_pp, 1229 pcp->p_write ? S_WRITE : S_READ, 1); 1230 npages += btop(pcp->p_len); 1231 ASSERT(IS_PCP_WIRED(pcp)); 1232 kmem_cache_free(seg_pkmcache, pcp); 1233 if (done) { 1234 ASSERT(delcallb_list == NULL); 1235 goto out; 1236 } 1237 } 1238 } 1239 1240 out: 1241 mutex_enter(&seg_pmem_mtx); 1242 ASSERT(seg_plocked >= npages); 1243 seg_plocked -= npages; 1244 mutex_exit(&seg_pmem_mtx); 1245 } 1246 1247 /* 1248 * purge all entries for a given segment. Since we 1249 * callback into the segment driver directly for page 1250 * reclaim the caller needs to hold the right locks. 1251 */ 1252 void 1253 seg_ppurge(struct seg *seg, struct anon_map *amp, uint_t flags) 1254 { 1255 struct seg_pcache *delcallb_list = NULL; 1256 struct seg_pcache *pcp; 1257 struct seg_phash *hp; 1258 pgcnt_t npages = 0; 1259 void *htag0; 1260 1261 if (seg_plocked == 0) { 1262 return; 1263 } 1264 ASSERT(seg_phashsize_win != 0); 1265 1266 /* 1267 * If amp is not NULL use amp as a lookup tag otherwise use seg 1268 * as a lookup tag. 1269 */ 1270 htag0 = (amp == NULL ? (void *)seg : (void *)amp); 1271 ASSERT(htag0 != NULL); 1272 if (IS_PFLAGS_WIRED(flags)) { 1273 hp = P_HASHBP(seg, htag0, 0, flags); 1274 mutex_enter(&hp->p_hmutex); 1275 pcp = hp->p_hnext; 1276 while (pcp != (struct seg_pcache *)hp) { 1277 ASSERT(pcp->p_hashp == hp); 1278 ASSERT(IS_PCP_WIRED(pcp)); 1279 if (pcp->p_htag0 == htag0) { 1280 if (pcp->p_active) { 1281 break; 1282 } 1283 pcp->p_hprev->p_hnext = pcp->p_hnext; 1284 pcp->p_hnext->p_hprev = pcp->p_hprev; 1285 pcp->p_hprev = delcallb_list; 1286 delcallb_list = pcp; 1287 } 1288 pcp = pcp->p_hnext; 1289 } 1290 mutex_exit(&hp->p_hmutex); 1291 } else { 1292 pcache_link_t *plinkp; 1293 pcache_link_t *pheadp; 1294 kmutex_t *pmtx; 1295 1296 if (amp == NULL) { 1297 ASSERT(seg != NULL); 1298 pheadp = &seg->s_phead; 1299 pmtx = &seg->s_pmtx; 1300 } else { 1301 pheadp = &->a_phead; 1302 pmtx = &->a_pmtx; 1303 } 1304 mutex_enter(pmtx); 1305 while ((plinkp = pheadp->p_lnext) != pheadp) { 1306 pcp = plink2pcache(plinkp); 1307 ASSERT(!IS_PCP_WIRED(pcp)); 1308 ASSERT(pcp->p_htag0 == htag0); 1309 hp = pcp->p_hashp; 1310 mutex_enter(&hp->p_hmutex); 1311 if (pcp->p_active) { 1312 mutex_exit(&hp->p_hmutex); 1313 break; 1314 } 1315 ASSERT(plinkp->p_lprev == pheadp); 1316 pheadp->p_lnext = plinkp->p_lnext; 1317 plinkp->p_lnext->p_lprev = pheadp; 1318 pcp->p_hprev->p_hnext = pcp->p_hnext; 1319 pcp->p_hnext->p_hprev = pcp->p_hprev; 1320 pcp->p_hprev = delcallb_list; 1321 delcallb_list = pcp; 1322 if (hp->p_hnext == (struct seg_pcache *)hp) { 1323 seg_premove_abuck(hp, 0); 1324 } 1325 mutex_exit(&hp->p_hmutex); 1326 } 1327 mutex_exit(pmtx); 1328 } 1329 while (delcallb_list != NULL) { 1330 pcp = delcallb_list; 1331 delcallb_list = pcp->p_hprev; 1332 ASSERT(!pcp->p_active); 1333 (void) (*pcp->p_callback)(pcp->p_htag0, pcp->p_addr, pcp->p_len, 1334 pcp->p_pp, pcp->p_write ? S_WRITE : S_READ, 0); 1335 npages += btop(pcp->p_len); 1336 kmem_cache_free(seg_pkmcache, pcp); 1337 } 1338 mutex_enter(&seg_pmem_mtx); 1339 ASSERT(seg_plocked >= npages); 1340 seg_plocked -= npages; 1341 if (!IS_PFLAGS_WIRED(flags)) { 1342 ASSERT(seg_plocked_window >= npages); 1343 seg_plocked_window -= npages; 1344 } 1345 mutex_exit(&seg_pmem_mtx); 1346 } 1347 1348 static void seg_pinit_mem_config(void); 1349 1350 /* 1351 * setup the pagelock cache 1352 */ 1353 static void 1354 seg_pinit(void) 1355 { 1356 struct seg_phash *hp; 1357 ulong_t i; 1358 pgcnt_t physmegs; 1359 1360 seg_plocked = 0; 1361 seg_plocked_window = 0; 1362 1363 if (segpcache_enabled == 0) { 1364 seg_phashsize_win = 0; 1365 seg_phashsize_wired = 0; 1366 seg_pdisabled = 1; 1367 return; 1368 } 1369 1370 seg_pdisabled = 0; 1371 seg_pkmcache = kmem_cache_create("seg_pcache", 1372 sizeof (struct seg_pcache), 0, NULL, NULL, NULL, NULL, NULL, 0); 1373 if (segpcache_pcp_maxage_ticks <= 0) { 1374 segpcache_pcp_maxage_ticks = segpcache_pcp_maxage_sec * hz; 1375 } 1376 seg_pmax_pcpage = segpcache_pcp_maxage_ticks; 1377 seg_pathr_empty_ahb = 0; 1378 seg_pathr_full_ahb = 0; 1379 seg_pshrink_shift = segpcache_shrink_shift; 1380 seg_pmaxapurge_npages = btop(segpcache_maxapurge_bytes); 1381 1382 mutex_init(&seg_pcache_mtx, NULL, MUTEX_DEFAULT, NULL); 1383 mutex_init(&seg_pmem_mtx, NULL, MUTEX_DEFAULT, NULL); 1384 mutex_init(&seg_pasync_mtx, NULL, MUTEX_DEFAULT, NULL); 1385 cv_init(&seg_pasync_cv, NULL, CV_DEFAULT, NULL); 1386 1387 physmegs = physmem >> (20 - PAGESHIFT); 1388 1389 /* 1390 * If segpcache_hashsize_win was not set in /etc/system or it has 1391 * absurd value set it to a default. 1392 */ 1393 if (segpcache_hashsize_win == 0 || segpcache_hashsize_win > physmem) { 1394 /* 1395 * Create one bucket per 32K (or at least per 8 pages) of 1396 * available memory. 1397 */ 1398 pgcnt_t pages_per_bucket = MAX(btop(32 * 1024), 8); 1399 segpcache_hashsize_win = MAX(1024, physmem / pages_per_bucket); 1400 } 1401 if (!ISP2(segpcache_hashsize_win)) { 1402 ulong_t rndfac = ~(1UL << 1403 (highbit(segpcache_hashsize_win) - 1)); 1404 rndfac &= segpcache_hashsize_win; 1405 segpcache_hashsize_win += rndfac; 1406 segpcache_hashsize_win = 1 << 1407 (highbit(segpcache_hashsize_win) - 1); 1408 } 1409 seg_phashsize_win = segpcache_hashsize_win; 1410 seg_phashtab_win = kmem_zalloc( 1411 seg_phashsize_win * sizeof (struct seg_phash), 1412 KM_SLEEP); 1413 for (i = 0; i < seg_phashsize_win; i++) { 1414 hp = &seg_phashtab_win[i]; 1415 hp->p_hnext = (struct seg_pcache *)hp; 1416 hp->p_hprev = (struct seg_pcache *)hp; 1417 mutex_init(&hp->p_hmutex, NULL, MUTEX_DEFAULT, NULL); 1418 } 1419 1420 seg_pahcur = 0; 1421 seg_pathr_on = 0; 1422 seg_pahhead[0].p_lnext = &seg_pahhead[0]; 1423 seg_pahhead[0].p_lprev = &seg_pahhead[0]; 1424 seg_pahhead[1].p_lnext = &seg_pahhead[1]; 1425 seg_pahhead[1].p_lprev = &seg_pahhead[1]; 1426 1427 /* 1428 * If segpcache_hashsize_wired was not set in /etc/system or it has 1429 * absurd value set it to a default. 1430 */ 1431 if (segpcache_hashsize_wired == 0 || 1432 segpcache_hashsize_wired > physmem / 4) { 1433 /* 1434 * Choose segpcache_hashsize_wired based on physmem. 1435 * Create a bucket per 128K bytes upto 256K buckets. 1436 */ 1437 if (physmegs < 20 * 1024) { 1438 segpcache_hashsize_wired = MAX(1024, physmegs << 3); 1439 } else { 1440 segpcache_hashsize_wired = 256 * 1024; 1441 } 1442 } 1443 if (!ISP2(segpcache_hashsize_wired)) { 1444 segpcache_hashsize_wired = 1 << 1445 highbit(segpcache_hashsize_wired); 1446 } 1447 seg_phashsize_wired = segpcache_hashsize_wired; 1448 seg_phashtab_wired = kmem_zalloc( 1449 seg_phashsize_wired * sizeof (struct seg_phash_wired), KM_SLEEP); 1450 for (i = 0; i < seg_phashsize_wired; i++) { 1451 hp = (struct seg_phash *)&seg_phashtab_wired[i]; 1452 hp->p_hnext = (struct seg_pcache *)hp; 1453 hp->p_hprev = (struct seg_pcache *)hp; 1454 mutex_init(&hp->p_hmutex, NULL, MUTEX_DEFAULT, NULL); 1455 } 1456 1457 seg_pinit_mem_config(); 1458 } 1459 1460 /* 1461 * called by pageout if memory is low 1462 */ 1463 void 1464 seg_preap(void) 1465 { 1466 /* 1467 * if the cache is off or empty, return 1468 */ 1469 if (seg_plocked_window == 0) { 1470 return; 1471 } 1472 ASSERT(seg_phashsize_win != 0); 1473 1474 /* 1475 * If somebody is already purging pcache 1476 * just return. 1477 */ 1478 if (seg_pdisabled) { 1479 return; 1480 } 1481 1482 cv_signal(&seg_pasync_cv); 1483 } 1484 1485 /* 1486 * run as a backgroud thread and reclaim pagelock 1487 * pages which have not been used recently 1488 */ 1489 void 1490 seg_pasync_thread(void) 1491 { 1492 callb_cpr_t cpr_info; 1493 1494 if (seg_phashsize_win == 0) { 1495 thread_exit(); 1496 /*NOTREACHED*/ 1497 } 1498 1499 seg_pasync_thr = curthread; 1500 1501 CALLB_CPR_INIT(&cpr_info, &seg_pasync_mtx, 1502 callb_generic_cpr, "seg_pasync"); 1503 1504 if (segpcache_reap_ticks <= 0) { 1505 segpcache_reap_ticks = segpcache_reap_sec * hz; 1506 } 1507 1508 mutex_enter(&seg_pasync_mtx); 1509 for (;;) { 1510 CALLB_CPR_SAFE_BEGIN(&cpr_info); 1511 (void) cv_reltimedwait(&seg_pasync_cv, &seg_pasync_mtx, 1512 segpcache_reap_ticks, TR_CLOCK_TICK); 1513 CALLB_CPR_SAFE_END(&cpr_info, &seg_pasync_mtx); 1514 if (seg_pdisabled == 0) { 1515 seg_ppurge_async(0); 1516 } 1517 } 1518 } 1519 1520 static struct kmem_cache *seg_cache; 1521 1522 /* 1523 * Initialize segment management data structures. 1524 */ 1525 void 1526 seg_init(void) 1527 { 1528 kstat_t *ksp; 1529 1530 seg_cache = kmem_cache_create("seg_cache", sizeof (struct seg), 1531 0, NULL, NULL, NULL, NULL, NULL, 0); 1532 1533 ksp = kstat_create("unix", 0, "segadvstat", "vm", KSTAT_TYPE_NAMED, 1534 segadvstat_ndata, KSTAT_FLAG_VIRTUAL); 1535 if (ksp) { 1536 ksp->ks_data = (void *)segadvstat_ptr; 1537 kstat_install(ksp); 1538 } 1539 1540 seg_pinit(); 1541 } 1542 1543 /* 1544 * Allocate a segment to cover [base, base+size] 1545 * and attach it to the specified address space. 1546 */ 1547 struct seg * 1548 seg_alloc(struct as *as, caddr_t base, size_t size) 1549 { 1550 struct seg *new; 1551 caddr_t segbase; 1552 size_t segsize; 1553 1554 segbase = (caddr_t)((uintptr_t)base & (uintptr_t)PAGEMASK); 1555 segsize = (((uintptr_t)(base + size) + PAGEOFFSET) & PAGEMASK) - 1556 (uintptr_t)segbase; 1557 1558 if (!valid_va_range(&segbase, &segsize, segsize, AH_LO)) 1559 return ((struct seg *)NULL); /* bad virtual addr range */ 1560 1561 if (as != &kas && 1562 valid_usr_range(segbase, segsize, 0, as, 1563 as->a_userlimit) != RANGE_OKAY) 1564 return ((struct seg *)NULL); /* bad virtual addr range */ 1565 1566 new = kmem_cache_alloc(seg_cache, KM_SLEEP); 1567 new->s_ops = NULL; 1568 new->s_data = NULL; 1569 new->s_szc = 0; 1570 new->s_flags = 0; 1571 mutex_init(&new->s_pmtx, NULL, MUTEX_DEFAULT, NULL); 1572 new->s_phead.p_lnext = &new->s_phead; 1573 new->s_phead.p_lprev = &new->s_phead; 1574 if (seg_attach(as, segbase, segsize, new) < 0) { 1575 kmem_cache_free(seg_cache, new); 1576 return ((struct seg *)NULL); 1577 } 1578 /* caller must fill in ops, data */ 1579 return (new); 1580 } 1581 1582 /* 1583 * Attach a segment to the address space. Used by seg_alloc() 1584 * and for kernel startup to attach to static segments. 1585 */ 1586 int 1587 seg_attach(struct as *as, caddr_t base, size_t size, struct seg *seg) 1588 { 1589 seg->s_as = as; 1590 seg->s_base = base; 1591 seg->s_size = size; 1592 1593 /* 1594 * as_addseg() will add the segment at the appropraite point 1595 * in the list. It will return -1 if there is overlap with 1596 * an already existing segment. 1597 */ 1598 return (as_addseg(as, seg)); 1599 } 1600 1601 /* 1602 * Unmap a segment and free it from its associated address space. 1603 * This should be called by anybody who's finished with a whole segment's 1604 * mapping. Just calls segop_unmap() on the whole mapping . It is the 1605 * responsibility of the segment driver to unlink the the segment 1606 * from the address space, and to free public and private data structures 1607 * associated with the segment. (This is typically done by a call to 1608 * seg_free()). 1609 */ 1610 void 1611 seg_unmap(struct seg *seg) 1612 { 1613 #ifdef DEBUG 1614 int ret; 1615 #endif /* DEBUG */ 1616 1617 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 1618 1619 /* Shouldn't have called seg_unmap if mapping isn't yet established */ 1620 ASSERT(seg->s_data != NULL); 1621 1622 /* Unmap the whole mapping */ 1623 #ifdef DEBUG 1624 ret = segop_unmap(seg, seg->s_base, seg->s_size); 1625 ASSERT(ret == 0); 1626 #else 1627 segop_unmap(seg, seg->s_base, seg->s_size); 1628 #endif /* DEBUG */ 1629 } 1630 1631 /* 1632 * Free the segment from its associated as. This should only be called 1633 * if a mapping to the segment has not yet been established (e.g., if 1634 * an error occurs in the middle of doing an as_map when the segment 1635 * has already been partially set up) or if it has already been deleted 1636 * (e.g., from a segment driver unmap routine if the unmap applies to the 1637 * entire segment). If the mapping is currently set up then seg_unmap() should 1638 * be called instead. 1639 */ 1640 void 1641 seg_free(struct seg *seg) 1642 { 1643 register struct as *as = seg->s_as; 1644 struct seg *tseg = as_removeseg(as, seg); 1645 1646 ASSERT(tseg == seg); 1647 1648 /* 1649 * If the segment private data field is NULL, 1650 * then segment driver is not attached yet. 1651 */ 1652 if (seg->s_data != NULL) 1653 segop_free(seg); 1654 1655 mutex_destroy(&seg->s_pmtx); 1656 ASSERT(seg->s_phead.p_lnext == &seg->s_phead); 1657 ASSERT(seg->s_phead.p_lprev == &seg->s_phead); 1658 kmem_cache_free(seg_cache, seg); 1659 } 1660 1661 /*ARGSUSED*/ 1662 static void 1663 seg_p_mem_config_post_add( 1664 void *arg, 1665 pgcnt_t delta_pages) 1666 { 1667 /* Nothing to do. */ 1668 } 1669 1670 void 1671 seg_p_enable(void) 1672 { 1673 mutex_enter(&seg_pcache_mtx); 1674 ASSERT(seg_pdisabled != 0); 1675 seg_pdisabled--; 1676 mutex_exit(&seg_pcache_mtx); 1677 } 1678 1679 /* 1680 * seg_p_disable - disables seg_pcache, and then attempts to empty the 1681 * cache. 1682 * Returns SEGP_SUCCESS if the cache was successfully emptied, or 1683 * SEGP_FAIL if the cache could not be emptied. 1684 */ 1685 int 1686 seg_p_disable(void) 1687 { 1688 pgcnt_t old_plocked; 1689 int stall_count = 0; 1690 1691 mutex_enter(&seg_pcache_mtx); 1692 seg_pdisabled++; 1693 ASSERT(seg_pdisabled != 0); 1694 mutex_exit(&seg_pcache_mtx); 1695 1696 /* 1697 * Attempt to empty the cache. Terminate if seg_plocked does not 1698 * diminish with SEGP_STALL_THRESHOLD consecutive attempts. 1699 */ 1700 while (seg_plocked != 0) { 1701 ASSERT(seg_phashsize_win != 0); 1702 old_plocked = seg_plocked; 1703 seg_ppurge_async(1); 1704 if (seg_plocked == old_plocked) { 1705 if (stall_count++ > SEGP_STALL_THRESHOLD) { 1706 return (SEGP_FAIL); 1707 } 1708 } else 1709 stall_count = 0; 1710 if (seg_plocked != 0) 1711 delay(hz/SEGP_PREDEL_DELAY_FACTOR); 1712 } 1713 return (SEGP_SUCCESS); 1714 } 1715 1716 /* 1717 * Attempt to purge seg_pcache. May need to return before this has 1718 * completed to allow other pre_del callbacks to unlock pages. This is 1719 * ok because: 1720 * 1) The seg_pdisabled flag has been set so at least we won't 1721 * cache anymore locks and the locks we couldn't purge 1722 * will not be held if they do get released by a subsequent 1723 * pre-delete callback. 1724 * 1725 * 2) The rest of the memory delete thread processing does not 1726 * depend on the changes made in this pre-delete callback. No 1727 * panics will result, the worst that will happen is that the 1728 * DR code will timeout and cancel the delete. 1729 */ 1730 /*ARGSUSED*/ 1731 static int 1732 seg_p_mem_config_pre_del( 1733 void *arg, 1734 pgcnt_t delta_pages) 1735 { 1736 if (seg_phashsize_win == 0) { 1737 return (0); 1738 } 1739 if (seg_p_disable() != SEGP_SUCCESS) 1740 cmn_err(CE_NOTE, 1741 "!Pre-delete couldn't purge"" pagelock cache - continuing"); 1742 return (0); 1743 } 1744 1745 /*ARGSUSED*/ 1746 static void 1747 seg_p_mem_config_post_del( 1748 void *arg, 1749 pgcnt_t delta_pages, 1750 int cancelled) 1751 { 1752 if (seg_phashsize_win == 0) { 1753 return; 1754 } 1755 seg_p_enable(); 1756 } 1757 1758 static kphysm_setup_vector_t seg_p_mem_config_vec = { 1759 KPHYSM_SETUP_VECTOR_VERSION, 1760 seg_p_mem_config_post_add, 1761 seg_p_mem_config_pre_del, 1762 seg_p_mem_config_post_del, 1763 }; 1764 1765 static void 1766 seg_pinit_mem_config(void) 1767 { 1768 int ret; 1769 1770 ret = kphysm_setup_func_register(&seg_p_mem_config_vec, (void *)NULL); 1771 /* 1772 * Want to catch this in the debug kernel. At run time, if the 1773 * callbacks don't get run all will be OK as the disable just makes 1774 * it more likely that the pages can be collected. 1775 */ 1776 ASSERT(ret == 0); 1777 } 1778 1779 /* 1780 * Verify that segment is not a shared anonymous segment which reserves 1781 * swap. zone.max-swap accounting (zone->zone_max_swap) cannot be transfered 1782 * from one zone to another if any segments are shared. This is because the 1783 * last process to exit will credit the swap reservation. This could lead 1784 * to the swap being reserved by one zone, and credited to another. 1785 */ 1786 boolean_t 1787 seg_can_change_zones(struct seg *seg) 1788 { 1789 struct segvn_data *svd; 1790 1791 if (seg->s_ops == &segspt_shmops) 1792 return (B_FALSE); 1793 1794 if (seg->s_ops == &segvn_ops) { 1795 svd = (struct segvn_data *)seg->s_data; 1796 if (svd->type == MAP_SHARED && 1797 svd->amp != NULL && 1798 svd->amp->swresv > 0) 1799 return (B_FALSE); 1800 } 1801 return (B_TRUE); 1802 } 1803 1804 /* 1805 * Return swap reserved by a segment backing a private mapping. 1806 */ 1807 size_t 1808 seg_swresv(struct seg *seg) 1809 { 1810 struct segvn_data *svd; 1811 size_t swap = 0; 1812 1813 if (seg->s_ops == &segvn_ops) { 1814 svd = (struct segvn_data *)seg->s_data; 1815 if (svd->type == MAP_PRIVATE && svd->swresv > 0) 1816 swap = svd->swresv; 1817 } 1818 return (swap); 1819 } 1820 1821 /* 1822 * segop wrappers 1823 */ 1824 int 1825 segop_dup(struct seg *seg, struct seg *new) 1826 { 1827 VERIFY3P(seg->s_ops->dup, !=, NULL); 1828 1829 return (seg->s_ops->dup(seg, new)); 1830 } 1831 1832 int 1833 segop_unmap(struct seg *seg, caddr_t addr, size_t len) 1834 { 1835 VERIFY3P(seg->s_ops->unmap, !=, NULL); 1836 1837 return (seg->s_ops->unmap(seg, addr, len)); 1838 } 1839 1840 void 1841 segop_free(struct seg *seg) 1842 { 1843 VERIFY3P(seg->s_ops->free, !=, NULL); 1844 1845 seg->s_ops->free(seg); 1846 } 1847 1848 faultcode_t 1849 segop_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len, 1850 enum fault_type type, enum seg_rw rw) 1851 { 1852 VERIFY3P(seg->s_ops->fault, !=, NULL); 1853 1854 return (seg->s_ops->fault(hat, seg, addr, len, type, rw)); 1855 } 1856 1857 faultcode_t 1858 segop_faulta(struct seg *seg, caddr_t addr) 1859 { 1860 VERIFY3P(seg->s_ops->faulta, !=, NULL); 1861 1862 return (seg->s_ops->faulta(seg, addr)); 1863 } 1864 1865 int 1866 segop_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) 1867 { 1868 VERIFY3P(seg->s_ops->setprot, !=, NULL); 1869 1870 return (seg->s_ops->setprot(seg, addr, len, prot)); 1871 } 1872 1873 int 1874 segop_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) 1875 { 1876 VERIFY3P(seg->s_ops->checkprot, !=, NULL); 1877 1878 return (seg->s_ops->checkprot(seg, addr, len, prot)); 1879 } 1880 1881 int 1882 segop_kluster(struct seg *seg, caddr_t addr, ssize_t d) 1883 { 1884 VERIFY3P(seg->s_ops->kluster, !=, NULL); 1885 1886 return (seg->s_ops->kluster(seg, addr, d)); 1887 } 1888 1889 int 1890 segop_sync(struct seg *seg, caddr_t addr, size_t len, int atr, uint_t f) 1891 { 1892 VERIFY3P(seg->s_ops->sync, !=, NULL); 1893 1894 return (seg->s_ops->sync(seg, addr, len, atr, f)); 1895 } 1896 1897 size_t 1898 segop_incore(struct seg *seg, caddr_t addr, size_t len, char *v) 1899 { 1900 VERIFY3P(seg->s_ops->incore, !=, NULL); 1901 1902 return (seg->s_ops->incore(seg, addr, len, v)); 1903 } 1904 1905 int 1906 segop_lockop(struct seg *seg, caddr_t addr, size_t len, int atr, int op, 1907 ulong_t *b, size_t p) 1908 { 1909 VERIFY3P(seg->s_ops->lockop, !=, NULL); 1910 1911 return (seg->s_ops->lockop(seg, addr, len, atr, op, b, p)); 1912 } 1913 1914 int 1915 segop_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *p) 1916 { 1917 VERIFY3P(seg->s_ops->getprot, !=, NULL); 1918 1919 return (seg->s_ops->getprot(seg, addr, len, p)); 1920 } 1921 1922 u_offset_t 1923 segop_getoffset(struct seg *seg, caddr_t addr) 1924 { 1925 VERIFY3P(seg->s_ops->getoffset, !=, NULL); 1926 1927 return (seg->s_ops->getoffset(seg, addr)); 1928 } 1929 1930 int 1931 segop_gettype(struct seg *seg, caddr_t addr) 1932 { 1933 VERIFY3P(seg->s_ops->gettype, !=, NULL); 1934 1935 return (seg->s_ops->gettype(seg, addr)); 1936 } 1937 1938 int 1939 segop_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp) 1940 { 1941 VERIFY3P(seg->s_ops->getvp, !=, NULL); 1942 1943 return (seg->s_ops->getvp(seg, addr, vpp)); 1944 } 1945 1946 int 1947 segop_advise(struct seg *seg, caddr_t addr, size_t len, uint_t b) 1948 { 1949 VERIFY3P(seg->s_ops->advise, !=, NULL); 1950 1951 return (seg->s_ops->advise(seg, addr, len, b)); 1952 } 1953 1954 void 1955 segop_dump(struct seg *seg) 1956 { 1957 if (seg->s_ops->dump == NULL) 1958 return; 1959 1960 seg->s_ops->dump(seg); 1961 } 1962 1963 int 1964 segop_pagelock(struct seg *seg, caddr_t addr, size_t len, struct page ***page, 1965 enum lock_type type, enum seg_rw rw) 1966 { 1967 VERIFY3P(seg->s_ops->pagelock, !=, NULL); 1968 1969 return (seg->s_ops->pagelock(seg, addr, len, page, type, rw)); 1970 } 1971 1972 int 1973 segop_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc) 1974 { 1975 if (seg->s_ops->setpagesize == NULL) 1976 return (ENOTSUP); 1977 1978 return (seg->s_ops->setpagesize(seg, addr, len, szc)); 1979 } 1980 1981 int 1982 segop_getmemid(struct seg *seg, caddr_t addr, memid_t *mp) 1983 { 1984 if (seg->s_ops->getmemid == NULL) 1985 return (ENODEV); 1986 1987 return (seg->s_ops->getmemid(seg, addr, mp)); 1988 } 1989 1990 struct lgrp_mem_policy_info * 1991 segop_getpolicy(struct seg *seg, caddr_t addr) 1992 { 1993 if (seg->s_ops->getpolicy == NULL) 1994 return (NULL); 1995 1996 return (seg->s_ops->getpolicy(seg, addr)); 1997 } 1998 1999 int 2000 segop_capable(struct seg *seg, segcapability_t cap) 2001 { 2002 if (seg->s_ops->capable == NULL) 2003 return (0); 2004 2005 return (seg->s_ops->capable(seg, cap)); 2006 } 2007 2008 int 2009 segop_inherit(struct seg *seg, caddr_t addr, size_t len, uint_t op) 2010 { 2011 if (seg->s_ops->inherit == NULL) 2012 return (ENOTSUP); 2013 2014 return (seg->s_ops->inherit(seg, addr, len, op)); 2015 }