1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * negative cache handling for the /dev fs 28 */ 29 30 #include <sys/types.h> 31 #include <sys/param.h> 32 #include <sys/t_lock.h> 33 #include <sys/systm.h> 34 #include <sys/sysmacros.h> 35 #include <sys/user.h> 36 #include <sys/time.h> 37 #include <sys/vfs.h> 38 #include <sys/vnode.h> 39 #include <sys/file.h> 40 #include <sys/fcntl.h> 41 #include <sys/flock.h> 42 #include <sys/kmem.h> 43 #include <sys/uio.h> 44 #include <sys/errno.h> 45 #include <sys/stat.h> 46 #include <sys/cred.h> 47 #include <sys/cmn_err.h> 48 #include <sys/debug.h> 49 #include <sys/mode.h> 50 #include <sys/policy.h> 51 #include <fs/fs_subr.h> 52 #include <sys/mount.h> 53 #include <sys/fs/snode.h> 54 #include <sys/fs/dv_node.h> 55 #include <sys/fs/sdev_impl.h> 56 #include <sys/sunndi.h> 57 #include <sys/sunmdi.h> 58 #include <sys/ddi.h> 59 #include <sys/modctl.h> 60 #include <sys/devcache.h> 61 62 63 /* 64 * ncache is a negative cache of failed lookups. An entry 65 * is added after an attempt to configure a device by that 66 * name failed. An accumulation of these entries over time 67 * gives us a set of device name for which implicit reconfiguration 68 * does not need to be attempted. If a name is created matching 69 * an entry in ncache, that entry is removed, with the 70 * persistent store updated. 71 * 72 * Implicit reconfig is initiated for any name during lookup that 73 * can't be resolved from the backing store and that isn't 74 * present in the negative cache. This functionality is 75 * enabled during system startup once communication with devfsadm 76 * can be achieved. Since readdir is more general, implicit 77 * reconfig initiated by reading a directory isn't enabled until 78 * the system is more fully booted, at the time of the multi-user 79 * milestone, corresponding to init state 2. 80 * 81 * A maximum is imposed on the number of entries in the cache 82 * to limit some script going wild and as a defense against attack. 83 * The default limit is 64 and can be adjusted via sdev_nc_max_entries. 84 * 85 * Each entry also has a expiration count. When looked up a name in 86 * the cache is set to the default. Subsequent boots will decrement 87 * the count if a name isn't referenced. This permits a once-only 88 * entry to eventually be removed over time. 89 * 90 * sdev_reconfig_delay implements a "debounce" of the timing beyond 91 * system available indication, providing what the filesystem considers 92 * to be the system-is-fully-booted state. This is provided to adjust 93 * the timing if some application startup is performing a readdir 94 * in /dev that initiates a troublesome implicit reconfig on every boot. 95 * 96 * sdev_nc_disable_reset can be used to disable clearing the negative cache 97 * on reconfig boot. The default is to clear the cache on reconfig boot. 98 * sdev_nc_disable can be used to disable the negative cache itself. 99 * 100 * sdev_reconfig_disable can be used to disable implicit reconfig. 101 * The default is that implicit reconfig is enabled. 102 */ 103 104 /* tunables and defaults */ 105 #define SDEV_NC_EXPIRECNT 4 106 #define SDEV_NC_MAX_ENTRIES 64 107 #define SEV_RECONFIG_DELAY 6 /* seconds */ 108 109 /* tunables */ 110 int sdev_nc_expirecnt = SDEV_NC_EXPIRECNT; 111 int sdev_nc_max_entries = SDEV_NC_MAX_ENTRIES; 112 int sdev_reconfig_delay = SEV_RECONFIG_DELAY; 113 int sdev_reconfig_verbose = 0; 114 int sdev_reconfig_disable = 0; 115 int sdev_nc_disable = 0; 116 int sdev_nc_disable_reset = 0; 117 int sdev_nc_verbose = 0; 118 int sdev_cache_read_disable = 0; 119 int sdev_cache_write_disable = 0; 120 121 /* globals */ 122 int sdev_boot_state = SDEV_BOOT_STATE_INITIAL; 123 int sdev_reconfig_boot = 0; 124 sdev_nc_list_t *sdev_ncache; 125 static nvf_handle_t sdevfd_handle; 126 127 /* static prototypes */ 128 static void sdev_ncache_write_complete(nvf_handle_t); 129 static void sdev_ncache_write(void); 130 static void sdev_ncache_process_store(void); 131 static sdev_nc_list_t *sdev_nc_newlist(void); 132 static void sdev_nc_free_unlinked_node(sdev_nc_node_t *); 133 static sdev_nc_node_t *sdev_nc_findpath(sdev_nc_list_t *, char *); 134 static void sdev_nc_insertnode(sdev_nc_list_t *, sdev_nc_node_t *); 135 static void sdev_nc_free_bootonly(void); 136 static int sdev_ncache_unpack_nvlist(nvf_handle_t, nvlist_t *, char *); 137 static int sdev_ncache_pack_list(nvf_handle_t, nvlist_t **); 138 static void sdev_ncache_list_free(nvf_handle_t); 139 static void sdev_nvp_free(nvp_devname_t *); 140 141 /* 142 * Registration for /etc/devices/devname_cache 143 */ 144 static nvf_ops_t sdev_cache_ops = { 145 "/etc/devices/devname_cache", /* path to cache */ 146 sdev_ncache_unpack_nvlist, /* read: unpack nvlist */ 147 sdev_ncache_pack_list, /* write: pack list */ 148 sdev_ncache_list_free, /* free data list */ 149 sdev_ncache_write_complete /* write complete callback */ 150 }; 151 152 /* 153 * called once at filesystem initialization 154 */ 155 void 156 sdev_ncache_init(void) 157 { 158 sdev_ncache = sdev_nc_newlist(); 159 } 160 161 /* 162 * called at mount of the global instance 163 * currently the global instance is never unmounted 164 */ 165 void 166 sdev_ncache_setup(void) 167 { 168 sdevfd_handle = nvf_register_file(&sdev_cache_ops); 169 ASSERT(sdevfd_handle); 170 171 list_create(nvf_list(sdevfd_handle), sizeof (nvp_devname_t), 172 offsetof(nvp_devname_t, nvp_link)); 173 174 rw_enter(nvf_lock(sdevfd_handle), RW_WRITER); 175 if (!sdev_cache_read_disable) { 176 (void) nvf_read_file(sdevfd_handle); 177 } 178 sdev_ncache_process_store(); 179 rw_exit(nvf_lock(sdevfd_handle)); 180 181 sdev_devstate_change(); 182 } 183 184 static void 185 sdev_nvp_free(nvp_devname_t *dp) 186 { 187 int i; 188 char **p; 189 190 if (dp->nvp_npaths > 0) { 191 p = dp->nvp_paths; 192 for (i = 0; i < dp->nvp_npaths; i++, p++) { 193 kmem_free(*p, strlen(*p)+1); 194 } 195 kmem_free(dp->nvp_paths, 196 dp->nvp_npaths * sizeof (char *)); 197 kmem_free(dp->nvp_expirecnts, 198 dp->nvp_npaths * sizeof (int)); 199 } 200 201 kmem_free(dp, sizeof (nvp_devname_t)); 202 } 203 204 static void 205 sdev_ncache_list_free(nvf_handle_t fd) 206 { 207 list_t *listp; 208 nvp_devname_t *dp; 209 210 ASSERT(fd == sdevfd_handle); 211 ASSERT(RW_WRITE_HELD(nvf_lock(fd))); 212 213 listp = nvf_list(fd); 214 if ((dp = list_head(listp)) != NULL) { 215 list_remove(listp, dp); 216 sdev_nvp_free(dp); 217 } 218 } 219 220 /* 221 * Unpack a device path/nvlist pair to internal data list format. 222 * Used to decode the nvlist format into the internal representation 223 * when reading /etc/devices/devname_cache. 224 * Note that the expiration counts are optional, for compatibility 225 * with earlier instances of the cache. If not present, the 226 * expire counts are initialized to defaults. 227 */ 228 static int 229 sdev_ncache_unpack_nvlist(nvf_handle_t fd, nvlist_t *nvl, char *name) 230 { 231 nvp_devname_t *np; 232 char **strs; 233 int *cnts; 234 uint_t nstrs, ncnts; 235 int rval, i; 236 237 ASSERT(fd == sdevfd_handle); 238 ASSERT(RW_WRITE_HELD(nvf_lock(fd))); 239 240 /* name of the sublist must match what we created */ 241 if (strcmp(name, DP_DEVNAME_ID) != 0) { 242 return (-1); 243 } 244 245 np = kmem_zalloc(sizeof (nvp_devname_t), KM_SLEEP); 246 247 rval = nvlist_lookup_string_array(nvl, 248 DP_DEVNAME_NCACHE_ID, &strs, &nstrs); 249 if (rval) { 250 kmem_free(np, sizeof (nvp_devname_t)); 251 return (-1); 252 } 253 254 np->nvp_npaths = nstrs; 255 np->nvp_paths = kmem_zalloc(nstrs * sizeof (char *), KM_SLEEP); 256 for (i = 0; i < nstrs; i++) { 257 np->nvp_paths[i] = i_ddi_strdup(strs[i], KM_SLEEP); 258 } 259 np->nvp_expirecnts = kmem_zalloc(nstrs * sizeof (int), KM_SLEEP); 260 for (i = 0; i < nstrs; i++) { 261 np->nvp_expirecnts[i] = sdev_nc_expirecnt; 262 } 263 264 rval = nvlist_lookup_int32_array(nvl, 265 DP_DEVNAME_NC_EXPIRECNT_ID, &cnts, &ncnts); 266 if (rval == 0) { 267 ASSERT(ncnts == nstrs); 268 ncnts = min(ncnts, nstrs); 269 for (i = 0; i < nstrs; i++) { 270 np->nvp_expirecnts[i] = cnts[i]; 271 } 272 } 273 274 list_insert_tail(nvf_list(sdevfd_handle), np); 275 276 return (0); 277 } 278 279 /* 280 * Pack internal format cache data to a single nvlist. 281 * Used when writing the nvlist file. 282 * Note this is called indirectly by the nvpflush daemon. 283 */ 284 static int 285 sdev_ncache_pack_list(nvf_handle_t fd, nvlist_t **ret_nvl) 286 { 287 nvlist_t *nvl, *sub_nvl; 288 nvp_devname_t *np; 289 int rval; 290 list_t *listp; 291 292 ASSERT(fd == sdevfd_handle); 293 ASSERT(RW_WRITE_HELD(nvf_lock(fd))); 294 295 rval = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP); 296 if (rval != 0) { 297 nvf_error("%s: nvlist alloc error %d\n", 298 nvf_cache_name(fd), rval); 299 return (DDI_FAILURE); 300 } 301 302 listp = nvf_list(sdevfd_handle); 303 if ((np = list_head(listp)) != NULL) { 304 ASSERT(list_next(listp, np) == NULL); 305 306 rval = nvlist_alloc(&sub_nvl, NV_UNIQUE_NAME, KM_SLEEP); 307 if (rval != 0) { 308 nvf_error("%s: nvlist alloc error %d\n", 309 nvf_cache_name(fd), rval); 310 sub_nvl = NULL; 311 goto err; 312 } 313 314 rval = nvlist_add_string_array(sub_nvl, 315 DP_DEVNAME_NCACHE_ID, np->nvp_paths, np->nvp_npaths); 316 if (rval != 0) { 317 nvf_error("%s: nvlist add error %d (sdev)\n", 318 nvf_cache_name(fd), rval); 319 goto err; 320 } 321 322 rval = nvlist_add_int32_array(sub_nvl, 323 DP_DEVNAME_NC_EXPIRECNT_ID, 324 np->nvp_expirecnts, np->nvp_npaths); 325 if (rval != 0) { 326 nvf_error("%s: nvlist add error %d (sdev)\n", 327 nvf_cache_name(fd), rval); 328 goto err; 329 } 330 331 rval = nvlist_add_nvlist(nvl, DP_DEVNAME_ID, sub_nvl); 332 if (rval != 0) { 333 nvf_error("%s: nvlist add error %d (sublist)\n", 334 nvf_cache_name(fd), rval); 335 goto err; 336 } 337 nvlist_free(sub_nvl); 338 } 339 340 *ret_nvl = nvl; 341 return (DDI_SUCCESS); 342 343 err: 344 if (sub_nvl) 345 nvlist_free(sub_nvl); 346 nvlist_free(nvl); 347 *ret_nvl = NULL; 348 return (DDI_FAILURE); 349 } 350 351 /* 352 * Run through the data read from the backing cache store 353 * to establish the initial state of the neg. cache. 354 */ 355 static void 356 sdev_ncache_process_store(void) 357 { 358 sdev_nc_list_t *ncl = sdev_ncache; 359 nvp_devname_t *np; 360 sdev_nc_node_t *lp; 361 char *path; 362 int i, n; 363 list_t *listp; 364 365 if (sdev_nc_disable) 366 return; 367 368 ASSERT(RW_WRITE_HELD(nvf_lock(sdevfd_handle))); 369 370 listp = nvf_list(sdevfd_handle); 371 for (np = list_head(listp); np; np = list_next(listp, np)) { 372 for (i = 0; i < np->nvp_npaths; i++) { 373 sdcmn_err5((" %s %d\n", 374 np->nvp_paths[i], np->nvp_expirecnts[i])); 375 if (ncl->ncl_nentries < sdev_nc_max_entries) { 376 path = np->nvp_paths[i]; 377 n = strlen(path) + 1; 378 lp = kmem_alloc(sizeof (sdev_nc_node_t), 379 KM_SLEEP); 380 lp->ncn_name = kmem_alloc(n, KM_SLEEP); 381 bcopy(path, lp->ncn_name, n); 382 lp->ncn_flags = NCN_SRC_STORE; 383 lp->ncn_expirecnt = np->nvp_expirecnts[i]; 384 sdev_nc_insertnode(ncl, lp); 385 } else if (sdev_nc_verbose) { 386 cmn_err(CE_CONT, 387 "?%s: truncating from ncache (max %d)\n", 388 np->nvp_paths[i], sdev_nc_max_entries); 389 } 390 } 391 } 392 } 393 394 /* 395 * called by nvpflush daemon to inform us that an update of 396 * the cache file has been completed. 397 */ 398 static void 399 sdev_ncache_write_complete(nvf_handle_t fd) 400 { 401 sdev_nc_list_t *ncl = sdev_ncache; 402 403 ASSERT(fd == sdevfd_handle); 404 405 mutex_enter(&ncl->ncl_mutex); 406 407 ASSERT(ncl->ncl_flags & NCL_LIST_WRITING); 408 409 if (ncl->ncl_flags & NCL_LIST_DIRTY) { 410 sdcmn_err5(("ncache write complete but dirty again\n")); 411 ncl->ncl_flags &= ~NCL_LIST_DIRTY; 412 mutex_exit(&ncl->ncl_mutex); 413 sdev_ncache_write(); 414 } else { 415 sdcmn_err5(("ncache write complete\n")); 416 ncl->ncl_flags &= ~NCL_LIST_WRITING; 417 mutex_exit(&ncl->ncl_mutex); 418 rw_enter(nvf_lock(fd), RW_WRITER); 419 sdev_ncache_list_free(fd); 420 rw_exit(nvf_lock(fd)); 421 } 422 } 423 424 /* 425 * Prepare to perform an update of the neg. cache backing store. 426 */ 427 static void 428 sdev_ncache_write(void) 429 { 430 sdev_nc_list_t *ncl = sdev_ncache; 431 nvp_devname_t *np; 432 sdev_nc_node_t *lp; 433 int n, i; 434 435 if (sdev_cache_write_disable) { 436 mutex_enter(&ncl->ncl_mutex); 437 ncl->ncl_flags &= ~NCL_LIST_WRITING; 438 mutex_exit(&ncl->ncl_mutex); 439 return; 440 } 441 442 /* proper lock ordering here is essential */ 443 rw_enter(nvf_lock(sdevfd_handle), RW_WRITER); 444 sdev_ncache_list_free(sdevfd_handle); 445 446 rw_enter(&ncl->ncl_lock, RW_READER); 447 n = ncl->ncl_nentries; 448 ASSERT(n <= sdev_nc_max_entries); 449 450 np = kmem_zalloc(sizeof (nvp_devname_t), KM_SLEEP); 451 np->nvp_npaths = n; 452 np->nvp_paths = kmem_zalloc(n * sizeof (char *), KM_SLEEP); 453 np->nvp_expirecnts = kmem_zalloc(n * sizeof (int), KM_SLEEP); 454 455 i = 0; 456 for (lp = list_head(&ncl->ncl_list); lp; 457 lp = list_next(&ncl->ncl_list, lp)) { 458 np->nvp_paths[i] = i_ddi_strdup(lp->ncn_name, KM_SLEEP); 459 np->nvp_expirecnts[i] = lp->ncn_expirecnt; 460 sdcmn_err5((" %s %d\n", 461 np->nvp_paths[i], np->nvp_expirecnts[i])); 462 i++; 463 } 464 465 rw_exit(&ncl->ncl_lock); 466 467 nvf_mark_dirty(sdevfd_handle); 468 list_insert_tail(nvf_list(sdevfd_handle), np); 469 rw_exit(nvf_lock(sdevfd_handle)); 470 471 nvf_wake_daemon(); 472 } 473 474 static void 475 sdev_nc_flush_updates(void) 476 { 477 sdev_nc_list_t *ncl = sdev_ncache; 478 479 if (sdev_nc_disable || sdev_cache_write_disable) 480 return; 481 482 mutex_enter(&ncl->ncl_mutex); 483 if (((ncl->ncl_flags & 484 (NCL_LIST_DIRTY | NCL_LIST_WENABLE | NCL_LIST_WRITING)) == 485 (NCL_LIST_DIRTY | NCL_LIST_WENABLE))) { 486 ncl->ncl_flags &= ~NCL_LIST_DIRTY; 487 ncl->ncl_flags |= NCL_LIST_WRITING; 488 mutex_exit(&ncl->ncl_mutex); 489 sdev_ncache_write(); 490 } else { 491 mutex_exit(&ncl->ncl_mutex); 492 } 493 } 494 495 static void 496 sdev_nc_flush_boot_update(void) 497 { 498 sdev_nc_list_t *ncl = sdev_ncache; 499 500 if (sdev_nc_disable || sdev_cache_write_disable || 501 (sdev_boot_state == SDEV_BOOT_STATE_INITIAL)) { 502 return; 503 } 504 mutex_enter(&ncl->ncl_mutex); 505 if (ncl->ncl_flags & NCL_LIST_WENABLE) { 506 mutex_exit(&ncl->ncl_mutex); 507 sdev_nc_flush_updates(); 508 } else { 509 mutex_exit(&ncl->ncl_mutex); 510 } 511 512 } 513 514 static void 515 sdev_state_boot_complete() 516 { 517 sdev_nc_list_t *ncl = sdev_ncache; 518 sdev_nc_node_t *lp, *next; 519 520 /* 521 * Once boot is complete, decrement the expire count of each entry 522 * in the cache not touched by a reference. Remove any that 523 * goes to zero. This effectively removes random entries over 524 * time. 525 */ 526 rw_enter(&ncl->ncl_lock, RW_WRITER); 527 mutex_enter(&ncl->ncl_mutex); 528 529 for (lp = list_head(&ncl->ncl_list); lp; lp = next) { 530 next = list_next(&ncl->ncl_list, lp); 531 if (sdev_nc_expirecnt > 0 && lp->ncn_expirecnt > 0) { 532 if (lp->ncn_flags & NCN_ACTIVE) { 533 if (lp->ncn_expirecnt != sdev_nc_expirecnt) { 534 lp->ncn_expirecnt = sdev_nc_expirecnt; 535 ncl->ncl_flags |= NCL_LIST_DIRTY; 536 } 537 } else { 538 if (--lp->ncn_expirecnt == 0) { 539 list_remove(&ncl->ncl_list, lp); 540 sdev_nc_free_unlinked_node(lp); 541 ncl->ncl_nentries--; 542 } 543 ncl->ncl_flags |= NCL_LIST_DIRTY; 544 } 545 } 546 } 547 548 mutex_exit(&ncl->ncl_mutex); 549 rw_exit(&ncl->ncl_lock); 550 551 sdev_nc_flush_boot_update(); 552 sdev_boot_state = SDEV_BOOT_STATE_COMPLETE; 553 } 554 555 /* 556 * Upon transition to the login state on a reconfigure boot, 557 * a debounce timer is set up so that we cache all the nonsense 558 * lookups we're hit with by the windowing system startup. 559 */ 560 561 /*ARGSUSED*/ 562 static void 563 sdev_state_timeout(void *arg) 564 { 565 sdev_state_boot_complete(); 566 } 567 568 static void 569 sdev_state_sysavail() 570 { 571 sdev_nc_list_t *ncl = sdev_ncache; 572 clock_t nticks; 573 int nsecs; 574 575 mutex_enter(&ncl->ncl_mutex); 576 ncl->ncl_flags |= NCL_LIST_WENABLE; 577 mutex_exit(&ncl->ncl_mutex); 578 579 nsecs = sdev_reconfig_delay; 580 if (nsecs == 0) { 581 sdev_state_boot_complete(); 582 } else { 583 nticks = drv_sectohz(nsecs); 584 sdcmn_err5(("timeout initiated %ld\n", nticks)); 585 (void) timeout(sdev_state_timeout, NULL, nticks); 586 sdev_nc_flush_boot_update(); 587 } 588 } 589 590 /* 591 * Called to inform the filesystem of progress during boot, 592 * either a notice of reconfiguration boot or an indication of 593 * system boot complete. At system boot complete, set up a 594 * timer at the expiration of which no further failed lookups 595 * will be added to the negative cache. 596 * 597 * The dev filesystem infers from reconfig boot that implicit 598 * reconfig need not be invoked at all as all available devices 599 * will have already been named. 600 * 601 * The dev filesystem infers from "system available" that devfsadmd 602 * can now be run and hence implicit reconfiguration may be initiated. 603 * During early stages of system startup, implicit reconfig is 604 * not done to avoid impacting boot performance. 605 */ 606 void 607 sdev_devstate_change(void) 608 { 609 int new_state; 610 611 /* 612 * Track system state and manage interesting transitions 613 */ 614 new_state = SDEV_BOOT_STATE_INITIAL; 615 if (i_ddi_reconfig()) 616 new_state = SDEV_BOOT_STATE_RECONFIG; 617 if (i_ddi_sysavail()) 618 new_state = SDEV_BOOT_STATE_SYSAVAIL; 619 620 if (sdev_boot_state < new_state) { 621 switch (new_state) { 622 case SDEV_BOOT_STATE_RECONFIG: 623 sdcmn_err5(("state change: reconfigure boot\n")); 624 sdev_boot_state = new_state; 625 /* 626 * The /dev filesystem fills a hot-plug .vs. 627 * public-namespace gap by invoking 'devfsadm' once 628 * as a result of the first /dev lookup failure 629 * (or getdents/readdir). Originally, it was thought 630 * that a reconfig reboot did not have a hot-plug gap, 631 * but this is not true - the gap is just smaller: 632 * it exists from the the time the smf invocation of 633 * devfsadm completes its forced devinfo snapshot, 634 * to the time when the smf devfsadmd daemon invocation 635 * is set up and listening for hotplug sysevents. 636 * Since there is still a gap with reconfig reboot, 637 * we no longer set 'sdev_reconfig_boot'. 638 */ 639 if (!sdev_nc_disable_reset) 640 sdev_nc_free_bootonly(); 641 break; 642 case SDEV_BOOT_STATE_SYSAVAIL: 643 sdcmn_err5(("system available\n")); 644 sdev_boot_state = new_state; 645 sdev_state_sysavail(); 646 break; 647 } 648 } 649 } 650 651 /* 652 * Lookup: filter out entries in the negative cache 653 * Return 1 if the lookup should not cause a reconfig. 654 */ 655 int 656 sdev_lookup_filter(sdev_node_t *dv, char *nm) 657 { 658 int n; 659 sdev_nc_list_t *ncl = sdev_ncache; 660 sdev_nc_node_t *lp; 661 char *path; 662 int rval = 0; 663 int changed = 0; 664 665 ASSERT(i_ddi_io_initialized()); 666 ASSERT(SDEVTOV(dv)->v_type == VDIR); 667 668 if (sdev_nc_disable) 669 return (0); 670 671 n = strlen(dv->sdev_path) + strlen(nm) + 2; 672 path = kmem_alloc(n, KM_SLEEP); 673 (void) sprintf(path, "%s/%s", dv->sdev_path, nm); 674 675 rw_enter(&ncl->ncl_lock, RW_READER); 676 if ((lp = sdev_nc_findpath(ncl, path)) != NULL) { 677 sdcmn_err5(("%s/%s: lookup by %s cached, no reconfig\n", 678 dv->sdev_name, nm, curproc->p_user.u_comm)); 679 if (sdev_nc_verbose) { 680 cmn_err(CE_CONT, 681 "?%s/%s: lookup by %s cached, no reconfig\n", 682 dv->sdev_name, nm, curproc->p_user.u_comm); 683 } 684 mutex_enter(&ncl->ncl_mutex); 685 lp->ncn_flags |= NCN_ACTIVE; 686 if (sdev_nc_expirecnt > 0 && lp->ncn_expirecnt > 0 && 687 lp->ncn_expirecnt < sdev_nc_expirecnt) { 688 lp->ncn_expirecnt = sdev_nc_expirecnt; 689 ncl->ncl_flags |= NCL_LIST_DIRTY; 690 changed = 1; 691 } 692 mutex_exit(&ncl->ncl_mutex); 693 rval = 1; 694 } 695 rw_exit(&ncl->ncl_lock); 696 kmem_free(path, n); 697 if (changed) 698 sdev_nc_flush_boot_update(); 699 return (rval); 700 } 701 702 void 703 sdev_lookup_failed(sdev_node_t *dv, char *nm, int failed_flags) 704 { 705 if (sdev_nc_disable) 706 return; 707 708 /* 709 * If we're still in the initial boot stage, always update 710 * the cache - we may not have received notice of the 711 * reconfig boot state yet. On a reconfigure boot, entries 712 * from the backing store are not re-persisted on update, 713 * but new entries are marked as needing an update. 714 * Never cache dynamic or non-global nodes. 715 */ 716 if (SDEV_IS_GLOBAL(dv) && !SDEV_IS_DYNAMIC(dv) && 717 !SDEV_IS_NO_NCACHE(dv) && 718 ((failed_flags & SLF_NO_NCACHE) == 0) && 719 ((sdev_reconfig_boot && 720 (sdev_boot_state != SDEV_BOOT_STATE_COMPLETE)) || 721 (!sdev_reconfig_boot && ((failed_flags & SLF_REBUILT))))) { 722 sdev_nc_addname(sdev_ncache, 723 dv, nm, NCN_SRC_CURRENT|NCN_ACTIVE); 724 } 725 } 726 727 static sdev_nc_list_t * 728 sdev_nc_newlist(void) 729 { 730 sdev_nc_list_t *ncl; 731 732 ncl = kmem_zalloc(sizeof (sdev_nc_list_t), KM_SLEEP); 733 734 rw_init(&ncl->ncl_lock, NULL, RW_DEFAULT, NULL); 735 mutex_init(&ncl->ncl_mutex, NULL, MUTEX_DEFAULT, NULL); 736 list_create(&ncl->ncl_list, sizeof (sdev_nc_node_t), 737 offsetof(sdev_nc_node_t, ncn_link)); 738 739 return (ncl); 740 } 741 742 static void 743 sdev_nc_free_unlinked_node(sdev_nc_node_t *lp) 744 { 745 kmem_free(lp->ncn_name, strlen(lp->ncn_name) + 1); 746 kmem_free(lp, sizeof (sdev_nc_node_t)); 747 } 748 749 static sdev_nc_node_t * 750 sdev_nc_findpath(sdev_nc_list_t *ncl, char *path) 751 { 752 sdev_nc_node_t *lp; 753 754 ASSERT(RW_LOCK_HELD(&ncl->ncl_lock)); 755 756 for (lp = list_head(&ncl->ncl_list); lp; 757 lp = list_next(&ncl->ncl_list, lp)) { 758 if (strcmp(path, lp->ncn_name) == 0) 759 return (lp); 760 } 761 762 return (NULL); 763 } 764 765 static void 766 sdev_nc_insertnode(sdev_nc_list_t *ncl, sdev_nc_node_t *new) 767 { 768 sdev_nc_node_t *lp; 769 770 rw_enter(&ncl->ncl_lock, RW_WRITER); 771 772 lp = sdev_nc_findpath(ncl, new->ncn_name); 773 if (lp == NULL) { 774 if (ncl->ncl_nentries == sdev_nc_max_entries) { 775 sdcmn_err5(( 776 "%s by %s: not adding to ncache (max %d)\n", 777 new->ncn_name, curproc->p_user.u_comm, 778 ncl->ncl_nentries)); 779 if (sdev_nc_verbose) { 780 cmn_err(CE_CONT, "?%s by %s: " 781 "not adding to ncache (max %d)\n", 782 new->ncn_name, curproc->p_user.u_comm, 783 ncl->ncl_nentries); 784 } 785 rw_exit(&ncl->ncl_lock); 786 sdev_nc_free_unlinked_node(new); 787 } else { 788 789 list_insert_tail(&ncl->ncl_list, new); 790 ncl->ncl_nentries++; 791 792 /* don't mark list dirty for nodes from store */ 793 mutex_enter(&ncl->ncl_mutex); 794 if ((new->ncn_flags & NCN_SRC_STORE) == 0) { 795 sdcmn_err5(("%s by %s: add to ncache\n", 796 new->ncn_name, curproc->p_user.u_comm)); 797 if (sdev_nc_verbose) { 798 cmn_err(CE_CONT, 799 "?%s by %s: add to ncache\n", 800 new->ncn_name, 801 curproc->p_user.u_comm); 802 } 803 ncl->ncl_flags |= NCL_LIST_DIRTY; 804 } 805 mutex_exit(&ncl->ncl_mutex); 806 rw_exit(&ncl->ncl_lock); 807 lp = new; 808 sdev_nc_flush_boot_update(); 809 } 810 } else { 811 mutex_enter(&ncl->ncl_mutex); 812 lp->ncn_flags |= new->ncn_flags; 813 mutex_exit(&ncl->ncl_mutex); 814 rw_exit(&ncl->ncl_lock); 815 sdev_nc_free_unlinked_node(new); 816 } 817 } 818 819 void 820 sdev_nc_addname(sdev_nc_list_t *ncl, sdev_node_t *dv, char *nm, int flags) 821 { 822 int n; 823 sdev_nc_node_t *lp; 824 825 ASSERT(SDEVTOV(dv)->v_type == VDIR); 826 827 lp = kmem_zalloc(sizeof (sdev_nc_node_t), KM_SLEEP); 828 829 n = strlen(dv->sdev_path) + strlen(nm) + 2; 830 lp->ncn_name = kmem_alloc(n, KM_SLEEP); 831 (void) sprintf(lp->ncn_name, "%s/%s", 832 dv->sdev_path, nm); 833 lp->ncn_flags = flags; 834 lp->ncn_expirecnt = sdev_nc_expirecnt; 835 sdev_nc_insertnode(ncl, lp); 836 } 837 838 void 839 sdev_nc_node_exists(sdev_node_t *dv) 840 { 841 /* dynamic and non-global nodes are never cached */ 842 if (SDEV_IS_GLOBAL(dv) && !SDEV_IS_DYNAMIC(dv) && 843 !SDEV_IS_NO_NCACHE(dv)) { 844 sdev_nc_path_exists(sdev_ncache, dv->sdev_path); 845 } 846 } 847 848 void 849 sdev_nc_path_exists(sdev_nc_list_t *ncl, char *path) 850 { 851 sdev_nc_node_t *lp; 852 853 if (sdev_nc_disable) 854 return; 855 856 rw_enter(&ncl->ncl_lock, RW_READER); 857 if ((lp = sdev_nc_findpath(ncl, path)) == NULL) { 858 rw_exit(&ncl->ncl_lock); 859 return; 860 } 861 if (rw_tryupgrade(&ncl->ncl_lock) == 0) { 862 rw_exit(&ncl->ncl_lock); 863 rw_enter(&ncl->ncl_lock, RW_WRITER); 864 lp = sdev_nc_findpath(ncl, path); 865 } 866 if (lp) { 867 list_remove(&ncl->ncl_list, lp); 868 ncl->ncl_nentries--; 869 mutex_enter(&ncl->ncl_mutex); 870 ncl->ncl_flags |= NCL_LIST_DIRTY; 871 if (ncl->ncl_flags & NCL_LIST_WENABLE) { 872 mutex_exit(&ncl->ncl_mutex); 873 rw_exit(&ncl->ncl_lock); 874 sdev_nc_flush_updates(); 875 } else { 876 mutex_exit(&ncl->ncl_mutex); 877 rw_exit(&ncl->ncl_lock); 878 } 879 sdev_nc_free_unlinked_node(lp); 880 sdcmn_err5(("%s by %s: removed from ncache\n", 881 path, curproc->p_user.u_comm)); 882 if (sdev_nc_verbose) { 883 cmn_err(CE_CONT, "?%s by %s: removed from ncache\n", 884 path, curproc->p_user.u_comm); 885 } 886 } else 887 rw_exit(&ncl->ncl_lock); 888 } 889 890 static void 891 sdev_nc_free_bootonly(void) 892 { 893 sdev_nc_list_t *ncl = sdev_ncache; 894 sdev_nc_node_t *lp; 895 sdev_nc_node_t *next; 896 897 rw_enter(&ncl->ncl_lock, RW_WRITER); 898 899 for (lp = list_head(&ncl->ncl_list); lp; lp = next) { 900 next = list_next(&ncl->ncl_list, lp); 901 if ((lp->ncn_flags & NCN_SRC_CURRENT) == 0) { 902 sdcmn_err5(("freeing %s\n", lp->ncn_name)); 903 mutex_enter(&ncl->ncl_mutex); 904 ncl->ncl_flags |= NCL_LIST_DIRTY; 905 mutex_exit(&ncl->ncl_mutex); 906 list_remove(&ncl->ncl_list, lp); 907 sdev_nc_free_unlinked_node(lp); 908 ncl->ncl_nentries--; 909 } 910 } 911 912 rw_exit(&ncl->ncl_lock); 913 }