1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * negative cache handling for the /dev fs
  28  */
  29 
  30 #include <sys/types.h>
  31 #include <sys/param.h>
  32 #include <sys/t_lock.h>
  33 #include <sys/systm.h>
  34 #include <sys/sysmacros.h>
  35 #include <sys/user.h>
  36 #include <sys/time.h>
  37 #include <sys/vfs.h>
  38 #include <sys/vnode.h>
  39 #include <sys/file.h>
  40 #include <sys/fcntl.h>
  41 #include <sys/flock.h>
  42 #include <sys/kmem.h>
  43 #include <sys/uio.h>
  44 #include <sys/errno.h>
  45 #include <sys/stat.h>
  46 #include <sys/cred.h>
  47 #include <sys/cmn_err.h>
  48 #include <sys/debug.h>
  49 #include <sys/mode.h>
  50 #include <sys/policy.h>
  51 #include <fs/fs_subr.h>
  52 #include <sys/mount.h>
  53 #include <sys/fs/snode.h>
  54 #include <sys/fs/dv_node.h>
  55 #include <sys/fs/sdev_impl.h>
  56 #include <sys/sunndi.h>
  57 #include <sys/sunmdi.h>
  58 #include <sys/ddi.h>
  59 #include <sys/modctl.h>
  60 #include <sys/devcache.h>
  61 
  62 
  63 /*
  64  * ncache is a negative cache of failed lookups.  An entry
  65  * is added after an attempt to configure a device by that
  66  * name failed.  An accumulation of these entries over time
  67  * gives us a set of device name for which implicit reconfiguration
  68  * does not need to be attempted.  If a name is created matching
  69  * an entry in ncache, that entry is removed, with the
  70  * persistent store updated.
  71  *
  72  * Implicit reconfig is initiated for any name during lookup that
  73  * can't be resolved from the backing store and that isn't
  74  * present in the negative cache.  This functionality is
  75  * enabled during system startup once communication with devfsadm
  76  * can be achieved.  Since readdir is more general, implicit
  77  * reconfig initiated by reading a directory isn't enabled until
  78  * the system is more fully booted, at the time of the multi-user
  79  * milestone, corresponding to init state 2.
  80  *
  81  * A maximum is imposed on the number of entries in the cache
  82  * to limit some script going wild and as a defense against attack.
  83  * The default limit is 64 and can be adjusted via sdev_nc_max_entries.
  84  *
  85  * Each entry also has a expiration count.  When looked up a name in
  86  * the cache is set to the default.  Subsequent boots will decrement
  87  * the count if a name isn't referenced.  This permits a once-only
  88  * entry to eventually be removed over time.
  89  *
  90  * sdev_reconfig_delay implements a "debounce" of the timing beyond
  91  * system available indication, providing what the filesystem considers
  92  * to be the system-is-fully-booted state.  This is provided to adjust
  93  * the timing if some application startup is performing a readdir
  94  * in /dev that initiates a troublesome implicit reconfig on every boot.
  95  *
  96  * sdev_nc_disable_reset can be used to disable clearing the negative cache
  97  * on reconfig boot.  The default is to clear the cache on reconfig boot.
  98  * sdev_nc_disable can be used to disable the negative cache itself.
  99  *
 100  * sdev_reconfig_disable can be used to disable implicit reconfig.
 101  * The default is that implicit reconfig is enabled.
 102  */
 103 
 104 /* tunables and defaults */
 105 #define SDEV_NC_EXPIRECNT       4
 106 #define SDEV_NC_MAX_ENTRIES     64
 107 #define SEV_RECONFIG_DELAY      6       /* seconds */
 108 
 109 /* tunables */
 110 int     sdev_nc_expirecnt = SDEV_NC_EXPIRECNT;
 111 int     sdev_nc_max_entries = SDEV_NC_MAX_ENTRIES;
 112 int     sdev_reconfig_delay = SEV_RECONFIG_DELAY;
 113 int     sdev_reconfig_verbose = 0;
 114 int     sdev_reconfig_disable = 0;
 115 int     sdev_nc_disable = 0;
 116 int     sdev_nc_disable_reset = 0;
 117 int     sdev_nc_verbose = 0;
 118 int     sdev_cache_read_disable = 0;
 119 int     sdev_cache_write_disable = 0;
 120 
 121 /* globals */
 122 int     sdev_boot_state = SDEV_BOOT_STATE_INITIAL;
 123 int     sdev_reconfig_boot = 0;
 124 sdev_nc_list_t *sdev_ncache;
 125 static nvf_handle_t sdevfd_handle;
 126 
 127 /* static prototypes */
 128 static void sdev_ncache_write_complete(nvf_handle_t);
 129 static void sdev_ncache_write(void);
 130 static void sdev_ncache_process_store(void);
 131 static sdev_nc_list_t *sdev_nc_newlist(void);
 132 static void sdev_nc_free_unlinked_node(sdev_nc_node_t *);
 133 static sdev_nc_node_t *sdev_nc_findpath(sdev_nc_list_t *, char *);
 134 static void sdev_nc_insertnode(sdev_nc_list_t *, sdev_nc_node_t *);
 135 static void sdev_nc_free_bootonly(void);
 136 static int sdev_ncache_unpack_nvlist(nvf_handle_t, nvlist_t *, char *);
 137 static int sdev_ncache_pack_list(nvf_handle_t, nvlist_t **);
 138 static void sdev_ncache_list_free(nvf_handle_t);
 139 static void sdev_nvp_free(nvp_devname_t *);
 140 
 141 /*
 142  * Registration for /etc/devices/devname_cache
 143  */
 144 static nvf_ops_t sdev_cache_ops = {
 145         "/etc/devices/devname_cache",           /* path to cache */
 146         sdev_ncache_unpack_nvlist,              /* read: unpack nvlist */
 147         sdev_ncache_pack_list,                  /* write: pack list */
 148         sdev_ncache_list_free,                  /* free data list */
 149         sdev_ncache_write_complete              /* write complete callback */
 150 };
 151 
 152 /*
 153  * called once at filesystem initialization
 154  */
 155 void
 156 sdev_ncache_init(void)
 157 {
 158         sdev_ncache = sdev_nc_newlist();
 159 }
 160 
 161 /*
 162  * called at mount of the global instance
 163  * currently the global instance is never unmounted
 164  */
 165 void
 166 sdev_ncache_setup(void)
 167 {
 168         sdevfd_handle = nvf_register_file(&sdev_cache_ops);
 169         ASSERT(sdevfd_handle);
 170 
 171         list_create(nvf_list(sdevfd_handle), sizeof (nvp_devname_t),
 172             offsetof(nvp_devname_t, nvp_link));
 173 
 174         rw_enter(nvf_lock(sdevfd_handle), RW_WRITER);
 175         if (!sdev_cache_read_disable) {
 176                 (void) nvf_read_file(sdevfd_handle);
 177         }
 178         sdev_ncache_process_store();
 179         rw_exit(nvf_lock(sdevfd_handle));
 180 
 181         sdev_devstate_change();
 182 }
 183 
 184 static void
 185 sdev_nvp_free(nvp_devname_t *dp)
 186 {
 187         int     i;
 188         char    **p;
 189 
 190         if (dp->nvp_npaths > 0) {
 191                 p = dp->nvp_paths;
 192                 for (i = 0; i < dp->nvp_npaths; i++, p++) {
 193                         kmem_free(*p, strlen(*p)+1);
 194                 }
 195                 kmem_free(dp->nvp_paths,
 196                     dp->nvp_npaths * sizeof (char *));
 197                 kmem_free(dp->nvp_expirecnts,
 198                     dp->nvp_npaths * sizeof (int));
 199         }
 200 
 201         kmem_free(dp, sizeof (nvp_devname_t));
 202 }
 203 
 204 static void
 205 sdev_ncache_list_free(nvf_handle_t fd)
 206 {
 207         list_t          *listp;
 208         nvp_devname_t   *dp;
 209 
 210         ASSERT(fd == sdevfd_handle);
 211         ASSERT(RW_WRITE_HELD(nvf_lock(fd)));
 212 
 213         listp = nvf_list(fd);
 214         if ((dp = list_head(listp)) != NULL) {
 215                 list_remove(listp, dp);
 216                 sdev_nvp_free(dp);
 217         }
 218 }
 219 
 220 /*
 221  * Unpack a device path/nvlist pair to internal data list format.
 222  * Used to decode the nvlist format into the internal representation
 223  * when reading /etc/devices/devname_cache.
 224  * Note that the expiration counts are optional, for compatibility
 225  * with earlier instances of the cache.  If not present, the
 226  * expire counts are initialized to defaults.
 227  */
 228 static int
 229 sdev_ncache_unpack_nvlist(nvf_handle_t fd, nvlist_t *nvl, char *name)
 230 {
 231         nvp_devname_t *np;
 232         char    **strs;
 233         int     *cnts;
 234         uint_t  nstrs, ncnts;
 235         int     rval, i;
 236 
 237         ASSERT(fd == sdevfd_handle);
 238         ASSERT(RW_WRITE_HELD(nvf_lock(fd)));
 239 
 240         /* name of the sublist must match what we created */
 241         if (strcmp(name, DP_DEVNAME_ID) != 0) {
 242                 return (-1);
 243         }
 244 
 245         np = kmem_zalloc(sizeof (nvp_devname_t), KM_SLEEP);
 246 
 247         rval = nvlist_lookup_string_array(nvl,
 248             DP_DEVNAME_NCACHE_ID, &strs, &nstrs);
 249         if (rval) {
 250                 kmem_free(np, sizeof (nvp_devname_t));
 251                 return (-1);
 252         }
 253 
 254         np->nvp_npaths = nstrs;
 255         np->nvp_paths = kmem_zalloc(nstrs * sizeof (char *), KM_SLEEP);
 256         for (i = 0; i < nstrs; i++) {
 257                 np->nvp_paths[i] = i_ddi_strdup(strs[i], KM_SLEEP);
 258         }
 259         np->nvp_expirecnts = kmem_zalloc(nstrs * sizeof (int), KM_SLEEP);
 260         for (i = 0; i < nstrs; i++) {
 261                 np->nvp_expirecnts[i] = sdev_nc_expirecnt;
 262         }
 263 
 264         rval = nvlist_lookup_int32_array(nvl,
 265             DP_DEVNAME_NC_EXPIRECNT_ID, &cnts, &ncnts);
 266         if (rval == 0) {
 267                 ASSERT(ncnts == nstrs);
 268                 ncnts = min(ncnts, nstrs);
 269                 for (i = 0; i < nstrs; i++) {
 270                         np->nvp_expirecnts[i] = cnts[i];
 271                 }
 272         }
 273 
 274         list_insert_tail(nvf_list(sdevfd_handle), np);
 275 
 276         return (0);
 277 }
 278 
 279 /*
 280  * Pack internal format cache data to a single nvlist.
 281  * Used when writing the nvlist file.
 282  * Note this is called indirectly by the nvpflush daemon.
 283  */
 284 static int
 285 sdev_ncache_pack_list(nvf_handle_t fd, nvlist_t **ret_nvl)
 286 {
 287         nvlist_t        *nvl, *sub_nvl;
 288         nvp_devname_t   *np;
 289         int             rval;
 290         list_t          *listp;
 291 
 292         ASSERT(fd == sdevfd_handle);
 293         ASSERT(RW_WRITE_HELD(nvf_lock(fd)));
 294 
 295         rval = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP);
 296         if (rval != 0) {
 297                 nvf_error("%s: nvlist alloc error %d\n",
 298                     nvf_cache_name(fd), rval);
 299                 return (DDI_FAILURE);
 300         }
 301 
 302         listp = nvf_list(sdevfd_handle);
 303         if ((np = list_head(listp)) != NULL) {
 304                 ASSERT(list_next(listp, np) == NULL);
 305 
 306                 rval = nvlist_alloc(&sub_nvl, NV_UNIQUE_NAME, KM_SLEEP);
 307                 if (rval != 0) {
 308                         nvf_error("%s: nvlist alloc error %d\n",
 309                             nvf_cache_name(fd), rval);
 310                         sub_nvl = NULL;
 311                         goto err;
 312                 }
 313 
 314                 rval = nvlist_add_string_array(sub_nvl,
 315                     DP_DEVNAME_NCACHE_ID, np->nvp_paths, np->nvp_npaths);
 316                 if (rval != 0) {
 317                         nvf_error("%s: nvlist add error %d (sdev)\n",
 318                             nvf_cache_name(fd), rval);
 319                         goto err;
 320                 }
 321 
 322                 rval = nvlist_add_int32_array(sub_nvl,
 323                     DP_DEVNAME_NC_EXPIRECNT_ID,
 324                     np->nvp_expirecnts, np->nvp_npaths);
 325                 if (rval != 0) {
 326                         nvf_error("%s: nvlist add error %d (sdev)\n",
 327                             nvf_cache_name(fd), rval);
 328                         goto err;
 329                 }
 330 
 331                 rval = nvlist_add_nvlist(nvl, DP_DEVNAME_ID, sub_nvl);
 332                 if (rval != 0) {
 333                         nvf_error("%s: nvlist add error %d (sublist)\n",
 334                             nvf_cache_name(fd), rval);
 335                         goto err;
 336                 }
 337                 nvlist_free(sub_nvl);
 338         }
 339 
 340         *ret_nvl = nvl;
 341         return (DDI_SUCCESS);
 342 
 343 err:
 344         nvlist_free(sub_nvl);
 345         nvlist_free(nvl);
 346         *ret_nvl = NULL;
 347         return (DDI_FAILURE);
 348 }
 349 
 350 /*
 351  * Run through the data read from the backing cache store
 352  * to establish the initial state of the neg. cache.
 353  */
 354 static void
 355 sdev_ncache_process_store(void)
 356 {
 357         sdev_nc_list_t  *ncl = sdev_ncache;
 358         nvp_devname_t   *np;
 359         sdev_nc_node_t  *lp;
 360         char            *path;
 361         int             i, n;
 362         list_t          *listp;
 363 
 364         if (sdev_nc_disable)
 365                 return;
 366 
 367         ASSERT(RW_WRITE_HELD(nvf_lock(sdevfd_handle)));
 368 
 369         listp = nvf_list(sdevfd_handle);
 370         for (np = list_head(listp); np; np = list_next(listp, np)) {
 371                 for (i = 0; i < np->nvp_npaths; i++) {
 372                         sdcmn_err5(("    %s %d\n",
 373                             np->nvp_paths[i], np->nvp_expirecnts[i]));
 374                         if (ncl->ncl_nentries < sdev_nc_max_entries) {
 375                                 path = np->nvp_paths[i];
 376                                 n = strlen(path) + 1;
 377                                 lp = kmem_alloc(sizeof (sdev_nc_node_t),
 378                                     KM_SLEEP);
 379                                 lp->ncn_name = kmem_alloc(n, KM_SLEEP);
 380                                 bcopy(path, lp->ncn_name, n);
 381                                 lp->ncn_flags = NCN_SRC_STORE;
 382                                 lp->ncn_expirecnt = np->nvp_expirecnts[i];
 383                                 sdev_nc_insertnode(ncl, lp);
 384                         } else if (sdev_nc_verbose) {
 385                                 cmn_err(CE_CONT,
 386                                     "?%s: truncating from ncache (max %d)\n",
 387                                     np->nvp_paths[i], sdev_nc_max_entries);
 388                         }
 389                 }
 390         }
 391 }
 392 
 393 /*
 394  * called by nvpflush daemon to inform us that an update of
 395  * the cache file has been completed.
 396  */
 397 static void
 398 sdev_ncache_write_complete(nvf_handle_t fd)
 399 {
 400         sdev_nc_list_t  *ncl = sdev_ncache;
 401 
 402         ASSERT(fd == sdevfd_handle);
 403 
 404         mutex_enter(&ncl->ncl_mutex);
 405 
 406         ASSERT(ncl->ncl_flags & NCL_LIST_WRITING);
 407 
 408         if (ncl->ncl_flags & NCL_LIST_DIRTY) {
 409                 sdcmn_err5(("ncache write complete but dirty again\n"));
 410                 ncl->ncl_flags &= ~NCL_LIST_DIRTY;
 411                 mutex_exit(&ncl->ncl_mutex);
 412                 sdev_ncache_write();
 413         } else {
 414                 sdcmn_err5(("ncache write complete\n"));
 415                 ncl->ncl_flags &= ~NCL_LIST_WRITING;
 416                 mutex_exit(&ncl->ncl_mutex);
 417                 rw_enter(nvf_lock(fd), RW_WRITER);
 418                 sdev_ncache_list_free(fd);
 419                 rw_exit(nvf_lock(fd));
 420         }
 421 }
 422 
 423 /*
 424  * Prepare to perform an update of the neg. cache backing store.
 425  */
 426 static void
 427 sdev_ncache_write(void)
 428 {
 429         sdev_nc_list_t  *ncl = sdev_ncache;
 430         nvp_devname_t   *np;
 431         sdev_nc_node_t  *lp;
 432         int             n, i;
 433 
 434         if (sdev_cache_write_disable) {
 435                 mutex_enter(&ncl->ncl_mutex);
 436                 ncl->ncl_flags &= ~NCL_LIST_WRITING;
 437                 mutex_exit(&ncl->ncl_mutex);
 438                 return;
 439         }
 440 
 441         /* proper lock ordering here is essential */
 442         rw_enter(nvf_lock(sdevfd_handle), RW_WRITER);
 443         sdev_ncache_list_free(sdevfd_handle);
 444 
 445         rw_enter(&ncl->ncl_lock, RW_READER);
 446         n = ncl->ncl_nentries;
 447         ASSERT(n <= sdev_nc_max_entries);
 448 
 449         np = kmem_zalloc(sizeof (nvp_devname_t), KM_SLEEP);
 450         np->nvp_npaths = n;
 451         np->nvp_paths = kmem_zalloc(n * sizeof (char *), KM_SLEEP);
 452         np->nvp_expirecnts = kmem_zalloc(n * sizeof (int), KM_SLEEP);
 453 
 454         i = 0;
 455         for (lp = list_head(&ncl->ncl_list); lp;
 456             lp = list_next(&ncl->ncl_list, lp)) {
 457                 np->nvp_paths[i] = i_ddi_strdup(lp->ncn_name, KM_SLEEP);
 458                 np->nvp_expirecnts[i] = lp->ncn_expirecnt;
 459                 sdcmn_err5(("    %s %d\n",
 460                     np->nvp_paths[i], np->nvp_expirecnts[i]));
 461                 i++;
 462         }
 463 
 464         rw_exit(&ncl->ncl_lock);
 465 
 466         nvf_mark_dirty(sdevfd_handle);
 467         list_insert_tail(nvf_list(sdevfd_handle), np);
 468         rw_exit(nvf_lock(sdevfd_handle));
 469 
 470         nvf_wake_daemon();
 471 }
 472 
 473 static void
 474 sdev_nc_flush_updates(void)
 475 {
 476         sdev_nc_list_t *ncl = sdev_ncache;
 477 
 478         if (sdev_nc_disable || sdev_cache_write_disable)
 479                 return;
 480 
 481         mutex_enter(&ncl->ncl_mutex);
 482         if (((ncl->ncl_flags &
 483             (NCL_LIST_DIRTY | NCL_LIST_WENABLE | NCL_LIST_WRITING)) ==
 484             (NCL_LIST_DIRTY | NCL_LIST_WENABLE))) {
 485                 ncl->ncl_flags &= ~NCL_LIST_DIRTY;
 486                 ncl->ncl_flags |= NCL_LIST_WRITING;
 487                 mutex_exit(&ncl->ncl_mutex);
 488                 sdev_ncache_write();
 489         } else {
 490                 mutex_exit(&ncl->ncl_mutex);
 491         }
 492 }
 493 
 494 static void
 495 sdev_nc_flush_boot_update(void)
 496 {
 497         sdev_nc_list_t *ncl = sdev_ncache;
 498 
 499         if (sdev_nc_disable || sdev_cache_write_disable ||
 500             (sdev_boot_state == SDEV_BOOT_STATE_INITIAL)) {
 501                 return;
 502         }
 503         mutex_enter(&ncl->ncl_mutex);
 504         if (ncl->ncl_flags & NCL_LIST_WENABLE) {
 505                 mutex_exit(&ncl->ncl_mutex);
 506                 sdev_nc_flush_updates();
 507         } else {
 508                 mutex_exit(&ncl->ncl_mutex);
 509         }
 510 
 511 }
 512 
 513 static void
 514 sdev_state_boot_complete()
 515 {
 516         sdev_nc_list_t  *ncl = sdev_ncache;
 517         sdev_nc_node_t  *lp, *next;
 518 
 519         /*
 520          * Once boot is complete, decrement the expire count of each entry
 521          * in the cache not touched by a reference.  Remove any that
 522          * goes to zero.  This effectively removes random entries over
 523          * time.
 524          */
 525         rw_enter(&ncl->ncl_lock, RW_WRITER);
 526         mutex_enter(&ncl->ncl_mutex);
 527 
 528         for (lp = list_head(&ncl->ncl_list); lp; lp = next) {
 529                 next = list_next(&ncl->ncl_list, lp);
 530                 if (sdev_nc_expirecnt > 0 && lp->ncn_expirecnt > 0) {
 531                         if (lp->ncn_flags & NCN_ACTIVE) {
 532                                 if (lp->ncn_expirecnt != sdev_nc_expirecnt) {
 533                                         lp->ncn_expirecnt = sdev_nc_expirecnt;
 534                                         ncl->ncl_flags |= NCL_LIST_DIRTY;
 535                                 }
 536                         } else {
 537                                 if (--lp->ncn_expirecnt == 0) {
 538                                         list_remove(&ncl->ncl_list, lp);
 539                                         sdev_nc_free_unlinked_node(lp);
 540                                         ncl->ncl_nentries--;
 541                                 }
 542                                 ncl->ncl_flags |= NCL_LIST_DIRTY;
 543                         }
 544                 }
 545         }
 546 
 547         mutex_exit(&ncl->ncl_mutex);
 548         rw_exit(&ncl->ncl_lock);
 549 
 550         sdev_nc_flush_boot_update();
 551         sdev_boot_state = SDEV_BOOT_STATE_COMPLETE;
 552 }
 553 
 554 /*
 555  * Upon transition to the login state on a reconfigure boot,
 556  * a debounce timer is set up so that we cache all the nonsense
 557  * lookups we're hit with by the windowing system startup.
 558  */
 559 
 560 /*ARGSUSED*/
 561 static void
 562 sdev_state_timeout(void *arg)
 563 {
 564         sdev_state_boot_complete();
 565 }
 566 
 567 static void
 568 sdev_state_sysavail()
 569 {
 570         sdev_nc_list_t *ncl = sdev_ncache;
 571         clock_t nticks;
 572         int nsecs;
 573 
 574         mutex_enter(&ncl->ncl_mutex);
 575         ncl->ncl_flags |= NCL_LIST_WENABLE;
 576         mutex_exit(&ncl->ncl_mutex);
 577 
 578         nsecs = sdev_reconfig_delay;
 579         if (nsecs == 0) {
 580                 sdev_state_boot_complete();
 581         } else {
 582                 nticks = drv_usectohz(1000000 * nsecs);
 583                 sdcmn_err5(("timeout initiated %ld\n", nticks));
 584                 (void) timeout(sdev_state_timeout, NULL, nticks);
 585                 sdev_nc_flush_boot_update();
 586         }
 587 }
 588 
 589 /*
 590  * Called to inform the filesystem of progress during boot,
 591  * either a notice of reconfiguration boot or an indication of
 592  * system boot complete.  At system boot complete, set up a
 593  * timer at the expiration of which no further failed lookups
 594  * will be added to the negative cache.
 595  *
 596  * The dev filesystem infers from reconfig boot that implicit
 597  * reconfig need not be invoked at all as all available devices
 598  * will have already been named.
 599  *
 600  * The dev filesystem infers from "system available" that devfsadmd
 601  * can now be run and hence implicit reconfiguration may be initiated.
 602  * During early stages of system startup, implicit reconfig is
 603  * not done to avoid impacting boot performance.
 604  */
 605 void
 606 sdev_devstate_change(void)
 607 {
 608         int new_state;
 609 
 610         /*
 611          * Track system state and manage interesting transitions
 612          */
 613         new_state = SDEV_BOOT_STATE_INITIAL;
 614         if (i_ddi_reconfig())
 615                 new_state = SDEV_BOOT_STATE_RECONFIG;
 616         if (i_ddi_sysavail())
 617                 new_state = SDEV_BOOT_STATE_SYSAVAIL;
 618 
 619         if (sdev_boot_state < new_state) {
 620                 switch (new_state) {
 621                 case SDEV_BOOT_STATE_RECONFIG:
 622                         sdcmn_err5(("state change: reconfigure boot\n"));
 623                         sdev_boot_state = new_state;
 624                         /*
 625                          * The /dev filesystem fills a hot-plug .vs.
 626                          * public-namespace gap by invoking 'devfsadm' once
 627                          * as a result of the first /dev lookup failure
 628                          * (or getdents/readdir). Originally, it was thought
 629                          * that a reconfig reboot did not have a hot-plug gap,
 630                          * but this is not true - the gap is just smaller:
 631                          * it exists from the the time the smf invocation of
 632                          * devfsadm completes its forced devinfo snapshot,
 633                          * to the time when the smf devfsadmd daemon invocation
 634                          * is set up and listening for hotplug sysevents.
 635                          * Since there is still a gap with reconfig reboot,
 636                          * we no longer set 'sdev_reconfig_boot'.
 637                          */
 638                         if (!sdev_nc_disable_reset)
 639                                 sdev_nc_free_bootonly();
 640                         break;
 641                 case SDEV_BOOT_STATE_SYSAVAIL:
 642                         sdcmn_err5(("system available\n"));
 643                         sdev_boot_state = new_state;
 644                         sdev_state_sysavail();
 645                         break;
 646                 }
 647         }
 648 }
 649 
 650 /*
 651  * Lookup: filter out entries in the negative cache
 652  * Return 1 if the lookup should not cause a reconfig.
 653  */
 654 int
 655 sdev_lookup_filter(sdev_node_t *dv, char *nm)
 656 {
 657         int n;
 658         sdev_nc_list_t *ncl = sdev_ncache;
 659         sdev_nc_node_t *lp;
 660         char *path;
 661         int rval = 0;
 662         int changed = 0;
 663 
 664         ASSERT(i_ddi_io_initialized());
 665         ASSERT(SDEVTOV(dv)->v_type == VDIR);
 666 
 667         if (sdev_nc_disable)
 668                 return (0);
 669 
 670         n = strlen(dv->sdev_path) + strlen(nm) + 2;
 671         path = kmem_alloc(n, KM_SLEEP);
 672         (void) sprintf(path, "%s/%s", dv->sdev_path, nm);
 673 
 674         rw_enter(&ncl->ncl_lock, RW_READER);
 675         if ((lp = sdev_nc_findpath(ncl, path)) != NULL) {
 676                 sdcmn_err5(("%s/%s: lookup by %s cached, no reconfig\n",
 677                     dv->sdev_name, nm, curproc->p_user.u_comm));
 678                 if (sdev_nc_verbose) {
 679                         cmn_err(CE_CONT,
 680                             "?%s/%s: lookup by %s cached, no reconfig\n",
 681                             dv->sdev_name, nm, curproc->p_user.u_comm);
 682                 }
 683                 mutex_enter(&ncl->ncl_mutex);
 684                 lp->ncn_flags |= NCN_ACTIVE;
 685                 if (sdev_nc_expirecnt > 0 && lp->ncn_expirecnt > 0 &&
 686                     lp->ncn_expirecnt < sdev_nc_expirecnt) {
 687                         lp->ncn_expirecnt = sdev_nc_expirecnt;
 688                         ncl->ncl_flags |= NCL_LIST_DIRTY;
 689                         changed = 1;
 690                 }
 691                 mutex_exit(&ncl->ncl_mutex);
 692                 rval = 1;
 693         }
 694         rw_exit(&ncl->ncl_lock);
 695         kmem_free(path, n);
 696         if (changed)
 697                 sdev_nc_flush_boot_update();
 698         return (rval);
 699 }
 700 
 701 void
 702 sdev_lookup_failed(sdev_node_t *dv, char *nm, int failed_flags)
 703 {
 704         if (sdev_nc_disable)
 705                 return;
 706 
 707         /*
 708          * If we're still in the initial boot stage, always update
 709          * the cache - we may not have received notice of the
 710          * reconfig boot state yet.  On a reconfigure boot, entries
 711          * from the backing store are not re-persisted on update,
 712          * but new entries are marked as needing an update.
 713          * Never cache dynamic or non-global nodes.
 714          */
 715         if (SDEV_IS_GLOBAL(dv) && !SDEV_IS_DYNAMIC(dv) &&
 716             !SDEV_IS_NO_NCACHE(dv) &&
 717             ((failed_flags & SLF_NO_NCACHE) == 0) &&
 718             ((sdev_reconfig_boot &&
 719             (sdev_boot_state != SDEV_BOOT_STATE_COMPLETE)) ||
 720             (!sdev_reconfig_boot && ((failed_flags & SLF_REBUILT))))) {
 721                         sdev_nc_addname(sdev_ncache,
 722                             dv, nm, NCN_SRC_CURRENT|NCN_ACTIVE);
 723         }
 724 }
 725 
 726 static sdev_nc_list_t *
 727 sdev_nc_newlist(void)
 728 {
 729         sdev_nc_list_t  *ncl;
 730 
 731         ncl = kmem_zalloc(sizeof (sdev_nc_list_t), KM_SLEEP);
 732 
 733         rw_init(&ncl->ncl_lock, NULL, RW_DEFAULT, NULL);
 734         mutex_init(&ncl->ncl_mutex, NULL, MUTEX_DEFAULT, NULL);
 735         list_create(&ncl->ncl_list, sizeof (sdev_nc_node_t),
 736             offsetof(sdev_nc_node_t, ncn_link));
 737 
 738         return (ncl);
 739 }
 740 
 741 static void
 742 sdev_nc_free_unlinked_node(sdev_nc_node_t *lp)
 743 {
 744         kmem_free(lp->ncn_name, strlen(lp->ncn_name) + 1);
 745         kmem_free(lp, sizeof (sdev_nc_node_t));
 746 }
 747 
 748 static sdev_nc_node_t *
 749 sdev_nc_findpath(sdev_nc_list_t *ncl, char *path)
 750 {
 751         sdev_nc_node_t *lp;
 752 
 753         ASSERT(RW_LOCK_HELD(&ncl->ncl_lock));
 754 
 755         for (lp = list_head(&ncl->ncl_list); lp;
 756             lp = list_next(&ncl->ncl_list, lp)) {
 757                 if (strcmp(path, lp->ncn_name) == 0)
 758                         return (lp);
 759         }
 760 
 761         return (NULL);
 762 }
 763 
 764 static void
 765 sdev_nc_insertnode(sdev_nc_list_t *ncl, sdev_nc_node_t *new)
 766 {
 767         sdev_nc_node_t *lp;
 768 
 769         rw_enter(&ncl->ncl_lock, RW_WRITER);
 770 
 771         lp = sdev_nc_findpath(ncl, new->ncn_name);
 772         if (lp == NULL) {
 773                 if (ncl->ncl_nentries == sdev_nc_max_entries) {
 774                         sdcmn_err5((
 775                             "%s by %s: not adding to ncache (max %d)\n",
 776                             new->ncn_name, curproc->p_user.u_comm,
 777                             ncl->ncl_nentries));
 778                         if (sdev_nc_verbose) {
 779                                 cmn_err(CE_CONT, "?%s by %s: "
 780                                     "not adding to ncache (max %d)\n",
 781                                     new->ncn_name, curproc->p_user.u_comm,
 782                                     ncl->ncl_nentries);
 783                         }
 784                         rw_exit(&ncl->ncl_lock);
 785                         sdev_nc_free_unlinked_node(new);
 786                 } else {
 787 
 788                         list_insert_tail(&ncl->ncl_list, new);
 789                         ncl->ncl_nentries++;
 790 
 791                         /* don't mark list dirty for nodes from store */
 792                         mutex_enter(&ncl->ncl_mutex);
 793                         if ((new->ncn_flags & NCN_SRC_STORE) == 0) {
 794                                 sdcmn_err5(("%s by %s: add to ncache\n",
 795                                     new->ncn_name, curproc->p_user.u_comm));
 796                                 if (sdev_nc_verbose) {
 797                                         cmn_err(CE_CONT,
 798                                             "?%s by %s: add to ncache\n",
 799                                             new->ncn_name,
 800                                             curproc->p_user.u_comm);
 801                                 }
 802                                 ncl->ncl_flags |= NCL_LIST_DIRTY;
 803                         }
 804                         mutex_exit(&ncl->ncl_mutex);
 805                         rw_exit(&ncl->ncl_lock);
 806                         lp = new;
 807                         sdev_nc_flush_boot_update();
 808                 }
 809         } else {
 810                 mutex_enter(&ncl->ncl_mutex);
 811                 lp->ncn_flags |= new->ncn_flags;
 812                 mutex_exit(&ncl->ncl_mutex);
 813                 rw_exit(&ncl->ncl_lock);
 814                 sdev_nc_free_unlinked_node(new);
 815         }
 816 }
 817 
 818 void
 819 sdev_nc_addname(sdev_nc_list_t *ncl, sdev_node_t *dv, char *nm, int flags)
 820 {
 821         int n;
 822         sdev_nc_node_t *lp;
 823 
 824         ASSERT(SDEVTOV(dv)->v_type == VDIR);
 825 
 826         lp = kmem_zalloc(sizeof (sdev_nc_node_t), KM_SLEEP);
 827 
 828         n = strlen(dv->sdev_path) + strlen(nm) + 2;
 829         lp->ncn_name = kmem_alloc(n, KM_SLEEP);
 830         (void) sprintf(lp->ncn_name, "%s/%s",
 831             dv->sdev_path, nm);
 832         lp->ncn_flags = flags;
 833         lp->ncn_expirecnt = sdev_nc_expirecnt;
 834         sdev_nc_insertnode(ncl, lp);
 835 }
 836 
 837 void
 838 sdev_nc_node_exists(sdev_node_t *dv)
 839 {
 840         /* dynamic and non-global nodes are never cached */
 841         if (SDEV_IS_GLOBAL(dv) && !SDEV_IS_DYNAMIC(dv) &&
 842             !SDEV_IS_NO_NCACHE(dv)) {
 843                 sdev_nc_path_exists(sdev_ncache, dv->sdev_path);
 844         }
 845 }
 846 
 847 void
 848 sdev_nc_path_exists(sdev_nc_list_t *ncl, char *path)
 849 {
 850         sdev_nc_node_t *lp;
 851 
 852         if (sdev_nc_disable)
 853                 return;
 854 
 855         rw_enter(&ncl->ncl_lock, RW_READER);
 856         if ((lp = sdev_nc_findpath(ncl, path)) == NULL) {
 857                 rw_exit(&ncl->ncl_lock);
 858                 return;
 859         }
 860         if (rw_tryupgrade(&ncl->ncl_lock) == 0) {
 861                 rw_exit(&ncl->ncl_lock);
 862                 rw_enter(&ncl->ncl_lock, RW_WRITER);
 863                 lp = sdev_nc_findpath(ncl, path);
 864         }
 865         if (lp) {
 866                 list_remove(&ncl->ncl_list, lp);
 867                 ncl->ncl_nentries--;
 868                 mutex_enter(&ncl->ncl_mutex);
 869                 ncl->ncl_flags |= NCL_LIST_DIRTY;
 870                 if (ncl->ncl_flags & NCL_LIST_WENABLE) {
 871                         mutex_exit(&ncl->ncl_mutex);
 872                         rw_exit(&ncl->ncl_lock);
 873                         sdev_nc_flush_updates();
 874                 } else {
 875                         mutex_exit(&ncl->ncl_mutex);
 876                         rw_exit(&ncl->ncl_lock);
 877                 }
 878                 sdev_nc_free_unlinked_node(lp);
 879                 sdcmn_err5(("%s by %s: removed from ncache\n",
 880                     path, curproc->p_user.u_comm));
 881                 if (sdev_nc_verbose) {
 882                         cmn_err(CE_CONT, "?%s by %s: removed from ncache\n",
 883                             path, curproc->p_user.u_comm);
 884                 }
 885         } else
 886                 rw_exit(&ncl->ncl_lock);
 887 }
 888 
 889 static void
 890 sdev_nc_free_bootonly(void)
 891 {
 892         sdev_nc_list_t  *ncl = sdev_ncache;
 893         sdev_nc_node_t *lp;
 894         sdev_nc_node_t *next;
 895 
 896         rw_enter(&ncl->ncl_lock, RW_WRITER);
 897 
 898         for (lp = list_head(&ncl->ncl_list); lp; lp = next) {
 899                 next = list_next(&ncl->ncl_list, lp);
 900                 if ((lp->ncn_flags & NCN_SRC_CURRENT) == 0) {
 901                         sdcmn_err5(("freeing %s\n", lp->ncn_name));
 902                         mutex_enter(&ncl->ncl_mutex);
 903                         ncl->ncl_flags |= NCL_LIST_DIRTY;
 904                         mutex_exit(&ncl->ncl_mutex);
 905                         list_remove(&ncl->ncl_list, lp);
 906                         sdev_nc_free_unlinked_node(lp);
 907                         ncl->ncl_nentries--;
 908                 }
 909         }
 910 
 911         rw_exit(&ncl->ncl_lock);
 912 }