1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * negative cache handling for the /dev fs
  28  */
  29 
  30 #include <sys/types.h>
  31 #include <sys/param.h>
  32 #include <sys/t_lock.h>
  33 #include <sys/systm.h>
  34 #include <sys/sysmacros.h>
  35 #include <sys/user.h>
  36 #include <sys/time.h>
  37 #include <sys/vfs.h>
  38 #include <sys/vnode.h>
  39 #include <sys/file.h>
  40 #include <sys/fcntl.h>
  41 #include <sys/flock.h>
  42 #include <sys/kmem.h>
  43 #include <sys/uio.h>
  44 #include <sys/errno.h>
  45 #include <sys/stat.h>
  46 #include <sys/cred.h>
  47 #include <sys/cmn_err.h>
  48 #include <sys/debug.h>
  49 #include <sys/mode.h>
  50 #include <sys/policy.h>
  51 #include <fs/fs_subr.h>
  52 #include <sys/mount.h>
  53 #include <sys/fs/snode.h>
  54 #include <sys/fs/dv_node.h>
  55 #include <sys/fs/sdev_impl.h>
  56 #include <sys/sunndi.h>
  57 #include <sys/sunmdi.h>
  58 #include <sys/ddi.h>
  59 #include <sys/modctl.h>
  60 #include <sys/devcache.h>
  61 
  62 
  63 /*
  64  * ncache is a negative cache of failed lookups.  An entry
  65  * is added after an attempt to configure a device by that
  66  * name failed.  An accumulation of these entries over time
  67  * gives us a set of device name for which implicit reconfiguration
  68  * does not need to be attempted.  If a name is created matching
  69  * an entry in ncache, that entry is removed, with the
  70  * persistent store updated.
  71  *
  72  * Implicit reconfig is initiated for any name during lookup that
  73  * can't be resolved from the backing store and that isn't
  74  * present in the negative cache.  This functionality is
  75  * enabled during system startup once communication with devfsadm
  76  * can be achieved.  Since readdir is more general, implicit
  77  * reconfig initiated by reading a directory isn't enabled until
  78  * the system is more fully booted, at the time of the multi-user
  79  * milestone, corresponding to init state 2.
  80  *
  81  * A maximum is imposed on the number of entries in the cache
  82  * to limit some script going wild and as a defense against attack.
  83  * The default limit is 64 and can be adjusted via sdev_nc_max_entries.
  84  *
  85  * Each entry also has a expiration count.  When looked up a name in
  86  * the cache is set to the default.  Subsequent boots will decrement
  87  * the count if a name isn't referenced.  This permits a once-only
  88  * entry to eventually be removed over time.
  89  *
  90  * sdev_reconfig_delay implements a "debounce" of the timing beyond
  91  * system available indication, providing what the filesystem considers
  92  * to be the system-is-fully-booted state.  This is provided to adjust
  93  * the timing if some application startup is performing a readdir
  94  * in /dev that initiates a troublesome implicit reconfig on every boot.
  95  *
  96  * sdev_nc_disable_reset can be used to disable clearing the negative cache
  97  * on reconfig boot.  The default is to clear the cache on reconfig boot.
  98  * sdev_nc_disable can be used to disable the negative cache itself.
  99  *
 100  * sdev_reconfig_disable can be used to disable implicit reconfig.
 101  * The default is that implicit reconfig is enabled.
 102  */
 103 
 104 /* tunables and defaults */
 105 #define SDEV_NC_EXPIRECNT       4
 106 #define SDEV_NC_MAX_ENTRIES     64
 107 #define SEV_RECONFIG_DELAY      6       /* seconds */
 108 
 109 /* tunables */
 110 int     sdev_nc_expirecnt = SDEV_NC_EXPIRECNT;
 111 int     sdev_nc_max_entries = SDEV_NC_MAX_ENTRIES;
 112 int     sdev_reconfig_delay = SEV_RECONFIG_DELAY;
 113 int     sdev_reconfig_verbose = 0;
 114 int     sdev_reconfig_disable = 0;
 115 int     sdev_nc_disable = 0;
 116 int     sdev_nc_disable_reset = 0;
 117 int     sdev_nc_verbose = 0;
 118 int     sdev_cache_read_disable = 0;
 119 int     sdev_cache_write_disable = 0;
 120 
 121 /* globals */
 122 int     sdev_boot_state = SDEV_BOOT_STATE_INITIAL;
 123 int     sdev_reconfig_boot = 0;
 124 sdev_nc_list_t *sdev_ncache;
 125 static nvf_handle_t sdevfd_handle;
 126 
 127 /* static prototypes */
 128 static void sdev_ncache_write_complete(nvf_handle_t);
 129 static void sdev_ncache_write(void);
 130 static void sdev_ncache_process_store(void);
 131 static sdev_nc_list_t *sdev_nc_newlist(void);
 132 static void sdev_nc_free_unlinked_node(sdev_nc_node_t *);
 133 static sdev_nc_node_t *sdev_nc_findpath(sdev_nc_list_t *, char *);
 134 static void sdev_nc_insertnode(sdev_nc_list_t *, sdev_nc_node_t *);
 135 static void sdev_nc_free_bootonly(void);
 136 static int sdev_ncache_unpack_nvlist(nvf_handle_t, nvlist_t *, char *);
 137 static int sdev_ncache_pack_list(nvf_handle_t, nvlist_t **);
 138 static void sdev_ncache_list_free(nvf_handle_t);
 139 static void sdev_nvp_free(nvp_devname_t *);
 140 
 141 /*
 142  * Registration for /etc/devices/devname_cache
 143  */
 144 static nvf_ops_t sdev_cache_ops = {
 145         "/etc/devices/devname_cache",           /* path to cache */
 146         sdev_ncache_unpack_nvlist,              /* read: unpack nvlist */
 147         sdev_ncache_pack_list,                  /* write: pack list */
 148         sdev_ncache_list_free,                  /* free data list */
 149         sdev_ncache_write_complete              /* write complete callback */
 150 };
 151 
 152 /*
 153  * called once at filesystem initialization
 154  */
 155 void
 156 sdev_ncache_init(void)
 157 {
 158         sdev_ncache = sdev_nc_newlist();
 159 }
 160 
 161 /*
 162  * called at mount of the global instance
 163  * currently the global instance is never unmounted
 164  */
 165 void
 166 sdev_ncache_setup(void)
 167 {
 168         sdevfd_handle = nvf_register_file(&sdev_cache_ops);
 169         ASSERT(sdevfd_handle);
 170 
 171         list_create(nvf_list(sdevfd_handle), sizeof (nvp_devname_t),
 172             offsetof(nvp_devname_t, nvp_link));
 173 
 174         rw_enter(nvf_lock(sdevfd_handle), RW_WRITER);
 175         if (!sdev_cache_read_disable) {
 176                 (void) nvf_read_file(sdevfd_handle);
 177         }
 178         sdev_ncache_process_store();
 179         rw_exit(nvf_lock(sdevfd_handle));
 180 
 181         sdev_devstate_change();
 182 }
 183 
 184 static void
 185 sdev_nvp_free(nvp_devname_t *dp)
 186 {
 187         int     i;
 188         char    **p;
 189 
 190         if (dp->nvp_npaths > 0) {
 191                 p = dp->nvp_paths;
 192                 for (i = 0; i < dp->nvp_npaths; i++, p++) {
 193                         kmem_free(*p, strlen(*p)+1);
 194                 }
 195                 kmem_free(dp->nvp_paths,
 196                     dp->nvp_npaths * sizeof (char *));
 197                 kmem_free(dp->nvp_expirecnts,
 198                     dp->nvp_npaths * sizeof (int));
 199         }
 200 
 201         kmem_free(dp, sizeof (nvp_devname_t));
 202 }
 203 
 204 static void
 205 sdev_ncache_list_free(nvf_handle_t fd)
 206 {
 207         list_t          *listp;
 208         nvp_devname_t   *dp;
 209 
 210         ASSERT(fd == sdevfd_handle);
 211         ASSERT(RW_WRITE_HELD(nvf_lock(fd)));
 212 
 213         listp = nvf_list(fd);
 214         if ((dp = list_head(listp)) != NULL) {
 215                 list_remove(listp, dp);
 216                 sdev_nvp_free(dp);
 217         }
 218 }
 219 
 220 /*
 221  * Unpack a device path/nvlist pair to internal data list format.
 222  * Used to decode the nvlist format into the internal representation
 223  * when reading /etc/devices/devname_cache.
 224  * Note that the expiration counts are optional, for compatibility
 225  * with earlier instances of the cache.  If not present, the
 226  * expire counts are initialized to defaults.
 227  */
 228 static int
 229 sdev_ncache_unpack_nvlist(nvf_handle_t fd, nvlist_t *nvl, char *name)
 230 {
 231         nvp_devname_t *np;
 232         char    **strs;
 233         int     *cnts;
 234         uint_t  nstrs, ncnts;
 235         int     rval, i;
 236 
 237         ASSERT(fd == sdevfd_handle);
 238         ASSERT(RW_WRITE_HELD(nvf_lock(fd)));
 239 
 240         /* name of the sublist must match what we created */
 241         if (strcmp(name, DP_DEVNAME_ID) != 0) {
 242                 return (-1);
 243         }
 244 
 245         np = kmem_zalloc(sizeof (nvp_devname_t), KM_SLEEP);
 246 
 247         rval = nvlist_lookup_string_array(nvl,
 248             DP_DEVNAME_NCACHE_ID, &strs, &nstrs);
 249         if (rval) {
 250                 kmem_free(np, sizeof (nvp_devname_t));
 251                 return (-1);
 252         }
 253 
 254         np->nvp_npaths = nstrs;
 255         np->nvp_paths = kmem_zalloc(nstrs * sizeof (char *), KM_SLEEP);
 256         for (i = 0; i < nstrs; i++) {
 257                 np->nvp_paths[i] = i_ddi_strdup(strs[i], KM_SLEEP);
 258         }
 259         np->nvp_expirecnts = kmem_zalloc(nstrs * sizeof (int), KM_SLEEP);
 260         for (i = 0; i < nstrs; i++) {
 261                 np->nvp_expirecnts[i] = sdev_nc_expirecnt;
 262         }
 263 
 264         rval = nvlist_lookup_int32_array(nvl,
 265             DP_DEVNAME_NC_EXPIRECNT_ID, &cnts, &ncnts);
 266         if (rval == 0) {
 267                 ASSERT(ncnts == nstrs);
 268                 ncnts = min(ncnts, nstrs);
 269                 for (i = 0; i < nstrs; i++) {
 270                         np->nvp_expirecnts[i] = cnts[i];
 271                 }
 272         }
 273 
 274         list_insert_tail(nvf_list(sdevfd_handle), np);
 275 
 276         return (0);
 277 }
 278 
 279 /*
 280  * Pack internal format cache data to a single nvlist.
 281  * Used when writing the nvlist file.
 282  * Note this is called indirectly by the nvpflush daemon.
 283  */
 284 static int
 285 sdev_ncache_pack_list(nvf_handle_t fd, nvlist_t **ret_nvl)
 286 {
 287         nvlist_t        *nvl, *sub_nvl;
 288         nvp_devname_t   *np;
 289         int             rval;
 290         list_t          *listp;
 291 
 292         ASSERT(fd == sdevfd_handle);
 293         ASSERT(RW_WRITE_HELD(nvf_lock(fd)));
 294 
 295         rval = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP);
 296         if (rval != 0) {
 297                 nvf_error("%s: nvlist alloc error %d\n",
 298                     nvf_cache_name(fd), rval);
 299                 return (DDI_FAILURE);
 300         }
 301 
 302         listp = nvf_list(sdevfd_handle);
 303         if ((np = list_head(listp)) != NULL) {
 304                 ASSERT(list_next(listp, np) == NULL);
 305 
 306                 rval = nvlist_alloc(&sub_nvl, NV_UNIQUE_NAME, KM_SLEEP);
 307                 if (rval != 0) {
 308                         nvf_error("%s: nvlist alloc error %d\n",
 309                             nvf_cache_name(fd), rval);
 310                         sub_nvl = NULL;
 311                         goto err;
 312                 }
 313 
 314                 rval = nvlist_add_string_array(sub_nvl,
 315                     DP_DEVNAME_NCACHE_ID, np->nvp_paths, np->nvp_npaths);
 316                 if (rval != 0) {
 317                         nvf_error("%s: nvlist add error %d (sdev)\n",
 318                             nvf_cache_name(fd), rval);
 319                         goto err;
 320                 }
 321 
 322                 rval = nvlist_add_int32_array(sub_nvl,
 323                     DP_DEVNAME_NC_EXPIRECNT_ID,
 324                     np->nvp_expirecnts, np->nvp_npaths);
 325                 if (rval != 0) {
 326                         nvf_error("%s: nvlist add error %d (sdev)\n",
 327                             nvf_cache_name(fd), rval);
 328                         goto err;
 329                 }
 330 
 331                 rval = nvlist_add_nvlist(nvl, DP_DEVNAME_ID, sub_nvl);
 332                 if (rval != 0) {
 333                         nvf_error("%s: nvlist add error %d (sublist)\n",
 334                             nvf_cache_name(fd), rval);
 335                         goto err;
 336                 }
 337                 nvlist_free(sub_nvl);
 338         }
 339 
 340         *ret_nvl = nvl;
 341         return (DDI_SUCCESS);
 342 
 343 err:
 344         if (sub_nvl)
 345                 nvlist_free(sub_nvl);
 346         nvlist_free(nvl);
 347         *ret_nvl = NULL;
 348         return (DDI_FAILURE);
 349 }
 350 
 351 /*
 352  * Run through the data read from the backing cache store
 353  * to establish the initial state of the neg. cache.
 354  */
 355 static void
 356 sdev_ncache_process_store(void)
 357 {
 358         sdev_nc_list_t  *ncl = sdev_ncache;
 359         nvp_devname_t   *np;
 360         sdev_nc_node_t  *lp;
 361         char            *path;
 362         int             i, n;
 363         list_t          *listp;
 364 
 365         if (sdev_nc_disable)
 366                 return;
 367 
 368         ASSERT(RW_WRITE_HELD(nvf_lock(sdevfd_handle)));
 369 
 370         listp = nvf_list(sdevfd_handle);
 371         for (np = list_head(listp); np; np = list_next(listp, np)) {
 372                 for (i = 0; i < np->nvp_npaths; i++) {
 373                         sdcmn_err5(("    %s %d\n",
 374                             np->nvp_paths[i], np->nvp_expirecnts[i]));
 375                         if (ncl->ncl_nentries < sdev_nc_max_entries) {
 376                                 path = np->nvp_paths[i];
 377                                 n = strlen(path) + 1;
 378                                 lp = kmem_alloc(sizeof (sdev_nc_node_t),
 379                                     KM_SLEEP);
 380                                 lp->ncn_name = kmem_alloc(n, KM_SLEEP);
 381                                 bcopy(path, lp->ncn_name, n);
 382                                 lp->ncn_flags = NCN_SRC_STORE;
 383                                 lp->ncn_expirecnt = np->nvp_expirecnts[i];
 384                                 sdev_nc_insertnode(ncl, lp);
 385                         } else if (sdev_nc_verbose) {
 386                                 cmn_err(CE_CONT,
 387                                     "?%s: truncating from ncache (max %d)\n",
 388                                     np->nvp_paths[i], sdev_nc_max_entries);
 389                         }
 390                 }
 391         }
 392 }
 393 
 394 /*
 395  * called by nvpflush daemon to inform us that an update of
 396  * the cache file has been completed.
 397  */
 398 static void
 399 sdev_ncache_write_complete(nvf_handle_t fd)
 400 {
 401         sdev_nc_list_t  *ncl = sdev_ncache;
 402 
 403         ASSERT(fd == sdevfd_handle);
 404 
 405         mutex_enter(&ncl->ncl_mutex);
 406 
 407         ASSERT(ncl->ncl_flags & NCL_LIST_WRITING);
 408 
 409         if (ncl->ncl_flags & NCL_LIST_DIRTY) {
 410                 sdcmn_err5(("ncache write complete but dirty again\n"));
 411                 ncl->ncl_flags &= ~NCL_LIST_DIRTY;
 412                 mutex_exit(&ncl->ncl_mutex);
 413                 sdev_ncache_write();
 414         } else {
 415                 sdcmn_err5(("ncache write complete\n"));
 416                 ncl->ncl_flags &= ~NCL_LIST_WRITING;
 417                 mutex_exit(&ncl->ncl_mutex);
 418                 rw_enter(nvf_lock(fd), RW_WRITER);
 419                 sdev_ncache_list_free(fd);
 420                 rw_exit(nvf_lock(fd));
 421         }
 422 }
 423 
 424 /*
 425  * Prepare to perform an update of the neg. cache backing store.
 426  */
 427 static void
 428 sdev_ncache_write(void)
 429 {
 430         sdev_nc_list_t  *ncl = sdev_ncache;
 431         nvp_devname_t   *np;
 432         sdev_nc_node_t  *lp;
 433         int             n, i;
 434 
 435         if (sdev_cache_write_disable) {
 436                 mutex_enter(&ncl->ncl_mutex);
 437                 ncl->ncl_flags &= ~NCL_LIST_WRITING;
 438                 mutex_exit(&ncl->ncl_mutex);
 439                 return;
 440         }
 441 
 442         /* proper lock ordering here is essential */
 443         rw_enter(nvf_lock(sdevfd_handle), RW_WRITER);
 444         sdev_ncache_list_free(sdevfd_handle);
 445 
 446         rw_enter(&ncl->ncl_lock, RW_READER);
 447         n = ncl->ncl_nentries;
 448         ASSERT(n <= sdev_nc_max_entries);
 449 
 450         np = kmem_zalloc(sizeof (nvp_devname_t), KM_SLEEP);
 451         np->nvp_npaths = n;
 452         np->nvp_paths = kmem_zalloc(n * sizeof (char *), KM_SLEEP);
 453         np->nvp_expirecnts = kmem_zalloc(n * sizeof (int), KM_SLEEP);
 454 
 455         i = 0;
 456         for (lp = list_head(&ncl->ncl_list); lp;
 457             lp = list_next(&ncl->ncl_list, lp)) {
 458                 np->nvp_paths[i] = i_ddi_strdup(lp->ncn_name, KM_SLEEP);
 459                 np->nvp_expirecnts[i] = lp->ncn_expirecnt;
 460                 sdcmn_err5(("    %s %d\n",
 461                     np->nvp_paths[i], np->nvp_expirecnts[i]));
 462                 i++;
 463         }
 464 
 465         rw_exit(&ncl->ncl_lock);
 466 
 467         nvf_mark_dirty(sdevfd_handle);
 468         list_insert_tail(nvf_list(sdevfd_handle), np);
 469         rw_exit(nvf_lock(sdevfd_handle));
 470 
 471         nvf_wake_daemon();
 472 }
 473 
 474 static void
 475 sdev_nc_flush_updates(void)
 476 {
 477         sdev_nc_list_t *ncl = sdev_ncache;
 478 
 479         if (sdev_nc_disable || sdev_cache_write_disable)
 480                 return;
 481 
 482         mutex_enter(&ncl->ncl_mutex);
 483         if (((ncl->ncl_flags &
 484             (NCL_LIST_DIRTY | NCL_LIST_WENABLE | NCL_LIST_WRITING)) ==
 485             (NCL_LIST_DIRTY | NCL_LIST_WENABLE))) {
 486                 ncl->ncl_flags &= ~NCL_LIST_DIRTY;
 487                 ncl->ncl_flags |= NCL_LIST_WRITING;
 488                 mutex_exit(&ncl->ncl_mutex);
 489                 sdev_ncache_write();
 490         } else {
 491                 mutex_exit(&ncl->ncl_mutex);
 492         }
 493 }
 494 
 495 static void
 496 sdev_nc_flush_boot_update(void)
 497 {
 498         sdev_nc_list_t *ncl = sdev_ncache;
 499 
 500         if (sdev_nc_disable || sdev_cache_write_disable ||
 501             (sdev_boot_state == SDEV_BOOT_STATE_INITIAL)) {
 502                 return;
 503         }
 504         mutex_enter(&ncl->ncl_mutex);
 505         if (ncl->ncl_flags & NCL_LIST_WENABLE) {
 506                 mutex_exit(&ncl->ncl_mutex);
 507                 sdev_nc_flush_updates();
 508         } else {
 509                 mutex_exit(&ncl->ncl_mutex);
 510         }
 511 
 512 }
 513 
 514 static void
 515 sdev_state_boot_complete()
 516 {
 517         sdev_nc_list_t  *ncl = sdev_ncache;
 518         sdev_nc_node_t  *lp, *next;
 519 
 520         /*
 521          * Once boot is complete, decrement the expire count of each entry
 522          * in the cache not touched by a reference.  Remove any that
 523          * goes to zero.  This effectively removes random entries over
 524          * time.
 525          */
 526         rw_enter(&ncl->ncl_lock, RW_WRITER);
 527         mutex_enter(&ncl->ncl_mutex);
 528 
 529         for (lp = list_head(&ncl->ncl_list); lp; lp = next) {
 530                 next = list_next(&ncl->ncl_list, lp);
 531                 if (sdev_nc_expirecnt > 0 && lp->ncn_expirecnt > 0) {
 532                         if (lp->ncn_flags & NCN_ACTIVE) {
 533                                 if (lp->ncn_expirecnt != sdev_nc_expirecnt) {
 534                                         lp->ncn_expirecnt = sdev_nc_expirecnt;
 535                                         ncl->ncl_flags |= NCL_LIST_DIRTY;
 536                                 }
 537                         } else {
 538                                 if (--lp->ncn_expirecnt == 0) {
 539                                         list_remove(&ncl->ncl_list, lp);
 540                                         sdev_nc_free_unlinked_node(lp);
 541                                         ncl->ncl_nentries--;
 542                                 }
 543                                 ncl->ncl_flags |= NCL_LIST_DIRTY;
 544                         }
 545                 }
 546         }
 547 
 548         mutex_exit(&ncl->ncl_mutex);
 549         rw_exit(&ncl->ncl_lock);
 550 
 551         sdev_nc_flush_boot_update();
 552         sdev_boot_state = SDEV_BOOT_STATE_COMPLETE;
 553 }
 554 
 555 /*
 556  * Upon transition to the login state on a reconfigure boot,
 557  * a debounce timer is set up so that we cache all the nonsense
 558  * lookups we're hit with by the windowing system startup.
 559  */
 560 
 561 /*ARGSUSED*/
 562 static void
 563 sdev_state_timeout(void *arg)
 564 {
 565         sdev_state_boot_complete();
 566 }
 567 
 568 static void
 569 sdev_state_sysavail()
 570 {
 571         sdev_nc_list_t *ncl = sdev_ncache;
 572         clock_t nticks;
 573         int nsecs;
 574 
 575         mutex_enter(&ncl->ncl_mutex);
 576         ncl->ncl_flags |= NCL_LIST_WENABLE;
 577         mutex_exit(&ncl->ncl_mutex);
 578 
 579         nsecs = sdev_reconfig_delay;
 580         if (nsecs == 0) {
 581                 sdev_state_boot_complete();
 582         } else {
 583                 nticks = drv_sectohz(nsecs);
 584                 sdcmn_err5(("timeout initiated %ld\n", nticks));
 585                 (void) timeout(sdev_state_timeout, NULL, nticks);
 586                 sdev_nc_flush_boot_update();
 587         }
 588 }
 589 
 590 /*
 591  * Called to inform the filesystem of progress during boot,
 592  * either a notice of reconfiguration boot or an indication of
 593  * system boot complete.  At system boot complete, set up a
 594  * timer at the expiration of which no further failed lookups
 595  * will be added to the negative cache.
 596  *
 597  * The dev filesystem infers from reconfig boot that implicit
 598  * reconfig need not be invoked at all as all available devices
 599  * will have already been named.
 600  *
 601  * The dev filesystem infers from "system available" that devfsadmd
 602  * can now be run and hence implicit reconfiguration may be initiated.
 603  * During early stages of system startup, implicit reconfig is
 604  * not done to avoid impacting boot performance.
 605  */
 606 void
 607 sdev_devstate_change(void)
 608 {
 609         int new_state;
 610 
 611         /*
 612          * Track system state and manage interesting transitions
 613          */
 614         new_state = SDEV_BOOT_STATE_INITIAL;
 615         if (i_ddi_reconfig())
 616                 new_state = SDEV_BOOT_STATE_RECONFIG;
 617         if (i_ddi_sysavail())
 618                 new_state = SDEV_BOOT_STATE_SYSAVAIL;
 619 
 620         if (sdev_boot_state < new_state) {
 621                 switch (new_state) {
 622                 case SDEV_BOOT_STATE_RECONFIG:
 623                         sdcmn_err5(("state change: reconfigure boot\n"));
 624                         sdev_boot_state = new_state;
 625                         /*
 626                          * The /dev filesystem fills a hot-plug .vs.
 627                          * public-namespace gap by invoking 'devfsadm' once
 628                          * as a result of the first /dev lookup failure
 629                          * (or getdents/readdir). Originally, it was thought
 630                          * that a reconfig reboot did not have a hot-plug gap,
 631                          * but this is not true - the gap is just smaller:
 632                          * it exists from the the time the smf invocation of
 633                          * devfsadm completes its forced devinfo snapshot,
 634                          * to the time when the smf devfsadmd daemon invocation
 635                          * is set up and listening for hotplug sysevents.
 636                          * Since there is still a gap with reconfig reboot,
 637                          * we no longer set 'sdev_reconfig_boot'.
 638                          */
 639                         if (!sdev_nc_disable_reset)
 640                                 sdev_nc_free_bootonly();
 641                         break;
 642                 case SDEV_BOOT_STATE_SYSAVAIL:
 643                         sdcmn_err5(("system available\n"));
 644                         sdev_boot_state = new_state;
 645                         sdev_state_sysavail();
 646                         break;
 647                 }
 648         }
 649 }
 650 
 651 /*
 652  * Lookup: filter out entries in the negative cache
 653  * Return 1 if the lookup should not cause a reconfig.
 654  */
 655 int
 656 sdev_lookup_filter(sdev_node_t *dv, char *nm)
 657 {
 658         int n;
 659         sdev_nc_list_t *ncl = sdev_ncache;
 660         sdev_nc_node_t *lp;
 661         char *path;
 662         int rval = 0;
 663         int changed = 0;
 664 
 665         ASSERT(i_ddi_io_initialized());
 666         ASSERT(SDEVTOV(dv)->v_type == VDIR);
 667 
 668         if (sdev_nc_disable)
 669                 return (0);
 670 
 671         n = strlen(dv->sdev_path) + strlen(nm) + 2;
 672         path = kmem_alloc(n, KM_SLEEP);
 673         (void) sprintf(path, "%s/%s", dv->sdev_path, nm);
 674 
 675         rw_enter(&ncl->ncl_lock, RW_READER);
 676         if ((lp = sdev_nc_findpath(ncl, path)) != NULL) {
 677                 sdcmn_err5(("%s/%s: lookup by %s cached, no reconfig\n",
 678                     dv->sdev_name, nm, curproc->p_user.u_comm));
 679                 if (sdev_nc_verbose) {
 680                         cmn_err(CE_CONT,
 681                             "?%s/%s: lookup by %s cached, no reconfig\n",
 682                             dv->sdev_name, nm, curproc->p_user.u_comm);
 683                 }
 684                 mutex_enter(&ncl->ncl_mutex);
 685                 lp->ncn_flags |= NCN_ACTIVE;
 686                 if (sdev_nc_expirecnt > 0 && lp->ncn_expirecnt > 0 &&
 687                     lp->ncn_expirecnt < sdev_nc_expirecnt) {
 688                         lp->ncn_expirecnt = sdev_nc_expirecnt;
 689                         ncl->ncl_flags |= NCL_LIST_DIRTY;
 690                         changed = 1;
 691                 }
 692                 mutex_exit(&ncl->ncl_mutex);
 693                 rval = 1;
 694         }
 695         rw_exit(&ncl->ncl_lock);
 696         kmem_free(path, n);
 697         if (changed)
 698                 sdev_nc_flush_boot_update();
 699         return (rval);
 700 }
 701 
 702 void
 703 sdev_lookup_failed(sdev_node_t *dv, char *nm, int failed_flags)
 704 {
 705         if (sdev_nc_disable)
 706                 return;
 707 
 708         /*
 709          * If we're still in the initial boot stage, always update
 710          * the cache - we may not have received notice of the
 711          * reconfig boot state yet.  On a reconfigure boot, entries
 712          * from the backing store are not re-persisted on update,
 713          * but new entries are marked as needing an update.
 714          * Never cache dynamic or non-global nodes.
 715          */
 716         if (SDEV_IS_GLOBAL(dv) && !SDEV_IS_DYNAMIC(dv) &&
 717             !SDEV_IS_NO_NCACHE(dv) &&
 718             ((failed_flags & SLF_NO_NCACHE) == 0) &&
 719             ((sdev_reconfig_boot &&
 720             (sdev_boot_state != SDEV_BOOT_STATE_COMPLETE)) ||
 721             (!sdev_reconfig_boot && ((failed_flags & SLF_REBUILT))))) {
 722                         sdev_nc_addname(sdev_ncache,
 723                             dv, nm, NCN_SRC_CURRENT|NCN_ACTIVE);
 724         }
 725 }
 726 
 727 static sdev_nc_list_t *
 728 sdev_nc_newlist(void)
 729 {
 730         sdev_nc_list_t  *ncl;
 731 
 732         ncl = kmem_zalloc(sizeof (sdev_nc_list_t), KM_SLEEP);
 733 
 734         rw_init(&ncl->ncl_lock, NULL, RW_DEFAULT, NULL);
 735         mutex_init(&ncl->ncl_mutex, NULL, MUTEX_DEFAULT, NULL);
 736         list_create(&ncl->ncl_list, sizeof (sdev_nc_node_t),
 737             offsetof(sdev_nc_node_t, ncn_link));
 738 
 739         return (ncl);
 740 }
 741 
 742 static void
 743 sdev_nc_free_unlinked_node(sdev_nc_node_t *lp)
 744 {
 745         kmem_free(lp->ncn_name, strlen(lp->ncn_name) + 1);
 746         kmem_free(lp, sizeof (sdev_nc_node_t));
 747 }
 748 
 749 static sdev_nc_node_t *
 750 sdev_nc_findpath(sdev_nc_list_t *ncl, char *path)
 751 {
 752         sdev_nc_node_t *lp;
 753 
 754         ASSERT(RW_LOCK_HELD(&ncl->ncl_lock));
 755 
 756         for (lp = list_head(&ncl->ncl_list); lp;
 757             lp = list_next(&ncl->ncl_list, lp)) {
 758                 if (strcmp(path, lp->ncn_name) == 0)
 759                         return (lp);
 760         }
 761 
 762         return (NULL);
 763 }
 764 
 765 static void
 766 sdev_nc_insertnode(sdev_nc_list_t *ncl, sdev_nc_node_t *new)
 767 {
 768         sdev_nc_node_t *lp;
 769 
 770         rw_enter(&ncl->ncl_lock, RW_WRITER);
 771 
 772         lp = sdev_nc_findpath(ncl, new->ncn_name);
 773         if (lp == NULL) {
 774                 if (ncl->ncl_nentries == sdev_nc_max_entries) {
 775                         sdcmn_err5((
 776                             "%s by %s: not adding to ncache (max %d)\n",
 777                             new->ncn_name, curproc->p_user.u_comm,
 778                             ncl->ncl_nentries));
 779                         if (sdev_nc_verbose) {
 780                                 cmn_err(CE_CONT, "?%s by %s: "
 781                                     "not adding to ncache (max %d)\n",
 782                                     new->ncn_name, curproc->p_user.u_comm,
 783                                     ncl->ncl_nentries);
 784                         }
 785                         rw_exit(&ncl->ncl_lock);
 786                         sdev_nc_free_unlinked_node(new);
 787                 } else {
 788 
 789                         list_insert_tail(&ncl->ncl_list, new);
 790                         ncl->ncl_nentries++;
 791 
 792                         /* don't mark list dirty for nodes from store */
 793                         mutex_enter(&ncl->ncl_mutex);
 794                         if ((new->ncn_flags & NCN_SRC_STORE) == 0) {
 795                                 sdcmn_err5(("%s by %s: add to ncache\n",
 796                                     new->ncn_name, curproc->p_user.u_comm));
 797                                 if (sdev_nc_verbose) {
 798                                         cmn_err(CE_CONT,
 799                                             "?%s by %s: add to ncache\n",
 800                                             new->ncn_name,
 801                                             curproc->p_user.u_comm);
 802                                 }
 803                                 ncl->ncl_flags |= NCL_LIST_DIRTY;
 804                         }
 805                         mutex_exit(&ncl->ncl_mutex);
 806                         rw_exit(&ncl->ncl_lock);
 807                         lp = new;
 808                         sdev_nc_flush_boot_update();
 809                 }
 810         } else {
 811                 mutex_enter(&ncl->ncl_mutex);
 812                 lp->ncn_flags |= new->ncn_flags;
 813                 mutex_exit(&ncl->ncl_mutex);
 814                 rw_exit(&ncl->ncl_lock);
 815                 sdev_nc_free_unlinked_node(new);
 816         }
 817 }
 818 
 819 void
 820 sdev_nc_addname(sdev_nc_list_t *ncl, sdev_node_t *dv, char *nm, int flags)
 821 {
 822         int n;
 823         sdev_nc_node_t *lp;
 824 
 825         ASSERT(SDEVTOV(dv)->v_type == VDIR);
 826 
 827         lp = kmem_zalloc(sizeof (sdev_nc_node_t), KM_SLEEP);
 828 
 829         n = strlen(dv->sdev_path) + strlen(nm) + 2;
 830         lp->ncn_name = kmem_alloc(n, KM_SLEEP);
 831         (void) sprintf(lp->ncn_name, "%s/%s",
 832             dv->sdev_path, nm);
 833         lp->ncn_flags = flags;
 834         lp->ncn_expirecnt = sdev_nc_expirecnt;
 835         sdev_nc_insertnode(ncl, lp);
 836 }
 837 
 838 void
 839 sdev_nc_node_exists(sdev_node_t *dv)
 840 {
 841         /* dynamic and non-global nodes are never cached */
 842         if (SDEV_IS_GLOBAL(dv) && !SDEV_IS_DYNAMIC(dv) &&
 843             !SDEV_IS_NO_NCACHE(dv)) {
 844                 sdev_nc_path_exists(sdev_ncache, dv->sdev_path);
 845         }
 846 }
 847 
 848 void
 849 sdev_nc_path_exists(sdev_nc_list_t *ncl, char *path)
 850 {
 851         sdev_nc_node_t *lp;
 852 
 853         if (sdev_nc_disable)
 854                 return;
 855 
 856         rw_enter(&ncl->ncl_lock, RW_READER);
 857         if ((lp = sdev_nc_findpath(ncl, path)) == NULL) {
 858                 rw_exit(&ncl->ncl_lock);
 859                 return;
 860         }
 861         if (rw_tryupgrade(&ncl->ncl_lock) == 0) {
 862                 rw_exit(&ncl->ncl_lock);
 863                 rw_enter(&ncl->ncl_lock, RW_WRITER);
 864                 lp = sdev_nc_findpath(ncl, path);
 865         }
 866         if (lp) {
 867                 list_remove(&ncl->ncl_list, lp);
 868                 ncl->ncl_nentries--;
 869                 mutex_enter(&ncl->ncl_mutex);
 870                 ncl->ncl_flags |= NCL_LIST_DIRTY;
 871                 if (ncl->ncl_flags & NCL_LIST_WENABLE) {
 872                         mutex_exit(&ncl->ncl_mutex);
 873                         rw_exit(&ncl->ncl_lock);
 874                         sdev_nc_flush_updates();
 875                 } else {
 876                         mutex_exit(&ncl->ncl_mutex);
 877                         rw_exit(&ncl->ncl_lock);
 878                 }
 879                 sdev_nc_free_unlinked_node(lp);
 880                 sdcmn_err5(("%s by %s: removed from ncache\n",
 881                     path, curproc->p_user.u_comm));
 882                 if (sdev_nc_verbose) {
 883                         cmn_err(CE_CONT, "?%s by %s: removed from ncache\n",
 884                             path, curproc->p_user.u_comm);
 885                 }
 886         } else
 887                 rw_exit(&ncl->ncl_lock);
 888 }
 889 
 890 static void
 891 sdev_nc_free_bootonly(void)
 892 {
 893         sdev_nc_list_t  *ncl = sdev_ncache;
 894         sdev_nc_node_t *lp;
 895         sdev_nc_node_t *next;
 896 
 897         rw_enter(&ncl->ncl_lock, RW_WRITER);
 898 
 899         for (lp = list_head(&ncl->ncl_list); lp; lp = next) {
 900                 next = list_next(&ncl->ncl_list, lp);
 901                 if ((lp->ncn_flags & NCN_SRC_CURRENT) == 0) {
 902                         sdcmn_err5(("freeing %s\n", lp->ncn_name));
 903                         mutex_enter(&ncl->ncl_mutex);
 904                         ncl->ncl_flags |= NCL_LIST_DIRTY;
 905                         mutex_exit(&ncl->ncl_mutex);
 906                         list_remove(&ncl->ncl_list, lp);
 907                         sdev_nc_free_unlinked_node(lp);
 908                         ncl->ncl_nentries--;
 909                 }
 910         }
 911 
 912         rw_exit(&ncl->ncl_lock);
 913 }