1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #pragma ident   "%Z%%M% %I%     %E% SMI"
  27 
  28 /*
  29  * The idea behind composition-based stacked filesystems is to add a
  30  * vnode to the stack of vnodes for each mount. These vnodes have their
  31  * own set of mount options and filesystem-specific functions, so they
  32  * can modify data or operations before they are passed along. Such a
  33  * filesystem must maintain a mapping from the underlying vnodes to its
  34  * interposing vnodes.
  35  *
  36  * In lofs, this mapping is implemented by a hashtable. Each bucket
  37  * contains a count of the number of nodes currently contained, the
  38  * chain of vnodes, and a lock to protect the list of vnodes. The
  39  * hashtable dynamically grows if the number of vnodes in the table as a
  40  * whole exceeds the size of the table left-shifted by
  41  * lo_resize_threshold. In order to minimize lock contention, there is
  42  * no global lock protecting the hashtable, hence obtaining the
  43  * per-bucket locks consists of a dance to make sure we've actually
  44  * locked the correct bucket. Acquiring a bucket lock doesn't involve
  45  * locking the hashtable itself, so we refrain from freeing old
  46  * hashtables, and store them in a linked list of retired hashtables;
  47  * the list is freed when the filesystem is unmounted.
  48  */
  49 
  50 #include <sys/param.h>
  51 #include <sys/kmem.h>
  52 #include <sys/vfs.h>
  53 #include <sys/vnode.h>
  54 #include <sys/cmn_err.h>
  55 #include <sys/systm.h>
  56 #include <sys/t_lock.h>
  57 #include <sys/debug.h>
  58 #include <sys/atomic.h>
  59 
  60 #include <sys/fs/lofs_node.h>
  61 #include <sys/fs/lofs_info.h>
  62 /*
  63  * Due to the hashing algorithm, the size of the hash table needs to be a
  64  * power of 2.
  65  */
  66 #define LOFS_DEFAULT_HTSIZE     (1 << 6)
  67 
  68 #define ltablehash(vp, tblsz)   ((((intptr_t)(vp))>>10) & ((tblsz)-1))
  69 
  70 /*
  71  * The following macros can only be safely used when the desired bucket
  72  * is already locked.
  73  */
  74 /*
  75  * The lock in the hashtable associated with the given vnode.
  76  */
  77 #define TABLE_LOCK(vp, li)      \
  78         (&(li)->li_hashtable[ltablehash((vp), (li)->li_htsize)].lh_lock)
  79 
  80 /*
  81  * The bucket in the hashtable that the given vnode hashes to.
  82  */
  83 #define TABLE_BUCKET(vp, li)    \
  84         ((li)->li_hashtable[ltablehash((vp), (li)->li_htsize)].lh_chain)
  85 
  86 /*
  87  * Number of elements currently in the bucket that the vnode hashes to.
  88  */
  89 #define TABLE_COUNT(vp, li)     \
  90         ((li)->li_hashtable[ltablehash((vp), (li)->li_htsize)].lh_count)
  91 
  92 /*
  93  * Grab/Drop the lock for the bucket this vnode hashes to.
  94  */
  95 #define TABLE_LOCK_ENTER(vp, li)        table_lock_enter(vp, li)
  96 #define TABLE_LOCK_EXIT(vp, li)         \
  97         mutex_exit(&(li)->li_hashtable[ltablehash((vp),  \
  98             (li)->li_htsize)].lh_lock)
  99 
 100 static lnode_t *lfind(struct vnode *, struct loinfo *);
 101 static void lsave(lnode_t *, struct loinfo *);
 102 static struct vfs *makelfsnode(struct vfs *, struct loinfo *);
 103 static struct lfsnode *lfsfind(struct vfs *, struct loinfo *);
 104 
 105 uint_t lo_resize_threshold = 1;
 106 uint_t lo_resize_factor = 2;
 107 
 108 static kmem_cache_t *lnode_cache;
 109 
 110 /*
 111  * Since the hashtable itself isn't protected by a lock, obtaining a
 112  * per-bucket lock proceeds as follows:
 113  *
 114  * (a) li->li_htlock protects li->li_hashtable, li->li_htsize, and
 115  * li->li_retired.
 116  *
 117  * (b) Per-bucket locks (lh_lock) protect the contents of the bucket.
 118  *
 119  * (c) Locking order for resizing the hashtable is li_htlock then
 120  * lh_lock.
 121  *
 122  * To grab the bucket lock we:
 123  *
 124  * (1) Stash away the htsize and the pointer to the hashtable to make
 125  * sure neither change while we're using them.
 126  *
 127  * (2) lgrow() updates the pointer to the hashtable before it updates
 128  * the size: the worst case scenario is that we have the wrong size (but
 129  * the correct table), so we hash to the wrong bucket, grab the wrong
 130  * lock, and then realize that things have changed, rewind and start
 131  * again. If both the size and the table changed since we loaded them,
 132  * we'll realize that too and restart.
 133  *
 134  * (3) The protocol for growing the hashtable involves holding *all* the
 135  * locks in the table, hence the unlocking code (TABLE_LOCK_EXIT())
 136  * doesn't need to do any dances, since neither the table nor the size
 137  * can change while any bucket lock is held.
 138  *
 139  * (4) If the hashtable is growing (by thread t1) while another thread
 140  * (t2) is trying to grab a bucket lock, t2 might have a stale reference
 141  * to li->li_htsize:
 142  *
 143  * - t1 grabs all locks in lgrow()
 144  *      - t2 loads li->li_htsize and li->li_hashtable
 145  * - t1 changes li->hashtable
 146  *      - t2 loads from an offset in the "stale" hashtable and tries to grab
 147  *      the relevant mutex.
 148  *
 149  * If t1 had free'd the stale hashtable, t2 would be in trouble. Hence,
 150  * stale hashtables are not freed but stored in a list of "retired"
 151  * hashtables, which is emptied when the filesystem is unmounted.
 152  */
 153 static void
 154 table_lock_enter(vnode_t *vp, struct loinfo *li)
 155 {
 156         struct lobucket *chain;
 157         uint_t htsize;
 158         uint_t hash;
 159 
 160         for (;;) {
 161                 htsize = li->li_htsize;
 162                 membar_consumer();
 163                 chain = (struct lobucket *)li->li_hashtable;
 164                 hash = ltablehash(vp, htsize);
 165                 mutex_enter(&chain[hash].lh_lock);
 166                 if (li->li_hashtable == chain && li->li_htsize == htsize)
 167                         break;
 168                 mutex_exit(&chain[hash].lh_lock);
 169         }
 170 }
 171 
 172 void
 173 lofs_subrinit(void)
 174 {
 175         /*
 176          * Initialize the cache.
 177          */
 178         lnode_cache = kmem_cache_create("lnode_cache", sizeof (lnode_t),
 179             0, NULL, NULL, NULL, NULL, NULL, 0);
 180 }
 181 
 182 void
 183 lofs_subrfini(void)
 184 {
 185         kmem_cache_destroy(lnode_cache);
 186 }
 187 
 188 /*
 189  * Initialize a (struct loinfo), and initialize the hashtable to have
 190  * htsize buckets.
 191  */
 192 void
 193 lsetup(struct loinfo *li, uint_t htsize)
 194 {
 195         li->li_refct = 0;
 196         li->li_lfs = NULL;
 197         if (htsize == 0)
 198                 htsize = LOFS_DEFAULT_HTSIZE;
 199         li->li_htsize = htsize;
 200         li->li_hashtable = kmem_zalloc(htsize * sizeof (*li->li_hashtable),
 201             KM_SLEEP);
 202         mutex_init(&li->li_lfslock, NULL, MUTEX_DEFAULT, NULL);
 203         mutex_init(&li->li_htlock, NULL, MUTEX_DEFAULT, NULL);
 204         li->li_retired = NULL;
 205 }
 206 
 207 /*
 208  * Destroy a (struct loinfo)
 209  */
 210 void
 211 ldestroy(struct loinfo *li)
 212 {
 213         uint_t i, htsize;
 214         struct lobucket *table;
 215         struct lo_retired_ht *lrhp, *trhp;
 216 
 217         mutex_destroy(&li->li_htlock);
 218         mutex_destroy(&li->li_lfslock);
 219         htsize = li->li_htsize;
 220         table = li->li_hashtable;
 221         for (i = 0; i < htsize; i++)
 222                 mutex_destroy(&table[i].lh_lock);
 223         kmem_free(table, htsize * sizeof (*li->li_hashtable));
 224 
 225         /*
 226          * Free the retired hashtables.
 227          */
 228         lrhp = li->li_retired;
 229         while (lrhp != NULL) {
 230                 trhp = lrhp;
 231                 lrhp = lrhp->lrh_next;
 232                 kmem_free(trhp->lrh_table,
 233                     trhp->lrh_size * sizeof (*li->li_hashtable));
 234                 kmem_free(trhp, sizeof (*trhp));
 235         }
 236         li->li_retired = NULL;
 237 }
 238 
 239 /*
 240  * Return a looped back vnode for the given vnode.
 241  * If no lnode exists for this vnode create one and put it
 242  * in a table hashed by vnode.  If the lnode for
 243  * this vnode is already in the table return it (ref count is
 244  * incremented by lfind).  The lnode will be flushed from the
 245  * table when lo_inactive calls freelonode.  The creation of
 246  * a new lnode can be forced via the LOF_FORCE flag even if
 247  * the vnode exists in the table.  This is used in the creation
 248  * of a terminating lnode when looping is detected.  A unique
 249  * lnode is required for the correct evaluation of the current
 250  * working directory.
 251  * NOTE: vp is assumed to be a held vnode.
 252  */
 253 struct vnode *
 254 makelonode(struct vnode *vp, struct loinfo *li, int flag)
 255 {
 256         lnode_t *lp, *tlp;
 257         struct vfs *vfsp;
 258         vnode_t *nvp;
 259 
 260         lp = NULL;
 261         TABLE_LOCK_ENTER(vp, li);
 262         if (flag != LOF_FORCE)
 263                 lp = lfind(vp, li);
 264         if ((flag == LOF_FORCE) || (lp == NULL)) {
 265                 /*
 266                  * Optimistically assume that we won't need to sleep.
 267                  */
 268                 lp = kmem_cache_alloc(lnode_cache, KM_NOSLEEP);
 269                 nvp = vn_alloc(KM_NOSLEEP);
 270                 if (lp == NULL || nvp == NULL) {
 271                         TABLE_LOCK_EXIT(vp, li);
 272                         /* The lnode allocation may have succeeded, save it */
 273                         tlp = lp;
 274                         if (tlp == NULL) {
 275                                 tlp = kmem_cache_alloc(lnode_cache, KM_SLEEP);
 276                         }
 277                         if (nvp == NULL) {
 278                                 nvp = vn_alloc(KM_SLEEP);
 279                         }
 280                         lp = NULL;
 281                         TABLE_LOCK_ENTER(vp, li);
 282                         if (flag != LOF_FORCE)
 283                                 lp = lfind(vp, li);
 284                         if (lp != NULL) {
 285                                 kmem_cache_free(lnode_cache, tlp);
 286                                 vn_free(nvp);
 287                                 VN_RELE(vp);
 288                                 goto found_lnode;
 289                         }
 290                         lp = tlp;
 291                 }
 292                 atomic_add_32(&li->li_refct, 1);
 293                 vfsp = makelfsnode(vp->v_vfsp, li);
 294                 lp->lo_vnode = nvp;
 295                 VN_SET_VFS_TYPE_DEV(nvp, vfsp, vp->v_type, vp->v_rdev);
 296                 nvp->v_flag |= (vp->v_flag & (VNOMOUNT|VNOMAP|VDIROPEN));
 297                 vn_setops(nvp, lo_vnodeops);
 298                 nvp->v_data = (caddr_t)lp;
 299                 lp->lo_vp = vp;
 300                 lp->lo_looping = 0;
 301                 lsave(lp, li);
 302                 vn_exists(vp);
 303         } else {
 304                 VN_RELE(vp);
 305         }
 306 
 307 found_lnode:
 308         TABLE_LOCK_EXIT(vp, li);
 309         return (ltov(lp));
 310 }
 311 
 312 /*
 313  * Get/Make vfs structure for given real vfs
 314  */
 315 static struct vfs *
 316 makelfsnode(struct vfs *vfsp, struct loinfo *li)
 317 {
 318         struct lfsnode *lfs;
 319         struct lfsnode *tlfs;
 320 
 321         /*
 322          * Don't grab any locks for the fast (common) case.
 323          */
 324         if (vfsp == li->li_realvfs)
 325                 return (li->li_mountvfs);
 326         ASSERT(li->li_refct > 0);
 327         mutex_enter(&li->li_lfslock);
 328         if ((lfs = lfsfind(vfsp, li)) == NULL) {
 329                 mutex_exit(&li->li_lfslock);
 330                 lfs = kmem_zalloc(sizeof (*lfs), KM_SLEEP);
 331                 mutex_enter(&li->li_lfslock);
 332                 if ((tlfs = lfsfind(vfsp, li)) != NULL) {
 333                         kmem_free(lfs, sizeof (*lfs));
 334                         lfs = tlfs;
 335                         goto found_lfs;
 336                 }
 337                 lfs->lfs_realvfs = vfsp;
 338 
 339                 /*
 340                  * Even though the lfsnode is strictly speaking a private
 341                  * implementation detail of lofs, it should behave as a regular
 342                  * vfs_t for the benefit of the rest of the kernel.
 343                  */
 344                 VFS_INIT(&lfs->lfs_vfs, lo_vfsops, (caddr_t)li);
 345                 lfs->lfs_vfs.vfs_fstype = li->li_mountvfs->vfs_fstype;
 346                 lfs->lfs_vfs.vfs_flag =
 347                     ((vfsp->vfs_flag | li->li_mflag) & ~li->li_dflag) &
 348                     INHERIT_VFS_FLAG;
 349                 lfs->lfs_vfs.vfs_bsize = vfsp->vfs_bsize;
 350                 lfs->lfs_vfs.vfs_dev = vfsp->vfs_dev;
 351                 lfs->lfs_vfs.vfs_fsid = vfsp->vfs_fsid;
 352 
 353                 if (vfsp->vfs_mntpt != NULL) {
 354                         lfs->lfs_vfs.vfs_mntpt = vfs_getmntpoint(vfsp);
 355                         /* Leave a reference to the mountpoint */
 356                 }
 357 
 358                 (void) VFS_ROOT(vfsp, &lfs->lfs_realrootvp);
 359 
 360                 /*
 361                  * We use 1 instead of 0 as the value to associate with
 362                  * an idle lfs_vfs.  This is to prevent VFS_RELE()
 363                  * trying to kmem_free() our lfs_t (which is the wrong
 364                  * size).
 365                  */
 366                 VFS_HOLD(&lfs->lfs_vfs);
 367                 lfs->lfs_next = li->li_lfs;
 368                 li->li_lfs = lfs;
 369                 vfs_propagate_features(vfsp, &lfs->lfs_vfs);
 370         }
 371 
 372 found_lfs:
 373         VFS_HOLD(&lfs->lfs_vfs);
 374         mutex_exit(&li->li_lfslock);
 375         return (&lfs->lfs_vfs);
 376 }
 377 
 378 /*
 379  * Free lfs node since no longer in use
 380  */
 381 static void
 382 freelfsnode(struct lfsnode *lfs, struct loinfo *li)
 383 {
 384         struct lfsnode *prev = NULL;
 385         struct lfsnode *this;
 386 
 387         ASSERT(MUTEX_HELD(&li->li_lfslock));
 388         ASSERT(li->li_refct > 0);
 389         for (this = li->li_lfs; this != NULL; this = this->lfs_next) {
 390                 if (this == lfs) {
 391                         ASSERT(lfs->lfs_vfs.vfs_count == 1);
 392                         if (prev == NULL)
 393                                 li->li_lfs = lfs->lfs_next;
 394                         else
 395                                 prev->lfs_next = lfs->lfs_next;
 396                         if (lfs->lfs_realrootvp != NULL) {
 397                                 VN_RELE(lfs->lfs_realrootvp);
 398                         }
 399                         if (lfs->lfs_vfs.vfs_mntpt != NULL)
 400                                 refstr_rele(lfs->lfs_vfs.vfs_mntpt);
 401                         if (lfs->lfs_vfs.vfs_implp != NULL) {
 402                                 ASSERT(lfs->lfs_vfs.vfs_femhead == NULL);
 403                                 ASSERT(lfs->lfs_vfs.vfs_vskap == NULL);
 404                                 ASSERT(lfs->lfs_vfs.vfs_fstypevsp == NULL);
 405                                 kmem_free(lfs->lfs_vfs.vfs_implp,
 406                                     sizeof (vfs_impl_t));
 407                         }
 408                         sema_destroy(&lfs->lfs_vfs.vfs_reflock);
 409                         kmem_free(lfs, sizeof (struct lfsnode));
 410                         return;
 411                 }
 412                 prev = this;
 413         }
 414         panic("freelfsnode");
 415         /*NOTREACHED*/
 416 }
 417 
 418 /*
 419  * Find lfs given real vfs and mount instance(li)
 420  */
 421 static struct lfsnode *
 422 lfsfind(struct vfs *vfsp, struct loinfo *li)
 423 {
 424         struct lfsnode *lfs;
 425 
 426         ASSERT(MUTEX_HELD(&li->li_lfslock));
 427 
 428         /*
 429          * We need to handle the case where a UFS filesystem was forced
 430          * unmounted and then a subsequent mount got the same vfs
 431          * structure.  If the new mount lies in the lofs hierarchy, then
 432          * this will confuse lofs, because the original vfsp (of the
 433          * forced unmounted filesystem) is still around. We check for
 434          * this condition here.
 435          *
 436          * If we find a cache vfsp hit, then we check to see if the
 437          * cached filesystem was forced unmounted. Skip all such
 438          * entries. This should be safe to do since no
 439          * makelonode()->makelfsnode()->lfsfind() calls should be
 440          * generated for such force-unmounted filesystems (because (ufs)
 441          * lookup would've returned an error).
 442          */
 443         for (lfs = li->li_lfs; lfs != NULL; lfs = lfs->lfs_next) {
 444                 if (lfs->lfs_realvfs == vfsp) {
 445                         struct vnode *realvp;
 446 
 447                         realvp = lfs->lfs_realrootvp;
 448                         if (realvp == NULL)
 449                                 continue;
 450                         if (realvp->v_vfsp == NULL || realvp->v_type == VBAD)
 451                                 continue;
 452                         return (lfs);
 453                 }
 454         }
 455         return (NULL);
 456 }
 457 
 458 /*
 459  * Find real vfs given loopback vfs
 460  */
 461 struct vfs *
 462 lo_realvfs(struct vfs *vfsp, struct vnode **realrootvpp)
 463 {
 464         struct loinfo *li = vtoli(vfsp);
 465         struct lfsnode *lfs;
 466 
 467         ASSERT(li->li_refct > 0);
 468         if (vfsp == li->li_mountvfs) {
 469                 if (realrootvpp != NULL)
 470                         *realrootvpp = vtol(li->li_rootvp)->lo_vp;
 471                 return (li->li_realvfs);
 472         }
 473         mutex_enter(&li->li_lfslock);
 474         for (lfs = li->li_lfs; lfs != NULL; lfs = lfs->lfs_next) {
 475                 if (vfsp == &lfs->lfs_vfs) {
 476                         if (realrootvpp != NULL)
 477                                 *realrootvpp = lfs->lfs_realrootvp;
 478                         mutex_exit(&li->li_lfslock);
 479                         return (lfs->lfs_realvfs);
 480                 }
 481         }
 482         panic("lo_realvfs");
 483         /*NOTREACHED*/
 484 }
 485 
 486 /*
 487  * Lnode lookup stuff.
 488  * These routines maintain a table of lnodes hashed by vp so
 489  * that the lnode for a vp can be found if it already exists.
 490  *
 491  * NB: A lofs shadow vnode causes exactly one VN_HOLD() on the
 492  * underlying vnode.
 493  */
 494 
 495 /*
 496  * Retire old hashtables.
 497  */
 498 static void
 499 lretire(struct loinfo *li, struct lobucket *table, uint_t size)
 500 {
 501         struct lo_retired_ht *lrhp;
 502 
 503         lrhp = kmem_alloc(sizeof (*lrhp), KM_SLEEP);
 504         lrhp->lrh_table = table;
 505         lrhp->lrh_size = size;
 506 
 507         mutex_enter(&li->li_htlock);
 508         lrhp->lrh_next = li->li_retired;
 509         li->li_retired = lrhp;
 510         mutex_exit(&li->li_htlock);
 511 }
 512 
 513 /*
 514  * Grow the hashtable.
 515  */
 516 static void
 517 lgrow(struct loinfo *li, uint_t newsize)
 518 {
 519         uint_t oldsize;
 520         uint_t i;
 521         struct lobucket *oldtable, *newtable;
 522 
 523         /*
 524          * It's OK to not have enough memory to resize the hashtable.
 525          * We'll go down this path the next time we add something to the
 526          * table, and retry the allocation then.
 527          */
 528         if ((newtable = kmem_zalloc(newsize * sizeof (*li->li_hashtable),
 529             KM_NOSLEEP)) == NULL)
 530                 return;
 531 
 532         mutex_enter(&li->li_htlock);
 533         if (newsize <= li->li_htsize) {
 534                 mutex_exit(&li->li_htlock);
 535                 kmem_free(newtable, newsize * sizeof (*li->li_hashtable));
 536                 return;
 537         }
 538         oldsize = li->li_htsize;
 539         oldtable = li->li_hashtable;
 540 
 541         /*
 542          * Grab all locks so TABLE_LOCK_ENTER() calls block until the
 543          * resize is complete.
 544          */
 545         for (i = 0; i < oldsize; i++)
 546                 mutex_enter(&oldtable[i].lh_lock);
 547         /*
 548          * li->li_hashtable gets set before li->li_htsize, so in the
 549          * time between the two assignments, callers of
 550          * TABLE_LOCK_ENTER() cannot hash to a bucket beyond oldsize,
 551          * hence we only need to grab the locks up to oldsize.
 552          */
 553         for (i = 0; i < oldsize; i++)
 554                 mutex_enter(&newtable[i].lh_lock);
 555         /*
 556          * Rehash.
 557          */
 558         for (i = 0; i < oldsize; i++) {
 559                 lnode_t *tlp, *nlp;
 560 
 561                 for (tlp = oldtable[i].lh_chain; tlp != NULL; tlp = nlp) {
 562                         uint_t hash = ltablehash(tlp->lo_vp, newsize);
 563 
 564                         nlp = tlp->lo_next;
 565                         tlp->lo_next = newtable[hash].lh_chain;
 566                         newtable[hash].lh_chain = tlp;
 567                         newtable[hash].lh_count++;
 568                 }
 569         }
 570 
 571         /*
 572          * As soon as we store the new hashtable, future locking operations
 573          * will use it.  Therefore, we must ensure that all the state we've
 574          * just established reaches global visibility before the new hashtable
 575          * does.
 576          */
 577         membar_producer();
 578         li->li_hashtable = newtable;
 579 
 580         /*
 581          * table_lock_enter() relies on the fact that li->li_hashtable
 582          * is set to its new value before li->li_htsize.
 583          */
 584         membar_producer();
 585         li->li_htsize = newsize;
 586 
 587         /*
 588          * The new state is consistent now, so we can drop all the locks.
 589          */
 590         for (i = 0; i < oldsize; i++) {
 591                 mutex_exit(&newtable[i].lh_lock);
 592                 mutex_exit(&oldtable[i].lh_lock);
 593         }
 594         mutex_exit(&li->li_htlock);
 595 
 596         lretire(li, oldtable, oldsize);
 597 }
 598 
 599 /*
 600  * Put a lnode in the table
 601  */
 602 static void
 603 lsave(lnode_t *lp, struct loinfo *li)
 604 {
 605         ASSERT(lp->lo_vp);
 606         ASSERT(MUTEX_HELD(TABLE_LOCK(lp->lo_vp, li)));
 607 
 608 #ifdef LODEBUG
 609         lo_dprint(4, "lsave lp %p hash %d\n",
 610             lp, ltablehash(lp->lo_vp, li));
 611 #endif
 612 
 613         TABLE_COUNT(lp->lo_vp, li)++;
 614         lp->lo_next = TABLE_BUCKET(lp->lo_vp, li);
 615         TABLE_BUCKET(lp->lo_vp, li) = lp;
 616 
 617         if (li->li_refct > (li->li_htsize << lo_resize_threshold)) {
 618                 TABLE_LOCK_EXIT(lp->lo_vp, li);
 619                 lgrow(li, li->li_htsize << lo_resize_factor);
 620                 TABLE_LOCK_ENTER(lp->lo_vp, li);
 621         }
 622 }
 623 
 624 /*
 625  * Our version of vfs_rele() that stops at 1 instead of 0, and calls
 626  * freelfsnode() instead of kmem_free().
 627  */
 628 static void
 629 lfs_rele(struct lfsnode *lfs, struct loinfo *li)
 630 {
 631         vfs_t *vfsp = &lfs->lfs_vfs;
 632 
 633         ASSERT(MUTEX_HELD(&li->li_lfslock));
 634         ASSERT(vfsp->vfs_count > 1);
 635         if (atomic_add_32_nv(&vfsp->vfs_count, -1) == 1)
 636                 freelfsnode(lfs, li);
 637 }
 638 
 639 /*
 640  * Remove a lnode from the table
 641  */
 642 void
 643 freelonode(lnode_t *lp)
 644 {
 645         lnode_t *lt;
 646         lnode_t *ltprev = NULL;
 647         struct lfsnode *lfs, *nextlfs;
 648         struct vfs *vfsp;
 649         struct vnode *vp = ltov(lp);
 650         struct vnode *realvp = realvp(vp);
 651         struct loinfo *li = vtoli(vp->v_vfsp);
 652 
 653 #ifdef LODEBUG
 654         lo_dprint(4, "freelonode lp %p hash %d\n",
 655             lp, ltablehash(lp->lo_vp, li));
 656 #endif
 657         TABLE_LOCK_ENTER(lp->lo_vp, li);
 658 
 659         mutex_enter(&vp->v_lock);
 660         if (vp->v_count > 1) {
 661                 vp->v_count--;       /* release our hold from vn_rele */
 662                 mutex_exit(&vp->v_lock);
 663                 TABLE_LOCK_EXIT(lp->lo_vp, li);
 664                 return;
 665         }
 666         mutex_exit(&vp->v_lock);
 667 
 668         for (lt = TABLE_BUCKET(lp->lo_vp, li); lt != NULL;
 669             ltprev = lt, lt = lt->lo_next) {
 670                 if (lt == lp) {
 671 #ifdef LODEBUG
 672                         lo_dprint(4, "freeing %p, vfsp %p\n",
 673                             vp, vp->v_vfsp);
 674 #endif
 675                         atomic_add_32(&li->li_refct, -1);
 676                         vfsp = vp->v_vfsp;
 677                         vn_invalid(vp);
 678                         if (vfsp != li->li_mountvfs) {
 679                                 mutex_enter(&li->li_lfslock);
 680                                 /*
 681                                  * Check for unused lfs
 682                                  */
 683                                 lfs = li->li_lfs;
 684                                 while (lfs != NULL) {
 685                                         nextlfs = lfs->lfs_next;
 686                                         if (vfsp == &lfs->lfs_vfs) {
 687                                                 lfs_rele(lfs, li);
 688                                                 break;
 689                                         }
 690                                         if (lfs->lfs_vfs.vfs_count == 1) {
 691                                                 /*
 692                                                  * Lfs is idle
 693                                                  */
 694                                                 freelfsnode(lfs, li);
 695                                         }
 696                                         lfs = nextlfs;
 697                                 }
 698                                 mutex_exit(&li->li_lfslock);
 699                         }
 700                         if (ltprev == NULL) {
 701                                 TABLE_BUCKET(lt->lo_vp, li) = lt->lo_next;
 702                         } else {
 703                                 ltprev->lo_next = lt->lo_next;
 704                         }
 705                         TABLE_COUNT(lt->lo_vp, li)--;
 706                         TABLE_LOCK_EXIT(lt->lo_vp, li);
 707                         kmem_cache_free(lnode_cache, lt);
 708                         vn_free(vp);
 709                         VN_RELE(realvp);
 710                         return;
 711                 }
 712         }
 713         panic("freelonode");
 714         /*NOTREACHED*/
 715 }
 716 
 717 /*
 718  * Lookup a lnode by vp
 719  */
 720 static lnode_t *
 721 lfind(struct vnode *vp, struct loinfo *li)
 722 {
 723         lnode_t *lt;
 724 
 725         ASSERT(MUTEX_HELD(TABLE_LOCK(vp, li)));
 726 
 727         lt = TABLE_BUCKET(vp, li);
 728         while (lt != NULL) {
 729                 if (lt->lo_vp == vp) {
 730                         VN_HOLD(ltov(lt));
 731                         return (lt);
 732                 }
 733                 lt = lt->lo_next;
 734         }
 735         return (NULL);
 736 }
 737 
 738 #ifdef  LODEBUG
 739 static int lofsdebug;
 740 #endif  /* LODEBUG */
 741 
 742 /*
 743  * Utilities used by both client and server
 744  * Standard levels:
 745  * 0) no debugging
 746  * 1) hard failures
 747  * 2) soft failures
 748  * 3) current test software
 749  * 4) main procedure entry points
 750  * 5) main procedure exit points
 751  * 6) utility procedure entry points
 752  * 7) utility procedure exit points
 753  * 8) obscure procedure entry points
 754  * 9) obscure procedure exit points
 755  * 10) random stuff
 756  * 11) all <= 1
 757  * 12) all <= 2
 758  * 13) all <= 3
 759  * ...
 760  */
 761 
 762 #ifdef LODEBUG
 763 /*VARARGS2*/
 764 lo_dprint(level, str, a1, a2, a3, a4, a5, a6, a7, a8, a9)
 765         int level;
 766         char *str;
 767         int a1, a2, a3, a4, a5, a6, a7, a8, a9;
 768 {
 769 
 770         if (lofsdebug == level || (lofsdebug > 10 && (lofsdebug - 10) >= level))
 771                 printf(str, a1, a2, a3, a4, a5, a6, a7, a8, a9);
 772 }
 773 #endif