1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  27 /* All Rights Reserved */
  28 
  29 
  30 #include <nfs/nfs4_clnt.h>
  31 #include <nfs/rnode4.h>
  32 #include <sys/systm.h>
  33 #include <sys/cmn_err.h>
  34 #include <sys/atomic.h>
  35 
  36 static void     nfs4_free_open_owner(nfs4_open_owner_t *, mntinfo4_t *);
  37 static nfs4_open_owner_t *find_freed_open_owner(cred_t *,
  38                                 nfs4_oo_hash_bucket_t *, mntinfo4_t *);
  39 static open_delegation_type4 get_dtype(rnode4_t *);
  40 
  41 #ifdef DEBUG
  42 int nfs4_client_foo_debug = 0x0;
  43 int nfs4_client_open_dg = 0x0;
  44 /*
  45  * If this is non-zero, the lockowner and openowner seqid sync primitives
  46  * will intermittently return errors.
  47  */
  48 static int seqid_sync_faults = 0;
  49 #endif
  50 
  51 stateid4 clnt_special0 = {
  52         0,
  53         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
  54 };
  55 
  56 stateid4 clnt_special1 = {
  57         0xffffffff,
  58         {
  59                 (char)0xff, (char)0xff, (char)0xff, (char)0xff,
  60                 (char)0xff, (char)0xff, (char)0xff, (char)0xff,
  61                 (char)0xff, (char)0xff, (char)0xff, (char)0xff
  62         }
  63 };
  64 
  65 /* finds hash bucket and locks it */
  66 static nfs4_oo_hash_bucket_t *
  67 lock_bucket(cred_t *cr, mntinfo4_t *mi)
  68 {
  69         nfs4_oo_hash_bucket_t *bucketp;
  70         uint32_t hash_key;
  71 
  72         hash_key = (uint32_t)(crgetuid(cr) + crgetruid(cr))
  73             % NFS4_NUM_OO_BUCKETS;
  74         NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, "lock_bucket: "
  75             "hash_key %d for cred %p", hash_key, (void*)cr));
  76 
  77         ASSERT(hash_key >= 0 && hash_key < NFS4_NUM_OO_BUCKETS);
  78         ASSERT(mi != NULL);
  79         ASSERT(mutex_owned(&mi->mi_lock));
  80 
  81         bucketp = &(mi->mi_oo_list[hash_key]);
  82         mutex_enter(&bucketp->b_lock);
  83         return (bucketp);
  84 }
  85 
  86 /* unlocks hash bucket pointed by bucket_ptr */
  87 static void
  88 unlock_bucket(nfs4_oo_hash_bucket_t *bucketp)
  89 {
  90         mutex_exit(&bucketp->b_lock);
  91 }
  92 
  93 /*
  94  * Removes the lock owner from the rnode's lock_owners list and frees the
  95  * corresponding reference.
  96  */
  97 void
  98 nfs4_rnode_remove_lock_owner(rnode4_t *rp, nfs4_lock_owner_t *lop)
  99 {
 100         NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
 101             "nfs4_rnode_remove_lock_owner"));
 102 
 103         mutex_enter(&rp->r_statev4_lock);
 104 
 105         if (lop->lo_next_rnode == NULL) {
 106                 /* already removed from list */
 107                 mutex_exit(&rp->r_statev4_lock);
 108                 return;
 109         }
 110 
 111         ASSERT(lop->lo_prev_rnode != NULL);
 112 
 113         lop->lo_prev_rnode->lo_next_rnode = lop->lo_next_rnode;
 114         lop->lo_next_rnode->lo_prev_rnode = lop->lo_prev_rnode;
 115 
 116         lop->lo_next_rnode = lop->lo_prev_rnode = NULL;
 117 
 118         mutex_exit(&rp->r_statev4_lock);
 119 
 120         /*
 121          * This would be an appropriate place for
 122          * RELEASE_LOCKOWNER.  For now, this is overkill
 123          * because in the common case, close is going to
 124          * release any lockowners anyway.
 125          */
 126         lock_owner_rele(lop);
 127 }
 128 
 129 /*
 130  * Remove all lock owners from the rnode's lock_owners list.  Frees up
 131  * their references from the list.
 132  */
 133 
 134 void
 135 nfs4_flush_lock_owners(rnode4_t *rp)
 136 {
 137         nfs4_lock_owner_t *lop;
 138 
 139         mutex_enter(&rp->r_statev4_lock);
 140         while (rp->r_lo_head.lo_next_rnode != &rp->r_lo_head) {
 141                 lop = rp->r_lo_head.lo_next_rnode;
 142                 lop->lo_prev_rnode->lo_next_rnode = lop->lo_next_rnode;
 143                 lop->lo_next_rnode->lo_prev_rnode = lop->lo_prev_rnode;
 144                 lop->lo_next_rnode = lop->lo_prev_rnode = NULL;
 145                 lock_owner_rele(lop);
 146         }
 147         mutex_exit(&rp->r_statev4_lock);
 148 }
 149 
 150 void
 151 nfs4_clear_open_streams(rnode4_t *rp)
 152 {
 153         nfs4_open_stream_t *osp;
 154 
 155         mutex_enter(&rp->r_os_lock);
 156         while ((osp = list_head(&rp->r_open_streams)) != NULL) {
 157                 open_owner_rele(osp->os_open_owner);
 158                 list_remove(&rp->r_open_streams, osp);
 159                 mutex_destroy(&osp->os_sync_lock);
 160                 osp->os_open_owner = NULL;
 161                 kmem_free(osp, sizeof (*osp));
 162         }
 163         mutex_exit(&rp->r_os_lock);
 164 }
 165 
 166 void
 167 open_owner_hold(nfs4_open_owner_t *oop)
 168 {
 169         mutex_enter(&oop->oo_lock);
 170         oop->oo_ref_count++;
 171         mutex_exit(&oop->oo_lock);
 172 }
 173 
 174 /*
 175  * Frees the open owner if the ref count hits zero.
 176  */
 177 void
 178 open_owner_rele(nfs4_open_owner_t *oop)
 179 {
 180         NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
 181             "open_owner_rele"));
 182 
 183         mutex_enter(&oop->oo_lock);
 184         oop->oo_ref_count--;
 185         if (oop->oo_ref_count == 0) {
 186                 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
 187                     "open_owner_rele: freeing open owner"));
 188                 oop->oo_valid = 0;
 189                 mutex_exit(&oop->oo_lock);
 190                 /*
 191                  * Ok, we don't destroy the open owner, nor do we put it on
 192                  * the mntinfo4's free list just yet.  We are lazy about it
 193                  * and let callers to find_open_owner() do that to keep locking
 194                  * simple.
 195                  */
 196         } else {
 197                 mutex_exit(&oop->oo_lock);
 198         }
 199 }
 200 
 201 void
 202 open_stream_hold(nfs4_open_stream_t *osp)
 203 {
 204         mutex_enter(&osp->os_sync_lock);
 205         osp->os_ref_count++;
 206         mutex_exit(&osp->os_sync_lock);
 207 }
 208 
 209 /*
 210  * Frees the open stream and removes it from the rnode4's open streams list if
 211  * the ref count drops to zero.
 212  */
 213 void
 214 open_stream_rele(nfs4_open_stream_t *osp, rnode4_t *rp)
 215 {
 216         NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
 217             "open_stream_rele"));
 218 
 219         ASSERT(!mutex_owned(&rp->r_os_lock));
 220 
 221         mutex_enter(&osp->os_sync_lock);
 222         ASSERT(osp->os_ref_count > 0);
 223         osp->os_ref_count--;
 224         if (osp->os_ref_count == 0) {
 225                 nfs4_open_owner_t *tmp_oop;
 226 
 227                 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
 228                     "open_stream_rele: freeing open stream"));
 229                 osp->os_valid = 0;
 230                 tmp_oop = osp->os_open_owner;
 231                 mutex_exit(&osp->os_sync_lock);
 232 
 233                 /* now see if we need to destroy the open owner */
 234                 open_owner_rele(tmp_oop);
 235 
 236                 mutex_enter(&rp->r_os_lock);
 237                 list_remove(&rp->r_open_streams, osp);
 238                 mutex_exit(&rp->r_os_lock);
 239 
 240                 /* free up osp */
 241                 mutex_destroy(&osp->os_sync_lock);
 242                 osp->os_open_owner = NULL;
 243                 kmem_free(osp, sizeof (*osp));
 244         } else {
 245                 mutex_exit(&osp->os_sync_lock);
 246         }
 247 }
 248 
 249 void
 250 lock_owner_hold(nfs4_lock_owner_t *lop)
 251 {
 252         mutex_enter(&lop->lo_lock);
 253         lop->lo_ref_count++;
 254         mutex_exit(&lop->lo_lock);
 255 }
 256 
 257 /*
 258  * Frees the lock owner if the ref count hits zero and
 259  * the structure no longer has no locks.
 260  */
 261 void
 262 lock_owner_rele(nfs4_lock_owner_t *lop)
 263 {
 264         NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
 265             "lock_owner_rele"));
 266 
 267         mutex_enter(&lop->lo_lock);
 268         lop->lo_ref_count--;
 269         if (lop->lo_ref_count == 0) {
 270                 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
 271                     "lock_owner_rele: freeing lock owner: "
 272                     "%x", lop->lo_pid));
 273                 lop->lo_valid = 0;
 274                 /*
 275                  * If there are no references, the lock_owner should
 276                  * already be off the rnode's list.
 277                  */
 278                 ASSERT(lop->lo_next_rnode == NULL);
 279                 ASSERT(lop->lo_prev_rnode == NULL);
 280                 ASSERT(!(lop->lo_flags & NFS4_LOCK_SEQID_INUSE));
 281                 ASSERT(lop->lo_seqid_holder == NULL);
 282                 mutex_exit(&lop->lo_lock);
 283 
 284                 /* free up lop */
 285                 cv_destroy(&lop->lo_cv_seqid_sync);
 286                 mutex_destroy(&lop->lo_lock);
 287                 kmem_free(lop, sizeof (*lop));
 288         } else {
 289                 mutex_exit(&lop->lo_lock);
 290         }
 291 }
 292 
 293 /*
 294  * This increments the open owner ref count if found.
 295  * The argument 'just_created' determines whether we are looking for open
 296  * owners with the 'oo_just_created' flag set or not.
 297  */
 298 nfs4_open_owner_t *
 299 find_open_owner_nolock(cred_t *cr, int just_created, mntinfo4_t *mi)
 300 {
 301         nfs4_open_owner_t       *oop = NULL, *next_oop;
 302         nfs4_oo_hash_bucket_t   *bucketp;
 303 
 304         NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
 305             "find_open_owner: cred %p, just_created %d",
 306             (void*)cr, just_created));
 307 
 308         ASSERT(mi != NULL);
 309         ASSERT(mutex_owned(&mi->mi_lock));
 310 
 311         bucketp = lock_bucket(cr, mi);
 312 
 313         /* got hash bucket, search through open owners */
 314         for (oop = list_head(&bucketp->b_oo_hash_list); oop != NULL; ) {
 315                 mutex_enter(&oop->oo_lock);
 316                 if (!crcmp(oop->oo_cred, cr) &&
 317                     (oop->oo_just_created == just_created ||
 318                     just_created == NFS4_JUST_CREATED)) {
 319                         /* match */
 320                         if (oop->oo_valid == 0) {
 321                                 /* reactivate the open owner */
 322                                 oop->oo_valid = 1;
 323                                 ASSERT(oop->oo_ref_count == 0);
 324                         }
 325                         oop->oo_ref_count++;
 326                         mutex_exit(&oop->oo_lock);
 327                         unlock_bucket(bucketp);
 328                         return (oop);
 329                 }
 330                 next_oop = list_next(&bucketp->b_oo_hash_list, oop);
 331                 if (oop->oo_valid == 0) {
 332                         list_remove(&bucketp->b_oo_hash_list, oop);
 333 
 334                         /*
 335                          * Now we go ahead and put this open owner
 336                          * on the freed list.  This is our lazy method.
 337                          */
 338                         nfs4_free_open_owner(oop, mi);
 339                 }
 340 
 341                 mutex_exit(&oop->oo_lock);
 342                 oop = next_oop;
 343         }
 344 
 345         /* search through recently freed open owners */
 346         oop = find_freed_open_owner(cr, bucketp, mi);
 347 
 348         unlock_bucket(bucketp);
 349 
 350         return (oop);
 351 }
 352 
 353 nfs4_open_owner_t *
 354 find_open_owner(cred_t *cr, int just_created, mntinfo4_t *mi)
 355 {
 356         nfs4_open_owner_t *oop;
 357 
 358         mutex_enter(&mi->mi_lock);
 359         oop = find_open_owner_nolock(cr, just_created, mi);
 360         mutex_exit(&mi->mi_lock);
 361 
 362         return (oop);
 363 }
 364 
 365 /*
 366  * This increments osp's ref count if found.
 367  * Returns with 'os_sync_lock' held.
 368  */
 369 nfs4_open_stream_t *
 370 find_open_stream(nfs4_open_owner_t *oop, rnode4_t *rp)
 371 {
 372         nfs4_open_stream_t      *osp;
 373 
 374         NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
 375             "find_open_stream"));
 376 
 377         mutex_enter(&rp->r_os_lock);
 378         /* Now, no one can add or delete to rp's open streams list */
 379         for (osp = list_head(&rp->r_open_streams); osp != NULL;
 380             osp = list_next(&rp->r_open_streams, osp)) {
 381                 mutex_enter(&osp->os_sync_lock);
 382                 if (osp->os_open_owner == oop && osp->os_valid != 0) {
 383                         /* match */
 384                         NFS4_DEBUG(nfs4_client_state_debug,
 385                             (CE_NOTE, "find_open_stream "
 386                             "got a match"));
 387 
 388                         osp->os_ref_count++;
 389                         mutex_exit(&rp->r_os_lock);
 390                         return (osp);
 391                 }
 392                 mutex_exit(&osp->os_sync_lock);
 393         }
 394 
 395         mutex_exit(&rp->r_os_lock);
 396         return (NULL);
 397 }
 398 
 399 /*
 400  * Find the lock owner for the given file and process ID.  If "which" is
 401  * LOWN_VALID_STATEID, require that the lock owner contain a valid stateid
 402  * from the server.
 403  *
 404  * This increments the lock owner's ref count if found.  Returns NULL if
 405  * there was no match.
 406  */
 407 nfs4_lock_owner_t *
 408 find_lock_owner(rnode4_t *rp, pid_t pid, lown_which_t which)
 409 {
 410         nfs4_lock_owner_t       *lop, *next_lop;
 411 
 412         NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
 413             "find_lock_owner: pid %x, which %d", pid, which));
 414 
 415         ASSERT(which == LOWN_ANY || which == LOWN_VALID_STATEID);
 416 
 417         /* search by pid */
 418         mutex_enter(&rp->r_statev4_lock);
 419 
 420         lop = rp->r_lo_head.lo_next_rnode;
 421         while (lop != &rp->r_lo_head) {
 422                 mutex_enter(&lop->lo_lock);
 423                 if (lop->lo_pid == pid && lop->lo_valid != 0 &&
 424                     !(lop->lo_flags & NFS4_BAD_SEQID_LOCK)) {
 425                         if (which == LOWN_ANY ||
 426                             lop->lo_just_created != NFS4_JUST_CREATED) {
 427                                 /* Found a matching lock owner */
 428                                 NFS4_DEBUG(nfs4_client_state_debug,
 429                                     (CE_NOTE, "find_lock_owner: "
 430                                     "got a match"));
 431 
 432                                 lop->lo_ref_count++;
 433                                 mutex_exit(&lop->lo_lock);
 434                                 mutex_exit(&rp->r_statev4_lock);
 435                                 return (lop);
 436                         }
 437                 }
 438                 next_lop = lop->lo_next_rnode;
 439                 mutex_exit(&lop->lo_lock);
 440                 lop = next_lop;
 441         }
 442 
 443         mutex_exit(&rp->r_statev4_lock);
 444         return (NULL);
 445 }
 446 
 447 /*
 448  * This returns the delegation stateid as 'sid'. Returns 1 if a successful
 449  * delegation stateid was found, otherwise returns 0.
 450  */
 451 
 452 static int
 453 nfs4_get_deleg_stateid(rnode4_t *rp, nfs_opnum4 op, stateid4 *sid)
 454 {
 455         ASSERT(!mutex_owned(&rp->r_statev4_lock));
 456 
 457         mutex_enter(&rp->r_statev4_lock);
 458         if (((rp->r_deleg_type == OPEN_DELEGATE_WRITE && op == OP_WRITE) ||
 459             (rp->r_deleg_type != OPEN_DELEGATE_NONE && op != OP_WRITE)) &&
 460             !rp->r_deleg_return_pending) {
 461 
 462                 *sid = rp->r_deleg_stateid;
 463                 mutex_exit(&rp->r_statev4_lock);
 464                 return (1);
 465         }
 466         mutex_exit(&rp->r_statev4_lock);
 467         return (0);
 468 }
 469 
 470 /*
 471  * This returns the lock stateid as 'sid'. Returns 1 if a successful lock
 472  * stateid was found, otherwise returns 0.
 473  */
 474 static int
 475 nfs4_get_lock_stateid(rnode4_t *rp, pid_t pid, stateid4 *sid)
 476 {
 477         nfs4_lock_owner_t *lop;
 478 
 479         lop = find_lock_owner(rp, pid, LOWN_VALID_STATEID);
 480 
 481         if (lop) {
 482                 /*
 483                  * Found a matching lock owner, so use a lock
 484                  * stateid rather than an open stateid.
 485                  */
 486                 mutex_enter(&lop->lo_lock);
 487                 *sid = lop->lock_stateid;
 488                 mutex_exit(&lop->lo_lock);
 489                 lock_owner_rele(lop);
 490                 return (1);
 491         }
 492 
 493         NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
 494             "nfs4_get_lock_stateid: no lop"));
 495         return (0);
 496 }
 497 
 498 /*
 499  * This returns the open stateid as 'sid'. Returns 1 if a successful open
 500  * stateid was found, otherwise returns 0.
 501  *
 502  * Once the stateid is returned to the caller, it is no longer protected;
 503  * so the caller must be prepared to handle OLD/BAD_STATEID where
 504  * appropiate.
 505  */
 506 static int
 507 nfs4_get_open_stateid(rnode4_t *rp, cred_t *cr, mntinfo4_t *mi, stateid4 *sid)
 508 {
 509         nfs4_open_owner_t *oop;
 510         nfs4_open_stream_t *osp;
 511 
 512         ASSERT(mi != NULL);
 513 
 514         oop = find_open_owner(cr, NFS4_PERM_CREATED, mi);
 515         if (!oop) {
 516                 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
 517                     "nfs4_get_open_stateid: no oop"));
 518                 return (0);
 519         }
 520 
 521         osp = find_open_stream(oop, rp);
 522         open_owner_rele(oop);
 523         if (!osp) {
 524                 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
 525                     "nfs4_get_open_stateid: no osp"));
 526                 return (0);
 527         }
 528 
 529         if (osp->os_failed_reopen) {
 530                 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
 531                     "nfs4_get_open_stateid: osp %p failed reopen",
 532                     (void *)osp));
 533                 mutex_exit(&osp->os_sync_lock);
 534                 open_stream_rele(osp, rp);
 535                 return (0);
 536         }
 537         *sid = osp->open_stateid;
 538         mutex_exit(&osp->os_sync_lock);
 539         open_stream_rele(osp, rp);
 540         return (1);
 541 }
 542 
 543 /*
 544  * Returns the delegation stateid if this 'op' is OP_WRITE and the
 545  * delegation we hold is a write delegation, OR this 'op' is not
 546  * OP_WRITE and we have a delegation held (read or write), otherwise
 547  * returns the lock stateid if there is a lock owner, otherwise
 548  * returns the open stateid if there is a open stream, otherwise
 549  * returns special stateid <seqid = 0, other = 0>.
 550  *
 551  * Used for WRITE operations.
 552  */
 553 stateid4
 554 nfs4_get_w_stateid(cred_t *cr, rnode4_t *rp, pid_t pid, mntinfo4_t *mi,
 555         nfs_opnum4 op, nfs4_stateid_types_t *sid_tp)
 556 {
 557         stateid4 sid;
 558 
 559         if (nfs4_get_deleg_stateid(rp, op, &sid)) {
 560                 if (!stateid4_cmp(&sid, &sid_tp->d_sid)) {
 561                         sid_tp->cur_sid_type = DEL_SID;
 562                         return (sid);
 563                 }
 564         }
 565         if (nfs4_get_lock_stateid(rp, pid, &sid)) {
 566                 if (!stateid4_cmp(&sid, &sid_tp->l_sid)) {
 567                         sid_tp->cur_sid_type = LOCK_SID;
 568                         return (sid);
 569                 }
 570         }
 571         if (nfs4_get_open_stateid(rp, cr, mi, &sid)) {
 572                 if (!stateid4_cmp(&sid, &sid_tp->o_sid)) {
 573                         sid_tp->cur_sid_type = OPEN_SID;
 574                         return (sid);
 575                 }
 576         }
 577         bzero(&sid, sizeof (stateid4));
 578         sid_tp->cur_sid_type = SPEC_SID;
 579         return (sid);
 580 }
 581 
 582 /*
 583  * Returns the delegation stateid if this 'op' is OP_WRITE and the
 584  * delegation we hold is a write delegation, OR this 'op' is not
 585  * OP_WRITE and we have a delegation held (read or write), otherwise
 586  * returns the lock stateid if there is a lock owner, otherwise
 587  * returns the open stateid if there is a open stream, otherwise
 588  * returns special stateid <seqid = 0, other = 0>.
 589  *
 590  * This also updates which stateid we are using in 'sid_tp', skips
 591  * previously attempted stateids, and skips checking higher priority
 592  * stateids than the current level as dictated by 'sid_tp->cur_sid_type'
 593  * for async reads.
 594  *
 595  * Used for READ and SETATTR operations.
 596  */
 597 stateid4
 598 nfs4_get_stateid(cred_t *cr, rnode4_t *rp, pid_t pid, mntinfo4_t *mi,
 599         nfs_opnum4 op, nfs4_stateid_types_t *sid_tp, bool_t async_read)
 600 {
 601         stateid4 sid;
 602 
 603         /*
 604          * For asynchronous READs, do not attempt to retry from the start of
 605          * the stateid priority list, just continue from where you last left
 606          * off.
 607          */
 608         if (async_read) {
 609                 switch (sid_tp->cur_sid_type) {
 610                 case NO_SID:
 611                         break;
 612                 case DEL_SID:
 613                         goto lock_stateid;
 614                 case LOCK_SID:
 615                         goto open_stateid;
 616                 case OPEN_SID:
 617                         goto special_stateid;
 618                 case SPEC_SID:
 619                 default:
 620                         cmn_err(CE_PANIC, "nfs4_get_stateid: illegal current "
 621                             "stateid type %d", sid_tp->cur_sid_type);
 622                 }
 623         }
 624 
 625         if (nfs4_get_deleg_stateid(rp, op, &sid)) {
 626                 if (!stateid4_cmp(&sid, &sid_tp->d_sid)) {
 627                         sid_tp->cur_sid_type = DEL_SID;
 628                         return (sid);
 629                 }
 630         }
 631 lock_stateid:
 632         if (nfs4_get_lock_stateid(rp, pid, &sid)) {
 633                 if (!stateid4_cmp(&sid, &sid_tp->l_sid)) {
 634                         sid_tp->cur_sid_type = LOCK_SID;
 635                         return (sid);
 636                 }
 637         }
 638 open_stateid:
 639         if (nfs4_get_open_stateid(rp, cr, mi, &sid)) {
 640                 if (!stateid4_cmp(&sid, &sid_tp->o_sid)) {
 641                         sid_tp->cur_sid_type = OPEN_SID;
 642                         return (sid);
 643                 }
 644         }
 645 special_stateid:
 646         bzero(&sid, sizeof (stateid4));
 647         sid_tp->cur_sid_type = SPEC_SID;
 648         return  (sid);
 649 }
 650 
 651 void
 652 nfs4_set_lock_stateid(nfs4_lock_owner_t *lop, stateid4 stateid)
 653 {
 654         NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
 655             "nfs4_set_lock_stateid"));
 656 
 657         ASSERT(lop);
 658         ASSERT(lop->lo_flags & NFS4_LOCK_SEQID_INUSE);
 659 
 660         mutex_enter(&lop->lo_lock);
 661         lop->lock_stateid = stateid;
 662         mutex_exit(&lop->lo_lock);
 663 }
 664 
 665 /*
 666  * Sequence number used when a new open owner is needed.
 667  * This is used so as to not confuse the server.  Since a open owner
 668  * is based off of cred, a cred could be re-used quickly, and the server
 669  * may not release all state for a cred.
 670  */
 671 static uint64_t open_owner_seq_num = 0;
 672 
 673 uint64_t
 674 nfs4_get_new_oo_name(void)
 675 {
 676         return (atomic_add_64_nv(&open_owner_seq_num, 1));
 677 }
 678 
 679 /*
 680  * Create a new open owner and add it to the open owner hash table.
 681  */
 682 nfs4_open_owner_t *
 683 create_open_owner(cred_t *cr, mntinfo4_t *mi)
 684 {
 685         nfs4_open_owner_t       *oop;
 686         nfs4_oo_hash_bucket_t   *bucketp;
 687 
 688         oop = kmem_alloc(sizeof (nfs4_open_owner_t), KM_SLEEP);
 689         /*
 690          * Make sure the cred doesn't go away when we put this open owner
 691          * on the free list, as well as make crcmp() a valid check.
 692          */
 693         crhold(cr);
 694         oop->oo_cred = cr;
 695         mutex_init(&oop->oo_lock, NULL, MUTEX_DEFAULT, NULL);
 696         oop->oo_ref_count = 1;
 697         oop->oo_valid = 1;
 698         oop->oo_just_created = NFS4_JUST_CREATED;
 699         oop->oo_seqid = 0;
 700         oop->oo_seqid_inuse = 0;
 701         oop->oo_last_good_seqid = 0;
 702         oop->oo_last_good_op = TAG_NONE;
 703         oop->oo_cred_otw = NULL;
 704         cv_init(&oop->oo_cv_seqid_sync, NULL, CV_DEFAULT, NULL);
 705 
 706         /*
 707          * A Solaris open_owner is <oo_seq_num>
 708          */
 709         oop->oo_name = nfs4_get_new_oo_name();
 710 
 711         /* now add the struct into the cred hash table */
 712         ASSERT(mutex_owned(&mi->mi_lock));
 713         bucketp = lock_bucket(cr, mi);
 714         list_insert_head(&bucketp->b_oo_hash_list, oop);
 715         unlock_bucket(bucketp);
 716 
 717         return (oop);
 718 }
 719 
 720 /*
 721  * Create a new open stream and it to the rnode's list.
 722  * Increments the ref count on oop.
 723  * Returns with 'os_sync_lock' held.
 724  */
 725 nfs4_open_stream_t *
 726 create_open_stream(nfs4_open_owner_t *oop, rnode4_t *rp)
 727 {
 728         nfs4_open_stream_t      *osp;
 729 
 730 #ifdef DEBUG
 731         mutex_enter(&oop->oo_lock);
 732         ASSERT(oop->oo_seqid_inuse);
 733         mutex_exit(&oop->oo_lock);
 734 #endif
 735 
 736         osp = kmem_alloc(sizeof (nfs4_open_stream_t), KM_SLEEP);
 737         osp->os_open_ref_count = 1;
 738         osp->os_mapcnt = 0;
 739         osp->os_ref_count = 2;
 740         osp->os_valid = 1;
 741         osp->os_open_owner = oop;
 742         osp->os_orig_oo_name = oop->oo_name;
 743         bzero(&osp->open_stateid, sizeof (stateid4));
 744         osp->os_share_acc_read = 0;
 745         osp->os_share_acc_write = 0;
 746         osp->os_mmap_read = 0;
 747         osp->os_mmap_write = 0;
 748         osp->os_share_deny_none = 0;
 749         osp->os_share_deny_read = 0;
 750         osp->os_share_deny_write = 0;
 751         osp->os_delegation = 0;
 752         osp->os_dc_openacc = 0;
 753         osp->os_final_close = 0;
 754         osp->os_pending_close = 0;
 755         osp->os_failed_reopen = 0;
 756         osp->os_force_close = 0;
 757         mutex_init(&osp->os_sync_lock, NULL, MUTEX_DEFAULT, NULL);
 758 
 759         /* open owner gets a reference */
 760         open_owner_hold(oop);
 761 
 762         /* now add the open stream to rp */
 763         mutex_enter(&rp->r_os_lock);
 764         mutex_enter(&osp->os_sync_lock);
 765         list_insert_head(&rp->r_open_streams, osp);
 766         mutex_exit(&rp->r_os_lock);
 767 
 768         return (osp);
 769 }
 770 
 771 /*
 772  * Returns an open stream with 'os_sync_lock' held.
 773  * If the open stream is found (rather than created), its
 774  * 'os_open_ref_count' is bumped.
 775  *
 776  * There is no race with two threads entering this function
 777  * and creating two open streams for the same <oop, rp> pair.
 778  * This is because the open seqid sync must be acquired, thus
 779  * only allowing one thread in at a time.
 780  */
 781 nfs4_open_stream_t *
 782 find_or_create_open_stream(nfs4_open_owner_t *oop, rnode4_t *rp,
 783         int *created_osp)
 784 {
 785         nfs4_open_stream_t *osp;
 786 
 787 #ifdef DEBUG
 788         mutex_enter(&oop->oo_lock);
 789         ASSERT(oop->oo_seqid_inuse);
 790         mutex_exit(&oop->oo_lock);
 791 #endif
 792 
 793         osp = find_open_stream(oop, rp);
 794         if (!osp) {
 795                 osp = create_open_stream(oop, rp);
 796                 if (osp)
 797                         *created_osp = 1;
 798         } else {
 799                 *created_osp = 0;
 800                 osp->os_open_ref_count++;
 801         }
 802 
 803         return (osp);
 804 }
 805 
 806 static uint64_t lock_owner_seq_num = 0;
 807 
 808 /*
 809  * Create a new lock owner and add it to the rnode's list.
 810  * Assumes the rnode's r_statev4_lock is held.
 811  * The created lock owner has a reference count of 2: one for the list and
 812  * one for the caller to use.  Returns the lock owner locked down.
 813  */
 814 nfs4_lock_owner_t *
 815 create_lock_owner(rnode4_t *rp, pid_t pid)
 816 {
 817         nfs4_lock_owner_t       *lop;
 818 
 819         NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
 820             "create_lock_owner: pid %x", pid));
 821 
 822         ASSERT(mutex_owned(&rp->r_statev4_lock));
 823 
 824         lop = kmem_alloc(sizeof (nfs4_lock_owner_t), KM_SLEEP);
 825         lop->lo_ref_count = 2;
 826         lop->lo_valid = 1;
 827         bzero(&lop->lock_stateid, sizeof (stateid4));
 828         lop->lo_pid = pid;
 829         lop->lock_seqid = 0;
 830         lop->lo_pending_rqsts = 0;
 831         lop->lo_just_created = NFS4_JUST_CREATED;
 832         lop->lo_flags = 0;
 833         lop->lo_seqid_holder = NULL;
 834 
 835         /*
 836          * A Solaris lock_owner is <seq_num><pid>
 837          */
 838         lop->lock_owner_name.ln_seq_num =
 839             atomic_add_64_nv(&lock_owner_seq_num, 1);
 840         lop->lock_owner_name.ln_pid = pid;
 841 
 842         cv_init(&lop->lo_cv_seqid_sync, NULL, CV_DEFAULT, NULL);
 843         mutex_init(&lop->lo_lock, NULL, MUTEX_DEFAULT, NULL);
 844 
 845         mutex_enter(&lop->lo_lock);
 846 
 847         /* now add the lock owner to rp */
 848         lop->lo_prev_rnode = &rp->r_lo_head;
 849         lop->lo_next_rnode = rp->r_lo_head.lo_next_rnode;
 850         rp->r_lo_head.lo_next_rnode->lo_prev_rnode = lop;
 851         rp->r_lo_head.lo_next_rnode = lop;
 852 
 853         return (lop);
 854 
 855 }
 856 
 857 /*
 858  * This sets the lock seqid of a lock owner.
 859  */
 860 void
 861 nfs4_set_lock_seqid(seqid4 seqid, nfs4_lock_owner_t *lop)
 862 {
 863         NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
 864             "nfs4_set_lock_seqid"));
 865 
 866         ASSERT(lop != NULL);
 867         ASSERT(lop->lo_flags & NFS4_LOCK_SEQID_INUSE);
 868 
 869         lop->lock_seqid = seqid;
 870 }
 871 
 872 static void
 873 nfs4_set_new_lock_owner_args(lock_owner4 *owner, pid_t pid)
 874 {
 875         nfs4_lo_name_t *cast_namep;
 876 
 877         NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
 878             "nfs4_set_new_lock_owner_args"));
 879 
 880         owner->owner_len = sizeof (*cast_namep);
 881         owner->owner_val = kmem_alloc(owner->owner_len, KM_SLEEP);
 882         /*
 883          * A Solaris lock_owner is <seq_num><pid>
 884          */
 885         cast_namep = (nfs4_lo_name_t *)owner->owner_val;
 886         cast_namep->ln_seq_num = atomic_add_64_nv(&lock_owner_seq_num, 1);
 887         cast_namep->ln_pid = pid;
 888 }
 889 
 890 /*
 891  * Fill in the lock owner args.
 892  */
 893 void
 894 nfs4_setlockowner_args(lock_owner4 *owner, rnode4_t *rp, pid_t pid)
 895 {
 896         nfs4_lock_owner_t *lop;
 897 
 898         NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
 899             "nfs4_setlockowner_args"));
 900 
 901         /* This increments lop's ref count */
 902         lop = find_lock_owner(rp, pid, LOWN_VALID_STATEID);
 903 
 904         if (!lop)
 905                 goto make_up_args;
 906 
 907         mutex_enter(&lop->lo_lock);
 908         owner->owner_len = sizeof (lop->lock_owner_name);
 909         owner->owner_val = kmem_alloc(owner->owner_len, KM_SLEEP);
 910         bcopy(&lop->lock_owner_name, owner->owner_val,
 911             owner->owner_len);
 912         mutex_exit(&lop->lo_lock);
 913         lock_owner_rele(lop);
 914         return;
 915 
 916 make_up_args:
 917         nfs4_set_new_lock_owner_args(owner, pid);
 918 }
 919 
 920 /*
 921  * This ends our use of the open owner's open seqid by setting
 922  * the appropiate flags and issuing a cv_signal to wake up another
 923  * thread waiting to use the open seqid.
 924  */
 925 
 926 void
 927 nfs4_end_open_seqid_sync(nfs4_open_owner_t *oop)
 928 {
 929         mutex_enter(&oop->oo_lock);
 930         ASSERT(oop->oo_seqid_inuse);
 931         oop->oo_seqid_inuse = 0;
 932         cv_broadcast(&oop->oo_cv_seqid_sync);
 933         mutex_exit(&oop->oo_lock);
 934 }
 935 
 936 /*
 937  * This starts our use of the open owner's open seqid by setting
 938  * the oo_seqid_inuse to true.  We will wait (forever) with a
 939  * cv_wait() until we are woken up.
 940  *
 941  * Return values:
 942  * 0            no problems
 943  * EAGAIN       caller should retry (like a recovery retry)
 944  */
 945 int
 946 nfs4_start_open_seqid_sync(nfs4_open_owner_t *oop, mntinfo4_t *mi)
 947 {
 948         int error = 0;
 949 #ifdef DEBUG
 950         static int ops = 0;             /* fault injection */
 951 #endif
 952 
 953 #ifdef DEBUG
 954         if (seqid_sync_faults && curthread != mi->mi_recovthread &&
 955             ++ops % 5 == 0)
 956                 return (EAGAIN);
 957 #endif
 958 
 959         mutex_enter(&mi->mi_lock);
 960         if ((mi->mi_flags & MI4_RECOV_ACTIV) &&
 961             curthread != mi->mi_recovthread)
 962                 error = EAGAIN;
 963         mutex_exit(&mi->mi_lock);
 964         if (error != 0)
 965                 goto done;
 966 
 967         mutex_enter(&oop->oo_lock);
 968 
 969         while (oop->oo_seqid_inuse) {
 970                 NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE,
 971                     "nfs4_start_open_seqid_sync waiting on cv"));
 972 
 973                 cv_wait(&oop->oo_cv_seqid_sync, &oop->oo_lock);
 974         }
 975 
 976         oop->oo_seqid_inuse = 1;
 977 
 978         mutex_exit(&oop->oo_lock);
 979 
 980         mutex_enter(&mi->mi_lock);
 981         if ((mi->mi_flags & MI4_RECOV_ACTIV) &&
 982             curthread != mi->mi_recovthread)
 983                 error = EAGAIN;
 984         mutex_exit(&mi->mi_lock);
 985 
 986         if (error == EAGAIN)
 987                 nfs4_end_open_seqid_sync(oop);
 988 
 989         NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE,
 990             "nfs4_start_open_seqid_sync: error=%d", error));
 991 
 992 done:
 993         return (error);
 994 }
 995 
 996 #ifdef  DEBUG
 997 int bypass_otw[2];
 998 #endif
 999 
1000 /*
1001  * Checks to see if the OPEN OTW is necessary that is, if it's already
1002  * been opened with the same access and deny bits we are now asking for.
1003  * Note, this assumes that *vpp is a rnode.
1004  */
1005 int
1006 nfs4_is_otw_open_necessary(nfs4_open_owner_t *oop, int flag, vnode_t *vp,
1007         int just_been_created, int *errorp, int acc, nfs4_recov_state_t *rsp)
1008 {
1009         rnode4_t *rp;
1010         nfs4_open_stream_t *osp;
1011         open_delegation_type4 dt;
1012 
1013         rp = VTOR4(vp);
1014 
1015         /*
1016          * Grab the delegation type.  This function is protected against
1017          * the delegation being returned by virtue of start_op (called
1018          * by nfs4open_otw) taking the r_deleg_recall_lock in read mode,
1019          * delegreturn requires this lock in write mode to proceed.
1020          */
1021         ASSERT(nfs_rw_lock_held(&rp->r_deleg_recall_lock, RW_READER));
1022         dt = get_dtype(rp);
1023 
1024         /* returns with 'os_sync_lock' held */
1025         osp = find_open_stream(oop, rp);
1026 
1027         if (osp) {
1028                 uint32_t        do_otw = 0;
1029 
1030                 if (osp->os_failed_reopen) {
1031                         NFS4_DEBUG(nfs4_open_stream_debug, (CE_NOTE,
1032                             "nfs4_is_otw_open_necessary: os_failed_reopen "
1033                             "set on osp %p, cr %p, rp %s", (void *)osp,
1034                             (void *)osp->os_open_owner->oo_cred,
1035                             rnode4info(rp)));
1036                         do_otw = 1;
1037                 }
1038 
1039                 /*
1040                  * check access/deny bits
1041                  */
1042                 if (!do_otw && (flag & FREAD))
1043                         if (osp->os_share_acc_read == 0 &&
1044                             dt == OPEN_DELEGATE_NONE)
1045                                 do_otw = 1;
1046 
1047                 if (!do_otw && (flag & FWRITE))
1048                         if (osp->os_share_acc_write == 0 &&
1049                             dt != OPEN_DELEGATE_WRITE)
1050                                 do_otw = 1;
1051 
1052                 if (!do_otw) {
1053                         NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
1054                             "nfs4_is_otw_open_necessary: can skip this "
1055                             "open OTW"));
1056                         if (!just_been_created) {
1057                                 osp->os_open_ref_count++;
1058                                 if (flag & FREAD)
1059                                         osp->os_share_acc_read++;
1060                                 if (flag & FWRITE)
1061                                         osp->os_share_acc_write++;
1062                                 osp->os_share_deny_none++;
1063                         }
1064 
1065                         /*
1066                          * Need to reset this bitfield for the possible case
1067                          * where we were going to OTW CLOSE the file, got a
1068                          * non-recoverable error, and before we could retry
1069                          * the CLOSE, OPENed the file again.
1070                          */
1071                         ASSERT(osp->os_open_owner->oo_seqid_inuse);
1072                         osp->os_final_close = 0;
1073                         osp->os_force_close = 0;
1074 
1075                         mutex_exit(&osp->os_sync_lock);
1076                         open_stream_rele(osp, rp);
1077 
1078 #ifdef  DEBUG
1079                         bypass_otw[0]++;
1080 #endif
1081 
1082                         *errorp = 0;
1083                         return (0);
1084                 }
1085                 mutex_exit(&osp->os_sync_lock);
1086                 open_stream_rele(osp, rp);
1087 
1088         } else if (dt != OPEN_DELEGATE_NONE) {
1089                 /*
1090                  * Even if there isn't an open_stream yet, we may still be
1091                  * able to bypass the otw open if the client owns a delegation.
1092                  *
1093                  * If you are asking for for WRITE, but I only have
1094                  * a read delegation, then you still have to go otw.
1095                  */
1096 
1097                 if (flag & FWRITE && dt == OPEN_DELEGATE_READ)
1098                         return (1);
1099 
1100                 /*
1101                  * TODO - evaluate the nfsace4
1102                  */
1103 
1104                 /*
1105                  * Check the access flags to make sure the caller
1106                  * had permission.
1107                  */
1108                 if (flag & FREAD && !(acc & VREAD))
1109                         return (1);
1110 
1111                 if (flag & FWRITE && !(acc & VWRITE))
1112                         return (1);
1113 
1114                 /*
1115                  * create_open_stream will add a reference to oop,
1116                  * this will prevent the open_owner_rele done in
1117                  * nfs4open_otw from destroying the open_owner.
1118                  */
1119 
1120                 /* returns with 'os_sync_lock' held */
1121                 osp = create_open_stream(oop, rp);
1122                 if (osp == NULL)
1123                         return (1);
1124 
1125                 osp->open_stateid = rp->r_deleg_stateid;
1126                 osp->os_delegation = 1;
1127 
1128                 if (flag & FREAD)
1129                         osp->os_share_acc_read++;
1130                 if (flag & FWRITE)
1131                         osp->os_share_acc_write++;
1132 
1133                 osp->os_share_deny_none++;
1134                 mutex_exit(&osp->os_sync_lock);
1135 
1136                 open_stream_rele(osp, rp);
1137 
1138                 mutex_enter(&oop->oo_lock);
1139                 oop->oo_just_created = NFS4_PERM_CREATED;
1140                 mutex_exit(&oop->oo_lock);
1141 
1142                 ASSERT(rsp != NULL);
1143                 if (rsp->rs_sp != NULL) {
1144                         mutex_enter(&rsp->rs_sp->s_lock);
1145                         nfs4_inc_state_ref_count_nolock(rsp->rs_sp,
1146                             VTOMI4(vp));
1147                         mutex_exit(&rsp->rs_sp->s_lock);
1148                 }
1149 #ifdef  DEBUG
1150                 bypass_otw[1]++;
1151 #endif
1152 
1153                 *errorp = 0;
1154                 return (0);
1155         }
1156 
1157         return (1);
1158 }
1159 
1160 static open_delegation_type4
1161 get_dtype(rnode4_t *rp)
1162 {
1163         open_delegation_type4 dt;
1164 
1165         mutex_enter(&rp->r_statev4_lock);
1166         ASSERT(!rp->r_deleg_return_inprog);
1167         if (rp->r_deleg_return_pending)
1168                 dt = OPEN_DELEGATE_NONE;
1169         else
1170                 dt = rp->r_deleg_type;
1171         mutex_exit(&rp->r_statev4_lock);
1172 
1173         return (dt);
1174 }
1175 
1176 /*
1177  * Fill in *locker with the lock state arguments for a LOCK call.  If
1178  * lop->lo_just_created == NFS4_JUST_CREATED, oop and osp must be non-NULL.
1179  * Caller must already hold the necessary seqid sync lock(s).
1180  */
1181 
1182 void
1183 nfs4_setup_lock_args(nfs4_lock_owner_t *lop, nfs4_open_owner_t *oop,
1184         nfs4_open_stream_t *osp, clientid4 clientid, locker4 *locker)
1185 {
1186         ASSERT(lop->lo_flags & NFS4_LOCK_SEQID_INUSE);
1187         if (lop->lo_just_created == NFS4_JUST_CREATED) {
1188                 /* this is a new lock request */
1189                 open_to_lock_owner4 *nown;
1190 
1191                 ASSERT(oop != NULL);
1192                 ASSERT(osp != NULL);
1193 
1194                 locker->new_lock_owner = TRUE;
1195                 nown = &locker->locker4_u.open_owner;
1196                 nown->open_seqid = nfs4_get_open_seqid(oop) + 1;
1197                 mutex_enter(&osp->os_sync_lock);
1198                 nown->open_stateid = osp->open_stateid;
1199                 mutex_exit(&osp->os_sync_lock);
1200                 nown->lock_seqid = lop->lock_seqid; /* initial, so no +1 */
1201 
1202                 nown->lock_owner.clientid = clientid;
1203                 nown->lock_owner.owner_len = sizeof (lop->lock_owner_name);
1204                 nown->lock_owner.owner_val =
1205                     kmem_alloc(nown->lock_owner.owner_len, KM_SLEEP);
1206                 bcopy(&lop->lock_owner_name, nown->lock_owner.owner_val,
1207                     nown->lock_owner.owner_len);
1208         } else {
1209                 exist_lock_owner4 *eown;
1210                 /* have an existing lock owner */
1211 
1212                 locker->new_lock_owner = FALSE;
1213                 eown = &locker->locker4_u.lock_owner;
1214                 mutex_enter(&lop->lo_lock);
1215                 eown->lock_stateid = lop->lock_stateid;
1216                 mutex_exit(&lop->lo_lock);
1217                 eown->lock_seqid = lop->lock_seqid + 1;
1218         }
1219 }
1220 
1221 /*
1222  * This starts our use of the lock owner's lock seqid by setting
1223  * the lo_flags to NFS4_LOCK_SEQID_INUSE.  We will wait (forever)
1224  * with a cv_wait() until we are woken up.
1225  *
1226  * Return values:
1227  * 0            no problems
1228  * EAGAIN       caller should retry (like a recovery retry)
1229  */
1230 int
1231 nfs4_start_lock_seqid_sync(nfs4_lock_owner_t *lop, mntinfo4_t *mi)
1232 {
1233         int error = 0;
1234 #ifdef DEBUG
1235         static int ops = 0;             /* fault injection */
1236 #endif
1237 
1238 #ifdef DEBUG
1239         if (seqid_sync_faults && curthread != mi->mi_recovthread &&
1240             ++ops % 7 == 0)
1241                 return (EAGAIN);
1242 #endif
1243 
1244         mutex_enter(&mi->mi_lock);
1245         if ((mi->mi_flags & MI4_RECOV_ACTIV) &&
1246             curthread != mi->mi_recovthread)
1247                 error = EAGAIN;
1248         mutex_exit(&mi->mi_lock);
1249         if (error != 0)
1250                 goto done;
1251 
1252         mutex_enter(&lop->lo_lock);
1253 
1254         ASSERT(lop->lo_seqid_holder != curthread);
1255         while (lop->lo_flags & NFS4_LOCK_SEQID_INUSE) {
1256                 NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE,
1257                     "nfs4_start_lock_seqid_sync: waiting on cv"));
1258 
1259                 cv_wait(&lop->lo_cv_seqid_sync, &lop->lo_lock);
1260         }
1261         NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE, "nfs4_start_lock_seqid_sync: "
1262             "NFS4_LOCK_SEQID_INUSE"));
1263 
1264         lop->lo_flags |= NFS4_LOCK_SEQID_INUSE;
1265         lop->lo_seqid_holder = curthread;
1266         mutex_exit(&lop->lo_lock);
1267 
1268         mutex_enter(&mi->mi_lock);
1269         if ((mi->mi_flags & MI4_RECOV_ACTIV) &&
1270             curthread != mi->mi_recovthread)
1271                 error = EAGAIN;
1272         mutex_exit(&mi->mi_lock);
1273 
1274         if (error == EAGAIN)
1275                 nfs4_end_lock_seqid_sync(lop);
1276 
1277         NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE,
1278             "nfs4_start_lock_seqid_sync: error=%d", error));
1279 
1280 done:
1281         return (error);
1282 }
1283 
1284 /*
1285  * This ends our use of the lock owner's lock seqid by setting
1286  * the appropiate flags and issuing a cv_signal to wake up another
1287  * thread waiting to use the lock seqid.
1288  */
1289 void
1290 nfs4_end_lock_seqid_sync(nfs4_lock_owner_t *lop)
1291 {
1292         mutex_enter(&lop->lo_lock);
1293         ASSERT(lop->lo_flags & NFS4_LOCK_SEQID_INUSE);
1294         ASSERT(lop->lo_seqid_holder == curthread);
1295         lop->lo_flags &= ~NFS4_LOCK_SEQID_INUSE;
1296         lop->lo_seqid_holder = NULL;
1297         cv_broadcast(&lop->lo_cv_seqid_sync);
1298         mutex_exit(&lop->lo_lock);
1299 }
1300 
1301 /*
1302  * Returns a reference to a lock owner via lopp, which has its lock seqid
1303  * synchronization started.
1304  * If the lock owner is in the 'just_created' state, then we return its open
1305  * owner and open stream and start the open seqid synchronization.
1306  *
1307  * Return value:
1308  * NFS4_OK              no problems
1309  * NFS4ERR_DELAY        there is lost state to recover; caller should retry
1310  * NFS4ERR_IO           no open stream
1311  */
1312 nfsstat4
1313 nfs4_find_or_create_lock_owner(pid_t pid, rnode4_t *rp, cred_t *cr,
1314         nfs4_open_owner_t **oopp, nfs4_open_stream_t **ospp,
1315         nfs4_lock_owner_t **lopp)
1316 {
1317         nfs4_lock_owner_t *lop, *next_lop;
1318         mntinfo4_t *mi;
1319         int error = 0;
1320         nfsstat4 stat;
1321 
1322         mi = VTOMI4(RTOV4(rp));
1323 
1324         mutex_enter(&rp->r_statev4_lock);
1325 
1326         lop = rp->r_lo_head.lo_next_rnode;
1327         while (lop != &rp->r_lo_head) {
1328                 mutex_enter(&lop->lo_lock);
1329                 if (lop->lo_pid == pid && lop->lo_valid != 0) {
1330                         /* Found a matching lock owner */
1331                         NFS4_DEBUG(nfs4_client_state_debug,
1332                             (CE_NOTE, "nfs4_find_or_create_lock_owner: "
1333                             "got a match"));
1334                         lop->lo_ref_count++;
1335                         break;
1336                 }
1337                 next_lop = lop->lo_next_rnode;
1338                 mutex_exit(&lop->lo_lock);
1339                 lop = next_lop;
1340         }
1341 
1342         if (lop == &rp->r_lo_head) {
1343                 /* create temporary lock owner */
1344                 lop = create_lock_owner(rp, pid);
1345         }
1346         mutex_exit(&rp->r_statev4_lock);
1347 
1348         /* Have a locked down lock owner struct now */
1349         if (lop->lo_just_created != NFS4_JUST_CREATED) {
1350                 /* This is an existing lock owner */
1351                 *oopp = NULL;
1352                 *ospp = NULL;
1353         } else {
1354                 /* Lock owner doesn't exist yet */
1355 
1356                 /* First grab open owner seqid synchronization */
1357                 mutex_exit(&lop->lo_lock);
1358                 *oopp = find_open_owner(cr, NFS4_PERM_CREATED, mi);
1359                 if (*oopp == NULL)
1360                         goto kill_new_lop;
1361                 error = nfs4_start_open_seqid_sync(*oopp, mi);
1362                 if (error == EAGAIN) {
1363                         stat = NFS4ERR_DELAY;
1364                         goto failed;
1365                 }
1366                 *ospp = find_open_stream(*oopp, rp);
1367                 if (*ospp == NULL) {
1368                         nfs4_end_open_seqid_sync(*oopp);
1369                         goto kill_new_lop;
1370                 }
1371                 if ((*ospp)->os_failed_reopen) {
1372                         mutex_exit(&(*ospp)->os_sync_lock);
1373                         NFS4_DEBUG((nfs4_open_stream_debug ||
1374                             nfs4_client_lock_debug), (CE_NOTE,
1375                             "nfs4_find_or_create_lock_owner: os_failed_reopen;"
1376                             "osp %p, cr %p, rp %s", (void *)(*ospp),
1377                             (void *)cr, rnode4info(rp)));
1378                         nfs4_end_open_seqid_sync(*oopp);
1379                         stat = NFS4ERR_IO;
1380                         goto failed;
1381                 }
1382                 mutex_exit(&(*ospp)->os_sync_lock);
1383 
1384                 /*
1385                  * Now see if the lock owner has become permanent while we
1386                  * had released our lock.
1387                  */
1388                 mutex_enter(&lop->lo_lock);
1389                 if (lop->lo_just_created != NFS4_JUST_CREATED) {
1390                         nfs4_end_open_seqid_sync(*oopp);
1391                         open_stream_rele(*ospp, rp);
1392                         open_owner_rele(*oopp);
1393                         *oopp = NULL;
1394                         *ospp = NULL;
1395                 }
1396         }
1397         mutex_exit(&lop->lo_lock);
1398 
1399         error = nfs4_start_lock_seqid_sync(lop, mi);
1400         if (error == EAGAIN) {
1401                 if (*oopp != NULL)
1402                         nfs4_end_open_seqid_sync(*oopp);
1403                 stat = NFS4ERR_DELAY;
1404                 goto failed;
1405         }
1406         ASSERT(error == 0);
1407 
1408         *lopp = lop;
1409         return (NFS4_OK);
1410 
1411 kill_new_lop:
1412         /*
1413          * A previous CLOSE was attempted but got EINTR, but the application
1414          * continued to use the unspecified state file descriptor.  But now the
1415          * open stream is gone (which could also destroy the open owner), hence
1416          * we can no longer continue.  The calling function should return EIO
1417          * to the application.
1418          */
1419         NFS4_DEBUG(nfs4_lost_rqst_debug || nfs4_client_lock_debug,
1420             (CE_NOTE, "nfs4_find_or_create_lock_owner: destroy newly created "
1421             "lop %p, oop %p, osp %p", (void *)lop, (void *)(*oopp),
1422             (void *)(*ospp)));
1423 
1424         nfs4_rnode_remove_lock_owner(rp, lop);
1425         stat = NFS4ERR_IO;
1426 
1427 failed:
1428         lock_owner_rele(lop);
1429         if (*oopp) {
1430                 open_owner_rele(*oopp);
1431                 *oopp = NULL;
1432         }
1433         if (*ospp) {
1434                 open_stream_rele(*ospp, rp);
1435                 *ospp = NULL;
1436         }
1437         return (stat);
1438 }
1439 
1440 /*
1441  * This function grabs a recently freed open owner off of the freed open
1442  * owner list if there is a match on the cred 'cr'.  It returns NULL if no
1443  * such match is found.  It will set the 'oo_ref_count' and 'oo_valid' back
1444  * to both 1 (sane values) in the case a match is found.
1445  */
1446 static nfs4_open_owner_t *
1447 find_freed_open_owner(cred_t *cr, nfs4_oo_hash_bucket_t *bucketp,
1448         mntinfo4_t *mi)
1449 {
1450         nfs4_open_owner_t               *foop;
1451 
1452         NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
1453             "find_freed_open_owner: cred %p", (void*)cr));
1454 
1455         ASSERT(mutex_owned(&mi->mi_lock));
1456         ASSERT(mutex_owned(&bucketp->b_lock));
1457 
1458         /* got hash bucket, search through freed open owners */
1459         for (foop = list_head(&mi->mi_foo_list); foop != NULL;
1460             foop = list_next(&mi->mi_foo_list, foop)) {
1461                 if (!crcmp(foop->oo_cred, cr)) {
1462                         NFS4_DEBUG(nfs4_client_foo_debug, (CE_NOTE,
1463                             "find_freed_open_owner: got a match open owner "
1464                             "%p", (void *)foop));
1465                         foop->oo_ref_count = 1;
1466                         foop->oo_valid = 1;
1467                         list_remove(&mi->mi_foo_list, foop);
1468                         mi->mi_foo_num--;
1469 
1470                         /* now add the struct into the cred hash table */
1471                         list_insert_head(&bucketp->b_oo_hash_list, foop);
1472                         return (foop);
1473                 }
1474         }
1475 
1476         return (NULL);
1477 }
1478 
1479 /*
1480  * Insert the newly freed 'oop' into the mi's freed oop list,
1481  * always at the head of the list.  If we've already reached
1482  * our maximum allowed number of freed open owners (mi_foo_max),
1483  * then remove the LRU open owner on the list (namely the tail).
1484  */
1485 static void
1486 nfs4_free_open_owner(nfs4_open_owner_t *oop, mntinfo4_t *mi)
1487 {
1488         nfs4_open_owner_t *lru_foop;
1489 
1490         if (mi->mi_foo_num < mi->mi_foo_max) {
1491                 NFS4_DEBUG(nfs4_client_foo_debug, (CE_NOTE,
1492                     "nfs4_free_open_owner: num free %d, max free %d, "
1493                     "insert open owner %p for mntinfo4 %p",
1494                     mi->mi_foo_num, mi->mi_foo_max, (void *)oop,
1495                     (void *)mi));
1496                 list_insert_head(&mi->mi_foo_list, oop);
1497                 mi->mi_foo_num++;
1498                 return;
1499         }
1500 
1501         /* need to replace a freed open owner */
1502 
1503         lru_foop = list_tail(&mi->mi_foo_list);
1504 
1505         NFS4_DEBUG(nfs4_client_foo_debug, (CE_NOTE,
1506             "nfs4_free_open_owner: destroy %p, insert %p",
1507             (void *)lru_foop, (void *)oop));
1508 
1509         list_remove(&mi->mi_foo_list, lru_foop);
1510         nfs4_destroy_open_owner(lru_foop);
1511 
1512         /* head always has latest freed oop */
1513         list_insert_head(&mi->mi_foo_list, oop);
1514 }
1515 
1516 void
1517 nfs4_destroy_open_owner(nfs4_open_owner_t *oop)
1518 {
1519         ASSERT(oop != NULL);
1520 
1521         crfree(oop->oo_cred);
1522         if (oop->oo_cred_otw)
1523                 crfree(oop->oo_cred_otw);
1524         mutex_destroy(&oop->oo_lock);
1525         cv_destroy(&oop->oo_cv_seqid_sync);
1526         kmem_free(oop, sizeof (*oop));
1527 }
1528 
1529 seqid4
1530 nfs4_get_open_seqid(nfs4_open_owner_t *oop)
1531 {
1532         ASSERT(oop->oo_seqid_inuse);
1533         return (oop->oo_seqid);
1534 }
1535 
1536 /*
1537  * This set's the open seqid for a <open owner/ mntinfo4> pair.
1538  */
1539 void
1540 nfs4_set_open_seqid(seqid4 seqid, nfs4_open_owner_t *oop,
1541         nfs4_tag_type_t tag_type)
1542 {
1543         ASSERT(oop->oo_seqid_inuse);
1544         oop->oo_seqid = seqid;
1545         oop->oo_last_good_seqid = seqid;
1546         oop->oo_last_good_op = tag_type;
1547 }
1548 
1549 /*
1550  * This bumps the current open seqid for the open owner 'oop'.
1551  */
1552 void
1553 nfs4_get_and_set_next_open_seqid(nfs4_open_owner_t *oop,
1554     nfs4_tag_type_t tag_type)
1555 {
1556         ASSERT(oop->oo_seqid_inuse);
1557         oop->oo_seqid++;
1558         oop->oo_last_good_seqid = oop->oo_seqid;
1559         oop->oo_last_good_op = tag_type;
1560 }
1561 
1562 /*
1563  * If no open owner was provided, this function takes the cred to find an
1564  * open owner within the given mntinfo4_t.  Either way we return the
1565  * open owner's OTW credential if it exists; otherwise returns the
1566  * supplied 'cr'.
1567  *
1568  * A hold is put on the returned credential, and it is up to the caller
1569  * to free the cred.
1570  */
1571 cred_t *
1572 nfs4_get_otw_cred(cred_t *cr, mntinfo4_t *mi, nfs4_open_owner_t *provided_oop)
1573 {
1574         cred_t *ret_cr;
1575         nfs4_open_owner_t *oop = provided_oop;
1576 
1577         if (oop == NULL)
1578                 oop = find_open_owner(cr, NFS4_PERM_CREATED, mi);
1579         if (oop != NULL) {
1580                 mutex_enter(&oop->oo_lock);
1581                 if (oop->oo_cred_otw)
1582                         ret_cr = oop->oo_cred_otw;
1583                 else
1584                         ret_cr = cr;
1585                 crhold(ret_cr);
1586                 mutex_exit(&oop->oo_lock);
1587                 if (provided_oop == NULL)
1588                         open_owner_rele(oop);
1589         } else {
1590                 ret_cr = cr;
1591                 crhold(ret_cr);
1592         }
1593         return (ret_cr);
1594 }
1595 
1596 /*
1597  * Retrieves the next open stream in the rnode's list if an open stream
1598  * is provided; otherwise gets the first open stream in the list.
1599  * The open owner for that open stream is then retrieved, and if its
1600  * oo_cred_otw exists then it is returned; otherwise the provided 'cr'
1601  * is returned.  *osp is set to the 'found' open stream.
1602  *
1603  * Note: we don't set *osp to the open stream retrieved via the
1604  * optimized check since that won't necessarily be at the beginning
1605  * of the rnode list, and if that osp doesn't work we'd like to
1606  * check _all_ open streams (starting from the beginning of the
1607  * rnode list).
1608  */
1609 cred_t *
1610 nfs4_get_otw_cred_by_osp(rnode4_t *rp, cred_t *cr,
1611         nfs4_open_stream_t **osp, bool_t *first_time, bool_t *last_time)
1612 {
1613         nfs4_open_stream_t *next_osp = NULL;
1614         cred_t *ret_cr;
1615 
1616         ASSERT(cr != NULL);
1617         /*
1618          * As an optimization, try to find the open owner
1619          * for the cred provided since that's most likely
1620          * to work.
1621          */
1622         if (*first_time) {
1623                 nfs4_open_owner_t *oop;
1624 
1625                 oop = find_open_owner(cr, NFS4_PERM_CREATED, VTOMI4(RTOV4(rp)));
1626                 if (oop) {
1627                         next_osp = find_open_stream(oop, rp);
1628                         if (next_osp)
1629                                 mutex_exit(&next_osp->os_sync_lock);
1630                         open_owner_rele(oop);
1631                 }
1632         }
1633         if (next_osp == NULL) {
1634                 int delay_rele = 0;
1635                 *first_time = FALSE;
1636 
1637                 /* return the next open stream for this rnode */
1638                 mutex_enter(&rp->r_os_lock);
1639                 /* Now, no one can add or delete to rp's open streams list */
1640 
1641                 if (*osp) {
1642                         next_osp = list_next(&rp->r_open_streams, *osp);
1643                         /*
1644                          * Delay the rele of *osp until after we drop
1645                          * r_os_lock to not deadlock with oo_lock
1646                          * via an open_stream_rele()->open_owner_rele().
1647                          */
1648                         delay_rele = 1;
1649                 } else {
1650                         next_osp = list_head(&rp->r_open_streams);
1651                 }
1652                 if (next_osp) {
1653                         nfs4_open_stream_t *tmp_osp;
1654 
1655                         /* find the next valid open stream */
1656                         mutex_enter(&next_osp->os_sync_lock);
1657                         while (next_osp && !next_osp->os_valid) {
1658                                 tmp_osp =
1659                                     list_next(&rp->r_open_streams, next_osp);
1660                                 mutex_exit(&next_osp->os_sync_lock);
1661                                 next_osp = tmp_osp;
1662                                 if (next_osp)
1663                                         mutex_enter(&next_osp->os_sync_lock);
1664                         }
1665                         if (next_osp) {
1666                                 next_osp->os_ref_count++;
1667                                 mutex_exit(&next_osp->os_sync_lock);
1668                         }
1669                 }
1670                 mutex_exit(&rp->r_os_lock);
1671                 if (delay_rele)
1672                         open_stream_rele(*osp, rp);
1673         }
1674 
1675         if (next_osp) {
1676                 nfs4_open_owner_t *oop;
1677 
1678                 oop = next_osp->os_open_owner;
1679                 mutex_enter(&oop->oo_lock);
1680                 if (oop->oo_cred_otw)
1681                         ret_cr = oop->oo_cred_otw;
1682                 else
1683                         ret_cr = cr;
1684                 crhold(ret_cr);
1685                 mutex_exit(&oop->oo_lock);
1686                 if (*first_time) {
1687                         open_stream_rele(next_osp, rp);
1688                         *osp = NULL;
1689                 } else
1690                         *osp = next_osp;
1691         } else {
1692                 /* just return the cred provided to us */
1693                 *last_time = TRUE;
1694                 *osp = NULL;
1695                 ret_cr = cr;
1696                 crhold(ret_cr);
1697         }
1698 
1699         *first_time = FALSE;
1700         return (ret_cr);
1701 }
1702 
1703 void
1704 nfs4_init_stateid_types(nfs4_stateid_types_t *sid_tp)
1705 {
1706         bzero(&sid_tp->d_sid, sizeof (stateid4));
1707         bzero(&sid_tp->l_sid, sizeof (stateid4));
1708         bzero(&sid_tp->o_sid, sizeof (stateid4));
1709         sid_tp->cur_sid_type = NO_SID;
1710 }
1711 
1712 void
1713 nfs4_save_stateid(stateid4 *s1, nfs4_stateid_types_t *sid_tp)
1714 {
1715         NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE,
1716             "nfs4_save_stateid: saved %s stateid",
1717             sid_tp->cur_sid_type == DEL_SID ? "delegation" :
1718             sid_tp->cur_sid_type == LOCK_SID ? "lock" :
1719             sid_tp->cur_sid_type == OPEN_SID ? "open" : "special"));
1720 
1721         switch (sid_tp->cur_sid_type) {
1722         case DEL_SID:
1723                 sid_tp->d_sid = *s1;
1724                 break;
1725         case LOCK_SID:
1726                 sid_tp->l_sid = *s1;
1727                 break;
1728         case OPEN_SID:
1729                 sid_tp->o_sid = *s1;
1730                 break;
1731         case SPEC_SID:
1732         default:
1733                 cmn_err(CE_PANIC, "nfs4_save_stateid: illegal "
1734                     "stateid type %d", sid_tp->cur_sid_type);
1735         }
1736 }
1737 
1738 /*
1739  * We got NFS4ERR_BAD_SEQID.  Setup some arguments to pass to recovery.
1740  * Caller is responsible for freeing.
1741  */
1742 nfs4_bseqid_entry_t *
1743 nfs4_create_bseqid_entry(nfs4_open_owner_t *oop, nfs4_lock_owner_t *lop,
1744     vnode_t *vp, pid_t pid, nfs4_tag_type_t tag, seqid4 seqid)
1745 {
1746         nfs4_bseqid_entry_t     *bsep;
1747 
1748         bsep = kmem_alloc(sizeof (*bsep), KM_SLEEP);
1749         bsep->bs_oop = oop;
1750         bsep->bs_lop = lop;
1751         bsep->bs_vp = vp;
1752         bsep->bs_pid = pid;
1753         bsep->bs_tag = tag;
1754         bsep->bs_seqid = seqid;
1755 
1756         return (bsep);
1757 }
1758 
1759 void
1760 nfs4open_dg_save_lost_rqst(int error, nfs4_lost_rqst_t *lost_rqstp,
1761         nfs4_open_owner_t *oop, nfs4_open_stream_t *osp, cred_t *cr,
1762         vnode_t *vp, int access_close, int deny_close)
1763 {
1764         lost_rqstp->lr_putfirst = FALSE;
1765 
1766         ASSERT(vp != NULL);
1767         if (error == ETIMEDOUT || error == EINTR ||
1768             NFS4_FRC_UNMT_ERR(error, vp->v_vfsp)) {
1769                 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
1770                     "nfs4open_dg_save_lost_rqst: error %d", error));
1771 
1772                 lost_rqstp->lr_op = OP_OPEN_DOWNGRADE;
1773                 /*
1774                  * The vp is held and rele'd via the recovery code.
1775                  * See nfs4_save_lost_rqst.
1776                  */
1777                 lost_rqstp->lr_vp = vp;
1778                 lost_rqstp->lr_dvp = NULL;
1779                 lost_rqstp->lr_oop = oop;
1780                 lost_rqstp->lr_osp = osp;
1781                 lost_rqstp->lr_lop = NULL;
1782                 lost_rqstp->lr_cr = cr;
1783                 lost_rqstp->lr_flk = NULL;
1784                 lost_rqstp->lr_dg_acc = access_close;
1785                 lost_rqstp->lr_dg_deny = deny_close;
1786                 lost_rqstp->lr_putfirst = FALSE;
1787         } else {
1788                 lost_rqstp->lr_op = 0;
1789         }
1790 }
1791 
1792 /*
1793  * Change the access and deny bits of an OPEN.
1794  * If recovery is needed, *recov_credpp is set to the cred used OTW,
1795  * a hold is placed on it, and *recov_seqidp is set to the seqid used OTW.
1796  */
1797 void
1798 nfs4_open_downgrade(int access_close, int deny_close, nfs4_open_owner_t *oop,
1799         nfs4_open_stream_t *osp, vnode_t *vp, cred_t *cr, nfs4_lost_rqst_t *lrp,
1800         nfs4_error_t *ep, cred_t **recov_credpp, seqid4 *recov_seqidp)
1801 {
1802         mntinfo4_t              *mi;
1803         int                     downgrade_acc, downgrade_deny;
1804         int                     new_acc, new_deny;
1805         COMPOUND4args_clnt      args;
1806         COMPOUND4res_clnt       res;
1807         OPEN_DOWNGRADE4res      *odg_res;
1808         nfs_argop4              argop[3];
1809         nfs_resop4              *resop;
1810         rnode4_t                *rp;
1811         bool_t                  needrecov = FALSE;
1812         int                     doqueue = 1;
1813         seqid4                  seqid = 0;
1814         cred_t                  *cred_otw;
1815         hrtime_t                t;
1816 
1817         ASSERT(mutex_owned(&osp->os_sync_lock));
1818 #if DEBUG
1819         mutex_enter(&oop->oo_lock);
1820         ASSERT(oop->oo_seqid_inuse);
1821         mutex_exit(&oop->oo_lock);
1822 #endif
1823 
1824 
1825         if (access_close == 0 && deny_close == 0) {
1826                 nfs4_error_zinit(ep);
1827                 return;
1828         }
1829 
1830         cred_otw = nfs4_get_otw_cred(cr, VTOMI4(vp), oop);
1831 
1832 cred_retry:
1833         nfs4_error_zinit(ep);
1834         downgrade_acc = 0;
1835         downgrade_deny = 0;
1836         mi = VTOMI4(vp);
1837         rp = VTOR4(vp);
1838 
1839         /*
1840          * Check to see if the open stream got closed before we go OTW,
1841          * now that we have acquired the 'os_sync_lock'.
1842          */
1843         if (!osp->os_valid) {
1844                 NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, "nfs4_open_downgrade:"
1845                     " open stream has already been closed, return success"));
1846                 /* error has already been set */
1847                 goto no_args_out;
1848         }
1849 
1850         /* If the file failed recovery, just quit. */
1851         mutex_enter(&rp->r_statelock);
1852         if (rp->r_flags & R4RECOVERR) {
1853                 mutex_exit(&rp->r_statelock);
1854                 ep->error = EIO;
1855                 goto no_args_out;
1856         }
1857         mutex_exit(&rp->r_statelock);
1858 
1859         seqid = nfs4_get_open_seqid(oop) + 1;
1860 
1861         NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, "nfs4_open_downgrade:"
1862             "access_close %d, acc_read %"PRIu64" acc_write %"PRIu64"",
1863             access_close, osp->os_share_acc_read, osp->os_share_acc_write));
1864 
1865         /* If we're closing the last READ, need to downgrade */
1866         if ((access_close & FREAD) && (osp->os_share_acc_read == 1))
1867                 downgrade_acc |= OPEN4_SHARE_ACCESS_READ;
1868 
1869         /* if we're closing the last WRITE, need to downgrade */
1870         if ((access_close & FWRITE) && (osp->os_share_acc_write == 1))
1871                 downgrade_acc |= OPEN4_SHARE_ACCESS_WRITE;
1872 
1873         downgrade_deny = OPEN4_SHARE_DENY_NONE;
1874 
1875         new_acc = 0;
1876         new_deny = 0;
1877 
1878         /* set our new access and deny share bits */
1879         if ((osp->os_share_acc_read > 0) &&
1880             !(downgrade_acc & OPEN4_SHARE_ACCESS_READ))
1881                 new_acc |= OPEN4_SHARE_ACCESS_READ;
1882         if ((osp->os_share_acc_write > 0) &&
1883             !(downgrade_acc & OPEN4_SHARE_ACCESS_WRITE))
1884                 new_acc |= OPEN4_SHARE_ACCESS_WRITE;
1885 
1886         new_deny = OPEN4_SHARE_DENY_NONE;
1887 
1888         NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, "nfs4_open_downgrade:"
1889             "downgrade acc 0x%x deny 0x%x", downgrade_acc, downgrade_deny));
1890         NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, "nfs4_open_downgrade:"
1891             "new acc 0x%x deny 0x%x", new_acc, new_deny));
1892 
1893         /*
1894          * Check to see if we aren't actually doing any downgrade or
1895          * if this is the last 'close' but the file is still mmapped.
1896          * Skip this if this a lost request resend so we don't decrement
1897          * the osp's share counts more than once.
1898          */
1899         if (!lrp &&
1900             ((downgrade_acc == 0 && downgrade_deny == 0) ||
1901             (new_acc == 0 && new_deny == 0))) {
1902                 /*
1903                  * No downgrade to do, but still need to
1904                  * update osp's os_share_* counts.
1905                  */
1906                 NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE,
1907                     "nfs4_open_downgrade: just lower the osp's count by %s",
1908                     (access_close & FREAD) && (access_close & FWRITE) ?
1909                     "read and write" : (access_close & FREAD) ? "read" :
1910                     (access_close & FWRITE) ? "write" : "bogus"));
1911                 if (access_close & FREAD)
1912                         osp->os_share_acc_read--;
1913                 if (access_close & FWRITE)
1914                         osp->os_share_acc_write--;
1915                 osp->os_share_deny_none--;
1916                 nfs4_error_zinit(ep);
1917 
1918                 goto no_args_out;
1919         }
1920 
1921         if (osp->os_orig_oo_name != oop->oo_name) {
1922                 ep->error = EIO;
1923                 goto no_args_out;
1924         }
1925 
1926         /* setup the COMPOUND args */
1927         if (lrp)
1928                 args.ctag = TAG_OPEN_DG_LOST;
1929         else
1930                 args.ctag = TAG_OPEN_DG;
1931 
1932         args.array_len = 3;
1933         args.array = argop;
1934 
1935         /* putfh */
1936         argop[0].argop = OP_CPUTFH;
1937         argop[0].nfs_argop4_u.opcputfh.sfh = rp->r_fh;
1938 
1939         argop[1].argop = OP_GETATTR;
1940         argop[1].nfs_argop4_u.opgetattr.attr_request = NFS4_VATTR_MASK;
1941         argop[1].nfs_argop4_u.opgetattr.mi = mi;
1942 
1943         ASSERT(mutex_owned(&osp->os_sync_lock));
1944         ASSERT(osp->os_delegation == FALSE);
1945 
1946         /* open downgrade */
1947         argop[2].argop = OP_OPEN_DOWNGRADE;
1948         argop[2].nfs_argop4_u.opopen_downgrade.open_stateid = osp->open_stateid;
1949         argop[2].nfs_argop4_u.opopen_downgrade.share_access = new_acc;
1950         argop[2].nfs_argop4_u.opopen_downgrade.share_deny = new_deny;
1951         argop[2].nfs_argop4_u.opopen_downgrade.seqid = seqid;
1952 
1953         t = gethrtime();
1954 
1955         rfs4call(mi, &args, &res, cred_otw, &doqueue, 0, ep);
1956 
1957         if (ep->error == 0 && nfs4_need_to_bump_seqid(&res))
1958                 nfs4_set_open_seqid(seqid, oop, args.ctag);
1959 
1960         if ((ep->error == EACCES ||
1961             (ep->error == 0 && res.status == NFS4ERR_ACCESS)) &&
1962             cred_otw != cr) {
1963                 crfree(cred_otw);
1964                 cred_otw = cr;
1965                 crhold(cred_otw);
1966                 if (!ep->error)
1967                         (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1968                 goto cred_retry;
1969         }
1970 
1971         needrecov = nfs4_needs_recovery(ep, TRUE, mi->mi_vfsp);
1972 
1973         if (needrecov && recov_credpp) {
1974                 *recov_credpp = cred_otw;
1975                 crhold(*recov_credpp);
1976                 if (recov_seqidp)
1977                         *recov_seqidp = seqid;
1978         }
1979 
1980         if (!ep->error && !res.status) {
1981                 /* get the open downgrade results */
1982                 resop = &res.array[2];
1983                 odg_res = &resop->nfs_resop4_u.opopen_downgrade;
1984 
1985                 osp->open_stateid = odg_res->open_stateid;
1986 
1987                 /* set the open streams new access/deny bits */
1988                 if (access_close & FREAD)
1989                         osp->os_share_acc_read--;
1990                 if (access_close & FWRITE)
1991                         osp->os_share_acc_write--;
1992                 osp->os_share_deny_none--;
1993                 osp->os_dc_openacc = new_acc;
1994 
1995                 nfs4_attr_cache(vp,
1996                     &res.array[1].nfs_resop4_u.opgetattr.ga_res,
1997                     t, cred_otw, TRUE, NULL);
1998         }
1999 
2000         if (!ep->error)
2001                 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
2002 
2003 no_args_out:
2004         crfree(cred_otw);
2005 }
2006 
2007 /*
2008  * If an OPEN request gets ETIMEDOUT or EINTR (that includes bailing out
2009  * because the filesystem was forcibly unmounted) then we don't know if we
2010  * potentially left state dangling on the server, therefore the recovery
2011  * framework makes this call to resend the OPEN request and then undo it.
2012  */
2013 void
2014 nfs4_resend_open_otw(vnode_t **vpp, nfs4_lost_rqst_t *resend_rqstp,
2015         nfs4_error_t *ep)
2016 {
2017         COMPOUND4args_clnt      args;
2018         COMPOUND4res_clnt       res;
2019         nfs_argop4              argop[4];
2020         GETFH4res               *gf_res = NULL;
2021         OPEN4cargs              *open_args;
2022         OPEN4res                *op_res;
2023         char                    *destcfp;
2024         int                     destclen;
2025         nfs4_ga_res_t           *garp;
2026         vnode_t                 *dvp = NULL, *vp = NULL;
2027         rnode4_t                *rp = NULL, *drp = NULL;
2028         cred_t                  *cr = NULL;
2029         seqid4                  seqid;
2030         nfs4_open_owner_t       *oop = NULL;
2031         nfs4_open_stream_t      *osp = NULL;
2032         component4              *srcfp;
2033         open_claim_type4        claim;
2034         mntinfo4_t              *mi;
2035         int                     doqueue = 1;
2036         bool_t                  retry_open = FALSE;
2037         int                     created_osp = 0;
2038         hrtime_t                t;
2039         char                    *failed_msg = "";
2040         int                     fh_different;
2041         int                     reopen = 0;
2042 
2043         nfs4_error_zinit(ep);
2044 
2045         cr = resend_rqstp->lr_cr;
2046         dvp = resend_rqstp->lr_dvp;
2047 
2048         vp = *vpp;
2049         if (vp) {
2050                 ASSERT(nfs4_consistent_type(vp));
2051                 rp = VTOR4(vp);
2052         }
2053 
2054         if (rp) {
2055                 /* If the file failed recovery, just quit. */
2056                 mutex_enter(&rp->r_statelock);
2057                 if (rp->r_flags & R4RECOVERR) {
2058                         mutex_exit(&rp->r_statelock);
2059                         ep->error = EIO;
2060                         return;
2061                 }
2062                 mutex_exit(&rp->r_statelock);
2063         }
2064 
2065         if (dvp) {
2066                 drp = VTOR4(dvp);
2067                 /* If the parent directory failed recovery, just quit. */
2068                 mutex_enter(&drp->r_statelock);
2069                 if (drp->r_flags & R4RECOVERR) {
2070                         mutex_exit(&drp->r_statelock);
2071                         ep->error = EIO;
2072                         return;
2073                 }
2074                 mutex_exit(&drp->r_statelock);
2075         } else
2076                 reopen = 1;     /* NULL dvp means this is a reopen */
2077 
2078         claim = resend_rqstp->lr_oclaim;
2079         ASSERT(claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR);
2080 
2081         args.ctag = TAG_OPEN_LOST;
2082         args.array_len = 4;
2083         args.array = argop;
2084 
2085         argop[0].argop = OP_CPUTFH;
2086         if (reopen) {
2087                 ASSERT(vp != NULL);
2088 
2089                 mi = VTOMI4(vp);
2090                 /*
2091                  * if this is a file mount then
2092                  * use the mntinfo parentfh
2093                  */
2094                 argop[0].nfs_argop4_u.opcputfh.sfh =
2095                     (vp->v_flag & VROOT) ? mi->mi_srvparentfh :
2096                     VTOSV(vp)->sv_dfh;
2097                 args.ctag = TAG_REOPEN_LOST;
2098         } else {
2099                 argop[0].nfs_argop4_u.opcputfh.sfh = VTOR4(dvp)->r_fh;
2100                 mi = VTOMI4(dvp);
2101         }
2102 
2103         argop[1].argop = OP_COPEN;
2104         open_args = &argop[1].nfs_argop4_u.opcopen;
2105         open_args->claim = claim;
2106 
2107         /*
2108          * If we sent over a OPEN with CREATE then the only
2109          * thing we care about is to not leave dangling state
2110          * on the server, not whether the file we potentially
2111          * created remains on the server.  So even though the
2112          * lost open request specified a CREATE, we only wish
2113          * to do a non-CREATE OPEN.
2114          */
2115         open_args->opentype = OPEN4_NOCREATE;
2116 
2117         srcfp = &resend_rqstp->lr_ofile;
2118         destclen = srcfp->utf8string_len;
2119         destcfp = kmem_alloc(destclen + 1, KM_SLEEP);
2120         bcopy(srcfp->utf8string_val, destcfp, destclen);
2121         destcfp[destclen] = '\0';
2122         if (claim == CLAIM_DELEGATE_CUR) {
2123                 open_args->open_claim4_u.delegate_cur_info.delegate_stateid =
2124                     resend_rqstp->lr_ostateid;
2125                 open_args->open_claim4_u.delegate_cur_info.cfile = destcfp;
2126         } else {
2127                 open_args->open_claim4_u.cfile = destcfp;
2128         }
2129 
2130         open_args->share_access = resend_rqstp->lr_oacc;
2131         open_args->share_deny = resend_rqstp->lr_odeny;
2132         oop = resend_rqstp->lr_oop;
2133         ASSERT(oop != NULL);
2134 
2135         open_args->owner.clientid = mi2clientid(mi);
2136         /* this length never changes */
2137         open_args->owner.owner_len = sizeof (oop->oo_name);
2138         open_args->owner.owner_val =
2139             kmem_alloc(open_args->owner.owner_len, KM_SLEEP);
2140 
2141         ep->error = nfs4_start_open_seqid_sync(oop, mi);
2142         ASSERT(ep->error == 0);              /* recov thread always succeeds */
2143         /*
2144          * We can get away with not saving the seqid upon detection
2145          * of a lost request, and now just use the open owner's current
2146          * seqid since we only allow one op OTW per seqid and lost
2147          * requests are saved FIFO.
2148          */
2149         seqid = nfs4_get_open_seqid(oop) + 1;
2150         open_args->seqid = seqid;
2151 
2152         bcopy(&oop->oo_name, open_args->owner.owner_val,
2153             open_args->owner.owner_len);
2154 
2155         /* getfh */
2156         argop[2].argop = OP_GETFH;
2157 
2158         /* Construct the getattr part of the compound */
2159         argop[3].argop = OP_GETATTR;
2160         argop[3].nfs_argop4_u.opgetattr.attr_request = NFS4_VATTR_MASK;
2161         argop[3].nfs_argop4_u.opgetattr.mi = mi;
2162 
2163         res.array = NULL;
2164 
2165         t = gethrtime();
2166 
2167         rfs4call(mi, &args, &res, cr, &doqueue, 0, ep);
2168 
2169         if (ep->error == 0 && nfs4_need_to_bump_seqid(&res))
2170                 nfs4_set_open_seqid(seqid, oop, args.ctag);
2171 
2172         NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
2173             "nfs4_resend_open_otw: error %d stat %d", ep->error, res.status));
2174 
2175         if (ep->error || res.status)
2176                 goto err_out;
2177 
2178         op_res = &res.array[1].nfs_resop4_u.opopen;
2179         gf_res = &res.array[2].nfs_resop4_u.opgetfh;
2180         garp = &res.array[3].nfs_resop4_u.opgetattr.ga_res;
2181 
2182         if (!vp) {
2183                 int rnode_err = 0;
2184                 nfs4_sharedfh_t *sfh;
2185 
2186                 /*
2187                  * If we can't decode all the attributes they are not usable,
2188                  * just make the vnode.
2189                  */
2190 
2191                 sfh = sfh4_get(&gf_res->object, VTOMI4(dvp));
2192                 *vpp = makenfs4node(sfh, garp, dvp->v_vfsp, t, cr, dvp,
2193                     fn_get(VTOSV(dvp)->sv_name,
2194                     open_args->open_claim4_u.cfile, sfh));
2195                 sfh4_rele(&sfh);
2196                 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
2197                     "nfs4_resend_open_otw: made vp %p for file %s",
2198                     (void *)(*vpp), open_args->open_claim4_u.cfile));
2199 
2200                 if (ep->error)
2201                         PURGE_ATTRCACHE4(*vpp);
2202 
2203                 /*
2204                  * For the newly created *vpp case, make sure the rnode
2205                  * isn't bad before using it.
2206                  */
2207                 mutex_enter(&(VTOR4(*vpp))->r_statelock);
2208                 if (VTOR4(*vpp)->r_flags & R4RECOVERR)
2209                         rnode_err = EIO;
2210                 mutex_exit(&(VTOR4(*vpp))->r_statelock);
2211 
2212                 if (rnode_err) {
2213                         NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
2214                             "nfs4_resend_open_otw: rp %p is bad",
2215                             (void *)VTOR4(*vpp)));
2216                         ep->error = rnode_err;
2217                         goto err_out;
2218                 }
2219 
2220                 vp = *vpp;
2221                 rp = VTOR4(vp);
2222         }
2223 
2224         if (reopen) {
2225                 /*
2226                  * Check if the path we reopened really is the same
2227                  * file. We could end up in a situation were the file
2228                  * was removed and a new file created with the same name.
2229                  */
2230                 (void) nfs_rw_enter_sig(&mi->mi_fh_lock, RW_READER, 0);
2231                 fh_different =
2232                     (nfs4cmpfh(&rp->r_fh->sfh_fh, &gf_res->object) != 0);
2233                 if (fh_different) {
2234                         if (mi->mi_fh_expire_type == FH4_PERSISTENT ||
2235                             mi->mi_fh_expire_type & FH4_NOEXPIRE_WITH_OPEN) {
2236                                 /* Oops, we don't have the same file */
2237                                 if (mi->mi_fh_expire_type == FH4_PERSISTENT)
2238                                         failed_msg =
2239                                             "Couldn't reopen: Persistant "
2240                                             "file handle changed";
2241                                 else
2242                                         failed_msg =
2243                                             "Couldn't reopen: Volatile "
2244                                             "(no expire on open) file handle "
2245                                             "changed";
2246 
2247                                 nfs4_end_open_seqid_sync(oop);
2248                                 kmem_free(destcfp, destclen + 1);
2249                                 nfs4args_copen_free(open_args);
2250                                 (void) xdr_free(xdr_COMPOUND4res_clnt,
2251                                     (caddr_t)&res);
2252                                 nfs_rw_exit(&mi->mi_fh_lock);
2253                                 nfs4_fail_recov(vp, failed_msg, ep->error,
2254                                     ep->stat);
2255                                 return;
2256                         } else {
2257                                 /*
2258                                  * We have volatile file handles that don't
2259                                  * compare.  If the fids are the same then we
2260                                  * assume that the file handle expired but the
2261                                  * renode still refers to the same file object.
2262                                  *
2263                                  * First check that we have fids or not.
2264                                  * If we don't we have a dumb server so we will
2265                                  * just assume every thing is ok for now.
2266                                  */
2267                                 if (!ep->error &&
2268                                     garp->n4g_va.va_mask & AT_NODEID &&
2269                                     rp->r_attr.va_mask & AT_NODEID &&
2270                                     rp->r_attr.va_nodeid !=
2271                                     garp->n4g_va.va_nodeid) {
2272                                         /*
2273                                          * We have fids, but they don't
2274                                          * compare. So kill the file.
2275                                          */
2276                                         failed_msg =
2277                                             "Couldn't reopen: file handle "
2278                                             "changed due to mismatched fids";
2279                                         nfs4_end_open_seqid_sync(oop);
2280                                         kmem_free(destcfp, destclen + 1);
2281                                         nfs4args_copen_free(open_args);
2282                                         (void) xdr_free(xdr_COMPOUND4res_clnt,
2283                                             (caddr_t)&res);
2284                                         nfs_rw_exit(&mi->mi_fh_lock);
2285                                         nfs4_fail_recov(vp, failed_msg,
2286                                             ep->error, ep->stat);
2287                                         return;
2288                                 } else {
2289                                         /*
2290                                          * We have volatile file handles that
2291                                          * refers to the same file (at least
2292                                          * they have the same fid) or we don't
2293                                          * have fids so we can't tell. :(. We'll
2294                                          * be a kind and accepting client so
2295                                          * we'll update the rnode's file
2296                                          * handle with the otw handle.
2297                                          *
2298                                          * We need to drop mi->mi_fh_lock since
2299                                          * sh4_update acquires it. Since there
2300                                          * is only one recovery thread there is
2301                                          * no race.
2302                                          */
2303                                         nfs_rw_exit(&mi->mi_fh_lock);
2304                                         sfh4_update(rp->r_fh, &gf_res->object);
2305                                 }
2306                         }
2307                 } else {
2308                         nfs_rw_exit(&mi->mi_fh_lock);
2309                 }
2310         }
2311 
2312         ASSERT(nfs4_consistent_type(vp));
2313 
2314         if (op_res->rflags & OPEN4_RESULT_CONFIRM)
2315                 nfs4open_confirm(vp, &seqid, &op_res->stateid, cr, TRUE,
2316                     &retry_open, oop, TRUE, ep, NULL);
2317         if (ep->error || ep->stat) {
2318                 nfs4_end_open_seqid_sync(oop);
2319                 kmem_free(destcfp, destclen + 1);
2320                 nfs4args_copen_free(open_args);
2321                 if (!ep->error)
2322                         (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
2323                 return;
2324         }
2325 
2326         if (reopen) {
2327                 /*
2328                  * Doing a reopen here so the osp should already exist.
2329                  * If not, something changed or went very wrong.
2330                  *
2331                  * returns with 'os_sync_lock' held
2332                  */
2333                 osp = find_open_stream(oop, rp);
2334                 if (!osp) {
2335                         NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
2336                             "nfs4_resend_open_otw: couldn't find osp"));
2337                         ep->error = EINVAL;
2338                         goto err_out;
2339                 }
2340                 osp->os_open_ref_count++;
2341         } else {
2342                 mutex_enter(&oop->oo_lock);
2343                 oop->oo_just_created = NFS4_PERM_CREATED;
2344                 mutex_exit(&oop->oo_lock);
2345 
2346                 /* returns with 'os_sync_lock' held */
2347                 osp = find_or_create_open_stream(oop, rp, &created_osp);
2348                 if (!osp) {
2349                         NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
2350                             "nfs4_resend_open_otw: couldn't create osp"));
2351                         ep->error = EINVAL;
2352                         goto err_out;
2353                 }
2354         }
2355 
2356         osp->open_stateid = op_res->stateid;
2357         osp->os_delegation = FALSE;
2358         /*
2359          * Need to reset this bitfield for the possible case where we were
2360          * going to OTW CLOSE the file, got a non-recoverable error, and before
2361          * we could retry the CLOSE, OPENed the file again.
2362          */
2363         ASSERT(osp->os_open_owner->oo_seqid_inuse);
2364         osp->os_final_close = 0;
2365         osp->os_force_close = 0;
2366 
2367         if (!reopen) {
2368                 if (open_args->share_access & OPEN4_SHARE_ACCESS_READ)
2369                         osp->os_share_acc_read++;
2370                 if (open_args->share_access & OPEN4_SHARE_ACCESS_WRITE)
2371                         osp->os_share_acc_write++;
2372                 osp->os_share_deny_none++;
2373         }
2374 
2375         mutex_exit(&osp->os_sync_lock);
2376         if (created_osp)
2377                 nfs4_inc_state_ref_count(mi);
2378         open_stream_rele(osp, rp);
2379 
2380         nfs4_end_open_seqid_sync(oop);
2381 
2382         /* accept delegation, if any */
2383         nfs4_delegation_accept(rp, claim, op_res, garp, cr);
2384 
2385         kmem_free(destcfp, destclen + 1);
2386         nfs4args_copen_free(open_args);
2387 
2388         if (claim == CLAIM_DELEGATE_CUR)
2389                 nfs4_attr_cache(vp, garp, t, cr, TRUE, NULL);
2390         else
2391                 PURGE_ATTRCACHE4(vp);
2392 
2393         (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
2394 
2395         ASSERT(nfs4_consistent_type(vp));
2396 
2397         return;
2398 
2399 err_out:
2400         nfs4_end_open_seqid_sync(oop);
2401         kmem_free(destcfp, destclen + 1);
2402         nfs4args_copen_free(open_args);
2403         if (!ep->error)
2404                 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
2405 }