1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 29 /* 30 * rename or exchange identities of virtual device nodes 31 */ 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/debug.h> 36 #include <sys/sysmacros.h> 37 #include <sys/types.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 41 #include <sys/lvm/mdvar.h> 42 #include <sys/lvm/md_rename.h> 43 44 #include <sys/sysevent/eventdefs.h> 45 #include <sys/sysevent/svm.h> 46 47 extern major_t md_major; 48 extern unit_t md_nunits; 49 extern set_t md_nsets; 50 extern md_set_t md_set[]; 51 52 #define ROLE(r) \ 53 ((r) == MDRR_PARENT? "parent": \ 54 (r) == MDRR_SELF? "self": \ 55 (r) == MDRR_CHILD? "child": \ 56 (r) == MDRR_UNK? "<unknown>": "<garbage>") 57 58 #define OP_STR(op) \ 59 (((op) == MDRNOP_UNK)? "<unknown>" : \ 60 ((op) == MDRNOP_RENAME)? "rename" : \ 61 ((op) == MDRNOP_EXCHANGE)? "exchange" : \ 62 "<garbage>") 63 int md_rename_debug = 0; 64 65 /* delta guard rails */ 66 const unsigned long long DELTA_BEG = (0xDad08888a110beefull); 67 const unsigned long long DELTA_END = (0xa110Beef88880Dadull); 68 69 const unsigned long long DELTA_BEG_FREED = (0xBad0c0ed0fed0dadull); 70 const unsigned long long DELTA_END_FREED = (0x0Fed0dadbad0c0edull); 71 72 /* transaction guard rails */ 73 const unsigned long long TXN_BEG = (0xDad01eadc0ed2badull); 74 const unsigned long long TXN_END = (0xc0ed2badDad01eadull); 75 76 const unsigned long long TXNUN_BEG = (0xcafe0fedbad0beefull); 77 const unsigned long long TXNUN_END = (0xbad0beefcafe0fedull); 78 79 const unsigned int guard_shift = (sizeof (u_longlong_t) - 3); 80 const md_stackcap_t MD_CAN_DO_ANYTHING = (md_stackcap_t)0; 81 82 typedef struct role_change_mapping_tab_t { 83 const int ord; 84 const md_renrole_t old_role; 85 const md_renrole_t new_role; 86 const char *svc_name; 87 md_ren_roleswap_svc_t * const default_svc; 88 } role_change_tab_t; 89 90 /* 91 * The actual table is at the end of the file, so we don't need 92 * many forward references 93 */ 94 static role_change_tab_t role_swap_tab[]; 95 96 #define ILLEGAL_ROLESWAP_SVC ((md_ren_roleswap_svc_t *)(0xA1100BAD)) 97 #define NO_DEFAULT_ROLESWAP_SVC ((md_ren_roleswap_svc_t *)(NULL)) 98 #define ILLEGAL_SVC_NAME (NULL) 99 100 /* 101 * 102 * Role swap rule table: 103 * 104 * New Role 105 * +---------------------------------------------------------------| 106 * | | Parent | Self | Child | 107 * +--------+-----------------+----------------+-------------------+ 108 * | Parent | no default | ...no default | illegal | 109 * | | 1 (update kids) | 2 (update to) | 3 | 110 * Old +--------+-----------------+----------------+-------------------+ 111 * Role | Self | ...self update | ...rename self | no default (down | 112 * | | 4 update up | 5 | 6 update from) | 113 * +--------+-----------------+----------------+-------------------+ 114 * | Child | illegal | ...child | ...update | 115 * | | 7 | 8 update to | 9 parent | 116 * +---------------------------------------------------------------+ 117 * 118 * and notes: 119 * 120 * - Boxes 1, 4 and 6 are the most interesting. They are responsible 121 * for updating the from unit's data structures. These may involve 122 * finding (former or future) children, resetting name keys and the like. 123 * 124 * - The "rename" operation is boxes 1, 5 and 9. Most of the work 125 * is done in box 5, since that contains both the "from" and "to" 126 * unit struct for rename. 127 * 128 * (There's got to be an eigen function for this; that diagonal 129 * axis is a role identity operation searching for an expression.) 130 * 131 * - Almost every transaction will call more than one of these. 132 * (Only a rename of a unit with no relatives will only call 133 * a single box.) 134 * 135 * - Box 4 "...update from" is the generic self->parent modifier. 136 * - Box 8 "...update to" is the generic child->self modifier. 137 * These can be generic because all of the information which 138 * needs to be updated is in the common portion of the unit 139 * structure when changing from their respective roles. 140 * 141 * - Boxes 1, 2 and 6 ("no default") indicate that per-metadevice 142 * information must be updated. For example, in box 1, children 143 * identities must be updated. Since different metadevice types 144 * detect and manipulate their children differently, there can 145 * be no generic "md_rename" function in this box. 146 * 147 * In addition to the named services in the table above, there 148 * are other named services used by rename/exchange. 149 * MDRNM_LIST_URFOLKS, MDRNM_LIST_URSELF, MDRNM_LIST_URKIDS 150 * list a device's parents, self and children, respectively. 151 * In most cases the default functions can be used for parents 152 * and self. Top-level devices, are not required to have a 153 * "list folks" named service. Likewise, devices which can 154 * not have metadevice children, are not required to have the 155 * "list kids" named service. The LIST_UR* functions call back into 156 * the base driver (md_build_rendelta()) to package the changes to 157 * a device for addition onto the tree. The LIST_UR* named service 158 * then adds this "rename delta" onto the delta tree itself. 159 * This keeps private knowledge appropriately encapsulated. 160 * They return the number of devices which will need to be changed, 161 * and hence the number of elements they've added to the delta list 162 * or -1 for error. 163 * 164 * Other named services used by rename/exchange are: 165 * "lock" (MDRNM_LOCK), "unlock" (MDRNM_UNLOCK) and "check" (MDRNM_CHECK). 166 * These (un) write-lock all of the relevant in-core structs, 167 * including the unit structs for the device and quiesce i/o as necessary. 168 * The "check" named service verifies that this device 169 * is in a state where rename could and may occur at this time. 170 * Since the role_swap functions themselves cannot be undone 171 * (at least in this implementation), it is check()'s job to 172 * verify that the device is renamable (sic) or, if not, abort. 173 * The check function for the device participating in the role 174 * of "self" is usually where rename or exchange validity is verified. 175 * 176 * All of these functions take two arguments which may be thought 177 * of as the collective state changes of the tree of devices 178 * (md_rendelta_t *family) and the rename transaction state 179 * (md_rentxn_t rtxn or rtxnp). 180 * 181 */ 182 183 184 /* 185 * rename unit lock 186 * (default name service routine MDRNM_LOCK) 187 */ 188 static intptr_t 189 md_rename_lock(md_rendelta_t *delta, md_rentxn_t *rtxnp) 190 { 191 minor_t mnum; 192 md_renop_t op; 193 194 ASSERT(delta); 195 ASSERT(rtxnp); 196 197 if (!delta || !rtxnp) { 198 (void) mdsyserror(&rtxnp->mde, EINVAL); 199 return (EINVAL); 200 } 201 mnum = md_getminor(delta->dev); 202 op = rtxnp->op; 203 204 /* 205 * target doesn't exist if renaming (by definition), 206 * so it need not be locked 207 */ 208 if (op == MDRNOP_RENAME && mnum == rtxnp->to.mnum) { 209 return (0); 210 } 211 212 ASSERT(delta->uip); 213 if (!delta->uip) { 214 (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, mnum); 215 return (ENODEV); 216 } 217 218 ASSERT(delta->unp); 219 if (!delta->unp) { 220 (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, mnum); 221 return (ENODEV); 222 } 223 224 ASSERT(!UNIT_WRITER_HELD(delta->unp)); 225 226 (void) md_unit_writerlock(delta->uip); 227 228 ASSERT(UNIT_WRITER_HELD(delta->unp)); 229 230 return (0); 231 } 232 233 /* 234 * (default name service routine MDRNM_UNLOCK) 235 */ 236 /* ARGSUSED */ 237 static void 238 md_rename_unlock( 239 md_rendelta_t *delta, 240 md_rentxn_t *rtxnp) 241 { 242 ASSERT(delta); 243 ASSERT(delta->uip); 244 ASSERT(delta->unp); 245 246 ASSERT(UNIT_WRITER_HELD(delta->unp)); 247 248 (void) md_unit_writerexit(delta->uip); 249 250 ASSERT(!UNIT_WRITER_HELD(delta->unp)); 251 } 252 253 /* 254 * This is used by the various MDRNM_LIST* named services. 255 */ 256 md_rendelta_t * 257 md_build_rendelta( 258 md_renrole_t old_role, 259 md_renrole_t new_role, 260 md_dev64_t dev, 261 md_rendelta_t *prev, 262 md_unit_t *unp, 263 mdi_unit_t *uip, 264 md_error_t *ep) 265 { 266 int err = 0; 267 md_rendelta_t *new; 268 269 new = (md_rendelta_t *)kmem_alloc(sizeof (md_rendelta_t), KM_SLEEP); 270 271 new->beginning = DELTA_BEG; 272 new->dev = dev; 273 new->new_role = new_role; 274 new->old_role = old_role; 275 new->next = NULL; 276 new->prev = prev; 277 new->unp = unp; 278 new->uip = uip; 279 bzero((void *) &new->txn_stat, sizeof (md_rendstat_t)); 280 281 /* 282 * For non-meta devices that are being renamed (in the future, 283 * that is) we would need to pass in default functions to 284 * accommodate them, provided the default function is 285 * truly capable of performing the lock/check/unlock function 286 * on opaque devices. 287 */ 288 289 new->lock = md_get_named_service(dev, /* modindex */ 0, 290 MDRNM_LOCK, md_rename_lock); 291 292 new->unlock = (md_ren_void_svc_t *)md_get_named_service(dev, 293 /* modindex */ 0, MDRNM_UNLOCK, 294 (intptr_t (*)()) md_rename_unlock); 295 296 new->check = md_get_named_service(dev, /* modindex */ 0, 297 MDRNM_CHECK, /* Default */ NULL); 298 299 new->role_swap = NULL; /* set this when the roles are determined */ 300 301 if (!new->lock || !new->unlock || !new->check) { 302 (void) mdmderror(ep, MDE_RENAME_CONFIG_ERROR, md_getminor(dev)); 303 err = EINVAL; 304 goto out; 305 } 306 307 new->end = DELTA_END; 308 309 out: 310 if (err != 0) { 311 if (new) { 312 new->beginning = DELTA_BEG_FREED; 313 new->end = DELTA_END_FREED; 314 315 kmem_free(new, sizeof (md_rendelta_t)); 316 new = NULL; 317 } 318 } 319 320 if (prev) { 321 prev->next = new; 322 } 323 324 return (new); 325 } 326 327 /* 328 * md_store_recid() 329 * used by role swap functions 330 */ 331 void 332 md_store_recid( 333 int *prec_idx, 334 mddb_recid_t *recid_list, 335 md_unit_t *un) 336 { 337 mddb_recid_t *rp; 338 bool_t add_recid; 339 340 ASSERT(prec_idx); 341 ASSERT(recid_list); 342 ASSERT(recid_list[*prec_idx] == 0); 343 ASSERT(*prec_idx >= 0); 344 345 for (add_recid = TRUE, rp = recid_list; add_recid && rp && *rp; rp++) { 346 if (MD_RECID(un) == *rp) { 347 add_recid = FALSE; 348 } 349 } 350 351 if (add_recid) { 352 recid_list[(*prec_idx)++] = MD_RECID(un); 353 } 354 } 355 356 /* 357 * MDRNM_LIST_URFOLKS: generic named svc entry point 358 * add all parents onto the list pointed to by dlpp 359 * (only weird multi-parented devices need to have their 360 * own named svc to do this.) 361 */ 362 static int 363 md_rename_listfolks(md_rendelta_t **dlpp, md_rentxn_t *rtxnp) 364 { 365 md_rendelta_t *new; 366 367 ASSERT(rtxnp); 368 ASSERT(dlpp); 369 ASSERT(*dlpp == NULL); 370 ASSERT((rtxnp->op == MDRNOP_EXCHANGE) || (rtxnp->op == MDRNOP_RENAME)); 371 ASSERT(rtxnp->from.uip); 372 ASSERT(rtxnp->from.unp); 373 374 if ((!rtxnp->from.uip) || (!rtxnp->from.unp)) { 375 (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, 376 rtxnp->from.mnum); 377 return (-1); 378 } 379 380 if (!MD_HAS_PARENT(MD_PARENT(rtxnp->from.unp))) { 381 return (0); 382 } 383 384 /* 385 * If supporting log renaming (and other multiparented devices) 386 * callout to each misc module to claim this waif and return the 387 * md_dev64_t of its parents. 388 */ 389 if (MD_PARENT(rtxnp->from.unp) == MD_MULTI_PARENT) { 390 (void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, 391 rtxnp->from.mnum); 392 return (2); 393 } 394 395 if ((rtxnp->op == MDRNOP_RENAME) || 396 (MD_PARENT(rtxnp->from.unp) != MD_SID(rtxnp->to.unp))) { 397 398 new = md_build_rendelta( 399 MDRR_PARENT, 400 MDRR_PARENT, 401 md_makedevice(md_major, MD_PARENT(rtxnp->from.unp)), 402 NULL, 403 MD_UNIT(MD_PARENT(rtxnp->from.unp)), 404 MDI_UNIT(MD_PARENT(rtxnp->from.unp)), 405 &rtxnp->mde); 406 } else { 407 /* parent is swapping roles with self */ 408 new = md_build_rendelta( 409 MDRR_PARENT, 410 MDRR_SELF, 411 md_makedevice(md_major, MD_SID(rtxnp->to.unp)), 412 NULL, 413 rtxnp->to.unp, 414 rtxnp->to.uip, 415 &rtxnp->mde); 416 } 417 418 if (!new) { 419 if (mdisok(&rtxnp->mde)) { 420 (void) mdsyserror(&rtxnp->mde, ENOMEM); 421 } 422 return (-1); 423 } 424 425 *dlpp = new; 426 427 return (1); 428 } 429 430 /* 431 * MDRNM_LIST_URSELF: named svc entry point 432 * add all delta entries appropriate for ourselves onto the deltalist pointed 433 * to by dlpp 434 */ 435 static int 436 md_rename_listself(md_rendelta_t **dlpp, md_rentxn_t *rtxnp) 437 { 438 md_rendelta_t *new, *p; 439 bool_t exchange_up = FALSE; 440 441 ASSERT(rtxnp); 442 ASSERT(dlpp); 443 ASSERT((rtxnp->op == MDRNOP_EXCHANGE) || (rtxnp->op == MDRNOP_RENAME)); 444 ASSERT(rtxnp->from.unp); 445 ASSERT(rtxnp->from.uip); 446 447 if ((!rtxnp->from.uip) || (!rtxnp->from.unp)) { 448 (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, 449 rtxnp->from.mnum); 450 return (-1); 451 } 452 453 for (p = *dlpp; p && p->next != NULL; p = p->next) { 454 /* NULL */ 455 } 456 457 /* 458 * renaming or 459 * from's parent is not to and to's parent is not from 460 */ 461 if (rtxnp->op == MDRNOP_RENAME) { 462 new = md_build_rendelta( 463 MDRR_SELF, 464 MDRR_SELF, 465 md_makedevice(md_major, rtxnp->from.mnum), 466 p, 467 rtxnp->from.unp, 468 rtxnp->from.uip, 469 &rtxnp->mde); 470 } else { 471 472 if (MD_PARENT(rtxnp->from.unp) == MD_SID(rtxnp->to.unp)) { 473 exchange_up = TRUE; 474 } 475 476 /* self and parent are flipping */ 477 new = md_build_rendelta( 478 MDRR_SELF, 479 exchange_up? MDRR_PARENT: MDRR_CHILD, 480 md_makedevice(md_major, rtxnp->from.mnum), 481 p, 482 rtxnp->from.unp, 483 rtxnp->from.uip, 484 &rtxnp->mde); 485 } 486 487 if (!new) { 488 if (mdisok(&rtxnp->mde)) { 489 (void) mdsyserror(&rtxnp->mde, ENOMEM); 490 } 491 return (-1); 492 } 493 494 if (!*dlpp) { 495 *dlpp = new; 496 } 497 498 return (1); 499 } 500 501 /* 502 * free the tree of all deltas to devices involved in the rename transaction 503 */ 504 static void 505 free_dtree(md_rendelta_t *family) 506 { 507 md_rendelta_t *next = NULL; 508 int i = 0; 509 md_rendelta_t *r; 510 511 for (r = family; (NULL != r); r = next, i++) { 512 513 next = r->next; 514 515 /* shift << because it makes the resultant pattern readable */ 516 r->beginning = DELTA_BEG_FREED ^ (i << guard_shift); 517 r->end = DELTA_END_FREED ^ (i << guard_shift); 518 519 kmem_free(r, sizeof (md_rendelta_t)); 520 } 521 } 522 523 /* 524 * walk down family tree, calling lock service function 525 */ 526 static int 527 lock_dtree(md_rendelta_t *family, md_rentxn_t *rtxnp) 528 { 529 md_rendelta_t *r; 530 int rc; 531 532 ASSERT(family); 533 ASSERT(rtxnp); 534 535 if (!family || !rtxnp) { 536 return (EINVAL); 537 } 538 539 for (rc = 0, r = family; r; r = r->next) { 540 541 ASSERT(r->unp); 542 ASSERT(!UNIT_WRITER_HELD(r->unp)); 543 ASSERT(r->lock); 544 545 if ((rc = (int)(*r->lock) (r, rtxnp)) != 0) { 546 return (rc); 547 } 548 r->txn_stat.locked = TRUE; 549 } 550 551 return (0); 552 } 553 554 /* 555 * We rely on check() (MDRNM_CHECK) to make exhaustive checks, 556 * since we don't attempt to undo role_swap() failures. 557 * 558 * To implement an undo() function would require each role_swap() 559 * to store a log of previous state of the structures it changes, 560 * presumably anchored by the rendelta. 561 * 562 */ 563 static int 564 check_dtree(md_rendelta_t *family, md_rentxn_t *rtxnp) 565 { 566 md_rendelta_t *r; 567 int rc; 568 569 ASSERT(family); 570 ASSERT(rtxnp); 571 572 if (!family || !rtxnp) { 573 /* no error packet to set? */ 574 return (EINVAL); 575 } 576 577 for (r = family, rc = 0; r; r = r->next) { 578 579 ASSERT(UNIT_WRITER_HELD(r->unp)); 580 ASSERT(r->txn_stat.locked); 581 582 /* 583 * <to> doesn't exist for rename 584 */ 585 if (!(rtxnp->op == MDRNOP_RENAME && 586 md_getminor(r->dev) == rtxnp->to.mnum)) { 587 ASSERT(r->uip); 588 r->txn_stat.is_open = md_unit_isopen(r->uip); 589 } 590 591 /* 592 * if only allowing offline rename/exchanges, check 593 * for top being trans because it opens its sub-devices 594 */ 595 596 switch (rtxnp->revision) { 597 case MD_RENAME_VERSION_OFFLINE: 598 if ((r->txn_stat.is_open) && 599 (!rtxnp->stat.trans_in_stack)) { 600 (void) mdmderror(&rtxnp->mde, MDE_RENAME_BUSY, 601 md_getminor(r->dev)); 602 return (EBUSY); 603 } 604 break; 605 606 case MD_RENAME_VERSION_ONLINE: 607 break; 608 609 default: 610 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR, 611 md_getminor(r->dev)); 612 return (EINVAL); 613 } 614 615 /* MD_UN_MOD_INPROGRESS includes the MD_UN_RENAMING bit */ 616 617 if (MD_STATUS(r->unp) & MD_UN_MOD_INPROGRESS) { 618 (void) mdmderror(&rtxnp->mde, MDE_RENAME_BUSY, 619 md_getminor(r->dev)); 620 return (EBUSY); 621 } 622 623 MD_STATUS(r->unp) |= MD_UN_RENAMING; 624 625 if ((rc = (int)(*r->check)(r, rtxnp)) != 0) { 626 return (rc); 627 } 628 629 /* and be sure we can proceed */ 630 if (!(r->role_swap)) { 631 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR, 632 md_getminor(r->dev)); 633 return (EINVAL); 634 } 635 r->txn_stat.checked = TRUE; 636 } 637 638 return (0); 639 } 640 641 642 /* 643 * rename role_swap() functions are responsible for updating their 644 * own parent, self and children references in both on-disk 645 * and in-core structures, as well as storing the changed 646 * record ids into recids and incrementing rec_idx. 647 */ 648 649 static void 650 role_swap_dtree(md_rendelta_t *family, md_rentxn_t *rtxnp) 651 { 652 md_rendelta_t *r; 653 654 ASSERT(family); 655 ASSERT(rtxnp); 656 657 for (r = family; r; r = r->next) { 658 ASSERT(r->role_swap); 659 ASSERT(r->txn_stat.locked); 660 ASSERT(r->txn_stat.checked); 661 662 (*r->role_swap)(r, rtxnp); 663 664 r->txn_stat.role_swapped = TRUE; 665 } 666 667 /* 668 * there's some work to do, but not more than expected 669 */ 670 ASSERT(rtxnp->rec_idx > 0); 671 ASSERT(rtxnp->rec_idx < rtxnp->n_recids); 672 673 if (rtxnp->rec_idx >= rtxnp->n_recids || rtxnp->rec_idx <= 0) { 674 /* 675 * There's no way to indicate error from here, 676 * and even if we could, there's no undo mechanism. 677 * We've already modified the in-core structs, so 678 * We can't continue w/o committing, but we 679 * don't appear to have anything to commit. 680 */ 681 cmn_err(CE_PANIC, 682 "md_rename: role_swap_dtree(family:%p, rtxnp:%p)", 683 (void *) family, (void *) rtxnp); 684 return; 685 } 686 rtxnp->recids[rtxnp->rec_idx] = 0; 687 688 mddb_commitrecs_wrapper(rtxnp->recids); 689 } 690 691 /* 692 * walk down delta tree, calling the unlock service for each device, 693 * provided any of the devices appear to have been locked 694 */ 695 static void 696 unlock_dtree(md_rendelta_t *family, md_rentxn_t *rtxnp) 697 { 698 md_rendelta_t *r; 699 uint_t any_locked = FALSE; 700 701 ASSERT(family); 702 ASSERT(rtxnp); 703 704 for (r = family; r; r = r->next) { 705 706 ASSERT(!(r->txn_stat.unlocked)); /* "has been unlocked" */ 707 any_locked |= r->txn_stat.locked; 708 } 709 710 if (any_locked) { 711 712 /* unwind in reverse order */ 713 for (r = family; NULL != r->next; r = r->next) { 714 /* NULL */ 715 } 716 717 for (; NULL != r; r = r->prev) { 718 MD_STATUS(r->unp) &= ~MD_UN_RENAMING; 719 ASSERT(r->unlock); 720 r->unlock(r, rtxnp); 721 r->txn_stat.unlocked = TRUE; 722 } 723 } 724 } 725 726 /* 727 * MDRNM_UPDATE_SELF 728 * This role swap function is identical for all unit types, 729 * so keep it here. It's also the best example because it 730 * touches all the modified portions of the relevant 731 * in-common structures. 732 */ 733 static void 734 md_rename_update_self( 735 md_rendelta_t *delta, 736 md_rentxn_t *rtxnp) 737 { 738 minor_t from_min, to_min; 739 sv_dev_t sv; 740 mddb_de_ic_t *dep; 741 mddb_rb32_t *rbp; 742 743 ASSERT(rtxnp); 744 ASSERT(rtxnp->op == MDRNOP_RENAME); 745 ASSERT(delta); 746 ASSERT(delta->unp); 747 ASSERT(delta->uip); 748 ASSERT(rtxnp->rec_idx >= 0); 749 ASSERT(rtxnp->recids); 750 ASSERT(delta->old_role == MDRR_SELF); 751 ASSERT(delta->new_role == MDRR_SELF); 752 ASSERT(md_getminor(delta->dev) == rtxnp->from.mnum); 753 754 from_min = rtxnp->from.mnum; 755 to_min = rtxnp->to.mnum; 756 757 /* 758 * self id changes in our own unit struct 759 */ 760 MD_SID(delta->unp) = to_min; 761 762 /* 763 * make sure that dest always has correct un_revision 764 * and rb_revision 765 */ 766 delta->unp->c.un_revision |= MD_FN_META_DEV; 767 dep = mddb_getrecdep(MD_RECID(delta->unp)); 768 ASSERT(dep); 769 rbp = dep->de_rb; 770 if (rbp->rb_revision & MDDB_REV_RB) { 771 rbp->rb_revision = MDDB_REV_RBFN; 772 } else if (rbp->rb_revision & MDDB_REV_RB64) { 773 rbp->rb_revision = MDDB_REV_RB64FN; 774 } 775 776 /* 777 * clear old array pointers to unit in-core and unit 778 */ 779 780 MDI_VOIDUNIT(from_min) = NULL; 781 MD_VOIDUNIT(from_min) = NULL; 782 783 /* 784 * and point the new slots at the unit in-core and unit structs 785 */ 786 787 MDI_VOIDUNIT(to_min) = delta->uip; 788 MD_VOIDUNIT(to_min) = delta->unp; 789 790 /* 791 * recreate kstats 792 * - destroy the ones associated with our former identity 793 * - reallocate and associate them with our new identity 794 */ 795 md_kstat_destroy_ui(delta->uip); 796 md_kstat_init_ui(to_min, delta->uip); 797 798 /* 799 * the unit in-core reference to the get next link's id changes 800 */ 801 802 delta->uip->ui_link.ln_id = to_min; 803 804 /* 805 * name space addition of new key was done from user-level 806 * remove the old name's key here 807 */ 808 809 sv.setno = MD_MIN2SET(from_min); 810 sv.key = rtxnp->from.key; 811 812 md_rem_names(&sv, 1); 813 814 /* 815 * Remove associated device node as well 816 */ 817 md_remove_minor_node(from_min); 818 819 /* 820 * and store the record id (from the unit struct) into recids 821 * for later commitment by md_rename() 822 */ 823 md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp); 824 } 825 826 /* 827 * Either one of our siblings and/or our parent changed identities. 828 */ 829 static void 830 md_renexch_update_parent( 831 md_rendelta_t *delta, 832 md_rentxn_t *rtxnp) 833 { 834 ASSERT(rtxnp); 835 ASSERT((MDRNOP_RENAME == rtxnp->op) || (rtxnp->op == MDRNOP_EXCHANGE)); 836 ASSERT(rtxnp->rec_idx >= 0); 837 ASSERT(rtxnp->recids); 838 ASSERT(delta); 839 ASSERT(delta->unp); 840 ASSERT(delta->old_role == MDRR_CHILD); 841 ASSERT(delta->new_role == MDRR_CHILD); 842 ASSERT((MD_PARENT(delta->unp) == rtxnp->from.mnum) || 843 (MD_PARENT(delta->unp) == rtxnp->to.mnum)); 844 845 if (MD_PARENT(delta->unp) == rtxnp->from.mnum) { 846 MD_PARENT(delta->unp) = rtxnp->to.mnum; 847 } 848 849 md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp); 850 } 851 852 /* 853 * exchange up (child->self) 854 */ 855 static void 856 md_exchange_child_update_to( 857 md_rendelta_t *delta, 858 md_rentxn_t *rtxnp) 859 { 860 minor_t from_min, to_min; 861 862 ASSERT(rtxnp); 863 ASSERT(rtxnp->op == MDRNOP_EXCHANGE); 864 ASSERT(rtxnp->rec_idx >= 0); 865 ASSERT(rtxnp->recids); 866 ASSERT(delta); 867 ASSERT(delta->unp); 868 ASSERT(delta->uip); 869 ASSERT(delta->old_role == MDRR_CHILD); 870 ASSERT(delta->new_role == MDRR_SELF); 871 ASSERT(md_getminor(delta->dev) == rtxnp->to.mnum); 872 873 from_min = rtxnp->from.mnum; 874 to_min = rtxnp->to.mnum; 875 876 /* 877 * self id changes in our own unit struct 878 * Note: 879 * - Since we're assuming the identity of "from" we use its mnum even 880 * though we're updating the "to" structures. 881 */ 882 883 MD_SID(delta->unp) = from_min; 884 885 /* 886 * our parent identifier becomes the new self, who was "to" 887 */ 888 889 MD_PARENT(delta->unp) = to_min; 890 891 /* 892 * point the set array pointers at the "new" unit and unit in-cores 893 * Note: 894 * - The other half of this transfer is done in the "update from" 895 * rename/exchange named service. 896 */ 897 898 MD_VOIDUNIT(from_min) = delta->unp; 899 MDI_VOIDUNIT(from_min) = delta->uip; 900 901 /* 902 * transfer kstats 903 */ 904 905 delta->uip->ui_kstat = rtxnp->from.kstatp; 906 907 /* 908 * the unit in-core reference to the get next link's id changes 909 */ 910 911 delta->uip->ui_link.ln_id = from_min; 912 913 /* 914 * name space additions, if necessary, were done from user-level. 915 * name space deletions, if necessary, were done in "exchange_from" 916 */ 917 918 /* 919 * and store the record id (from the unit struct) into recids 920 * for later comitment by md_rename() 921 */ 922 923 md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp); 924 } 925 926 /* 927 * exchange up (self->parent) 928 */ 929 static void 930 md_exchange_self_update_from_up( 931 md_rendelta_t *delta, 932 md_rentxn_t *rtxnp) 933 { 934 minor_t from_min, to_min; 935 936 ASSERT(rtxnp); 937 ASSERT(rtxnp->op == MDRNOP_EXCHANGE); 938 ASSERT(rtxnp->rec_idx >= 0); 939 ASSERT(rtxnp->recids); 940 ASSERT(delta); 941 ASSERT(delta->unp); 942 ASSERT(delta->uip); 943 ASSERT(delta->old_role == MDRR_SELF); 944 ASSERT(delta->new_role == MDRR_PARENT); 945 ASSERT(md_getminor(delta->dev) == rtxnp->from.mnum); 946 947 from_min = rtxnp->from.mnum; 948 to_min = rtxnp->to.mnum; 949 950 /* 951 * self id changes in our own unit struct 952 * Note: 953 * - Since we're assuming the identity of "to" we use its mnum 954 * while we're updating the "to" structures. 955 */ 956 957 MD_SID(delta->unp) = to_min; 958 959 /* 960 * our parent identifier becomes the new parent, who was "from" 961 */ 962 963 MD_PARENT(delta->unp) = from_min; 964 965 /* 966 * point the set array pointers at the "new" unit and unit in-cores 967 * Note: 968 * - The other half of this transfer is done in the "update from" 969 * rename/exchange named service. 970 */ 971 972 MD_VOIDUNIT(to_min) = delta->unp; 973 MDI_VOIDUNIT(to_min) = delta->uip; 974 975 /* 976 * transfer kstats 977 */ 978 979 delta->uip->ui_kstat = rtxnp->to.kstatp; 980 981 /* 982 * the unit in-core reference to the get next link's id changes 983 */ 984 985 delta->uip->ui_link.ln_id = to_min; 986 987 /* 988 * name space additions, if necessary, were done from user-level. 989 * name space deletions, if necessary, were done in "exchange_from" 990 */ 991 992 /* 993 * and store the record id (from the unit struct) into recids 994 * for later comitment by md_rename() 995 */ 996 997 md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp); 998 } 999 1000 /* 1001 * The order of the called role swap functions is critical. 1002 * If they're not ordered as "all parents", then "all self" 1003 * then "all child" transitions, we will almost certainly 1004 * corrupt the data base and the in-core linkages. So, 1005 * verify that the list built by the individual drivers is 1006 * ok here. 1007 * 1008 * We could have done fancy bit encodings of the roles so 1009 * it all fit into a single word and we wouldn't need the 1010 * prev_ord field. But, since cpu power is cheaper than 1011 * than people power, they're all separate for easier 1012 * debugging and maintaining. (In the unlikely event that 1013 * rename/exchange ever becomes cpu-limited, and this 1014 * algorithm is the bottleneck, we should revisit this.) 1015 */ 1016 1017 static bool_t 1018 role_swap_is_valid( 1019 int previous, 1020 int current, 1021 md_rendelta_t *delta, 1022 md_rentxn_t *rtxnp) 1023 { 1024 bool_t valid = FALSE; 1025 1026 /* 1027 * we've backed up in processing the role table 1028 */ 1029 if ((previous > current) && 1030 (delta->prev && (delta->old_role != delta->prev->old_role))) { 1031 goto out; 1032 } 1033 1034 /* 1035 * we're repeating the same role transition 1036 */ 1037 if (previous == current) { 1038 switch (delta->old_role) { 1039 case MDRR_PARENT: 1040 /* 1041 * require at least one of the devices to 1042 * be multiparented for us to allow another 1043 * parent transition 1044 */ 1045 if ((MD_MULTI_PARENT != MD_PARENT(rtxnp->from.unp)) && 1046 (MD_MULTI_PARENT != MD_PARENT(rtxnp->to.unp))) { 1047 goto out; 1048 } 1049 break; 1050 1051 case MDRR_CHILD: 1052 /* it's ok to have multiple children */ 1053 break; 1054 1055 case MDRR_SELF: 1056 /* it's never ok to have multiple self transitions */ 1057 /* FALLTHROUGH */ 1058 default: 1059 goto out; 1060 } 1061 } 1062 1063 valid = TRUE; 1064 out: 1065 if (!valid) { 1066 if (md_rename_debug != 0) { 1067 cmn_err(CE_NOTE, "previous: %d, current: %d, role: %s", 1068 previous, current, 1069 ROLE(delta->old_role)); 1070 delay(drv_sectohz(3)); 1071 ASSERT(FALSE); 1072 } 1073 } 1074 1075 return (valid); 1076 } 1077 1078 static role_change_tab_t * 1079 lookup_role(md_renrole_t old_role, md_renrole_t new_role) 1080 { 1081 role_change_tab_t *rp; 1082 role_change_tab_t *found = NULL; 1083 1084 for (rp = role_swap_tab; !found && (rp->old_role != MDRR_UNK); rp++) { 1085 1086 if (rp->old_role == old_role && rp->new_role == new_role) { 1087 found = rp; 1088 } 1089 } 1090 /* 1091 * we require a named svc if we've got two devices 1092 * claiming to be changing roles in this manner 1093 */ 1094 ASSERT(found); 1095 ASSERT(found->default_svc != ILLEGAL_ROLESWAP_SVC); 1096 ASSERT(found->svc_name != ILLEGAL_SVC_NAME); 1097 1098 if (!found || 1099 (found->default_svc == ILLEGAL_ROLESWAP_SVC) || 1100 (found->svc_name == ILLEGAL_SVC_NAME)) { 1101 return (NULL); 1102 } 1103 1104 return (found); 1105 } 1106 1107 /* 1108 * fill in the role swap named svc., now that we know each device 1109 * and its changing role 1110 */ 1111 static int 1112 valid_roleswap_dtree( 1113 md_rendelta_t *family, 1114 md_rentxn_t *rtxnp 1115 ) 1116 { 1117 md_rendelta_t *r; 1118 role_change_tab_t *rolep; 1119 minor_t from_min, to_min; 1120 int prev_ord = -1; 1121 bool_t found_self = FALSE; 1122 int err = 0; 1123 1124 ASSERT(family); 1125 ASSERT(rtxnp); 1126 1127 from_min = rtxnp->from.mnum; 1128 to_min = rtxnp->to.mnum; 1129 1130 for (r = family; r; r = r->next, prev_ord = rolep->ord) { 1131 1132 if (!(rolep = lookup_role(r->old_role, r->new_role))) { 1133 (void) mdmderror(&rtxnp->mde, 1134 MDE_RENAME_CONFIG_ERROR, from_min); 1135 err = EOPNOTSUPP; 1136 goto out; 1137 } 1138 r->role_swap = (md_ren_roleswap_svc_t *)md_get_named_service( 1139 r->dev, /* modindex */ 0, 1140 (char *)rolep->svc_name, 1141 (intptr_t (*)()) rolep->default_svc); 1142 1143 /* 1144 * someone probably called the ioctl directly and 1145 * incorrectly, rather than via the libmeta wrappers 1146 */ 1147 if (!(r->role_swap)) { 1148 (void) mdmderror(&rtxnp->mde, 1149 MDE_RENAME_TARGET_UNRELATED, to_min); 1150 err = EOPNOTSUPP; 1151 goto out; 1152 } 1153 1154 if (!role_swap_is_valid(prev_ord, rolep->ord, r, rtxnp)) { 1155 (void) mdmderror(&rtxnp->mde, 1156 MDE_RENAME_CONFIG_ERROR, from_min); 1157 err = EINVAL; 1158 goto out; 1159 } 1160 1161 if (rolep->old_role == MDRR_SELF) { 1162 found_self = TRUE; 1163 } 1164 1165 if (MD_PARENT(r->unp) == MD_MULTI_PARENT) { 1166 (void) mdmderror(&rtxnp->mde, MDE_RENAME_TARGET_BAD, 1167 md_getminor(r->dev)); 1168 err = EINVAL; 1169 goto out; 1170 } 1171 } 1172 1173 /* 1174 * must be at least one selfish device 1175 */ 1176 ASSERT(found_self); 1177 if (!found_self) { 1178 (void) mdmderror(&rtxnp->mde, 1179 MDE_RENAME_CONFIG_ERROR, from_min); 1180 err = EINVAL; 1181 goto out; 1182 } 1183 1184 out: 1185 return (err); 1186 } 1187 1188 /* 1189 * dump contents of rename transaction 1190 */ 1191 static void 1192 dump_txn(md_rentxn_t *rtxnp) { 1193 1194 if (md_rename_debug == 0) { 1195 return; 1196 } 1197 1198 cmn_err(CE_NOTE, "rtxnp: %p", (void *) rtxnp); 1199 if (rtxnp) { 1200 cmn_err(CE_NOTE, "beginning: %llx, op: %s", 1201 rtxnp->beginning, OP_STR(rtxnp->op)); 1202 1203 cmn_err(CE_NOTE, 1204 "revision: %d, uflags: %d, rec_idx: %d, n_recids: %d, rec_ids: %p%s", 1205 rtxnp->revision, rtxnp->uflags, 1206 rtxnp->rec_idx, rtxnp->n_recids, (void *) rtxnp->recids, 1207 rtxnp->stat.trans_in_stack? " (trans in stack)": ""); 1208 cmn_err(CE_NOTE, " from: beginning: %llx", 1209 rtxnp->from.beginning); 1210 cmn_err(CE_NOTE, " minor: %lX, key: %lX", 1211 (ulong_t)rtxnp->from.mnum, (ulong_t)rtxnp->from.key); 1212 cmn_err(CE_NOTE, " unp: %lX, uip: %lX", 1213 (ulong_t)rtxnp->from.unp, (ulong_t)rtxnp->from.uip); 1214 cmn_err(CE_NOTE, " end: %llx", rtxnp->from.end); 1215 cmn_err(CE_NOTE, " to: beginning: %llx", rtxnp->to.beginning); 1216 cmn_err(CE_NOTE, " minor: %lX, key: %lX", 1217 (ulong_t)rtxnp->to.mnum, (ulong_t)rtxnp->to.key); 1218 cmn_err(CE_NOTE, " unp: %lX, uip: %lX", 1219 (ulong_t)rtxnp->to.unp, (ulong_t)rtxnp->to.uip); 1220 cmn_err(CE_NOTE, " end: %llx", rtxnp->to.end); 1221 cmn_err(CE_NOTE, "end: %llx\n", rtxnp->end); 1222 } 1223 delay(drv_sectohz(1)); 1224 } 1225 1226 /* 1227 * dump contents of all deltas 1228 */ 1229 static void 1230 dump_dtree(md_rendelta_t *family) 1231 { 1232 md_rendelta_t *r; 1233 int i; 1234 1235 if (md_rename_debug == 0) { 1236 return; 1237 } 1238 1239 for (r = family, i = 0; r; r = r->next, i++) { 1240 cmn_err(CE_NOTE, "%d. beginning: %llx", i, r->beginning); 1241 cmn_err(CE_NOTE, " r: %lX, dev: %lX, next: %lx, prev: %lx", 1242 (ulong_t)r, (ulong_t)r->dev, 1243 (ulong_t)r->next, (ulong_t)r->prev); 1244 1245 cmn_err(CE_NOTE, " role: %s -> %s, unp: %lx, uip: %lx", 1246 ROLE(r->old_role), ROLE(r->new_role), 1247 (ulong_t)r->unp, (ulong_t)r->uip); 1248 cmn_err(CE_NOTE, 1249 " lock: %lx, unlock: %lx\n\t check: %lx, role_swap: %lx", 1250 (ulong_t)r->lock, (ulong_t)r->unlock, 1251 (ulong_t)r->check, (ulong_t)r->role_swap); 1252 if (*((uint_t *)(&r->txn_stat)) != 0) { 1253 cmn_err(CE_NOTE, "status: (0x%x) %s%s%s%s%s", 1254 *((uint_t *)(&r->txn_stat)), 1255 r->txn_stat.is_open? "is_open " : "", 1256 r->txn_stat.locked? "locked " : "", 1257 r->txn_stat.checked? "checked " : "", 1258 r->txn_stat.role_swapped? "role_swapped " : "", 1259 r->txn_stat.unlocked? "unlocked" : ""); 1260 } 1261 cmn_err(CE_NOTE, "end: %llx\n", r->end); 1262 } 1263 delay(drv_sectohz(1)); 1264 } 1265 1266 /* 1267 * validate the rename request parameters 1268 */ 1269 static int 1270 validate_txn_parms(md_rentxn_t *rtxnp) 1271 { 1272 minor_t to_min, from_min; 1273 1274 ASSERT(rtxnp); 1275 1276 from_min = rtxnp->from.mnum; 1277 to_min = rtxnp->to.mnum; 1278 1279 switch (rtxnp->revision) { 1280 case MD_RENAME_VERSION_OFFLINE: 1281 if (rtxnp->uflags != 0) { 1282 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR, 1283 from_min); 1284 return (ENOTSUP); 1285 } 1286 break; 1287 1288 case MD_RENAME_VERSION_ONLINE: 1289 /* not supported until 5.0 */ 1290 /* FALLTHROUGH */ 1291 1292 default: 1293 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR, 1294 from_min); 1295 return (EPROTONOSUPPORT); 1296 } 1297 1298 if ((rtxnp->from.uip = MDI_UNIT(from_min)) == NULL) { 1299 (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, from_min); 1300 return (ENODEV); 1301 } 1302 1303 if (!md_dev_exists(md_makedevice(md_major, from_min))) { 1304 (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, from_min); 1305 return (ENODEV); 1306 } 1307 1308 if ((rtxnp->from.key == MD_KEYBAD) || (rtxnp->from.key == MD_KEYWILD)) { 1309 (void) mdmderror(&rtxnp->mde, MDE_INVAL_UNIT, from_min); 1310 return (EINVAL); 1311 } 1312 1313 rtxnp->from.kstatp = rtxnp->from.uip->ui_kstat; 1314 rtxnp->from.unp = MD_UNIT(from_min); 1315 1316 if (MD_MIN2SET(to_min) != MD_MIN2SET(from_min)) { 1317 (void) mdmderror(&rtxnp->mde, MDE_INVAL_UNIT, to_min); 1318 return (EINVAL); 1319 } 1320 1321 switch (rtxnp->op) { 1322 case MDRNOP_EXCHANGE: 1323 rtxnp->to.unp = MD_UNIT(to_min); 1324 rtxnp->to.uip = MDI_UNIT(to_min); 1325 1326 /* 1327 * exchange requires target to exist 1328 */ 1329 1330 if ((rtxnp->to.uip == NULL) || 1331 (md_dev_exists(md_makedevice(md_major, to_min)) == NULL)) { 1332 (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, 1333 to_min); 1334 return (ENODEV); 1335 } 1336 1337 if ((rtxnp->to.key == MD_KEYBAD) || 1338 (rtxnp->to.key == MD_KEYWILD)) { 1339 (void) mdmderror(&rtxnp->mde, MDE_INVAL_UNIT, to_min); 1340 return (EINVAL); 1341 } 1342 1343 /* 1344 * <from> is not in the role of <self>, 1345 * that is, 1346 * <from> has a parent, which is <to> and <to> has a parent too 1347 * or 1348 * <to> has a parent, which is <from> and <to> can have a child 1349 */ 1350 if ((MD_HAS_PARENT(MD_PARENT(rtxnp->from.unp))) && 1351 (MD_PARENT(rtxnp->from.unp) == to_min) && 1352 MD_HAS_PARENT(MD_PARENT(rtxnp->to.unp))) { 1353 (void) mdmderror(&rtxnp->mde, MDE_RENAME_ORDER, 1354 from_min); 1355 return (EINVAL); 1356 } 1357 1358 if ((MD_HAS_PARENT(MD_PARENT(rtxnp->to.unp))) && 1359 (MD_PARENT(rtxnp->to.unp) == from_min) && 1360 (MD_CAPAB(rtxnp->to.unp) & MD_CAN_META_CHILD)) { 1361 (void) mdmderror(&rtxnp->mde, MDE_RENAME_ORDER, 1362 from_min); 1363 return (EINVAL); 1364 } 1365 1366 rtxnp->to.kstatp = rtxnp->to.uip->ui_kstat; 1367 break; 1368 1369 case MDRNOP_RENAME: 1370 1371 /* 1372 * rename requires <to> not to exist 1373 */ 1374 1375 if (MDI_UNIT(to_min) || 1376 md_dev_exists(md_makedevice(md_major, to_min))) { 1377 1378 (void) mdmderror(&rtxnp->mde, MDE_UNIT_ALREADY_SETUP, 1379 to_min); 1380 return (EEXIST); 1381 } 1382 1383 /* 1384 * and to be within valid ranges for the current 1385 * limits on number of sets and metadevices 1386 */ 1387 if ((MD_MIN2SET(to_min) >= md_nsets) || 1388 (MD_MIN2UNIT(to_min) >= md_nunits)) { 1389 (void) mdmderror(&rtxnp->mde, MDE_INVAL_UNIT, to_min); 1390 return (EINVAL); 1391 } 1392 1393 rtxnp->to.unp = NULL; 1394 rtxnp->to.uip = NULL; 1395 rtxnp->to.kstatp = NULL; 1396 break; 1397 1398 default: 1399 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR, 1400 from_min); 1401 return (EINVAL); 1402 } 1403 1404 /* 1405 * install guard rails 1406 */ 1407 rtxnp->beginning = TXN_BEG; 1408 1409 rtxnp->from.beginning = TXNUN_BEG; 1410 rtxnp->from.end = TXNUN_END; 1411 1412 rtxnp->to.beginning = TXNUN_BEG; 1413 rtxnp->to.end = TXNUN_END; 1414 1415 rtxnp->end = TXN_END; 1416 1417 return (0); 1418 } 1419 1420 /* 1421 * If the device being changed exhibits this capability, set the list 1422 * relatives function pointer to the named service that lists the 1423 * appropriate relatives for this capability. 1424 */ 1425 static int 1426 set_list_rels_funcp( 1427 md_rentxn_t *rtxnp, 1428 md_stackcap_t capability, 1429 char *svc_name, 1430 md_ren_list_svc_t default_svc_func, 1431 md_ren_list_svc_t **list_relatives_funcp 1432 ) 1433 { 1434 int err; 1435 minor_t from_min; 1436 md_dev64_t from_dev; 1437 md_unit_t *from_un; 1438 mdi_unit_t *from_ui; 1439 1440 ASSERT(rtxnp); 1441 ASSERT((rtxnp->op == MDRNOP_RENAME) || (rtxnp->op == MDRNOP_EXCHANGE)); 1442 ASSERT(list_relatives_funcp); 1443 1444 from_min = rtxnp->from.mnum; 1445 from_dev = md_makedevice(md_major, from_min); 1446 from_un = MD_UNIT(from_min); 1447 from_ui = MDI_UNIT(from_min); 1448 err = 0; 1449 1450 if (!from_ui || !from_un) { 1451 (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, from_min); 1452 err = EINVAL; 1453 goto out; 1454 } 1455 1456 if ((capability == MD_CAN_DO_ANYTHING) || 1457 ((MD_CAPAB(from_un) & capability) == capability)) { 1458 1459 *list_relatives_funcp = (md_ren_list_svc_t *) 1460 md_get_named_service(from_dev, 1461 /* modindex */ 0, svc_name, 1462 (intptr_t (*)()) default_svc_func); 1463 1464 ASSERT(*list_relatives_funcp); 1465 if (!(*list_relatives_funcp)) { 1466 (void) mdmderror(&rtxnp->mde, 1467 MDE_RENAME_CONFIG_ERROR, from_min); 1468 err = EINVAL; 1469 goto out; 1470 } 1471 } else { 1472 *list_relatives_funcp = (md_ren_list_svc_t *)NULL; 1473 } 1474 1475 out: 1476 return (err); 1477 } 1478 1479 /* 1480 * call list relations function, bump recid counter 1481 * by number of members added to the delta list. 1482 * Validate that the number of members added is within bounds. 1483 */ 1484 static int 1485 list_relations( 1486 md_rendelta_t **family, 1487 md_rentxn_t *rtxnp, 1488 md_ren_list_svc_t *add_relatives_funcp, 1489 int valid_min, 1490 int valid_max 1491 ) 1492 { 1493 int n_added; 1494 int err = 0; 1495 1496 ASSERT(family); 1497 ASSERT(rtxnp); 1498 1499 if (!family || !rtxnp) { 1500 err = EINVAL; 1501 goto out; 1502 } 1503 1504 n_added = 0; 1505 1506 /* no relations of this type */ 1507 if (!add_relatives_funcp) { 1508 goto out; 1509 } 1510 1511 n_added = (*add_relatives_funcp) (family, rtxnp); 1512 1513 if ((n_added < valid_min) || (n_added > valid_max)) { 1514 if (mdisok(&rtxnp->mde)) { 1515 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR, 1516 rtxnp->from.mnum); 1517 } 1518 err = EINVAL; 1519 goto out; 1520 } 1521 1522 rtxnp->n_recids += n_added; 1523 1524 out: 1525 return (err); 1526 } 1527 1528 /* 1529 * build recid array 1530 */ 1531 static int 1532 alloc_recids(md_rendelta_t *family, md_rentxn_t *rtxnp) 1533 { 1534 int err = 0; 1535 1536 if (!family || !rtxnp) { 1537 err = ENOMEM; 1538 goto out; 1539 } 1540 1541 rtxnp->rec_idx = 0; 1542 1543 if (rtxnp->n_recids == 0) { 1544 err = EINVAL; 1545 goto out; 1546 } 1547 1548 rtxnp->n_recids += 1; /* terminator */ 1549 1550 rtxnp->recids = kmem_alloc(sizeof (mddb_recid_t) * rtxnp->n_recids, 1551 KM_SLEEP); 1552 if (!(rtxnp->recids)) { 1553 err = ENOMEM; 1554 goto out; 1555 } 1556 1557 bzero((void *) rtxnp->recids, 1558 (sizeof (mddb_recid_t) * rtxnp->n_recids)); 1559 out: 1560 if (err != 0) { 1561 (void) mdsyserror(&rtxnp->mde, err); 1562 } 1563 1564 return (err); 1565 } 1566 1567 /* 1568 * build family tree (parent(s), self, children) 1569 * The order of the resultant list is important, as it governs 1570 * the order of locking, checking and changing the unit structures. 1571 * Since we'll be changing them, we may not use the MD_UNIT, MDI_UNIT, 1572 * and other pointer which depend on the array being correct. 1573 * Use only the cached pointers (in rtxnp.) 1574 */ 1575 static md_rendelta_t * 1576 build_dtree(md_rentxn_t *rtxnp) 1577 { 1578 md_ren_list_svc_t *add_folks, *add_self, *add_kids; 1579 int err; 1580 md_rendelta_t *family = NULL; 1581 1582 ASSERT(rtxnp); 1583 ASSERT((rtxnp->op == MDRNOP_RENAME) || (rtxnp->op == MDRNOP_EXCHANGE)); 1584 1585 err = set_list_rels_funcp(rtxnp, MD_CAN_PARENT, MDRNM_LIST_URFOLKS, 1586 md_rename_listfolks, &add_folks); 1587 1588 if (err) { 1589 goto out; 1590 } 1591 1592 err = set_list_rels_funcp(rtxnp, MD_CAN_DO_ANYTHING, MDRNM_LIST_URSELF, 1593 md_rename_listself, &add_self); 1594 if (err) { 1595 goto out; 1596 } 1597 1598 err = set_list_rels_funcp(rtxnp, MD_CAN_META_CHILD, MDRNM_LIST_URKIDS, 1599 /* no default list func */ ((int (*)()) NULL), 1600 &add_kids); 1601 if (err) { 1602 goto out; 1603 } 1604 1605 rtxnp->n_recids = 0; /* accumulated by list_relations() */ 1606 1607 if ((err = list_relations(&family, rtxnp, add_folks, 0, 1)) != 0) { 1608 goto out; 1609 } 1610 1611 if ((err = list_relations(&family, rtxnp, add_self, 1, 1)) != 0) { 1612 goto out; 1613 } 1614 1615 err = list_relations(&family, rtxnp, add_kids, 0, md_nunits); 1616 if (err != 0) { 1617 goto out; 1618 } 1619 1620 /* 1621 * delta tree is still empty? 1622 */ 1623 if ((!family) || (rtxnp->n_recids == 0)) { 1624 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR, 1625 rtxnp->from.mnum); 1626 err = EINVAL; 1627 goto out; 1628 } 1629 1630 /* 1631 * verify role change interactions 1632 */ 1633 if ((err = valid_roleswap_dtree(family, rtxnp)) != 0) { 1634 goto out; 1635 } 1636 1637 if ((err = alloc_recids(family, rtxnp)) != 0) { 1638 goto out; 1639 } 1640 1641 out: 1642 if (err != 0) { 1643 free_dtree(family); 1644 dump_dtree(family); /* yes, after freeing it */ 1645 family = NULL; 1646 } 1647 1648 return (family); 1649 } 1650 1651 1652 /* 1653 * (MD_IOCRENAME) rename/exchange ioctl entry point 1654 * calls individual driver named service entry points 1655 * to build a list of devices which need state changed, 1656 * to verify that they're in a state where renames may occur, 1657 * and to modify themselves into their new identities 1658 */ 1659 1660 int 1661 md_rename( 1662 md_rename_t *mrp, 1663 IOLOCK *iolockp) 1664 { 1665 md_rendelta_t *family = NULL; 1666 md_rentxn_t rtxn; 1667 int err = 0; 1668 set_t setno; 1669 mdc_unit_t *mdc; 1670 1671 ASSERT(iolockp); 1672 if (mrp == NULL) 1673 return (EINVAL); 1674 1675 setno = MD_MIN2SET(mrp->from.mnum); 1676 if (setno >= md_nsets) { 1677 return (EINVAL); 1678 } 1679 1680 /* 1681 * Early exit if top is eof trans 1682 */ 1683 mdc = (mdc_unit_t *)md_set[setno].s_un[MD_MIN2UNIT(mrp->from.mnum)]; 1684 while (mdc != NULL) { 1685 if (!MD_HAS_PARENT(mdc->un_parent)) { 1686 break; 1687 } else { 1688 mdc = (mdc_unit_t *)md_set[setno].s_un[MD_MIN2UNIT 1689 (mdc->un_parent)]; 1690 } 1691 } 1692 1693 if (mdc && mdc->un_type == MD_METATRANS) { 1694 return (EINVAL); 1695 } 1696 1697 1698 mdclrerror(&mrp->mde); 1699 1700 bzero((void *) &rtxn, sizeof (md_rentxn_t)); 1701 mdclrerror(&rtxn.mde); 1702 1703 /* 1704 * encapsulate user parameters 1705 */ 1706 rtxn.from.key = mrp->from.key; 1707 rtxn.to.key = mrp->to.key; 1708 rtxn.from.mnum = mrp->from.mnum; 1709 rtxn.to.mnum = mrp->to.mnum; 1710 rtxn.op = mrp->op; 1711 rtxn.uflags = mrp->flags; 1712 rtxn.revision = mrp->revision; 1713 1714 if (MD_MIN2UNIT(mrp->to.mnum) >= md_nunits) { 1715 err = EINVAL; 1716 goto cleanup; 1717 } 1718 1719 /* 1720 * catch this early, before taking any locks 1721 */ 1722 if (md_get_setstatus(setno) & MD_SET_STALE) { 1723 (void) (mdmddberror(&rtxn.mde, MDE_DB_STALE, rtxn.from.mnum, 1724 MD_MIN2SET(rtxn.from.mnum))); 1725 err = EROFS; 1726 goto cleanup; 1727 } 1728 1729 /* 1730 * Locking and re-validation (of the per-unit state) is 1731 * done by the rename lock/unlock service, for now only take 1732 * the array lock. 1733 */ 1734 md_array_writer(iolockp); 1735 1736 /* 1737 * validate the rename/exchange parameters 1738 * rtxn is filled in on succesful completion of validate_txn_parms() 1739 */ 1740 if ((err = validate_txn_parms(&rtxn)) != 0) { 1741 goto cleanup; 1742 } 1743 1744 /* 1745 * build list of work to do, the "delta tree" for related devices 1746 */ 1747 if (!(family = build_dtree(&rtxn))) { 1748 err = ENOMEM; 1749 goto cleanup; 1750 } 1751 dump_txn(&rtxn); 1752 dump_dtree(family); 1753 1754 if ((err = lock_dtree(family, &rtxn)) != 0) { 1755 goto cleanup; 1756 } 1757 1758 if ((err = check_dtree(family, &rtxn)) != 0) { 1759 goto cleanup; 1760 } 1761 dump_txn(&rtxn); 1762 1763 role_swap_dtree(family, &rtxn); /* commits the recids */ 1764 1765 /* 1766 * let folks know 1767 */ 1768 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_RENAME_SRC, SVM_TAG_METADEVICE, 1769 MD_MIN2SET(rtxn.from.mnum), rtxn.from.mnum); 1770 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_RENAME_DST, SVM_TAG_METADEVICE, 1771 MD_MIN2SET(rtxn.from.mnum), rtxn.from.mnum); 1772 1773 cleanup: 1774 1775 if (err != 0 && mdisok(&rtxn.mde)) { 1776 (void) mdsyserror(&rtxn.mde, EINVAL); 1777 } 1778 1779 if (family) { 1780 unlock_dtree(family, &rtxn); 1781 free_dtree(family); 1782 dump_dtree(family); 1783 family = NULL; 1784 } 1785 1786 if (rtxn.recids && (rtxn.n_recids > 0)) { 1787 kmem_free(rtxn.recids, sizeof (mddb_recid_t) * rtxn.n_recids); 1788 } 1789 1790 if (!mdisok(&rtxn.mde)) { 1791 (void) mdstealerror(&mrp->mde, &rtxn.mde); 1792 } 1793 1794 return (0); /* success/failure will be communicated via rtxn.mde */ 1795 } 1796 1797 static role_change_tab_t 1798 role_swap_tab[] = 1799 { 1800 { 1801 1, /* ordinal */ 1802 MDRR_PARENT, /* old role */ 1803 MDRR_PARENT, /* new role */ 1804 MDRNM_UPDATE_KIDS, /* named service */ 1805 NO_DEFAULT_ROLESWAP_SVC /* default role swap function */ 1806 }, 1807 { 1808 2, 1809 MDRR_PARENT, 1810 MDRR_SELF, 1811 MDRNM_PARENT_UPDATE_TO, 1812 NO_DEFAULT_ROLESWAP_SVC 1813 }, 1814 { 1815 3, 1816 MDRR_PARENT, 1817 MDRR_CHILD, 1818 ILLEGAL_SVC_NAME, 1819 ILLEGAL_ROLESWAP_SVC 1820 }, 1821 { 1822 4, 1823 MDRR_SELF, 1824 MDRR_PARENT, 1825 MDRNM_SELF_UPDATE_FROM_UP, 1826 md_exchange_self_update_from_up 1827 }, 1828 { 1829 5, 1830 MDRR_SELF, 1831 MDRR_SELF, 1832 MDRNM_UPDATE_SELF, 1833 md_rename_update_self 1834 }, 1835 { 1836 6, 1837 MDRR_SELF, 1838 MDRR_CHILD, 1839 MDRNM_SELF_UPDATE_FROM_DOWN, 1840 NO_DEFAULT_ROLESWAP_SVC 1841 }, 1842 { 1843 7, 1844 MDRR_CHILD, 1845 MDRR_PARENT, 1846 ILLEGAL_SVC_NAME, 1847 ILLEGAL_ROLESWAP_SVC 1848 }, 1849 { 1850 8, 1851 MDRR_CHILD, 1852 MDRR_SELF, 1853 MDRNM_CHILD_UPDATE_TO, 1854 md_exchange_child_update_to 1855 }, 1856 { 1857 9, 1858 MDRR_CHILD, 1859 MDRR_CHILD, 1860 MDRNM_UPDATE_FOLKS, 1861 md_renexch_update_parent 1862 }, 1863 1864 /* terminator is old_role == MDRR_UNK */ 1865 { 1866 0, 1867 MDRR_UNK, 1868 MDRR_UNK, 1869 ILLEGAL_SVC_NAME, 1870 NO_DEFAULT_ROLESWAP_SVC 1871 } 1872 };