1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * AVL - generic AVL tree implementation for kernel use
  28  *
  29  * A complete description of AVL trees can be found in many CS textbooks.
  30  *
  31  * Here is a very brief overview. An AVL tree is a binary search tree that is
  32  * almost perfectly balanced. By "almost" perfectly balanced, we mean that at
  33  * any given node, the left and right subtrees are allowed to differ in height
  34  * by at most 1 level.
  35  *
  36  * This relaxation from a perfectly balanced binary tree allows doing
  37  * insertion and deletion relatively efficiently. Searching the tree is
  38  * still a fast operation, roughly O(log(N)).
  39  *
  40  * The key to insertion and deletion is a set of tree maniuplations called
  41  * rotations, which bring unbalanced subtrees back into the semi-balanced state.
  42  *
  43  * This implementation of AVL trees has the following peculiarities:
  44  *
  45  *      - The AVL specific data structures are physically embedded as fields
  46  *        in the "using" data structures.  To maintain generality the code
  47  *        must constantly translate between "avl_node_t *" and containing
  48  *        data structure "void *"s by adding/subracting the avl_offset.
  49  *
  50  *      - Since the AVL data is always embedded in other structures, there is
  51  *        no locking or memory allocation in the AVL routines. This must be
  52  *        provided for by the enclosing data structure's semantics. Typically,
  53  *        avl_insert()/_add()/_remove()/avl_insert_here() require some kind of
  54  *        exclusive write lock. Other operations require a read lock.
  55  *
  56  *      - The implementation uses iteration instead of explicit recursion,
  57  *        since it is intended to run on limited size kernel stacks. Since
  58  *        there is no recursion stack present to move "up" in the tree,
  59  *        there is an explicit "parent" link in the avl_node_t.
  60  *
  61  *      - The left/right children pointers of a node are in an array.
  62  *        In the code, variables (instead of constants) are used to represent
  63  *        left and right indices.  The implementation is written as if it only
  64  *        dealt with left handed manipulations.  By changing the value assigned
  65  *        to "left", the code also works for right handed trees.  The
  66  *        following variables/terms are frequently used:
  67  *
  68  *              int left;       // 0 when dealing with left children,
  69  *                              // 1 for dealing with right children
  70  *
  71  *              int left_heavy; // -1 when left subtree is taller at some node,
  72  *                              // +1 when right subtree is taller
  73  *
  74  *              int right;      // will be the opposite of left (0 or 1)
  75  *              int right_heavy;// will be the opposite of left_heavy (-1 or 1)
  76  *
  77  *              int direction;  // 0 for "<" (ie. left child); 1 for ">" (right)
  78  *
  79  *        Though it is a little more confusing to read the code, the approach
  80  *        allows using half as much code (and hence cache footprint) for tree
  81  *        manipulations and eliminates many conditional branches.
  82  *
  83  *      - The avl_index_t is an opaque "cookie" used to find nodes at or
  84  *        adjacent to where a new value would be inserted in the tree. The value
  85  *        is a modified "avl_node_t *".  The bottom bit (normally 0 for a
  86  *        pointer) is set to indicate if that the new node has a value greater
  87  *        than the value of the indicated "avl_node_t *".
  88  */
  89 
  90 #include <sys/types.h>
  91 #include <sys/param.h>
  92 #include <sys/debug.h>
  93 #include <sys/avl.h>
  94 #include <sys/cmn_err.h>
  95 
  96 /*
  97  * Small arrays to translate between balance (or diff) values and child indeces.
  98  *
  99  * Code that deals with binary tree data structures will randomly use
 100  * left and right children when examining a tree.  C "if()" statements
 101  * which evaluate randomly suffer from very poor hardware branch prediction.
 102  * In this code we avoid some of the branch mispredictions by using the
 103  * following translation arrays. They replace random branches with an
 104  * additional memory reference. Since the translation arrays are both very
 105  * small the data should remain efficiently in cache.
 106  */
 107 static const int  avl_child2balance[2]  = {-1, 1};
 108 static const int  avl_balance2child[]   = {0, 0, 1};
 109 
 110 
 111 /*
 112  * Walk from one node to the previous valued node (ie. an infix walk
 113  * towards the left). At any given node we do one of 2 things:
 114  *
 115  * - If there is a left child, go to it, then to it's rightmost descendant.
 116  *
 117  * - otherwise we return thru parent nodes until we've come from a right child.
 118  *
 119  * Return Value:
 120  * NULL - if at the end of the nodes
 121  * otherwise next node
 122  */
 123 void *
 124 avl_walk(avl_tree_t *tree, void *oldnode, int left)
 125 {
 126         size_t off = tree->avl_offset;
 127         avl_node_t *node = AVL_DATA2NODE(oldnode, off);
 128         int right = 1 - left;
 129         int was_child;
 130 
 131 
 132         /*
 133          * nowhere to walk to if tree is empty
 134          */
 135         if (node == NULL)
 136                 return (NULL);
 137 
 138         /*
 139          * Visit the previous valued node. There are two possibilities:
 140          *
 141          * If this node has a left child, go down one left, then all
 142          * the way right.
 143          */
 144         if (node->avl_child[left] != NULL) {
 145                 for (node = node->avl_child[left];
 146                     node->avl_child[right] != NULL;
 147                     node = node->avl_child[right])
 148                         ;
 149         /*
 150          * Otherwise, return thru left children as far as we can.
 151          */
 152         } else {
 153                 for (;;) {
 154                         was_child = AVL_XCHILD(node);
 155                         node = AVL_XPARENT(node);
 156                         if (node == NULL)
 157                                 return (NULL);
 158                         if (was_child == right)
 159                                 break;
 160                 }
 161         }
 162 
 163         return (AVL_NODE2DATA(node, off));
 164 }
 165 
 166 /*
 167  * Return the lowest valued node in a tree or NULL.
 168  * (leftmost child from root of tree)
 169  */
 170 void *
 171 avl_first(avl_tree_t *tree)
 172 {
 173         avl_node_t *node;
 174         avl_node_t *prev = NULL;
 175         size_t off = tree->avl_offset;
 176 
 177         for (node = tree->avl_root; node != NULL; node = node->avl_child[0])
 178                 prev = node;
 179 
 180         if (prev != NULL)
 181                 return (AVL_NODE2DATA(prev, off));
 182         return (NULL);
 183 }
 184 
 185 /*
 186  * Return the highest valued node in a tree or NULL.
 187  * (rightmost child from root of tree)
 188  */
 189 void *
 190 avl_last(avl_tree_t *tree)
 191 {
 192         avl_node_t *node;
 193         avl_node_t *prev = NULL;
 194         size_t off = tree->avl_offset;
 195 
 196         for (node = tree->avl_root; node != NULL; node = node->avl_child[1])
 197                 prev = node;
 198 
 199         if (prev != NULL)
 200                 return (AVL_NODE2DATA(prev, off));
 201         return (NULL);
 202 }
 203 
 204 /*
 205  * Access the node immediately before or after an insertion point.
 206  *
 207  * "avl_index_t" is a (avl_node_t *) with the bottom bit indicating a child
 208  *
 209  * Return value:
 210  *      NULL: no node in the given direction
 211  *      "void *"  of the found tree node
 212  */
 213 void *
 214 avl_nearest(avl_tree_t *tree, avl_index_t where, int direction)
 215 {
 216         int child = AVL_INDEX2CHILD(where);
 217         avl_node_t *node = AVL_INDEX2NODE(where);
 218         void *data;
 219         size_t off = tree->avl_offset;
 220 
 221         if (node == NULL) {
 222                 ASSERT(tree->avl_root == NULL);
 223                 return (NULL);
 224         }
 225         data = AVL_NODE2DATA(node, off);
 226         if (child != direction)
 227                 return (data);
 228 
 229         return (avl_walk(tree, data, direction));
 230 }
 231 
 232 
 233 /*
 234  * Search for the node which contains "value".  The algorithm is a
 235  * simple binary tree search.
 236  *
 237  * return value:
 238  *      NULL: the value is not in the AVL tree
 239  *              *where (if not NULL)  is set to indicate the insertion point
 240  *      "void *"  of the found tree node
 241  */
 242 void *
 243 avl_find(avl_tree_t *tree, const void *value, avl_index_t *where)
 244 {
 245         avl_node_t *node;
 246         avl_node_t *prev = NULL;
 247         int child = 0;
 248         int diff;
 249         size_t off = tree->avl_offset;
 250 
 251         for (node = tree->avl_root; node != NULL;
 252             node = node->avl_child[child]) {
 253 
 254                 prev = node;
 255 
 256                 diff = tree->avl_compar(value, AVL_NODE2DATA(node, off));
 257                 ASSERT(-1 <= diff && diff <= 1);
 258                 if (diff == 0) {
 259 #ifdef DEBUG
 260                         if (where != NULL)
 261                                 *where = 0;
 262 #endif
 263                         return (AVL_NODE2DATA(node, off));
 264                 }
 265                 child = avl_balance2child[1 + diff];
 266 
 267         }
 268 
 269         if (where != NULL)
 270                 *where = AVL_MKINDEX(prev, child);
 271 
 272         return (NULL);
 273 }
 274 
 275 
 276 /*
 277  * Perform a rotation to restore balance at the subtree given by depth.
 278  *
 279  * This routine is used by both insertion and deletion. The return value
 280  * indicates:
 281  *       0 : subtree did not change height
 282  *      !0 : subtree was reduced in height
 283  *
 284  * The code is written as if handling left rotations, right rotations are
 285  * symmetric and handled by swapping values of variables right/left[_heavy]
 286  *
 287  * On input balance is the "new" balance at "node". This value is either
 288  * -2 or +2.
 289  */
 290 static int
 291 avl_rotation(avl_tree_t *tree, avl_node_t *node, int balance)
 292 {
 293         int left = !(balance < 0);   /* when balance = -2, left will be 0 */
 294         int right = 1 - left;
 295         int left_heavy = balance >> 1;
 296         int right_heavy = -left_heavy;
 297         avl_node_t *parent = AVL_XPARENT(node);
 298         avl_node_t *child = node->avl_child[left];
 299         avl_node_t *cright;
 300         avl_node_t *gchild;
 301         avl_node_t *gright;
 302         avl_node_t *gleft;
 303         int which_child = AVL_XCHILD(node);
 304         int child_bal = AVL_XBALANCE(child);
 305 
 306         /* BEGIN CSTYLED */
 307         /*
 308          * case 1 : node is overly left heavy, the left child is balanced or
 309          * also left heavy. This requires the following rotation.
 310          *
 311          *                   (node bal:-2)
 312          *                    /           \
 313          *                   /             \
 314          *              (child bal:0 or -1)
 315          *              /    \
 316          *             /      \
 317          *                     cright
 318          *
 319          * becomes:
 320          *
 321          *              (child bal:1 or 0)
 322          *              /        \
 323          *             /          \
 324          *                        (node bal:-1 or 0)
 325          *                         /     \
 326          *                        /       \
 327          *                     cright
 328          *
 329          * we detect this situation by noting that child's balance is not
 330          * right_heavy.
 331          */
 332         /* END CSTYLED */
 333         if (child_bal != right_heavy) {
 334 
 335                 /*
 336                  * compute new balance of nodes
 337                  *
 338                  * If child used to be left heavy (now balanced) we reduced
 339                  * the height of this sub-tree -- used in "return...;" below
 340                  */
 341                 child_bal += right_heavy; /* adjust towards right */
 342 
 343                 /*
 344                  * move "cright" to be node's left child
 345                  */
 346                 cright = child->avl_child[right];
 347                 node->avl_child[left] = cright;
 348                 if (cright != NULL) {
 349                         AVL_SETPARENT(cright, node);
 350                         AVL_SETCHILD(cright, left);
 351                 }
 352 
 353                 /*
 354                  * move node to be child's right child
 355                  */
 356                 child->avl_child[right] = node;
 357                 AVL_SETBALANCE(node, -child_bal);
 358                 AVL_SETCHILD(node, right);
 359                 AVL_SETPARENT(node, child);
 360 
 361                 /*
 362                  * update the pointer into this subtree
 363                  */
 364                 AVL_SETBALANCE(child, child_bal);
 365                 AVL_SETCHILD(child, which_child);
 366                 AVL_SETPARENT(child, parent);
 367                 if (parent != NULL)
 368                         parent->avl_child[which_child] = child;
 369                 else
 370                         tree->avl_root = child;
 371 
 372                 return (child_bal == 0);
 373         }
 374 
 375         /* BEGIN CSTYLED */
 376         /*
 377          * case 2 : When node is left heavy, but child is right heavy we use
 378          * a different rotation.
 379          *
 380          *                   (node b:-2)
 381          *                    /   \
 382          *                   /     \
 383          *                  /       \
 384          *             (child b:+1)
 385          *              /     \
 386          *             /       \
 387          *                   (gchild b: != 0)
 388          *                     /  \
 389          *                    /    \
 390          *                 gleft   gright
 391          *
 392          * becomes:
 393          *
 394          *              (gchild b:0)
 395          *              /       \
 396          *             /         \
 397          *            /           \
 398          *        (child b:?)   (node b:?)
 399          *         /  \          /   \
 400          *        /    \        /     \
 401          *            gleft   gright
 402          *
 403          * computing the new balances is more complicated. As an example:
 404          *       if gchild was right_heavy, then child is now left heavy
 405          *              else it is balanced
 406          */
 407         /* END CSTYLED */
 408         gchild = child->avl_child[right];
 409         gleft = gchild->avl_child[left];
 410         gright = gchild->avl_child[right];
 411 
 412         /*
 413          * move gright to left child of node and
 414          *
 415          * move gleft to right child of node
 416          */
 417         node->avl_child[left] = gright;
 418         if (gright != NULL) {
 419                 AVL_SETPARENT(gright, node);
 420                 AVL_SETCHILD(gright, left);
 421         }
 422 
 423         child->avl_child[right] = gleft;
 424         if (gleft != NULL) {
 425                 AVL_SETPARENT(gleft, child);
 426                 AVL_SETCHILD(gleft, right);
 427         }
 428 
 429         /*
 430          * move child to left child of gchild and
 431          *
 432          * move node to right child of gchild and
 433          *
 434          * fixup parent of all this to point to gchild
 435          */
 436         balance = AVL_XBALANCE(gchild);
 437         gchild->avl_child[left] = child;
 438         AVL_SETBALANCE(child, (balance == right_heavy ? left_heavy : 0));
 439         AVL_SETPARENT(child, gchild);
 440         AVL_SETCHILD(child, left);
 441 
 442         gchild->avl_child[right] = node;
 443         AVL_SETBALANCE(node, (balance == left_heavy ? right_heavy : 0));
 444         AVL_SETPARENT(node, gchild);
 445         AVL_SETCHILD(node, right);
 446 
 447         AVL_SETBALANCE(gchild, 0);
 448         AVL_SETPARENT(gchild, parent);
 449         AVL_SETCHILD(gchild, which_child);
 450         if (parent != NULL)
 451                 parent->avl_child[which_child] = gchild;
 452         else
 453                 tree->avl_root = gchild;
 454 
 455         return (1);     /* the new tree is always shorter */
 456 }
 457 
 458 
 459 /*
 460  * Insert a new node into an AVL tree at the specified (from avl_find()) place.
 461  *
 462  * Newly inserted nodes are always leaf nodes in the tree, since avl_find()
 463  * searches out to the leaf positions.  The avl_index_t indicates the node
 464  * which will be the parent of the new node.
 465  *
 466  * After the node is inserted, a single rotation further up the tree may
 467  * be necessary to maintain an acceptable AVL balance.
 468  */
 469 void
 470 avl_insert(avl_tree_t *tree, void *new_data, avl_index_t where)
 471 {
 472         avl_node_t *node;
 473         avl_node_t *parent = AVL_INDEX2NODE(where);
 474         int old_balance;
 475         int new_balance;
 476         int which_child = AVL_INDEX2CHILD(where);
 477         size_t off = tree->avl_offset;
 478 
 479         ASSERT(tree);
 480 #ifdef _LP64
 481         ASSERT(((uintptr_t)new_data & 0x7) == 0);
 482 #endif
 483 
 484         node = AVL_DATA2NODE(new_data, off);
 485 
 486         /*
 487          * First, add the node to the tree at the indicated position.
 488          */
 489         ++tree->avl_numnodes;
 490 
 491         node->avl_child[0] = NULL;
 492         node->avl_child[1] = NULL;
 493 
 494         AVL_SETCHILD(node, which_child);
 495         AVL_SETBALANCE(node, 0);
 496         AVL_SETPARENT(node, parent);
 497         if (parent != NULL) {
 498                 ASSERT(parent->avl_child[which_child] == NULL);
 499                 parent->avl_child[which_child] = node;
 500         } else {
 501                 ASSERT(tree->avl_root == NULL);
 502                 tree->avl_root = node;
 503         }
 504         /*
 505          * Now, back up the tree modifying the balance of all nodes above the
 506          * insertion point. If we get to a highly unbalanced ancestor, we
 507          * need to do a rotation.  If we back out of the tree we are done.
 508          * If we brought any subtree into perfect balance (0), we are also done.
 509          */
 510         for (;;) {
 511                 node = parent;
 512                 if (node == NULL)
 513                         return;
 514 
 515                 /*
 516                  * Compute the new balance
 517                  */
 518                 old_balance = AVL_XBALANCE(node);
 519                 new_balance = old_balance + avl_child2balance[which_child];
 520 
 521                 /*
 522                  * If we introduced equal balance, then we are done immediately
 523                  */
 524                 if (new_balance == 0) {
 525                         AVL_SETBALANCE(node, 0);
 526                         return;
 527                 }
 528 
 529                 /*
 530                  * If both old and new are not zero we went
 531                  * from -1 to -2 balance, do a rotation.
 532                  */
 533                 if (old_balance != 0)
 534                         break;
 535 
 536                 AVL_SETBALANCE(node, new_balance);
 537                 parent = AVL_XPARENT(node);
 538                 which_child = AVL_XCHILD(node);
 539         }
 540 
 541         /*
 542          * perform a rotation to fix the tree and return
 543          */
 544         (void) avl_rotation(tree, node, new_balance);
 545 }
 546 
 547 /*
 548  * Insert "new_data" in "tree" in the given "direction" either after or
 549  * before (AVL_AFTER, AVL_BEFORE) the data "here".
 550  *
 551  * Insertions can only be done at empty leaf points in the tree, therefore
 552  * if the given child of the node is already present we move to either
 553  * the AVL_PREV or AVL_NEXT and reverse the insertion direction. Since
 554  * every other node in the tree is a leaf, this always works.
 555  *
 556  * To help developers using this interface, we assert that the new node
 557  * is correctly ordered at every step of the way in DEBUG kernels.
 558  */
 559 void
 560 avl_insert_here(
 561         avl_tree_t *tree,
 562         void *new_data,
 563         void *here,
 564         int direction)
 565 {
 566         avl_node_t *node;
 567         int child = direction;  /* rely on AVL_BEFORE == 0, AVL_AFTER == 1 */
 568 #ifdef DEBUG
 569         int diff;
 570 #endif
 571 
 572         ASSERT(tree != NULL);
 573         ASSERT(new_data != NULL);
 574         ASSERT(here != NULL);
 575         ASSERT(direction == AVL_BEFORE || direction == AVL_AFTER);
 576 
 577         /*
 578          * If corresponding child of node is not NULL, go to the neighboring
 579          * node and reverse the insertion direction.
 580          */
 581         node = AVL_DATA2NODE(here, tree->avl_offset);
 582 
 583 #ifdef DEBUG
 584         diff = tree->avl_compar(new_data, here);
 585         ASSERT(-1 <= diff && diff <= 1);
 586         ASSERT(diff != 0);
 587         ASSERT(diff > 0 ? child == 1 : child == 0);
 588 #endif
 589 
 590         if (node->avl_child[child] != NULL) {
 591                 node = node->avl_child[child];
 592                 child = 1 - child;
 593                 while (node->avl_child[child] != NULL) {
 594 #ifdef DEBUG
 595                         diff = tree->avl_compar(new_data,
 596                             AVL_NODE2DATA(node, tree->avl_offset));
 597                         ASSERT(-1 <= diff && diff <= 1);
 598                         ASSERT(diff != 0);
 599                         ASSERT(diff > 0 ? child == 1 : child == 0);
 600 #endif
 601                         node = node->avl_child[child];
 602                 }
 603 #ifdef DEBUG
 604                 diff = tree->avl_compar(new_data,
 605                     AVL_NODE2DATA(node, tree->avl_offset));
 606                 ASSERT(-1 <= diff && diff <= 1);
 607                 ASSERT(diff != 0);
 608                 ASSERT(diff > 0 ? child == 1 : child == 0);
 609 #endif
 610         }
 611         ASSERT(node->avl_child[child] == NULL);
 612 
 613         avl_insert(tree, new_data, AVL_MKINDEX(node, child));
 614 }
 615 
 616 /*
 617  * Add a new node to an AVL tree.
 618  */
 619 void
 620 avl_add(avl_tree_t *tree, void *new_node)
 621 {
 622         avl_index_t where;
 623 
 624         /*
 625          * This is unfortunate.  We want to call panic() here, even for
 626          * non-DEBUG kernels.  In userland, however, we can't depend on anything
 627          * in libc or else the rtld build process gets confused.  So, all we can
 628          * do in userland is resort to a normal ASSERT().
 629          */
 630         if (avl_find(tree, new_node, &where) != NULL)
 631 #ifdef _KERNEL
 632                 panic("avl_find() succeeded inside avl_add()");
 633 #else
 634                 ASSERT(0);
 635 #endif
 636         avl_insert(tree, new_node, where);
 637 }
 638 
 639 /*
 640  * Delete a node from the AVL tree.  Deletion is similar to insertion, but
 641  * with 2 complications.
 642  *
 643  * First, we may be deleting an interior node. Consider the following subtree:
 644  *
 645  *     d           c            c
 646  *    / \         / \          / \
 647  *   b   e       b   e        b   e
 648  *  / \         / \          /
 649  * a   c       a            a
 650  *
 651  * When we are deleting node (d), we find and bring up an adjacent valued leaf
 652  * node, say (c), to take the interior node's place. In the code this is
 653  * handled by temporarily swapping (d) and (c) in the tree and then using
 654  * common code to delete (d) from the leaf position.
 655  *
 656  * Secondly, an interior deletion from a deep tree may require more than one
 657  * rotation to fix the balance. This is handled by moving up the tree through
 658  * parents and applying rotations as needed. The return value from
 659  * avl_rotation() is used to detect when a subtree did not change overall
 660  * height due to a rotation.
 661  */
 662 void
 663 avl_remove(avl_tree_t *tree, void *data)
 664 {
 665         avl_node_t *delete;
 666         avl_node_t *parent;
 667         avl_node_t *node;
 668         avl_node_t tmp;
 669         int old_balance;
 670         int new_balance;
 671         int left;
 672         int right;
 673         int which_child;
 674         size_t off = tree->avl_offset;
 675 
 676         ASSERT(tree);
 677 
 678         delete = AVL_DATA2NODE(data, off);
 679 
 680         /*
 681          * Deletion is easiest with a node that has at most 1 child.
 682          * We swap a node with 2 children with a sequentially valued
 683          * neighbor node. That node will have at most 1 child. Note this
 684          * has no effect on the ordering of the remaining nodes.
 685          *
 686          * As an optimization, we choose the greater neighbor if the tree
 687          * is right heavy, otherwise the left neighbor. This reduces the
 688          * number of rotations needed.
 689          */
 690         if (delete->avl_child[0] != NULL && delete->avl_child[1] != NULL) {
 691 
 692                 /*
 693                  * choose node to swap from whichever side is taller
 694                  */
 695                 old_balance = AVL_XBALANCE(delete);
 696                 left = avl_balance2child[old_balance + 1];
 697                 right = 1 - left;
 698 
 699                 /*
 700                  * get to the previous value'd node
 701                  * (down 1 left, as far as possible right)
 702                  */
 703                 for (node = delete->avl_child[left];
 704                     node->avl_child[right] != NULL;
 705                     node = node->avl_child[right])
 706                         ;
 707 
 708                 /*
 709                  * create a temp placeholder for 'node'
 710                  * move 'node' to delete's spot in the tree
 711                  */
 712                 tmp = *node;
 713 
 714                 *node = *delete;
 715                 if (node->avl_child[left] == node)
 716                         node->avl_child[left] = &tmp;
 717 
 718                 parent = AVL_XPARENT(node);
 719                 if (parent != NULL)
 720                         parent->avl_child[AVL_XCHILD(node)] = node;
 721                 else
 722                         tree->avl_root = node;
 723                 AVL_SETPARENT(node->avl_child[left], node);
 724                 AVL_SETPARENT(node->avl_child[right], node);
 725 
 726                 /*
 727                  * Put tmp where node used to be (just temporary).
 728                  * It always has a parent and at most 1 child.
 729                  */
 730                 delete = &tmp;
 731                 parent = AVL_XPARENT(delete);
 732                 parent->avl_child[AVL_XCHILD(delete)] = delete;
 733                 which_child = (delete->avl_child[1] != 0);
 734                 if (delete->avl_child[which_child] != NULL)
 735                         AVL_SETPARENT(delete->avl_child[which_child], delete);
 736         }
 737 
 738 
 739         /*
 740          * Here we know "delete" is at least partially a leaf node. It can
 741          * be easily removed from the tree.
 742          */
 743         ASSERT(tree->avl_numnodes > 0);
 744         --tree->avl_numnodes;
 745         parent = AVL_XPARENT(delete);
 746         which_child = AVL_XCHILD(delete);
 747         if (delete->avl_child[0] != NULL)
 748                 node = delete->avl_child[0];
 749         else
 750                 node = delete->avl_child[1];
 751 
 752         /*
 753          * Connect parent directly to node (leaving out delete).
 754          */
 755         if (node != NULL) {
 756                 AVL_SETPARENT(node, parent);
 757                 AVL_SETCHILD(node, which_child);
 758         }
 759         if (parent == NULL) {
 760                 tree->avl_root = node;
 761                 return;
 762         }
 763         parent->avl_child[which_child] = node;
 764 
 765 
 766         /*
 767          * Since the subtree is now shorter, begin adjusting parent balances
 768          * and performing any needed rotations.
 769          */
 770         do {
 771 
 772                 /*
 773                  * Move up the tree and adjust the balance
 774                  *
 775                  * Capture the parent and which_child values for the next
 776                  * iteration before any rotations occur.
 777                  */
 778                 node = parent;
 779                 old_balance = AVL_XBALANCE(node);
 780                 new_balance = old_balance - avl_child2balance[which_child];
 781                 parent = AVL_XPARENT(node);
 782                 which_child = AVL_XCHILD(node);
 783 
 784                 /*
 785                  * If a node was in perfect balance but isn't anymore then
 786                  * we can stop, since the height didn't change above this point
 787                  * due to a deletion.
 788                  */
 789                 if (old_balance == 0) {
 790                         AVL_SETBALANCE(node, new_balance);
 791                         break;
 792                 }
 793 
 794                 /*
 795                  * If the new balance is zero, we don't need to rotate
 796                  * else
 797                  * need a rotation to fix the balance.
 798                  * If the rotation doesn't change the height
 799                  * of the sub-tree we have finished adjusting.
 800                  */
 801                 if (new_balance == 0)
 802                         AVL_SETBALANCE(node, new_balance);
 803                 else if (!avl_rotation(tree, node, new_balance))
 804                         break;
 805         } while (parent != NULL);
 806 }
 807 
 808 #define AVL_REINSERT(tree, obj)         \
 809         avl_remove((tree), (obj));      \
 810         avl_add((tree), (obj))
 811 
 812 boolean_t
 813 avl_update_lt(avl_tree_t *t, void *obj)
 814 {
 815         void *neighbor;
 816 
 817         ASSERT(((neighbor = AVL_NEXT(t, obj)) == NULL) ||
 818             (t->avl_compar(obj, neighbor) <= 0));
 819 
 820         neighbor = AVL_PREV(t, obj);
 821         if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) < 0)) {
 822                 AVL_REINSERT(t, obj);
 823                 return (B_TRUE);
 824         }
 825 
 826         return (B_FALSE);
 827 }
 828 
 829 boolean_t
 830 avl_update_gt(avl_tree_t *t, void *obj)
 831 {
 832         void *neighbor;
 833 
 834         ASSERT(((neighbor = AVL_PREV(t, obj)) == NULL) ||
 835             (t->avl_compar(obj, neighbor) >= 0));
 836 
 837         neighbor = AVL_NEXT(t, obj);
 838         if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) > 0)) {
 839                 AVL_REINSERT(t, obj);
 840                 return (B_TRUE);
 841         }
 842 
 843         return (B_FALSE);
 844 }
 845 
 846 boolean_t
 847 avl_update(avl_tree_t *t, void *obj)
 848 {
 849         void *neighbor;
 850 
 851         neighbor = AVL_PREV(t, obj);
 852         if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) < 0)) {
 853                 AVL_REINSERT(t, obj);
 854                 return (B_TRUE);
 855         }
 856 
 857         neighbor = AVL_NEXT(t, obj);
 858         if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) > 0)) {
 859                 AVL_REINSERT(t, obj);
 860                 return (B_TRUE);
 861         }
 862 
 863         return (B_FALSE);
 864 }
 865 
 866 /*
 867  * initialize a new AVL tree
 868  */
 869 void
 870 avl_create(avl_tree_t *tree, int (*compar) (const void *, const void *),
 871     size_t size, size_t offset)
 872 {
 873         ASSERT(tree);
 874         ASSERT(compar);
 875         ASSERT(size > 0);
 876         ASSERT(size >= offset + sizeof (avl_node_t));
 877 #ifdef _LP64
 878         ASSERT((offset & 0x7) == 0);
 879 #endif
 880 
 881         tree->avl_compar = compar;
 882         tree->avl_root = NULL;
 883         tree->avl_numnodes = 0;
 884         tree->avl_size = size;
 885         tree->avl_offset = offset;
 886 }
 887 
 888 /*
 889  * Delete a tree.
 890  */
 891 /* ARGSUSED */
 892 void
 893 avl_destroy(avl_tree_t *tree)
 894 {
 895         ASSERT(tree);
 896         ASSERT(tree->avl_numnodes == 0);
 897         ASSERT(tree->avl_root == NULL);
 898 }
 899 
 900 
 901 /*
 902  * Return the number of nodes in an AVL tree.
 903  */
 904 ulong_t
 905 avl_numnodes(avl_tree_t *tree)
 906 {
 907         ASSERT(tree);
 908         return (tree->avl_numnodes);
 909 }
 910 
 911 boolean_t
 912 avl_is_empty(avl_tree_t *tree)
 913 {
 914         ASSERT(tree);
 915         return (tree->avl_numnodes == 0);
 916 }
 917 
 918 #define CHILDBIT        (1L)
 919 
 920 /*
 921  * Post-order tree walk used to visit all tree nodes and destroy the tree
 922  * in post order. This is used for destroying a tree w/o paying any cost
 923  * for rebalancing it.
 924  *
 925  * example:
 926  *
 927  *      void *cookie = NULL;
 928  *      my_data_t *node;
 929  *
 930  *      while ((node = avl_destroy_nodes(tree, &cookie)) != NULL)
 931  *              free(node);
 932  *      avl_destroy(tree);
 933  *
 934  * The cookie is really an avl_node_t to the current node's parent and
 935  * an indication of which child you looked at last.
 936  *
 937  * On input, a cookie value of CHILDBIT indicates the tree is done.
 938  */
 939 void *
 940 avl_destroy_nodes(avl_tree_t *tree, void **cookie)
 941 {
 942         avl_node_t      *node;
 943         avl_node_t      *parent;
 944         int             child;
 945         void            *first;
 946         size_t          off = tree->avl_offset;
 947 
 948         /*
 949          * Initial calls go to the first node or it's right descendant.
 950          */
 951         if (*cookie == NULL) {
 952                 first = avl_first(tree);
 953 
 954                 /*
 955                  * deal with an empty tree
 956                  */
 957                 if (first == NULL) {
 958                         *cookie = (void *)CHILDBIT;
 959                         return (NULL);
 960                 }
 961 
 962                 node = AVL_DATA2NODE(first, off);
 963                 parent = AVL_XPARENT(node);
 964                 goto check_right_side;
 965         }
 966 
 967         /*
 968          * If there is no parent to return to we are done.
 969          */
 970         parent = (avl_node_t *)((uintptr_t)(*cookie) & ~CHILDBIT);
 971         if (parent == NULL) {
 972                 if (tree->avl_root != NULL) {
 973                         ASSERT(tree->avl_numnodes == 1);
 974                         tree->avl_root = NULL;
 975                         tree->avl_numnodes = 0;
 976                 }
 977                 return (NULL);
 978         }
 979 
 980         /*
 981          * Remove the child pointer we just visited from the parent and tree.
 982          */
 983         child = (uintptr_t)(*cookie) & CHILDBIT;
 984         parent->avl_child[child] = NULL;
 985         ASSERT(tree->avl_numnodes > 1);
 986         --tree->avl_numnodes;
 987 
 988         /*
 989          * If we just did a right child or there isn't one, go up to parent.
 990          */
 991         if (child == 1 || parent->avl_child[1] == NULL) {
 992                 node = parent;
 993                 parent = AVL_XPARENT(parent);
 994                 goto done;
 995         }
 996 
 997         /*
 998          * Do parent's right child, then leftmost descendent.
 999          */
1000         node = parent->avl_child[1];
1001         while (node->avl_child[0] != NULL) {
1002                 parent = node;
1003                 node = node->avl_child[0];
1004         }
1005 
1006         /*
1007          * If here, we moved to a left child. It may have one
1008          * child on the right (when balance == +1).
1009          */
1010 check_right_side:
1011         if (node->avl_child[1] != NULL) {
1012                 ASSERT(AVL_XBALANCE(node) == 1);
1013                 parent = node;
1014                 node = node->avl_child[1];
1015                 ASSERT(node->avl_child[0] == NULL &&
1016                     node->avl_child[1] == NULL);
1017         } else {
1018                 ASSERT(AVL_XBALANCE(node) <= 0);
1019         }
1020 
1021 done:
1022         if (parent == NULL) {
1023                 *cookie = (void *)CHILDBIT;
1024                 ASSERT(node == tree->avl_root);
1025         } else {
1026                 *cookie = (void *)((uintptr_t)parent | AVL_XCHILD(node));
1027         }
1028 
1029         return (AVL_NODE2DATA(node, off));
1030 }