1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Data-Link Services Module
  28  */
  29 
  30 #include        <sys/sysmacros.h>
  31 #include        <sys/strsubr.h>
  32 #include        <sys/strsun.h>
  33 #include        <sys/vlan.h>
  34 #include        <sys/dld_impl.h>
  35 #include        <sys/sdt.h>
  36 #include        <sys/atomic.h>
  37 
  38 static kmem_cache_t     *i_dls_link_cachep;
  39 mod_hash_t              *i_dls_link_hash;
  40 static uint_t           i_dls_link_count;
  41 
  42 #define         LINK_HASHSZ     67      /* prime */
  43 #define         IMPL_HASHSZ     67      /* prime */
  44 
  45 /*
  46  * Construct a hash key encompassing both DLSAP value and VLAN idenitifier.
  47  */
  48 #define MAKE_KEY(_sap)                                          \
  49         ((mod_hash_key_t)(uintptr_t)((_sap) << VLAN_ID_SIZE))
  50 
  51 #define DLS_STRIP_PADDING(pktsize, p) {                 \
  52         if (pktsize != 0) {                             \
  53                 ssize_t delta = pktsize - msgdsize(p);  \
  54                                                         \
  55                 if (delta < 0)                               \
  56                         (void) adjmsg(p, delta);        \
  57         }                                               \
  58 }
  59 
  60 /*
  61  * Private functions.
  62  */
  63 
  64 /*ARGSUSED*/
  65 static int
  66 i_dls_link_constructor(void *buf, void *arg, int kmflag)
  67 {
  68         dls_link_t      *dlp = buf;
  69         char            name[MAXNAMELEN];
  70 
  71         bzero(buf, sizeof (dls_link_t));
  72 
  73         (void) snprintf(name, MAXNAMELEN, "dls_link_t_%p_hash", buf);
  74         dlp->dl_str_hash = mod_hash_create_idhash(name, IMPL_HASHSZ,
  75             mod_hash_null_valdtor);
  76 
  77         return (0);
  78 }
  79 
  80 /*ARGSUSED*/
  81 static void
  82 i_dls_link_destructor(void *buf, void *arg)
  83 {
  84         dls_link_t      *dlp = buf;
  85 
  86         ASSERT(dlp->dl_ref == 0);
  87         ASSERT(dlp->dl_mh == NULL);
  88         ASSERT(dlp->dl_mah == NULL);
  89         ASSERT(dlp->dl_unknowns == 0);
  90 
  91         mod_hash_destroy_idhash(dlp->dl_str_hash);
  92         dlp->dl_str_hash = NULL;
  93 
  94 }
  95 
  96 /*
  97  * - Parse the mac header information of the given packet.
  98  * - Strip the padding and skip over the header. Note that because some
  99  *   DLS consumers only check the db_ref count of the first mblk, we
 100  *   pullup the message into a single mblk. Because the original message
 101  *   is freed as the result of message pulling up, mac_vlan_header_info()
 102  *   is called again to update the mhi_saddr and mhi_daddr pointers in the
 103  *   mhip. Further, the mac_vlan_header_info() function ensures that the
 104  *   size of the pulled message is greater than the MAC header size,
 105  *   therefore we can directly advance b_rptr to point at the payload.
 106  *
 107  * We choose to use a macro for performance reasons.
 108  */
 109 #define DLS_PREPARE_PKT(mh, mp, mhip, err) {                            \
 110         mblk_t *nextp = (mp)->b_next;                                        \
 111         if (((err) = mac_vlan_header_info((mh), (mp), (mhip))) == 0) {  \
 112                 DLS_STRIP_PADDING((mhip)->mhi_pktsize, (mp));                \
 113                 if (MBLKL((mp)) < (mhip)->mhi_hdrsize) {          \
 114                         mblk_t *newmp;                                  \
 115                         if ((newmp = msgpullup((mp), -1)) == NULL) {    \
 116                                 (err) = EINVAL;                         \
 117                         } else {                                        \
 118                                 (mp)->b_next = NULL;                 \
 119                                 freemsg((mp));                          \
 120                                 (mp) = newmp;                           \
 121                                 VERIFY(mac_vlan_header_info((mh),       \
 122                                     (mp), (mhip)) == 0);                \
 123                                 (mp)->b_next = nextp;                        \
 124                                 (mp)->b_rptr += (mhip)->mhi_hdrsize;      \
 125                         }                                               \
 126                 } else {                                                \
 127                         (mp)->b_rptr += (mhip)->mhi_hdrsize;              \
 128                 }                                                       \
 129         }                                                               \
 130 }
 131 
 132 /*
 133  * Truncate the chain starting at mp such that all packets in the chain
 134  * have identical source and destination addresses, saps, and tag types
 135  * (see below).  It returns a pointer to the mblk following the chain,
 136  * NULL if there is no further packet following the processed chain.
 137  * The countp argument is set to the number of valid packets in the chain.
 138  * Note that the whole MAC header (including the VLAN tag if any) in each
 139  * packet will be stripped.
 140  */
 141 static mblk_t *
 142 i_dls_link_subchain(dls_link_t *dlp, mblk_t *mp, const mac_header_info_t *mhip,
 143     uint_t *countp)
 144 {
 145         mblk_t          *prevp;
 146         uint_t          npacket = 1;
 147         size_t          addr_size = dlp->dl_mip->mi_addr_length;
 148         uint16_t        vid = VLAN_ID(mhip->mhi_tci);
 149         uint16_t        pri = VLAN_PRI(mhip->mhi_tci);
 150 
 151         /*
 152          * Compare with subsequent headers until we find one that has
 153          * differing header information. After checking each packet
 154          * strip padding and skip over the header.
 155          */
 156         for (prevp = mp; (mp = mp->b_next) != NULL; prevp = mp) {
 157                 mac_header_info_t cmhi;
 158                 uint16_t cvid, cpri;
 159                 int err;
 160 
 161                 DLS_PREPARE_PKT(dlp->dl_mh, mp, &cmhi, err);
 162                 if (err != 0)
 163                         break;
 164 
 165                 prevp->b_next = mp;
 166 
 167                 /*
 168                  * The source, destination, sap, vlan tag must all match in
 169                  * a given subchain.
 170                  */
 171                 if (mhip->mhi_saddr == NULL || cmhi.mhi_saddr == NULL ||
 172                     memcmp(mhip->mhi_daddr, cmhi.mhi_daddr, addr_size) != 0 ||
 173                     memcmp(mhip->mhi_saddr, cmhi.mhi_saddr, addr_size) != 0 ||
 174                     mhip->mhi_bindsap != cmhi.mhi_bindsap) {
 175                         /*
 176                          * Note that we don't need to restore the padding.
 177                          */
 178                         mp->b_rptr -= cmhi.mhi_hdrsize;
 179                         break;
 180                 }
 181 
 182                 cvid = VLAN_ID(cmhi.mhi_tci);
 183                 cpri = VLAN_PRI(cmhi.mhi_tci);
 184 
 185                 /*
 186                  * There are several types of packets. Packets don't match
 187                  * if they are classified to different type or if they are
 188                  * VLAN packets but belong to different VLANs:
 189                  *
 190                  * packet type          tagged          vid             pri
 191                  * ---------------------------------------------------------
 192                  * untagged             No              zero            zero
 193                  * VLAN packets         Yes             non-zero        -
 194                  * priority tagged      Yes             zero            non-zero
 195                  * 0 tagged             Yes             zero            zero
 196                  */
 197                 if ((mhip->mhi_istagged != cmhi.mhi_istagged) ||
 198                     (vid != cvid) || ((vid == VLAN_ID_NONE) &&
 199                     (((pri == 0) && (cpri != 0)) ||
 200                     ((pri != 0) && (cpri == 0))))) {
 201                         mp->b_rptr -= cmhi.mhi_hdrsize;
 202                         break;
 203                 }
 204 
 205                 npacket++;
 206         }
 207 
 208         /*
 209          * Break the chain at this point and return a pointer to the next
 210          * sub-chain.
 211          */
 212         prevp->b_next = NULL;
 213         *countp = npacket;
 214         return (mp);
 215 }
 216 
 217 /* ARGSUSED */
 218 static int
 219 i_dls_head_hold(mod_hash_key_t key, mod_hash_val_t val)
 220 {
 221         dls_head_t *dhp = (dls_head_t *)val;
 222 
 223         /*
 224          * The lock order is  mod_hash's internal lock -> dh_lock as in the
 225          * call to i_dls_link_rx -> mod_hash_find_cb_rval -> i_dls_head_hold
 226          */
 227         mutex_enter(&dhp->dh_lock);
 228         if (dhp->dh_removing) {
 229                 mutex_exit(&dhp->dh_lock);
 230                 return (-1);
 231         }
 232         dhp->dh_ref++;
 233         mutex_exit(&dhp->dh_lock);
 234         return (0);
 235 }
 236 
 237 void
 238 i_dls_head_rele(dls_head_t *dhp)
 239 {
 240         mutex_enter(&dhp->dh_lock);
 241         dhp->dh_ref--;
 242         if (dhp->dh_ref == 0 && dhp->dh_removing != 0)
 243                 cv_broadcast(&dhp->dh_cv);
 244         mutex_exit(&dhp->dh_lock);
 245 }
 246 
 247 static dls_head_t *
 248 i_dls_head_alloc(mod_hash_key_t key)
 249 {
 250         dls_head_t      *dhp;
 251 
 252         dhp = kmem_zalloc(sizeof (dls_head_t), KM_SLEEP);
 253         dhp->dh_key = key;
 254         return (dhp);
 255 }
 256 
 257 static void
 258 i_dls_head_free(dls_head_t *dhp)
 259 {
 260         ASSERT(dhp->dh_ref == 0);
 261         kmem_free(dhp, sizeof (dls_head_t));
 262 }
 263 
 264 /*
 265  * Try to send mp up to the streams of the given sap and vid. Return B_TRUE
 266  * if this message is sent to any streams.
 267  * Note that this function will copy the message chain and the original
 268  * mp will remain valid after this function
 269  */
 270 static uint_t
 271 i_dls_link_rx_func(dls_link_t *dlp, mac_resource_handle_t mrh,
 272     mac_header_info_t *mhip, mblk_t *mp, uint32_t sap,
 273     boolean_t (*acceptfunc)())
 274 {
 275         mod_hash_t      *hash = dlp->dl_str_hash;
 276         mod_hash_key_t  key;
 277         dls_head_t      *dhp;
 278         dld_str_t       *dsp;
 279         mblk_t          *nmp;
 280         dls_rx_t        ds_rx;
 281         void            *ds_rx_arg;
 282         uint_t          naccepted = 0;
 283         int             rval;
 284 
 285         /*
 286          * Construct a hash key from the VLAN identifier and the
 287          * DLSAP that represents dld_str_t in promiscuous mode.
 288          */
 289         key = MAKE_KEY(sap);
 290 
 291         /*
 292          * Search the hash table for dld_str_t eligible to receive
 293          * a packet chain for this DLSAP/VLAN combination. The mod hash's
 294          * internal lock serializes find/insert/remove from the mod hash list.
 295          * Incrementing the dh_ref (while holding the mod hash lock) ensures
 296          * dls_link_remove will wait for the upcall to finish.
 297          */
 298         if (mod_hash_find_cb_rval(hash, key, (mod_hash_val_t *)&dhp,
 299             i_dls_head_hold, &rval) != 0 || (rval != 0)) {
 300                 return (B_FALSE);
 301         }
 302 
 303         /*
 304          * Find dld_str_t that will accept the sub-chain.
 305          */
 306         for (dsp = dhp->dh_list; dsp != NULL; dsp = dsp->ds_next) {
 307                 if (!acceptfunc(dsp, mhip, &ds_rx, &ds_rx_arg))
 308                         continue;
 309 
 310                 /*
 311                  * We have at least one acceptor.
 312                  */
 313                 naccepted++;
 314 
 315                 /*
 316                  * There will normally be at least more dld_str_t
 317                  * (since we've yet to check for non-promiscuous
 318                  * dld_str_t) so dup the sub-chain.
 319                  */
 320                 if ((nmp = copymsgchain(mp)) != NULL)
 321                         ds_rx(ds_rx_arg, mrh, nmp, mhip);
 322         }
 323 
 324         /*
 325          * Release the hold on the dld_str_t chain now that we have
 326          * finished walking it.
 327          */
 328         i_dls_head_rele(dhp);
 329         return (naccepted);
 330 }
 331 
 332 /* ARGSUSED */
 333 void
 334 i_dls_link_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
 335     boolean_t loopback)
 336 {
 337         dls_link_t                      *dlp = arg;
 338         mod_hash_t                      *hash = dlp->dl_str_hash;
 339         mblk_t                          *nextp;
 340         mac_header_info_t               mhi;
 341         dls_head_t                      *dhp;
 342         dld_str_t                       *dsp;
 343         dld_str_t                       *ndsp;
 344         mblk_t                          *nmp;
 345         mod_hash_key_t                  key;
 346         uint_t                          npacket;
 347         boolean_t                       accepted;
 348         dls_rx_t                        ds_rx, nds_rx;
 349         void                            *ds_rx_arg, *nds_rx_arg;
 350         uint16_t                        vid;
 351         int                             err, rval;
 352 
 353         /*
 354          * Walk the packet chain.
 355          */
 356         for (; mp != NULL; mp = nextp) {
 357                 /*
 358                  * Wipe the accepted state.
 359                  */
 360                 accepted = B_FALSE;
 361 
 362                 DLS_PREPARE_PKT(dlp->dl_mh, mp, &mhi, err);
 363                 if (err != 0) {
 364                         atomic_add_32(&(dlp->dl_unknowns), 1);
 365                         nextp = mp->b_next;
 366                         mp->b_next = NULL;
 367                         freemsg(mp);
 368                         continue;
 369                 }
 370 
 371                 /*
 372                  * Grab the longest sub-chain we can process as a single
 373                  * unit.
 374                  */
 375                 nextp = i_dls_link_subchain(dlp, mp, &mhi, &npacket);
 376                 ASSERT(npacket != 0);
 377 
 378                 vid = VLAN_ID(mhi.mhi_tci);
 379 
 380                 if (mhi.mhi_istagged) {
 381                         /*
 382                          * If it is tagged traffic, send it upstream to
 383                          * all dld_str_t which are attached to the physical
 384                          * link and bound to SAP 0x8100.
 385                          */
 386                         if (i_dls_link_rx_func(dlp, mrh, &mhi, mp,
 387                             ETHERTYPE_VLAN, dls_accept) > 0) {
 388                                 accepted = B_TRUE;
 389                         }
 390 
 391                         /*
 392                          * Don't pass the packets up if they are tagged
 393                          * packets and:
 394                          *  - their VID and priority are both zero and the
 395                          *    original packet isn't using the PVID (invalid
 396                          *    packets).
 397                          *  - their sap is ETHERTYPE_VLAN and their VID is
 398                          *    zero as they have already been sent upstreams.
 399                          */
 400                         if ((vid == VLAN_ID_NONE && !mhi.mhi_ispvid &&
 401                             VLAN_PRI(mhi.mhi_tci) == 0) ||
 402                             (mhi.mhi_bindsap == ETHERTYPE_VLAN &&
 403                             vid == VLAN_ID_NONE)) {
 404                                 freemsgchain(mp);
 405                                 goto loop;
 406                         }
 407                 }
 408 
 409                 /*
 410                  * Construct a hash key from the VLAN identifier and the
 411                  * DLSAP.
 412                  */
 413                 key = MAKE_KEY(mhi.mhi_bindsap);
 414 
 415                 /*
 416                  * Search the has table for dld_str_t eligible to receive
 417                  * a packet chain for this DLSAP/VLAN combination.
 418                  */
 419                 if (mod_hash_find_cb_rval(hash, key, (mod_hash_val_t *)&dhp,
 420                     i_dls_head_hold, &rval) != 0 || (rval != 0)) {
 421                         freemsgchain(mp);
 422                         goto loop;
 423                 }
 424 
 425                 /*
 426                  * Find the first dld_str_t that will accept the sub-chain.
 427                  */
 428                 for (dsp = dhp->dh_list; dsp != NULL; dsp = dsp->ds_next)
 429                         if (dls_accept(dsp, &mhi, &ds_rx, &ds_rx_arg))
 430                                 break;
 431 
 432                 /*
 433                  * If we did not find any dld_str_t willing to accept the
 434                  * sub-chain then throw it away.
 435                  */
 436                 if (dsp == NULL) {
 437                         i_dls_head_rele(dhp);
 438                         freemsgchain(mp);
 439                         goto loop;
 440                 }
 441 
 442                 /*
 443                  * We have at least one acceptor.
 444                  */
 445                 accepted = B_TRUE;
 446                 for (;;) {
 447                         /*
 448                          * Find the next dld_str_t that will accept the
 449                          * sub-chain.
 450                          */
 451                         for (ndsp = dsp->ds_next; ndsp != NULL;
 452                             ndsp = ndsp->ds_next)
 453                                 if (dls_accept(ndsp, &mhi, &nds_rx,
 454                                     &nds_rx_arg))
 455                                         break;
 456 
 457                         /*
 458                          * If there are no more dld_str_t that are willing
 459                          * to accept the sub-chain then we don't need to dup
 460                          * it before handing it to the current one.
 461                          */
 462                         if (ndsp == NULL) {
 463                                 ds_rx(ds_rx_arg, mrh, mp, &mhi);
 464 
 465                                 /*
 466                                  * Since there are no more dld_str_t, we're
 467                                  * done.
 468                                  */
 469                                 break;
 470                         }
 471 
 472                         /*
 473                          * There are more dld_str_t so dup the sub-chain.
 474                          */
 475                         if ((nmp = copymsgchain(mp)) != NULL)
 476                                 ds_rx(ds_rx_arg, mrh, nmp, &mhi);
 477 
 478                         dsp = ndsp;
 479                         ds_rx = nds_rx;
 480                         ds_rx_arg = nds_rx_arg;
 481                 }
 482 
 483                 /*
 484                  * Release the hold on the dld_str_t chain now that we have
 485                  * finished walking it.
 486                  */
 487                 i_dls_head_rele(dhp);
 488 
 489 loop:
 490                 /*
 491                  * If there were no acceptors then add the packet count to the
 492                  * 'unknown' count.
 493                  */
 494                 if (!accepted)
 495                         atomic_add_32(&(dlp->dl_unknowns), npacket);
 496         }
 497 }
 498 
 499 /* ARGSUSED */
 500 void
 501 dls_rx_vlan_promisc(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
 502     boolean_t loopback)
 503 {
 504         dld_str_t                       *dsp = arg;
 505         dls_link_t                      *dlp = dsp->ds_dlp;
 506         mac_header_info_t               mhi;
 507         dls_rx_t                        ds_rx;
 508         void                            *ds_rx_arg;
 509         int                             err;
 510 
 511         DLS_PREPARE_PKT(dlp->dl_mh, mp, &mhi, err);
 512         if (err != 0)
 513                 goto drop;
 514 
 515         /*
 516          * If there is promiscuous handle for vlan, we filter out the untagged
 517          * pkts and pkts that are not for the primary unicast address.
 518          */
 519         if (dsp->ds_vlan_mph != NULL) {
 520                 uint8_t prim_addr[MAXMACADDRLEN];
 521                 size_t  addr_length = dsp->ds_mip->mi_addr_length;
 522 
 523                 if (!(mhi.mhi_istagged))
 524                         goto drop;
 525                 ASSERT(dsp->ds_mh != NULL);
 526                 mac_unicast_primary_get(dsp->ds_mh, (uint8_t *)prim_addr);
 527                 if (memcmp(mhi.mhi_daddr, prim_addr, addr_length) != 0)
 528                         goto drop;
 529 
 530                 if (!dls_accept(dsp, &mhi, &ds_rx, &ds_rx_arg))
 531                         goto drop;
 532 
 533                 ds_rx(ds_rx_arg, NULL, mp, &mhi);
 534                 return;
 535         }
 536 
 537 drop:
 538         atomic_add_32(&dlp->dl_unknowns, 1);
 539         freemsg(mp);
 540 }
 541 
 542 /* ARGSUSED */
 543 void
 544 dls_rx_promisc(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
 545     boolean_t loopback)
 546 {
 547         dld_str_t                       *dsp = arg;
 548         dls_link_t                      *dlp = dsp->ds_dlp;
 549         mac_header_info_t               mhi;
 550         dls_rx_t                        ds_rx;
 551         void                            *ds_rx_arg;
 552         int                             err;
 553         dls_head_t                      *dhp;
 554         mod_hash_key_t                  key;
 555 
 556         DLS_PREPARE_PKT(dlp->dl_mh, mp, &mhi, err);
 557         if (err != 0)
 558                 goto drop;
 559 
 560         /*
 561          * In order to filter out sap pkt that no dls channel listens, search
 562          * the hash table trying to find a dld_str_t eligible to receive the pkt
 563          */
 564         if ((dsp->ds_promisc & DLS_PROMISC_SAP) == 0) {
 565                 key = MAKE_KEY(mhi.mhi_bindsap);
 566                 if (mod_hash_find(dsp->ds_dlp->dl_str_hash, key,
 567                     (mod_hash_val_t *)&dhp) != 0)
 568                         goto drop;
 569         }
 570 
 571         if (!dls_accept_promisc(dsp, &mhi, &ds_rx, &ds_rx_arg, loopback))
 572                 goto drop;
 573 
 574         ds_rx(ds_rx_arg, NULL, mp, &mhi);
 575         return;
 576 
 577 drop:
 578         atomic_add_32(&dlp->dl_unknowns, 1);
 579         freemsg(mp);
 580 }
 581 
 582 static void
 583 i_dls_link_destroy(dls_link_t *dlp)
 584 {
 585         ASSERT(dlp->dl_nactive == 0);
 586         ASSERT(dlp->dl_impl_count == 0);
 587         ASSERT(dlp->dl_zone_ref == 0);
 588 
 589         /*
 590          * Free the structure back to the cache.
 591          */
 592         if (dlp->dl_mch != NULL)
 593                 mac_client_close(dlp->dl_mch, 0);
 594 
 595         if (dlp->dl_mh != NULL) {
 596                 ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
 597                 mac_close(dlp->dl_mh);
 598         }
 599 
 600         dlp->dl_mh = NULL;
 601         dlp->dl_mch = NULL;
 602         dlp->dl_mip = NULL;
 603         dlp->dl_unknowns = 0;
 604         dlp->dl_nonip_cnt = 0;
 605         kmem_cache_free(i_dls_link_cachep, dlp);
 606 }
 607 
 608 static int
 609 i_dls_link_create(const char *name, dls_link_t **dlpp)
 610 {
 611         dls_link_t              *dlp;
 612         int                     err;
 613 
 614         /*
 615          * Allocate a new dls_link_t structure.
 616          */
 617         dlp = kmem_cache_alloc(i_dls_link_cachep, KM_SLEEP);
 618 
 619         /*
 620          * Name the dls_link_t after the MAC interface it represents.
 621          */
 622         (void) strlcpy(dlp->dl_name, name, sizeof (dlp->dl_name));
 623 
 624         /*
 625          * First reference; hold open the MAC interface.
 626          */
 627         ASSERT(dlp->dl_mh == NULL);
 628         err = mac_open(dlp->dl_name, &dlp->dl_mh);
 629         if (err != 0)
 630                 goto bail;
 631 
 632         ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
 633         dlp->dl_mip = mac_info(dlp->dl_mh);
 634 
 635         /* DLS is the "primary" MAC client */
 636         ASSERT(dlp->dl_mch == NULL);
 637 
 638         err = mac_client_open(dlp->dl_mh, &dlp->dl_mch, NULL,
 639             MAC_OPEN_FLAGS_USE_DATALINK_NAME);
 640         if (err != 0)
 641                 goto bail;
 642 
 643         DTRACE_PROBE2(dls__primary__client, char *, dlp->dl_name, void *,
 644             dlp->dl_mch);
 645 
 646         *dlpp = dlp;
 647         return (0);
 648 
 649 bail:
 650         i_dls_link_destroy(dlp);
 651         return (err);
 652 }
 653 
 654 /*
 655  * Module initialization functions.
 656  */
 657 
 658 void
 659 dls_link_init(void)
 660 {
 661         /*
 662          * Create a kmem_cache of dls_link_t structures.
 663          */
 664         i_dls_link_cachep = kmem_cache_create("dls_link_cache",
 665             sizeof (dls_link_t), 0, i_dls_link_constructor,
 666             i_dls_link_destructor, NULL, NULL, NULL, 0);
 667         ASSERT(i_dls_link_cachep != NULL);
 668 
 669         /*
 670          * Create a dls_link_t hash table and associated lock.
 671          */
 672         i_dls_link_hash = mod_hash_create_extended("dls_link_hash",
 673             IMPL_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor,
 674             mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
 675         i_dls_link_count = 0;
 676 }
 677 
 678 int
 679 dls_link_fini(void)
 680 {
 681         if (i_dls_link_count > 0)
 682                 return (EBUSY);
 683 
 684         /*
 685          * Destroy the kmem_cache.
 686          */
 687         kmem_cache_destroy(i_dls_link_cachep);
 688 
 689         /*
 690          * Destroy the hash table and associated lock.
 691          */
 692         mod_hash_destroy_hash(i_dls_link_hash);
 693         return (0);
 694 }
 695 
 696 /*
 697  * Exported functions.
 698  */
 699 
 700 static int
 701 dls_link_hold_common(const char *name, dls_link_t **dlpp, boolean_t create)
 702 {
 703         dls_link_t              *dlp;
 704         int                     err;
 705 
 706         /*
 707          * Look up a dls_link_t corresponding to the given macname in the
 708          * global hash table. The i_dls_link_hash itself is protected by the
 709          * mod_hash package's internal lock which synchronizes
 710          * find/insert/remove into the global mod_hash list. Assumes that
 711          * inserts and removes are single threaded on a per mac end point
 712          * by the mac perimeter.
 713          */
 714         if ((err = mod_hash_find(i_dls_link_hash, (mod_hash_key_t)name,
 715             (mod_hash_val_t *)&dlp)) == 0)
 716                 goto done;
 717 
 718         ASSERT(err == MH_ERR_NOTFOUND);
 719         if (!create)
 720                 return (ENOENT);
 721 
 722         /*
 723          * We didn't find anything so we need to create one.
 724          */
 725         if ((err = i_dls_link_create(name, &dlp)) != 0)
 726                 return (err);
 727 
 728         /*
 729          * Insert the dls_link_t.
 730          */
 731         err = mod_hash_insert(i_dls_link_hash, (mod_hash_key_t)dlp->dl_name,
 732             (mod_hash_val_t)dlp);
 733         ASSERT(err == 0);
 734 
 735         atomic_add_32(&i_dls_link_count, 1);
 736         ASSERT(i_dls_link_count != 0);
 737 
 738 done:
 739         ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
 740         /*
 741          * Bump the reference count and hand back the reference.
 742          */
 743         dlp->dl_ref++;
 744         *dlpp = dlp;
 745         return (0);
 746 }
 747 
 748 int
 749 dls_link_hold_create(const char *name, dls_link_t **dlpp)
 750 {
 751         return (dls_link_hold_common(name, dlpp, B_TRUE));
 752 }
 753 
 754 int
 755 dls_link_hold(const char *name, dls_link_t **dlpp)
 756 {
 757         return (dls_link_hold_common(name, dlpp, B_FALSE));
 758 }
 759 
 760 dev_info_t *
 761 dls_link_devinfo(dev_t dev)
 762 {
 763         dls_link_t      *dlp;
 764         dev_info_t      *dip;
 765         char    macname[MAXNAMELEN];
 766         char    *drv;
 767         mac_perim_handle_t      mph;
 768 
 769         if ((drv = ddi_major_to_name(getmajor(dev))) == NULL)
 770                 return (NULL);
 771         (void) snprintf(macname, MAXNAMELEN, "%s%d", drv,
 772             DLS_MINOR2INST(getminor(dev)));
 773 
 774         /*
 775          * The code below assumes that the name constructed above is the
 776          * macname. This is not the case for legacy devices. Currently this
 777          * is ok because this function is only called in the getinfo(9e) path,
 778          * which for a legacy device would directly end up in the driver's
 779          * getinfo, rather than here
 780          */
 781         if (mac_perim_enter_by_macname(macname, &mph) != 0)
 782                 return (NULL);
 783 
 784         if (dls_link_hold(macname, &dlp) != 0) {
 785                 mac_perim_exit(mph);
 786                 return (NULL);
 787         }
 788 
 789         dip = mac_devinfo_get(dlp->dl_mh);
 790         dls_link_rele(dlp);
 791         mac_perim_exit(mph);
 792 
 793         return (dip);
 794 }
 795 
 796 dev_t
 797 dls_link_dev(dls_link_t *dlp)
 798 {
 799         return (makedevice(ddi_driver_major(mac_devinfo_get(dlp->dl_mh)),
 800             mac_minor(dlp->dl_mh)));
 801 }
 802 
 803 void
 804 dls_link_rele(dls_link_t *dlp)
 805 {
 806         mod_hash_val_t  val;
 807 
 808         ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
 809         /*
 810          * Check if there are any more references.
 811          */
 812         if (--dlp->dl_ref == 0) {
 813                 (void) mod_hash_remove(i_dls_link_hash,
 814                     (mod_hash_key_t)dlp->dl_name, &val);
 815                 ASSERT(dlp == (dls_link_t *)val);
 816 
 817                 /*
 818                  * Destroy the dls_link_t.
 819                  */
 820                 i_dls_link_destroy(dlp);
 821                 ASSERT(i_dls_link_count > 0);
 822                 atomic_add_32(&i_dls_link_count, -1);
 823         }
 824 }
 825 
 826 int
 827 dls_link_rele_by_name(const char *name)
 828 {
 829         dls_link_t              *dlp;
 830 
 831         if (mod_hash_find(i_dls_link_hash, (mod_hash_key_t)name,
 832             (mod_hash_val_t *)&dlp) != 0)
 833                 return (ENOENT);
 834 
 835         ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
 836 
 837         /*
 838          * Must fail detach if mac client is busy.
 839          */
 840         ASSERT(dlp->dl_ref > 0 && dlp->dl_mch != NULL);
 841         if (mac_link_has_flows(dlp->dl_mch))
 842                 return (ENOTEMPTY);
 843 
 844         dls_link_rele(dlp);
 845         return (0);
 846 }
 847 
 848 int
 849 dls_link_setzid(const char *name, zoneid_t zid)
 850 {
 851         dls_link_t      *dlp;
 852         int             err = 0;
 853         zoneid_t        old_zid;
 854 
 855         if ((err = dls_link_hold_create(name, &dlp)) != 0)
 856                 return (err);
 857 
 858         ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
 859 
 860         if ((old_zid = dlp->dl_zid) == zid)
 861                 goto done;
 862 
 863         /*
 864          * Check whether this dlp is used by its own zone.  If yes, we cannot
 865          * change its zoneid.
 866          */
 867         if (dlp->dl_zone_ref != 0) {
 868                 err = EBUSY;
 869                 goto done;
 870         }
 871 
 872         dlp->dl_zid = zid;
 873 
 874         if (zid == GLOBAL_ZONEID) {
 875                 /*
 876                  * The link is moving from a non-global zone to the global
 877                  * zone, so we need to release the reference that was held
 878                  * when the link was originally assigned to the non-global
 879                  * zone.
 880                  */
 881                 dls_link_rele(dlp);
 882         }
 883 
 884 done:
 885         /*
 886          * We only keep the reference to this link open if the link has
 887          * successfully moved from the global zone to a non-global zone.
 888          */
 889         if (err != 0 || old_zid != GLOBAL_ZONEID)
 890                 dls_link_rele(dlp);
 891         return (err);
 892 }
 893 
 894 int
 895 dls_link_getzid(const char *name, zoneid_t *zidp)
 896 {
 897         dls_link_t      *dlp;
 898         int             err = 0;
 899 
 900         if ((err = dls_link_hold(name, &dlp)) != 0)
 901                 return (err);
 902 
 903         ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
 904 
 905         *zidp = dlp->dl_zid;
 906 
 907         dls_link_rele(dlp);
 908         return (0);
 909 }
 910 
 911 void
 912 dls_link_add(dls_link_t *dlp, uint32_t sap, dld_str_t *dsp)
 913 {
 914         mod_hash_t      *hash = dlp->dl_str_hash;
 915         mod_hash_key_t  key;
 916         dls_head_t      *dhp;
 917         dld_str_t       *p;
 918         int             err;
 919 
 920         ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
 921 
 922         /*
 923          * Generate a hash key based on the sap.
 924          */
 925         key = MAKE_KEY(sap);
 926 
 927         /*
 928          * Search the table for a list head with this key.
 929          */
 930         if ((err = mod_hash_find(hash, key, (mod_hash_val_t *)&dhp)) != 0) {
 931                 ASSERT(err == MH_ERR_NOTFOUND);
 932 
 933                 dhp = i_dls_head_alloc(key);
 934                 err = mod_hash_insert(hash, key, (mod_hash_val_t)dhp);
 935                 ASSERT(err == 0);
 936         }
 937 
 938         /*
 939          * Add the dld_str_t to the head of the list. List walkers in
 940          * i_dls_link_rx_* bump up dh_ref to ensure the list does not change
 941          * while they walk the list. The membar below ensures that list walkers
 942          * see exactly the old list or the new list.
 943          */
 944         ASSERT(dsp->ds_next == NULL);
 945         p = dhp->dh_list;
 946         dsp->ds_next = p;
 947 
 948         membar_producer();
 949 
 950         dhp->dh_list = dsp;
 951 
 952         /*
 953          * Save a pointer to the list head.
 954          */
 955         dsp->ds_head = dhp;
 956         dlp->dl_impl_count++;
 957 }
 958 
 959 void
 960 dls_link_remove(dls_link_t *dlp, dld_str_t *dsp)
 961 {
 962         mod_hash_t      *hash = dlp->dl_str_hash;
 963         dld_str_t       **pp;
 964         dld_str_t       *p;
 965         dls_head_t      *dhp;
 966 
 967         ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
 968 
 969         /*
 970          * We set dh_removing here to tell the receive callbacks not to pass
 971          * up packets anymore. Then wait till the current callbacks are done.
 972          * This happens either in the close path or in processing the
 973          * DL_UNBIND_REQ via a taskq thread, and it is ok to cv_wait in either.
 974          * The dh_ref ensures there aren't and there won't be any upcalls
 975          * walking or using the dh_list. The mod hash internal lock ensures
 976          * that the insert/remove of the dls_head_t itself synchronizes with
 977          * any i_dls_link_rx trying to locate it. The perimeter ensures that
 978          * there isn't another simultaneous dls_link_add/remove.
 979          */
 980         dhp = dsp->ds_head;
 981 
 982         mutex_enter(&dhp->dh_lock);
 983         dhp->dh_removing = B_TRUE;
 984         while (dhp->dh_ref != 0)
 985                 cv_wait(&dhp->dh_cv, &dhp->dh_lock);
 986         mutex_exit(&dhp->dh_lock);
 987 
 988         /*
 989          * Walk the list and remove the dld_str_t.
 990          */
 991         for (pp = &dhp->dh_list; (p = *pp) != NULL; pp = &(p->ds_next)) {
 992                 if (p == dsp)
 993                         break;
 994         }
 995         ASSERT(p != NULL);
 996         *pp = p->ds_next;
 997         p->ds_next = NULL;
 998         p->ds_head = NULL;
 999 
1000         ASSERT(dlp->dl_impl_count != 0);
1001         dlp->dl_impl_count--;
1002 
1003         if (dhp->dh_list == NULL) {
1004                 mod_hash_val_t  val = NULL;
1005 
1006                 /*
1007                  * The list is empty so remove the hash table entry.
1008                  */
1009                 (void) mod_hash_remove(hash, dhp->dh_key, &val);
1010                 ASSERT(dhp == (dls_head_t *)val);
1011                 i_dls_head_free(dhp);
1012         } else {
1013                 mutex_enter(&dhp->dh_lock);
1014                 dhp->dh_removing = B_FALSE;
1015                 mutex_exit(&dhp->dh_lock);
1016         }
1017 }