1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 /* Copyright (c) 1990 Mentat Inc. */
  25 
  26 /*
  27  * Internet Group Management Protocol (IGMP) routines.
  28  * Multicast Listener Discovery Protocol (MLD) routines.
  29  *
  30  * Written by Steve Deering, Stanford, May 1988.
  31  * Modified by Rosen Sharma, Stanford, Aug 1994.
  32  * Modified by Bill Fenner, Xerox PARC, Feb. 1995.
  33  *
  34  * MULTICAST 3.5.1.1
  35  */
  36 
  37 #include <sys/types.h>
  38 #include <sys/stream.h>
  39 #include <sys/stropts.h>
  40 #include <sys/strlog.h>
  41 #include <sys/strsun.h>
  42 #include <sys/systm.h>
  43 #include <sys/ddi.h>
  44 #include <sys/sunddi.h>
  45 #include <sys/cmn_err.h>
  46 #include <sys/atomic.h>
  47 #include <sys/zone.h>
  48 #include <sys/callb.h>
  49 #include <sys/param.h>
  50 #include <sys/socket.h>
  51 #include <inet/ipclassifier.h>
  52 #include <net/if.h>
  53 #include <net/route.h>
  54 #include <netinet/in.h>
  55 #include <netinet/igmp_var.h>
  56 #include <netinet/ip6.h>
  57 #include <netinet/icmp6.h>
  58 #include <inet/ipsec_impl.h>
  59 
  60 #include <inet/common.h>
  61 #include <inet/mi.h>
  62 #include <inet/nd.h>
  63 #include <inet/tunables.h>
  64 #include <inet/ip.h>
  65 #include <inet/ip6.h>
  66 #include <inet/ip_multi.h>
  67 #include <inet/ip_listutils.h>
  68 
  69 #include <netinet/igmp.h>
  70 #include <inet/ip_ndp.h>
  71 #include <inet/ip_if.h>
  72 
  73 static uint_t   igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill);
  74 static uint_t   igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen);
  75 static uint_t   mld_query_in(mld_hdr_t *mldh, ill_t *ill);
  76 static uint_t   mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen);
  77 static void     igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr);
  78 static void     mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr);
  79 static void     igmpv3_sendrpt(ill_t *ill, mrec_t *reclist);
  80 static void     mldv2_sendrpt(ill_t *ill, mrec_t *reclist);
  81 static mrec_t   *mcast_bldmrec(mcast_record_t type, in6_addr_t *grp,
  82                     slist_t *srclist, mrec_t *next);
  83 static void     mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp,
  84                     mcast_record_t rtype, slist_t *flist);
  85 static mrec_t   *mcast_merge_rtx(ilm_t *ilm, mrec_t *rp, slist_t *flist);
  86 
  87 /*
  88  * Macros used to do timer len conversions.  Timer values are always
  89  * stored and passed to the timer functions as milliseconds; but the
  90  * default values and values from the wire may not be.
  91  *
  92  * And yes, it's obscure, but decisecond is easier to abbreviate than
  93  * "tenths of a second".
  94  */
  95 #define DSEC_TO_MSEC(dsec)      ((dsec) * 100)
  96 #define SEC_TO_MSEC(sec)        ((sec) * 1000)
  97 
  98 /*
  99  * A running timer (scheduled thru timeout) can be cancelled if another
 100  * timer with a shorter timeout value is scheduled before it has timed
 101  * out.  When the shorter timer expires, the original timer is updated
 102  * to account for the time elapsed while the shorter timer ran; but this
 103  * does not take into account the amount of time already spent in timeout
 104  * state before being preempted by the shorter timer, that is the time
 105  * interval between time scheduled to time cancelled.  This can cause
 106  * delays in sending out multicast membership reports.  To resolve this
 107  * problem, wallclock time (absolute time) is used instead of deltas
 108  * (relative time) to track timers.
 109  *
 110  * The MACRO below gets the lbolt value, used for proper timer scheduling
 111  * and firing. Therefore multicast membership reports are sent on time.
 112  * The timer does not exactly fire at the time it was scehduled to fire,
 113  * there is a difference of a few milliseconds observed. An offset is used
 114  * to take care of the difference.
 115  */
 116 
 117 #define CURRENT_MSTIME  ((uint_t)TICK_TO_MSEC(ddi_get_lbolt()))
 118 #define CURRENT_OFFSET  (999)
 119 
 120 /*
 121  * The first multicast join will trigger the igmp timers / mld timers
 122  * The unit for next is milliseconds.
 123  */
 124 void
 125 igmp_start_timers(unsigned next, ip_stack_t *ipst)
 126 {
 127         int     time_left;
 128         int     ret;
 129         timeout_id_t id;
 130 
 131         ASSERT(next != 0 && next != INFINITY);
 132 
 133         mutex_enter(&ipst->ips_igmp_timer_lock);
 134 
 135         if (ipst->ips_igmp_timer_setter_active) {
 136                 /*
 137                  * Serialize timer setters, one at a time. If the
 138                  * timer is currently being set by someone,
 139                  * just record the next time when it has to be
 140                  * invoked and return. The current setter will
 141                  * take care.
 142                  */
 143                 ipst->ips_igmp_time_to_next =
 144                     MIN(ipst->ips_igmp_time_to_next, next);
 145                 mutex_exit(&ipst->ips_igmp_timer_lock);
 146                 return;
 147         } else {
 148                 ipst->ips_igmp_timer_setter_active = B_TRUE;
 149         }
 150         if (ipst->ips_igmp_timeout_id == 0) {
 151                 /*
 152                  * The timer is inactive. We need to start a timer
 153                  */
 154                 ipst->ips_igmp_time_to_next = next;
 155                 ipst->ips_igmp_timeout_id = timeout(igmp_timeout_handler,
 156                     (void *)ipst, MSEC_TO_TICK(ipst->ips_igmp_time_to_next));
 157                 ipst->ips_igmp_timer_scheduled_last = ddi_get_lbolt();
 158                 ipst->ips_igmp_timer_setter_active = B_FALSE;
 159                 mutex_exit(&ipst->ips_igmp_timer_lock);
 160                 return;
 161         }
 162 
 163         /*
 164          * The timer was scheduled sometime back for firing in
 165          * 'igmp_time_to_next' ms and is active. We need to
 166          * reschedule the timeout if the new 'next' will happen
 167          * earlier than the currently scheduled timeout
 168          */
 169         time_left = ipst->ips_igmp_timer_scheduled_last +
 170             MSEC_TO_TICK(ipst->ips_igmp_time_to_next) - ddi_get_lbolt();
 171         if (time_left < MSEC_TO_TICK(next)) {
 172                 ipst->ips_igmp_timer_setter_active = B_FALSE;
 173                 mutex_exit(&ipst->ips_igmp_timer_lock);
 174                 return;
 175         }
 176         id = ipst->ips_igmp_timeout_id;
 177 
 178         mutex_exit(&ipst->ips_igmp_timer_lock);
 179         ret = untimeout(id);
 180         mutex_enter(&ipst->ips_igmp_timer_lock);
 181         /*
 182          * The timeout was cancelled, or the timeout handler
 183          * completed, while we were blocked in the untimeout.
 184          * No other thread could have set the timer meanwhile
 185          * since we serialized all the timer setters. Thus
 186          * no timer is currently active nor executing nor will
 187          * any timer fire in the future. We start the timer now
 188          * if needed.
 189          */
 190         if (ret == -1) {
 191                 ASSERT(ipst->ips_igmp_timeout_id == 0);
 192         } else {
 193                 ASSERT(ipst->ips_igmp_timeout_id != 0);
 194                 ipst->ips_igmp_timeout_id = 0;
 195         }
 196         if (ipst->ips_igmp_time_to_next != 0) {
 197                 ipst->ips_igmp_time_to_next =
 198                     MIN(ipst->ips_igmp_time_to_next, next);
 199                 ipst->ips_igmp_timeout_id = timeout(igmp_timeout_handler,
 200                     (void *)ipst, MSEC_TO_TICK(ipst->ips_igmp_time_to_next));
 201                 ipst->ips_igmp_timer_scheduled_last = ddi_get_lbolt();
 202         }
 203         ipst->ips_igmp_timer_setter_active = B_FALSE;
 204         mutex_exit(&ipst->ips_igmp_timer_lock);
 205 }
 206 
 207 /*
 208  * mld_start_timers:
 209  * The unit for next is milliseconds.
 210  */
 211 void
 212 mld_start_timers(unsigned next, ip_stack_t *ipst)
 213 {
 214         int     time_left;
 215         int     ret;
 216         timeout_id_t id;
 217 
 218         ASSERT(next != 0 && next != INFINITY);
 219 
 220         mutex_enter(&ipst->ips_mld_timer_lock);
 221         if (ipst->ips_mld_timer_setter_active) {
 222                 /*
 223                  * Serialize timer setters, one at a time. If the
 224                  * timer is currently being set by someone,
 225                  * just record the next time when it has to be
 226                  * invoked and return. The current setter will
 227                  * take care.
 228                  */
 229                 ipst->ips_mld_time_to_next =
 230                     MIN(ipst->ips_mld_time_to_next, next);
 231                 mutex_exit(&ipst->ips_mld_timer_lock);
 232                 return;
 233         } else {
 234                 ipst->ips_mld_timer_setter_active = B_TRUE;
 235         }
 236         if (ipst->ips_mld_timeout_id == 0) {
 237                 /*
 238                  * The timer is inactive. We need to start a timer
 239                  */
 240                 ipst->ips_mld_time_to_next = next;
 241                 ipst->ips_mld_timeout_id = timeout(mld_timeout_handler,
 242                     (void *)ipst, MSEC_TO_TICK(ipst->ips_mld_time_to_next));
 243                 ipst->ips_mld_timer_scheduled_last = ddi_get_lbolt();
 244                 ipst->ips_mld_timer_setter_active = B_FALSE;
 245                 mutex_exit(&ipst->ips_mld_timer_lock);
 246                 return;
 247         }
 248 
 249         /*
 250          * The timer was scheduled sometime back for firing in
 251          * 'igmp_time_to_next' ms and is active. We need to
 252          * reschedule the timeout if the new 'next' will happen
 253          * earlier than the currently scheduled timeout
 254          */
 255         time_left = ipst->ips_mld_timer_scheduled_last +
 256             MSEC_TO_TICK(ipst->ips_mld_time_to_next) - ddi_get_lbolt();
 257         if (time_left < MSEC_TO_TICK(next)) {
 258                 ipst->ips_mld_timer_setter_active = B_FALSE;
 259                 mutex_exit(&ipst->ips_mld_timer_lock);
 260                 return;
 261         }
 262         id = ipst->ips_mld_timeout_id;
 263 
 264         mutex_exit(&ipst->ips_mld_timer_lock);
 265         ret = untimeout(id);
 266         mutex_enter(&ipst->ips_mld_timer_lock);
 267         /*
 268          * The timeout was cancelled, or the timeout handler
 269          * completed, while we were blocked in the untimeout.
 270          * No other thread could have set the timer meanwhile
 271          * since we serialized all the timer setters. Thus
 272          * no timer is currently active nor executing nor will
 273          * any timer fire in the future. We start the timer now
 274          * if needed.
 275          */
 276         if (ret == -1) {
 277                 ASSERT(ipst->ips_mld_timeout_id == 0);
 278         } else {
 279                 ASSERT(ipst->ips_mld_timeout_id != 0);
 280                 ipst->ips_mld_timeout_id = 0;
 281         }
 282         if (ipst->ips_mld_time_to_next != 0) {
 283                 ipst->ips_mld_time_to_next =
 284                     MIN(ipst->ips_mld_time_to_next, next);
 285                 ipst->ips_mld_timeout_id = timeout(mld_timeout_handler,
 286                     (void *)ipst, MSEC_TO_TICK(ipst->ips_mld_time_to_next));
 287                 ipst->ips_mld_timer_scheduled_last = ddi_get_lbolt();
 288         }
 289         ipst->ips_mld_timer_setter_active = B_FALSE;
 290         mutex_exit(&ipst->ips_mld_timer_lock);
 291 }
 292 
 293 /*
 294  * igmp_input:
 295  * Return NULL for a bad packet that is discarded here.
 296  * Return mp if the message is OK and should be handed to "raw" receivers.
 297  * Callers of igmp_input() may need to reinitialize variables that were copied
 298  * from the mblk as this calls pullupmsg().
 299  */
 300 mblk_t *
 301 igmp_input(mblk_t *mp, ip_recv_attr_t *ira)
 302 {
 303         igmpa_t         *igmpa;
 304         ipha_t          *ipha = (ipha_t *)(mp->b_rptr);
 305         int             iphlen, igmplen, mblklen;
 306         ilm_t           *ilm;
 307         uint32_t        src, dst;
 308         uint32_t        group;
 309         in6_addr_t      v6group;
 310         uint_t          next;
 311         ipif_t          *ipif;
 312         ill_t           *ill = ira->ira_ill;
 313         ip_stack_t      *ipst = ill->ill_ipst;
 314 
 315         ASSERT(!ill->ill_isv6);
 316         ++ipst->ips_igmpstat.igps_rcv_total;
 317 
 318         mblklen = MBLKL(mp);
 319         iphlen = ira->ira_ip_hdr_length;
 320         if (mblklen < 1 || mblklen < iphlen) {
 321                 ++ipst->ips_igmpstat.igps_rcv_tooshort;
 322                 goto bad_pkt;
 323         }
 324         igmplen = ira->ira_pktlen - iphlen;
 325         /*
 326          * Since msg sizes are more variable with v3, just pullup the
 327          * whole thing now.
 328          */
 329         if (MBLKL(mp) < (igmplen + iphlen)) {
 330                 mblk_t *mp1;
 331                 if ((mp1 = msgpullup(mp, -1)) == NULL) {
 332                         ++ipst->ips_igmpstat.igps_rcv_tooshort;
 333                         goto bad_pkt;
 334                 }
 335                 freemsg(mp);
 336                 mp = mp1;
 337                 ipha = (ipha_t *)(mp->b_rptr);
 338         }
 339 
 340         /*
 341          * Validate lengths
 342          */
 343         if (igmplen < IGMP_MINLEN) {
 344                 ++ipst->ips_igmpstat.igps_rcv_tooshort;
 345                 goto bad_pkt;
 346         }
 347 
 348         igmpa = (igmpa_t *)(&mp->b_rptr[iphlen]);
 349         src = ipha->ipha_src;
 350         dst = ipha->ipha_dst;
 351         if (ip_debug > 1)
 352                 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
 353                     "igmp_input: src 0x%x, dst 0x%x on %s\n",
 354                     (int)ntohl(src), (int)ntohl(dst),
 355                     ill->ill_name);
 356 
 357         switch (igmpa->igmpa_type) {
 358         case IGMP_MEMBERSHIP_QUERY:
 359                 /*
 360                  * packet length differentiates between v1/v2 and v3
 361                  * v1/v2 should be exactly 8 octets long; v3 is >= 12
 362                  */
 363                 if ((igmplen == IGMP_MINLEN) ||
 364                     (ipst->ips_igmp_max_version <= IGMP_V2_ROUTER)) {
 365                         next = igmp_query_in(ipha, igmpa, ill);
 366                 } else if (igmplen >= IGMP_V3_QUERY_MINLEN) {
 367                         next = igmpv3_query_in((igmp3qa_t *)igmpa, ill,
 368                             igmplen);
 369                 } else {
 370                         ++ipst->ips_igmpstat.igps_rcv_tooshort;
 371                         goto bad_pkt;
 372                 }
 373                 if (next == 0)
 374                         goto bad_pkt;
 375 
 376                 if (next != INFINITY)
 377                         igmp_start_timers(next, ipst);
 378 
 379                 break;
 380 
 381         case IGMP_V1_MEMBERSHIP_REPORT:
 382         case IGMP_V2_MEMBERSHIP_REPORT:
 383                 /*
 384                  * For fast leave to work, we have to know that we are the
 385                  * last person to send a report for this group. Reports
 386                  * generated by us are looped back since we could potentially
 387                  * be a multicast router, so discard reports sourced by me.
 388                  */
 389                 mutex_enter(&ill->ill_lock);
 390                 for (ipif = ill->ill_ipif; ipif != NULL;
 391                     ipif = ipif->ipif_next) {
 392                         if (ipif->ipif_lcl_addr == src) {
 393                                 if (ip_debug > 1) {
 394                                         (void) mi_strlog(ill->ill_rq,
 395                                             1,
 396                                             SL_TRACE,
 397                                             "igmp_input: we are only "
 398                                             "member src 0x%x\n",
 399                                             (int)ntohl(src));
 400                                 }
 401                                 mutex_exit(&ill->ill_lock);
 402                                 return (mp);
 403                         }
 404                 }
 405                 mutex_exit(&ill->ill_lock);
 406 
 407                 ++ipst->ips_igmpstat.igps_rcv_reports;
 408                 group = igmpa->igmpa_group;
 409                 if (!CLASSD(group)) {
 410                         ++ipst->ips_igmpstat.igps_rcv_badreports;
 411                         goto bad_pkt;
 412                 }
 413 
 414                 /*
 415                  * KLUDGE: if the IP source address of the report has an
 416                  * unspecified (i.e., zero) subnet number, as is allowed for
 417                  * a booting host, replace it with the correct subnet number
 418                  * so that a process-level multicast routing demon can
 419                  * determine which subnet it arrived from.  This is necessary
 420                  * to compensate for the lack of any way for a process to
 421                  * determine the arrival interface of an incoming packet.
 422                  *
 423                  * Requires that a copy of *this* message it passed up
 424                  * to the raw interface which is done by our caller.
 425                  */
 426                 if ((src & htonl(0xFF000000U)) == 0) {      /* Minimum net mask */
 427                         /* Pick the first ipif on this ill */
 428                         mutex_enter(&ill->ill_lock);
 429                         src = ill->ill_ipif->ipif_subnet;
 430                         mutex_exit(&ill->ill_lock);
 431                         ip1dbg(("igmp_input: changed src to 0x%x\n",
 432                             (int)ntohl(src)));
 433                         ipha->ipha_src = src;
 434                 }
 435 
 436                 /*
 437                  * If our ill has ILMs that belong to the group being
 438                  * reported, and we are a 'Delaying Member' in the RFC
 439                  * terminology, stop our timer for that group and 'clear
 440                  * flag' i.e. mark as IGMP_OTHERMEMBER.
 441                  */
 442                 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
 443                 IN6_IPADDR_TO_V4MAPPED(group, &v6group);
 444                 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
 445                         if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, &v6group))
 446                                 continue;
 447 
 448                         ++ipst->ips_igmpstat.igps_rcv_ourreports;
 449                         ilm->ilm_timer = INFINITY;
 450                         ilm->ilm_state = IGMP_OTHERMEMBER;
 451                 } /* for */
 452                 rw_exit(&ill->ill_mcast_lock);
 453                 ill_mcast_timer_start(ill->ill_ipst);
 454                 break;
 455 
 456         case IGMP_V3_MEMBERSHIP_REPORT:
 457                 /*
 458                  * Currently nothing to do here; IGMP router is not
 459                  * implemented in ip, and v3 hosts don't pay attention
 460                  * to membership reports.
 461                  */
 462                 break;
 463         }
 464         /*
 465          * Pass all valid IGMP packets up to any process(es) listening
 466          * on a raw IGMP socket. Do not free the packet.
 467          */
 468         return (mp);
 469 
 470 bad_pkt:
 471         freemsg(mp);
 472         return (NULL);
 473 }
 474 
 475 static uint_t
 476 igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill)
 477 {
 478         ilm_t   *ilm;
 479         int     timer;
 480         uint_t  next, current;
 481         ip_stack_t       *ipst;
 482 
 483         ipst = ill->ill_ipst;
 484         ++ipst->ips_igmpstat.igps_rcv_queries;
 485 
 486         rw_enter(&ill->ill_mcast_lock, RW_WRITER);
 487         /*
 488          * In the IGMPv2 specification, there are 3 states and a flag.
 489          *
 490          * In Non-Member state, we simply don't have a membership record.
 491          * In Delaying Member state, our timer is running (ilm->ilm_timer
 492          * < INFINITY).  In Idle Member state, our timer is not running
 493          * (ilm->ilm_timer == INFINITY).
 494          *
 495          * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if
 496          * we have heard a report from another member, or IGMP_IREPORTEDLAST
 497          * if I sent the last report.
 498          */
 499         if ((igmpa->igmpa_code == 0) ||
 500             (ipst->ips_igmp_max_version == IGMP_V1_ROUTER)) {
 501                 /*
 502                  * Query from an old router.
 503                  * Remember that the querier on this interface is old,
 504                  * and set the timer to the value in RFC 1112.
 505                  */
 506                 ill->ill_mcast_v1_time = 0;
 507                 ill->ill_mcast_v1_tset = 1;
 508                 if (ill->ill_mcast_type != IGMP_V1_ROUTER) {
 509                         ip1dbg(("Received IGMPv1 Query on %s, switching mode "
 510                             "to IGMP_V1_ROUTER\n", ill->ill_name));
 511                         atomic_inc_16(&ill->ill_ifptr->illif_mcast_v1);
 512                         ill->ill_mcast_type = IGMP_V1_ROUTER;
 513                 }
 514 
 515                 timer = SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY);
 516 
 517                 if (ipha->ipha_dst != htonl(INADDR_ALLHOSTS_GROUP) ||
 518                     igmpa->igmpa_group != 0) {
 519                         ++ipst->ips_igmpstat.igps_rcv_badqueries;
 520                         rw_exit(&ill->ill_mcast_lock);
 521                         ill_mcast_timer_start(ill->ill_ipst);
 522                         return (0);
 523                 }
 524 
 525         } else {
 526                 in_addr_t group;
 527 
 528                 /*
 529                  * Query from a new router
 530                  * Simply do a validity check
 531                  */
 532                 group = igmpa->igmpa_group;
 533                 if (group != 0 && (!CLASSD(group))) {
 534                         ++ipst->ips_igmpstat.igps_rcv_badqueries;
 535                         rw_exit(&ill->ill_mcast_lock);
 536                         ill_mcast_timer_start(ill->ill_ipst);
 537                         return (0);
 538                 }
 539 
 540                 /*
 541                  * Switch interface state to v2 on receipt of a v2 query
 542                  * ONLY IF current state is v3.  Let things be if current
 543                  * state if v1 but do reset the v2-querier-present timer.
 544                  */
 545                 if (ill->ill_mcast_type == IGMP_V3_ROUTER) {
 546                         ip1dbg(("Received IGMPv2 Query on %s, switching mode "
 547                             "to IGMP_V2_ROUTER", ill->ill_name));
 548                         atomic_inc_16(&ill->ill_ifptr->illif_mcast_v2);
 549                         ill->ill_mcast_type = IGMP_V2_ROUTER;
 550                 }
 551                 ill->ill_mcast_v2_time = 0;
 552                 ill->ill_mcast_v2_tset = 1;
 553 
 554                 timer = DSEC_TO_MSEC((int)igmpa->igmpa_code);
 555         }
 556 
 557         if (ip_debug > 1) {
 558                 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
 559                     "igmp_input: TIMER = igmp_code %d igmp_type 0x%x",
 560                     (int)ntohs(igmpa->igmpa_code),
 561                     (int)ntohs(igmpa->igmpa_type));
 562         }
 563 
 564         /*
 565          * -Start the timers in all of our membership records
 566          *  for the physical interface on which the query
 567          *  arrived, excluding those that belong to the "all
 568          *  hosts" group (224.0.0.1).
 569          *
 570          * -Restart any timer that is already running but has
 571          *  a value longer than the requested timeout.
 572          *
 573          * -Use the value specified in the query message as
 574          *  the maximum timeout.
 575          */
 576         next = (unsigned)INFINITY;
 577 
 578         current = CURRENT_MSTIME;
 579         for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
 580 
 581                 /*
 582                  * A multicast router joins INADDR_ANY address
 583                  * to enable promiscuous reception of all
 584                  * mcasts from the interface. This INADDR_ANY
 585                  * is stored in the ilm_v6addr as V6 unspec addr
 586                  */
 587                 if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr))
 588                         continue;
 589                 if (ilm->ilm_addr == htonl(INADDR_ANY))
 590                         continue;
 591                 if (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP) &&
 592                     (igmpa->igmpa_group == 0) ||
 593                     (igmpa->igmpa_group == ilm->ilm_addr)) {
 594                         if (ilm->ilm_timer > timer) {
 595                                 MCAST_RANDOM_DELAY(ilm->ilm_timer, timer);
 596                                 if (ilm->ilm_timer < next)
 597                                         next = ilm->ilm_timer;
 598                                 ilm->ilm_timer += current;
 599                         }
 600                 }
 601         }
 602         rw_exit(&ill->ill_mcast_lock);
 603         /*
 604          * No packets have been sent above - no
 605          * ill_mcast_send_queued is needed.
 606          */
 607         ill_mcast_timer_start(ill->ill_ipst);
 608 
 609         return (next);
 610 }
 611 
 612 static uint_t
 613 igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen)
 614 {
 615         uint_t          i, next, mrd, qqi, timer, delay, numsrc;
 616         uint_t          current;
 617         ilm_t           *ilm;
 618         ipaddr_t        *src_array;
 619         uint8_t         qrv;
 620         ip_stack_t       *ipst;
 621 
 622         ipst = ill->ill_ipst;
 623         /* make sure numsrc matches packet size */
 624         numsrc = ntohs(igmp3qa->igmp3qa_numsrc);
 625         if (igmplen < IGMP_V3_QUERY_MINLEN + (numsrc * sizeof (ipaddr_t))) {
 626                 ++ipst->ips_igmpstat.igps_rcv_tooshort;
 627                 return (0);
 628         }
 629         src_array = (ipaddr_t *)&igmp3qa[1];
 630 
 631         ++ipst->ips_igmpstat.igps_rcv_queries;
 632 
 633         rw_enter(&ill->ill_mcast_lock, RW_WRITER);
 634 
 635         if ((mrd = (uint_t)igmp3qa->igmp3qa_mxrc) >= IGMP_V3_MAXRT_FPMIN) {
 636                 uint_t hdrval, mant, exp;
 637                 hdrval = (uint_t)igmp3qa->igmp3qa_mxrc;
 638                 mant = hdrval & IGMP_V3_MAXRT_MANT_MASK;
 639                 exp = (hdrval & IGMP_V3_MAXRT_EXP_MASK) >> 4;
 640                 mrd = (mant | 0x10) << (exp + 3);
 641         }
 642         if (mrd == 0)
 643                 mrd = MCAST_DEF_QUERY_RESP_INTERVAL;
 644         timer = DSEC_TO_MSEC(mrd);
 645         MCAST_RANDOM_DELAY(delay, timer);
 646         next = (unsigned)INFINITY;
 647         current = CURRENT_MSTIME;
 648 
 649         if ((qrv = igmp3qa->igmp3qa_sqrv & IGMP_V3_RV_MASK) == 0)
 650                 ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS;
 651         else
 652                 ill->ill_mcast_rv = qrv;
 653 
 654         if ((qqi = (uint_t)igmp3qa->igmp3qa_qqic) >= IGMP_V3_QQI_FPMIN) {
 655                 uint_t hdrval, mant, exp;
 656                 hdrval = (uint_t)igmp3qa->igmp3qa_qqic;
 657                 mant = hdrval & IGMP_V3_QQI_MANT_MASK;
 658                 exp = (hdrval & IGMP_V3_QQI_EXP_MASK) >> 4;
 659                 qqi = (mant | 0x10) << (exp + 3);
 660         }
 661         ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi;
 662 
 663         /*
 664          * If we have a pending general query response that's scheduled
 665          * sooner than the delay we calculated for this response, then
 666          * no action is required (RFC3376 section 5.2 rule 1)
 667          */
 668         if (ill->ill_global_timer < (current + delay)) {
 669                 rw_exit(&ill->ill_mcast_lock);
 670                 ill_mcast_timer_start(ill->ill_ipst);
 671                 return (next);
 672         }
 673 
 674         /*
 675          * Now take action depending upon query type:
 676          * general, group specific, or group/source specific.
 677          */
 678         if ((numsrc == 0) && (igmp3qa->igmp3qa_group == INADDR_ANY)) {
 679                 /*
 680                  * general query
 681                  * We know global timer is either not running or is
 682                  * greater than our calculated delay, so reset it to
 683                  * our delay (random value in range [0, response time]).
 684                  */
 685                 ill->ill_global_timer =  current + delay;
 686                 next = delay;
 687         } else {
 688                 /* group or group/source specific query */
 689                 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
 690                         if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr) ||
 691                             (ilm->ilm_addr == htonl(INADDR_ANY)) ||
 692                             (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) ||
 693                             (igmp3qa->igmp3qa_group != ilm->ilm_addr))
 694                                 continue;
 695                         /*
 696                          * If the query is group specific or we have a
 697                          * pending group specific query, the response is
 698                          * group specific (pending sources list should be
 699                          * empty).  Otherwise, need to update the pending
 700                          * sources list for the group and source specific
 701                          * response.
 702                          */
 703                         if (numsrc == 0 || (ilm->ilm_timer < INFINITY &&
 704                             SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) {
 705 group_query:
 706                                 FREE_SLIST(ilm->ilm_pendsrcs);
 707                                 ilm->ilm_pendsrcs = NULL;
 708                         } else {
 709                                 boolean_t overflow;
 710                                 slist_t *pktl;
 711                                 if (numsrc > MAX_FILTER_SIZE ||
 712                                     (ilm->ilm_pendsrcs == NULL &&
 713                                     (ilm->ilm_pendsrcs = l_alloc()) == NULL)) {
 714                                         /*
 715                                          * We've been sent more sources than
 716                                          * we can deal with; or we can't deal
 717                                          * with a source list at all.  Revert
 718                                          * to a group specific query.
 719                                          */
 720                                         goto group_query;
 721                                 }
 722                                 if ((pktl = l_alloc()) == NULL)
 723                                         goto group_query;
 724                                 pktl->sl_numsrc = numsrc;
 725                                 for (i = 0; i < numsrc; i++)
 726                                         IN6_IPADDR_TO_V4MAPPED(src_array[i],
 727                                             &(pktl->sl_addr[i]));
 728                                 l_union_in_a(ilm->ilm_pendsrcs, pktl,
 729                                     &overflow);
 730                                 l_free(pktl);
 731                                 if (overflow)
 732                                         goto group_query;
 733                         }
 734 
 735                         ilm->ilm_timer = (ilm->ilm_timer == INFINITY) ?
 736                             INFINITY : (ilm->ilm_timer - current);
 737                         /* choose soonest timer */
 738                         ilm->ilm_timer = MIN(ilm->ilm_timer, delay);
 739                         if (ilm->ilm_timer < next)
 740                                 next = ilm->ilm_timer;
 741                         ilm->ilm_timer += current;
 742                 }
 743         }
 744         rw_exit(&ill->ill_mcast_lock);
 745         /*
 746          * No packets have been sent above - no
 747          * ill_mcast_send_queued is needed.
 748          */
 749         ill_mcast_timer_start(ill->ill_ipst);
 750 
 751         return (next);
 752 }
 753 
 754 /*
 755  * Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
 756  * and it gets sent after the lock is dropped.
 757  */
 758 void
 759 igmp_joingroup(ilm_t *ilm)
 760 {
 761         uint_t  timer;
 762         ill_t   *ill;
 763         ip_stack_t      *ipst = ilm->ilm_ipst;
 764 
 765         ill = ilm->ilm_ill;
 766 
 767         ASSERT(!ill->ill_isv6);
 768         ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
 769 
 770         if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) {
 771                 ilm->ilm_rtx.rtx_timer = INFINITY;
 772                 ilm->ilm_state = IGMP_OTHERMEMBER;
 773         } else {
 774                 ip1dbg(("Querier mode %d, sending report, group %x\n",
 775                     ill->ill_mcast_type, htonl(ilm->ilm_addr)));
 776                 if (ill->ill_mcast_type == IGMP_V1_ROUTER) {
 777                         igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0);
 778                 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) {
 779                         igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0);
 780                 } else if (ill->ill_mcast_type == IGMP_V3_ROUTER) {
 781                         mrec_t *rp;
 782                         mcast_record_t rtype;
 783                         /*
 784                          * The possible state changes we need to handle here:
 785                          *   Old State  New State       Report
 786                          *
 787                          *   INCLUDE(0) INCLUDE(X)      ALLOW(X),BLOCK(0)
 788                          *   INCLUDE(0) EXCLUDE(X)      TO_EX(X)
 789                          *
 790                          * No need to send the BLOCK(0) report; ALLOW(X)
 791                          * is enough.
 792                          */
 793                         rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ?
 794                             ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE;
 795                         rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr,
 796                             ilm->ilm_filter, NULL);
 797                         igmpv3_sendrpt(ill, rp);
 798                         /*
 799                          * Set up retransmission state.  Timer is set below,
 800                          * for both v3 and older versions.
 801                          */
 802                         mcast_init_rtx(ill, &ilm->ilm_rtx, rtype,
 803                             ilm->ilm_filter);
 804                 }
 805 
 806                 /* Set the ilm timer value */
 807                 ilm->ilm_rtx.rtx_cnt = ill->ill_mcast_rv;
 808                 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer,
 809                     SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY));
 810                 timer = ilm->ilm_rtx.rtx_timer;
 811                 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME;
 812                 ilm->ilm_state = IGMP_IREPORTEDLAST;
 813 
 814                 /*
 815                  * We are holding ill_mcast_lock here and the timeout
 816                  * handler (igmp_timeout_handler_per_ill) acquires that
 817                  * lock. Hence we can't call igmp_start_timers since it could
 818                  * deadlock in untimeout().
 819                  * Instead the thread which drops ill_mcast_lock will have
 820                  * to call ill_mcast_timer_start().
 821                  */
 822                 mutex_enter(&ipst->ips_igmp_timer_lock);
 823                 ipst->ips_igmp_deferred_next = MIN(timer,
 824                     ipst->ips_igmp_deferred_next);
 825                 mutex_exit(&ipst->ips_igmp_timer_lock);
 826         }
 827 
 828         if (ip_debug > 1) {
 829                 (void) mi_strlog(ilm->ilm_ill->ill_rq, 1, SL_TRACE,
 830                     "igmp_joingroup: multicast_type %d timer %d",
 831                     (ilm->ilm_ill->ill_mcast_type),
 832                     (int)ntohl(timer));
 833         }
 834 }
 835 
 836 /*
 837  * Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
 838  * and it gets sent after the lock is dropped.
 839  */
 840 void
 841 mld_joingroup(ilm_t *ilm)
 842 {
 843         uint_t  timer;
 844         ill_t   *ill;
 845         ip_stack_t      *ipst = ilm->ilm_ipst;
 846 
 847         ill = ilm->ilm_ill;
 848 
 849         ASSERT(ill->ill_isv6);
 850 
 851         ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
 852 
 853         if (IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr)) {
 854                 ilm->ilm_rtx.rtx_timer = INFINITY;
 855                 ilm->ilm_state = IGMP_OTHERMEMBER;
 856         } else {
 857                 if (ill->ill_mcast_type == MLD_V1_ROUTER) {
 858                         mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL);
 859                 } else {
 860                         mrec_t *rp;
 861                         mcast_record_t rtype;
 862                         /*
 863                          * The possible state changes we need to handle here:
 864                          *      Old State   New State   Report
 865                          *
 866                          *      INCLUDE(0)  INCLUDE(X)  ALLOW(X),BLOCK(0)
 867                          *      INCLUDE(0)  EXCLUDE(X)  TO_EX(X)
 868                          *
 869                          * No need to send the BLOCK(0) report; ALLOW(X)
 870                          * is enough
 871                          */
 872                         rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ?
 873                             ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE;
 874                         rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr,
 875                             ilm->ilm_filter, NULL);
 876                         mldv2_sendrpt(ill, rp);
 877                         /*
 878                          * Set up retransmission state.  Timer is set below,
 879                          * for both v2 and v1.
 880                          */
 881                         mcast_init_rtx(ill, &ilm->ilm_rtx, rtype,
 882                             ilm->ilm_filter);
 883                 }
 884 
 885                 /* Set the ilm timer value */
 886                 ASSERT(ill->ill_mcast_type != MLD_V2_ROUTER ||
 887                     ilm->ilm_rtx.rtx_cnt > 0);
 888 
 889                 ilm->ilm_rtx.rtx_cnt = ill->ill_mcast_rv;
 890                 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer,
 891                     SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY));
 892                 timer = ilm->ilm_rtx.rtx_timer;
 893                 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME;
 894                 ilm->ilm_state = IGMP_IREPORTEDLAST;
 895 
 896                 /*
 897                  * We are holding ill_mcast_lock here and the timeout
 898                  * handler (mld_timeout_handler_per_ill) acquires that
 899                  * lock. Hence we can't call mld_start_timers since it could
 900                  * deadlock in untimeout().
 901                  * Instead the thread which drops ill_mcast_lock will have
 902                  * to call ill_mcast_timer_start().
 903                  */
 904                 mutex_enter(&ipst->ips_mld_timer_lock);
 905                 ipst->ips_mld_deferred_next = MIN(timer,
 906                     ipst->ips_mld_deferred_next);
 907                 mutex_exit(&ipst->ips_mld_timer_lock);
 908         }
 909 
 910         if (ip_debug > 1) {
 911                 (void) mi_strlog(ilm->ilm_ill->ill_rq, 1, SL_TRACE,
 912                     "mld_joingroup: multicast_type %d timer %d",
 913                     (ilm->ilm_ill->ill_mcast_type),
 914                     (int)ntohl(timer));
 915         }
 916 }
 917 
 918 /*
 919  * Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
 920  * and it gets sent after the lock is dropped.
 921  */
 922 void
 923 igmp_leavegroup(ilm_t *ilm)
 924 {
 925         ill_t *ill = ilm->ilm_ill;
 926 
 927         ASSERT(!ill->ill_isv6);
 928 
 929         ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
 930         if (ilm->ilm_state == IGMP_IREPORTEDLAST &&
 931             ill->ill_mcast_type == IGMP_V2_ROUTER &&
 932             (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) {
 933                 igmp_sendpkt(ilm, IGMP_V2_LEAVE_GROUP,
 934                     (htonl(INADDR_ALLRTRS_GROUP)));
 935                 return;
 936         }
 937         if ((ill->ill_mcast_type == IGMP_V3_ROUTER) &&
 938             (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) {
 939                 mrec_t *rp;
 940                 /*
 941                  * The possible state changes we need to handle here:
 942                  *      Old State       New State       Report
 943                  *
 944                  *      INCLUDE(X)      INCLUDE(0)      ALLOW(0),BLOCK(X)
 945                  *      EXCLUDE(X)      INCLUDE(0)      TO_IN(0)
 946                  *
 947                  * No need to send the ALLOW(0) report; BLOCK(X) is enough
 948                  */
 949                 if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
 950                         rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr,
 951                             ilm->ilm_filter, NULL);
 952                 } else {
 953                         rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr,
 954                             NULL, NULL);
 955                 }
 956                 igmpv3_sendrpt(ill, rp);
 957                 return;
 958         }
 959 }
 960 
 961 /*
 962  * Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
 963  * and it gets sent after the lock is dropped.
 964  */
 965 void
 966 mld_leavegroup(ilm_t *ilm)
 967 {
 968         ill_t *ill = ilm->ilm_ill;
 969 
 970         ASSERT(ill->ill_isv6);
 971 
 972         ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
 973         if (ilm->ilm_state == IGMP_IREPORTEDLAST &&
 974             ill->ill_mcast_type == MLD_V1_ROUTER &&
 975             (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) {
 976                 mld_sendpkt(ilm, MLD_LISTENER_REDUCTION, &ipv6_all_rtrs_mcast);
 977                 return;
 978         }
 979         if ((ill->ill_mcast_type == MLD_V2_ROUTER) &&
 980             (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) {
 981                 mrec_t *rp;
 982                 /*
 983                  * The possible state changes we need to handle here:
 984                  *      Old State       New State       Report
 985                  *
 986                  *      INCLUDE(X)      INCLUDE(0)      ALLOW(0),BLOCK(X)
 987                  *      EXCLUDE(X)      INCLUDE(0)      TO_IN(0)
 988                  *
 989                  * No need to send the ALLOW(0) report; BLOCK(X) is enough
 990                  */
 991                 if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
 992                         rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr,
 993                             ilm->ilm_filter, NULL);
 994                 } else {
 995                         rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr,
 996                             NULL, NULL);
 997                 }
 998                 mldv2_sendrpt(ill, rp);
 999                 return;
1000         }
1001 }
1002 
1003 /*
1004  * Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
1005  * and it gets sent after the lock is dropped.
1006  */
1007 void
1008 igmp_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist)
1009 {
1010         ill_t *ill;
1011         mrec_t *rp;
1012         ip_stack_t      *ipst = ilm->ilm_ipst;
1013 
1014         ASSERT(ilm != NULL);
1015 
1016         /* state change reports should only be sent if the router is v3 */
1017         if (ilm->ilm_ill->ill_mcast_type != IGMP_V3_ROUTER)
1018                 return;
1019 
1020         ill = ilm->ilm_ill;
1021         ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
1022 
1023         /*
1024          * Compare existing(old) state with the new state and prepare
1025          * State Change Report, according to the rules in RFC 3376:
1026          *
1027          *      Old State       New State       State Change Report
1028          *
1029          *      INCLUDE(A)      INCLUDE(B)      ALLOW(B-A),BLOCK(A-B)
1030          *      EXCLUDE(A)      EXCLUDE(B)      ALLOW(A-B),BLOCK(B-A)
1031          *      INCLUDE(A)      EXCLUDE(B)      TO_EX(B)
1032          *      EXCLUDE(A)      INCLUDE(B)      TO_IN(B)
1033          */
1034 
1035         if (ilm->ilm_fmode == fmode) {
1036                 slist_t *a_minus_b = NULL, *b_minus_a = NULL;
1037                 slist_t *allow, *block;
1038                 if (((a_minus_b = l_alloc()) == NULL) ||
1039                     ((b_minus_a = l_alloc()) == NULL)) {
1040                         l_free(a_minus_b);
1041                         if (ilm->ilm_fmode == MODE_IS_INCLUDE)
1042                                 goto send_to_ex;
1043                         else
1044                                 goto send_to_in;
1045                 }
1046                 l_difference(ilm->ilm_filter, flist, a_minus_b);
1047                 l_difference(flist, ilm->ilm_filter, b_minus_a);
1048                 if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1049                         allow = b_minus_a;
1050                         block = a_minus_b;
1051                 } else {
1052                         allow = a_minus_b;
1053                         block = b_minus_a;
1054                 }
1055                 rp = NULL;
1056                 if (!SLIST_IS_EMPTY(allow))
1057                         rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr,
1058                             allow, rp);
1059                 if (!SLIST_IS_EMPTY(block))
1060                         rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr,
1061                             block, rp);
1062                 l_free(a_minus_b);
1063                 l_free(b_minus_a);
1064         } else if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1065 send_to_ex:
1066                 rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist,
1067                     NULL);
1068         } else {
1069 send_to_in:
1070                 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist,
1071                     NULL);
1072         }
1073 
1074         /*
1075          * Need to set up retransmission state; merge the new info with the
1076          * current state (which may be null).  If the timer is not currently
1077          * running, the caller will start it when dropping ill_mcast_lock.
1078          */
1079         rp = mcast_merge_rtx(ilm, rp, flist);
1080         if (ilm->ilm_rtx.rtx_timer == INFINITY) {
1081                 ilm->ilm_rtx.rtx_cnt = ill->ill_mcast_rv;
1082                 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer,
1083                     SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY));
1084                 mutex_enter(&ipst->ips_igmp_timer_lock);
1085                 ipst->ips_igmp_deferred_next = MIN(ipst->ips_igmp_deferred_next,
1086                     ilm->ilm_rtx.rtx_timer);
1087                 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME;
1088                 mutex_exit(&ipst->ips_igmp_timer_lock);
1089         }
1090 
1091         igmpv3_sendrpt(ill, rp);
1092 }
1093 
1094 /*
1095  * Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
1096  * and it gets sent after the lock is dropped.
1097  */
1098 void
1099 mld_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist)
1100 {
1101         ill_t *ill;
1102         mrec_t *rp = NULL;
1103         ip_stack_t      *ipst = ilm->ilm_ipst;
1104 
1105         ASSERT(ilm != NULL);
1106 
1107         ill = ilm->ilm_ill;
1108         ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
1109 
1110         /* only need to send if we have an mldv2-capable router */
1111         if (ill->ill_mcast_type != MLD_V2_ROUTER) {
1112                 return;
1113         }
1114 
1115         /*
1116          * Compare existing (old) state with the new state passed in
1117          * and send appropriate MLDv2 State Change Report.
1118          *
1119          *      Old State       New State       State Change Report
1120          *
1121          *      INCLUDE(A)      INCLUDE(B)      ALLOW(B-A),BLOCK(A-B)
1122          *      EXCLUDE(A)      EXCLUDE(B)      ALLOW(A-B),BLOCK(B-A)
1123          *      INCLUDE(A)      EXCLUDE(B)      TO_EX(B)
1124          *      EXCLUDE(A)      INCLUDE(B)      TO_IN(B)
1125          */
1126         if (ilm->ilm_fmode == fmode) {
1127                 slist_t *a_minus_b = NULL, *b_minus_a = NULL;
1128                 slist_t *allow, *block;
1129                 if (((a_minus_b = l_alloc()) == NULL) ||
1130                     ((b_minus_a = l_alloc()) == NULL)) {
1131                         l_free(a_minus_b);
1132                         if (ilm->ilm_fmode == MODE_IS_INCLUDE)
1133                                 goto send_to_ex;
1134                         else
1135                                 goto send_to_in;
1136                 }
1137                 l_difference(ilm->ilm_filter, flist, a_minus_b);
1138                 l_difference(flist, ilm->ilm_filter, b_minus_a);
1139                 if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1140                         allow = b_minus_a;
1141                         block = a_minus_b;
1142                 } else {
1143                         allow = a_minus_b;
1144                         block = b_minus_a;
1145                 }
1146                 if (!SLIST_IS_EMPTY(allow))
1147                         rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr,
1148                             allow, rp);
1149                 if (!SLIST_IS_EMPTY(block))
1150                         rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr,
1151                             block, rp);
1152                 l_free(a_minus_b);
1153                 l_free(b_minus_a);
1154         } else if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1155 send_to_ex:
1156                 rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist,
1157                     NULL);
1158         } else {
1159 send_to_in:
1160                 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist,
1161                     NULL);
1162         }
1163 
1164         /*
1165          * Need to set up retransmission state; merge the new info with the
1166          * current state (which may be null).  If the timer is not currently
1167          * running, the caller will start it when dropping ill_mcast_lock.
1168          */
1169         rp = mcast_merge_rtx(ilm, rp, flist);
1170         ASSERT(ilm->ilm_rtx.rtx_cnt > 0);
1171         if (ilm->ilm_rtx.rtx_timer == INFINITY) {
1172                 ilm->ilm_rtx.rtx_cnt = ill->ill_mcast_rv;
1173                 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer,
1174                     SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY));
1175                 mutex_enter(&ipst->ips_mld_timer_lock);
1176                 ipst->ips_mld_deferred_next =
1177                     MIN(ipst->ips_mld_deferred_next, ilm->ilm_rtx.rtx_timer);
1178                 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME;
1179                 mutex_exit(&ipst->ips_mld_timer_lock);
1180         }
1181 
1182         mldv2_sendrpt(ill, rp);
1183 }
1184 
1185 uint_t
1186 igmp_timeout_handler_per_ill(ill_t *ill)
1187 {
1188         uint_t  next = INFINITY, current;
1189         ilm_t   *ilm;
1190         mrec_t  *rp = NULL;
1191         mrec_t  *rtxrp = NULL;
1192         rtx_state_t *rtxp;
1193         mcast_record_t  rtype;
1194 
1195         rw_enter(&ill->ill_mcast_lock, RW_WRITER);
1196 
1197         current = CURRENT_MSTIME;
1198         /* First check the global timer on this interface */
1199         if (ill->ill_global_timer == INFINITY)
1200                 goto per_ilm_timer;
1201         if (ill->ill_global_timer <= (current + CURRENT_OFFSET)) {
1202                 ill->ill_global_timer = INFINITY;
1203                 /*
1204                  * Send report for each group on this interface.
1205                  * Since we just set the global timer (received a v3 general
1206                  * query), need to skip the all hosts addr (224.0.0.1), per
1207                  * RFC 3376 section 5.
1208                  */
1209                 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1210                         if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP))
1211                                 continue;
1212                         rp = mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr,
1213                             ilm->ilm_filter, rp);
1214                         /*
1215                          * Since we're sending a report on this group, okay
1216                          * to delete pending group-specific timers.  Note
1217                          * that group-specific retransmit timers still need
1218                          * to be checked in the per_ilm_timer for-loop.
1219                          */
1220                         ilm->ilm_timer = INFINITY;
1221                         ilm->ilm_state = IGMP_IREPORTEDLAST;
1222                         FREE_SLIST(ilm->ilm_pendsrcs);
1223                         ilm->ilm_pendsrcs = NULL;
1224                 }
1225                 igmpv3_sendrpt(ill, rp);
1226                 rp = NULL;
1227         } else {
1228                 if ((ill->ill_global_timer - current) < next)
1229                         next = ill->ill_global_timer - current;
1230         }
1231 
1232 per_ilm_timer:
1233         for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1234                 if (ilm->ilm_timer == INFINITY)
1235                         goto per_ilm_rtxtimer;
1236 
1237                 if (ilm->ilm_timer > (current + CURRENT_OFFSET)) {
1238                         if ((ilm->ilm_timer - current) < next)
1239                                 next = ilm->ilm_timer - current;
1240 
1241                         if (ip_debug > 1) {
1242                                 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
1243                                     "igmp_timo_hlr 2: ilm_timr %d "
1244                                     "typ %d nxt %d",
1245                                     (int)ntohl(ilm->ilm_timer - current),
1246                                     (ill->ill_mcast_type), next);
1247                         }
1248 
1249                         goto per_ilm_rtxtimer;
1250                 }
1251 
1252                 /* the timer has expired, need to take action */
1253                 ilm->ilm_timer = INFINITY;
1254                 ilm->ilm_state = IGMP_IREPORTEDLAST;
1255                 if (ill->ill_mcast_type == IGMP_V1_ROUTER) {
1256                         igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0);
1257                 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) {
1258                         igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0);
1259                 } else {
1260                         slist_t *rsp;
1261                         if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) &&
1262                             (rsp = l_alloc()) != NULL) {
1263                                 /*
1264                                  * Contents of reply depend on pending
1265                                  * requested source list.
1266                                  */
1267                                 if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1268                                         l_intersection(ilm->ilm_filter,
1269                                             ilm->ilm_pendsrcs, rsp);
1270                                 } else {
1271                                         l_difference(ilm->ilm_pendsrcs,
1272                                             ilm->ilm_filter, rsp);
1273                                 }
1274                                 FREE_SLIST(ilm->ilm_pendsrcs);
1275                                 ilm->ilm_pendsrcs = NULL;
1276                                 if (!SLIST_IS_EMPTY(rsp))
1277                                         rp = mcast_bldmrec(MODE_IS_INCLUDE,
1278                                             &ilm->ilm_v6addr, rsp, rp);
1279                                 FREE_SLIST(rsp);
1280                         } else {
1281                                 /*
1282                                  * Either the pending request is just group-
1283                                  * specific, or we couldn't get the resources
1284                                  * (rsp) to build a source-specific reply.
1285                                  */
1286                                 rp = mcast_bldmrec(ilm->ilm_fmode,
1287                                     &ilm->ilm_v6addr, ilm->ilm_filter, rp);
1288                         }
1289                         igmpv3_sendrpt(ill, rp);
1290                         rp = NULL;
1291                 }
1292 
1293 per_ilm_rtxtimer:
1294                 rtxp = &ilm->ilm_rtx;
1295 
1296                 if (rtxp->rtx_timer == INFINITY)
1297                         continue;
1298                 if (rtxp->rtx_timer > (current + CURRENT_OFFSET)) {
1299                         if ((rtxp->rtx_timer - current) < next)
1300                                 next = rtxp->rtx_timer - current;
1301                         continue;
1302                 }
1303 
1304                 rtxp->rtx_timer = INFINITY;
1305                 ilm->ilm_state = IGMP_IREPORTEDLAST;
1306                 if (ill->ill_mcast_type == IGMP_V1_ROUTER) {
1307                         igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0);
1308                         continue;
1309                 }
1310                 if (ill->ill_mcast_type == IGMP_V2_ROUTER) {
1311                         igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0);
1312                         continue;
1313                 }
1314 
1315                 /*
1316                  * The retransmit timer has popped, and our router is
1317                  * IGMPv3.  We have to delve into the retransmit state
1318                  * stored in the ilm.
1319                  *
1320                  * Decrement the retransmit count.  If the fmode rtx
1321                  * count is active, decrement it, and send a filter
1322                  * mode change report with the ilm's source list.
1323                  * Otherwise, send a source list change report with
1324                  * the current retransmit lists.
1325                  */
1326                 ASSERT(rtxp->rtx_cnt > 0);
1327                 ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt);
1328                 rtxp->rtx_cnt--;
1329                 if (rtxp->rtx_fmode_cnt > 0) {
1330                         rtxp->rtx_fmode_cnt--;
1331                         rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ?
1332                             CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE;
1333                         rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr,
1334                             ilm->ilm_filter, rtxrp);
1335                 } else {
1336                         rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES,
1337                             &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp);
1338                         rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES,
1339                             &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp);
1340                 }
1341                 if (rtxp->rtx_cnt > 0) {
1342                         MCAST_RANDOM_DELAY(rtxp->rtx_timer,
1343                             SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY));
1344                         if (rtxp->rtx_timer < next)
1345                                 next = rtxp->rtx_timer;
1346                         rtxp->rtx_timer += current;
1347                 } else {
1348                         ASSERT(rtxp->rtx_timer == INFINITY);
1349                         CLEAR_SLIST(rtxp->rtx_allow);
1350                         CLEAR_SLIST(rtxp->rtx_block);
1351                 }
1352                 igmpv3_sendrpt(ill, rtxrp);
1353                 rtxrp = NULL;
1354         }
1355 
1356         rw_exit(&ill->ill_mcast_lock);
1357         /* Send any deferred/queued IP packets */
1358         ill_mcast_send_queued(ill);
1359         /* Defer ill_mcast_timer_start() until the caller is done */
1360 
1361         return (next);
1362 }
1363 
1364 /*
1365  * igmp_timeout_handler:
1366  * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick).
1367  * Returns number of ticks to next event (or 0 if none).
1368  *
1369  * As part of multicast join and leave igmp we may need to send out an
1370  * igmp request. The igmp related state variables in the ilm are protected
1371  * by ill_mcast_lock. A single global igmp timer is used to track igmp timeouts.
1372  * igmp_timer_lock protects the global igmp_timeout_id. igmp_start_timers
1373  * starts the igmp timer if needed. It serializes multiple threads trying to
1374  * simultaneously start the timer using the igmp_timer_setter_active flag.
1375  *
1376  * igmp_input() receives igmp queries and responds to the queries
1377  * in a delayed fashion by posting a timer i.e. it calls igmp_start_timers().
1378  * Later the igmp_timer fires, the timeout handler igmp_timerout_handler()
1379  * performs the action exclusively after acquiring ill_mcast_lock.
1380  *
1381  * The igmp_slowtimeo() function is called thru another timer.
1382  * igmp_slowtimeout_lock protects the igmp_slowtimeout_id
1383  */
1384 void
1385 igmp_timeout_handler(void *arg)
1386 {
1387         ill_t   *ill;
1388         uint_t  global_next = INFINITY;
1389         uint_t  next;
1390         ill_walk_context_t ctx;
1391         ip_stack_t *ipst = arg;
1392 
1393         ASSERT(arg != NULL);
1394         mutex_enter(&ipst->ips_igmp_timer_lock);
1395         ASSERT(ipst->ips_igmp_timeout_id != 0);
1396         ipst->ips_igmp_timeout_id = 0;
1397         ipst->ips_igmp_timer_scheduled_last = 0;
1398         ipst->ips_igmp_time_to_next = 0;
1399         mutex_exit(&ipst->ips_igmp_timer_lock);
1400 
1401         rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1402         ill = ILL_START_WALK_V4(&ctx, ipst);
1403         for (; ill != NULL; ill = ill_next(&ctx, ill)) {
1404                 ASSERT(!ill->ill_isv6);
1405                 /* Make sure the ill isn't going away. */
1406                 if (!ill_check_and_refhold(ill))
1407                         continue;
1408                 rw_exit(&ipst->ips_ill_g_lock);
1409                 next = igmp_timeout_handler_per_ill(ill);
1410                 if (next < global_next)
1411                         global_next = next;
1412                 ill_refrele(ill);
1413                 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1414         }
1415         rw_exit(&ipst->ips_ill_g_lock);
1416         if (global_next != INFINITY)
1417                 igmp_start_timers(global_next, ipst);
1418 }
1419 
1420 /*
1421  * mld_timeout_handler:
1422  * Called when there are timeout events, every next (tick).
1423  * Returns number of ticks to next event (or 0 if none).
1424  */
1425 uint_t
1426 mld_timeout_handler_per_ill(ill_t *ill)
1427 {
1428         ilm_t   *ilm;
1429         uint_t  next = INFINITY, current;
1430         mrec_t  *rp, *rtxrp;
1431         rtx_state_t *rtxp;
1432         mcast_record_t  rtype;
1433 
1434         rw_enter(&ill->ill_mcast_lock, RW_WRITER);
1435 
1436         current = CURRENT_MSTIME;
1437         /*
1438          * First check the global timer on this interface; the global timer
1439          * is not used for MLDv1, so if it's set we can assume we're v2.
1440          */
1441         if (ill->ill_global_timer == INFINITY)
1442                 goto per_ilm_timer;
1443         if (ill->ill_global_timer <= (current + CURRENT_OFFSET)) {
1444                 ill->ill_global_timer = INFINITY;
1445                 /*
1446                  * Send report for each group on this interface.
1447                  * Since we just set the global timer (received a v2 general
1448                  * query), need to skip the all hosts addr (ff02::1), per
1449                  * RFC 3810 section 6.
1450                  */
1451                 rp = NULL;
1452                 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1453                         if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr,
1454                             &ipv6_all_hosts_mcast))
1455                                 continue;
1456                         rp = mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr,
1457                             ilm->ilm_filter, rp);
1458                         /*
1459                          * Since we're sending a report on this group, okay
1460                          * to delete pending group-specific timers.  Note
1461                          * that group-specific retransmit timers still need
1462                          * to be checked in the per_ilm_timer for-loop.
1463                          */
1464                         ilm->ilm_timer = INFINITY;
1465                         ilm->ilm_state = IGMP_IREPORTEDLAST;
1466                         FREE_SLIST(ilm->ilm_pendsrcs);
1467                         ilm->ilm_pendsrcs = NULL;
1468                 }
1469                 mldv2_sendrpt(ill, rp);
1470         } else {
1471                 if ((ill->ill_global_timer - current) < next)
1472                         next = ill->ill_global_timer - current;
1473         }
1474 
1475 per_ilm_timer:
1476         rp = rtxrp = NULL;
1477         for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1478                 if (ilm->ilm_timer == INFINITY)
1479                         goto per_ilm_rtxtimer;
1480 
1481                 if (ilm->ilm_timer > (current + CURRENT_OFFSET)) {
1482                         if ((ilm->ilm_timer - current) < next)
1483                                 next = ilm->ilm_timer - current;
1484 
1485                         if (ip_debug > 1) {
1486                                 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
1487                                     "igmp_timo_hlr 2: ilm_timr"
1488                                     " %d typ %d nxt %d",
1489                                     (int)ntohl(ilm->ilm_timer - current),
1490                                     (ill->ill_mcast_type), next);
1491                         }
1492 
1493                         goto per_ilm_rtxtimer;
1494                 }
1495 
1496                 /* the timer has expired, need to take action */
1497                 ilm->ilm_timer = INFINITY;
1498                 ilm->ilm_state = IGMP_IREPORTEDLAST;
1499                 if (ill->ill_mcast_type == MLD_V1_ROUTER) {
1500                         mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL);
1501                 } else {
1502                         slist_t *rsp;
1503                         if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) &&
1504                             (rsp = l_alloc()) != NULL) {
1505                                 /*
1506                                  * Contents of reply depend on pending
1507                                  * requested source list.
1508                                  */
1509                                 if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1510                                         l_intersection(ilm->ilm_filter,
1511                                             ilm->ilm_pendsrcs, rsp);
1512                                 } else {
1513                                         l_difference(ilm->ilm_pendsrcs,
1514                                             ilm->ilm_filter, rsp);
1515                                 }
1516                                 FREE_SLIST(ilm->ilm_pendsrcs);
1517                                 ilm->ilm_pendsrcs = NULL;
1518                                 if (!SLIST_IS_EMPTY(rsp))
1519                                         rp = mcast_bldmrec(MODE_IS_INCLUDE,
1520                                             &ilm->ilm_v6addr, rsp, rp);
1521                                 FREE_SLIST(rsp);
1522                         } else {
1523                                 rp = mcast_bldmrec(ilm->ilm_fmode,
1524                                     &ilm->ilm_v6addr, ilm->ilm_filter, rp);
1525                         }
1526                 }
1527 
1528 per_ilm_rtxtimer:
1529                 rtxp = &ilm->ilm_rtx;
1530 
1531                 if (rtxp->rtx_timer == INFINITY)
1532                         continue;
1533                 if (rtxp->rtx_timer > (current + CURRENT_OFFSET)) {
1534                         if ((rtxp->rtx_timer - current) < next)
1535                                 next = rtxp->rtx_timer - current;
1536                         continue;
1537                 }
1538 
1539                 rtxp->rtx_timer = INFINITY;
1540                 ilm->ilm_state = IGMP_IREPORTEDLAST;
1541                 if (ill->ill_mcast_type == MLD_V1_ROUTER) {
1542                         mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL);
1543                         continue;
1544                 }
1545 
1546                 /*
1547                  * The retransmit timer has popped, and our router is
1548                  * MLDv2.  We have to delve into the retransmit state
1549                  * stored in the ilm.
1550                  *
1551                  * Decrement the retransmit count.  If the fmode rtx
1552                  * count is active, decrement it, and send a filter
1553                  * mode change report with the ilm's source list.
1554                  * Otherwise, send a source list change report with
1555                  * the current retransmit lists.
1556                  */
1557                 ASSERT(rtxp->rtx_cnt > 0);
1558                 ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt);
1559                 rtxp->rtx_cnt--;
1560                 if (rtxp->rtx_fmode_cnt > 0) {
1561                         rtxp->rtx_fmode_cnt--;
1562                         rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ?
1563                             CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE;
1564                         rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr,
1565                             ilm->ilm_filter, rtxrp);
1566                 } else {
1567                         rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES,
1568                             &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp);
1569                         rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES,
1570                             &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp);
1571                 }
1572                 if (rtxp->rtx_cnt > 0) {
1573                         MCAST_RANDOM_DELAY(rtxp->rtx_timer,
1574                             SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY));
1575                         if (rtxp->rtx_timer < next)
1576                                 next = rtxp->rtx_timer;
1577                         rtxp->rtx_timer += current;
1578                 } else {
1579                         ASSERT(rtxp->rtx_timer == INFINITY);
1580                         CLEAR_SLIST(rtxp->rtx_allow);
1581                         CLEAR_SLIST(rtxp->rtx_block);
1582                 }
1583         }
1584 
1585         if (ill->ill_mcast_type == MLD_V2_ROUTER) {
1586                 mldv2_sendrpt(ill, rp);
1587                 mldv2_sendrpt(ill, rtxrp);
1588         }
1589         rw_exit(&ill->ill_mcast_lock);
1590         /* Send any deferred/queued IP packets */
1591         ill_mcast_send_queued(ill);
1592         /* Defer ill_mcast_timer_start() until the caller is done */
1593 
1594         return (next);
1595 }
1596 
1597 /*
1598  * mld_timeout_handler:
1599  * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick).
1600  * Returns number of ticks to next event (or 0 if none).
1601  * MT issues are same as igmp_timeout_handler
1602  */
1603 void
1604 mld_timeout_handler(void *arg)
1605 {
1606         ill_t   *ill;
1607         uint_t  global_next = INFINITY;
1608         uint_t  next;
1609         ill_walk_context_t ctx;
1610         ip_stack_t *ipst = arg;
1611 
1612         ASSERT(arg != NULL);
1613         mutex_enter(&ipst->ips_mld_timer_lock);
1614         ASSERT(ipst->ips_mld_timeout_id != 0);
1615         ipst->ips_mld_timeout_id = 0;
1616         ipst->ips_mld_timer_scheduled_last = 0;
1617         ipst->ips_mld_time_to_next = 0;
1618         mutex_exit(&ipst->ips_mld_timer_lock);
1619 
1620         rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1621         ill = ILL_START_WALK_V6(&ctx, ipst);
1622         for (; ill != NULL; ill = ill_next(&ctx, ill)) {
1623                 ASSERT(ill->ill_isv6);
1624                 /* Make sure the ill isn't going away. */
1625                 if (!ill_check_and_refhold(ill))
1626                         continue;
1627                 rw_exit(&ipst->ips_ill_g_lock);
1628                 next = mld_timeout_handler_per_ill(ill);
1629                 if (next < global_next)
1630                         global_next = next;
1631                 ill_refrele(ill);
1632                 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1633         }
1634         rw_exit(&ipst->ips_ill_g_lock);
1635         if (global_next != INFINITY)
1636                 mld_start_timers(global_next, ipst);
1637 }
1638 
1639 /*
1640  * Calculate the Older Version Querier Present timeout value, in number
1641  * of slowtimo intervals, for the given ill.
1642  */
1643 #define OVQP(ill) \
1644         ((1000 * (((ill)->ill_mcast_rv * (ill)->ill_mcast_qi) \
1645         + MCAST_QUERY_RESP_INTERVAL)) / MCAST_SLOWTIMO_INTERVAL)
1646 
1647 /*
1648  * igmp_slowtimo:
1649  * - Resets to new router if we didnt we hear from the router
1650  *   in IGMP_AGE_THRESHOLD seconds.
1651  * - Resets slowtimeout.
1652  * Check for ips_igmp_max_version ensures that we don't revert to a higher
1653  * IGMP version than configured.
1654  */
1655 void
1656 igmp_slowtimo(void *arg)
1657 {
1658         ill_t   *ill;
1659         ill_if_t *ifp;
1660         avl_tree_t *avl_tree;
1661         ip_stack_t *ipst = (ip_stack_t *)arg;
1662 
1663         ASSERT(arg != NULL);
1664 
1665         /*
1666          * The ill_if_t list is circular, hence the odd loop parameters.
1667          *
1668          * We can't use the ILL_START_WALK and ill_next() wrappers for this
1669          * walk, as we need to check the illif_mcast_* fields in the ill_if_t
1670          * structure (allowing us to skip if none of the instances have timers
1671          * running).
1672          */
1673         rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1674         for (ifp = IP_V4_ILL_G_LIST(ipst);
1675             ifp != (ill_if_t *)&IP_V4_ILL_G_LIST(ipst);
1676             ifp = ifp->illif_next) {
1677                 /*
1678                  * illif_mcast_v[12] are set using atomics. If an ill hears
1679                  * a V1 or V2 query now and we miss seeing the count now,
1680                  * we will see it the next time igmp_slowtimo is called.
1681                  */
1682                 if (ifp->illif_mcast_v1 == 0 && ifp->illif_mcast_v2 == 0)
1683                         continue;
1684 
1685                 avl_tree = &ifp->illif_avl_by_ppa;
1686                 for (ill = avl_first(avl_tree); ill != NULL;
1687                     ill = avl_walk(avl_tree, ill, AVL_AFTER)) {
1688                         /* Make sure the ill isn't going away. */
1689                         if (!ill_check_and_refhold(ill))
1690                                 continue;
1691                         rw_exit(&ipst->ips_ill_g_lock);
1692                         rw_enter(&ill->ill_mcast_lock, RW_WRITER);
1693                         if (ill->ill_mcast_v1_tset == 1)
1694                                 ill->ill_mcast_v1_time++;
1695                         if (ill->ill_mcast_v2_tset == 1)
1696                                 ill->ill_mcast_v2_time++;
1697                         if ((ill->ill_mcast_type == IGMP_V1_ROUTER) &&
1698                             (ipst->ips_igmp_max_version >= IGMP_V2_ROUTER) &&
1699                             (ill->ill_mcast_v1_time >= OVQP(ill))) {
1700                                 if ((ill->ill_mcast_v2_tset > 0) ||
1701                                     (ipst->ips_igmp_max_version ==
1702                                     IGMP_V2_ROUTER)) {
1703                                         ip1dbg(("V1 query timer "
1704                                             "expired on %s; switching "
1705                                             "mode to IGMP_V2\n",
1706                                             ill->ill_name));
1707                                         ill->ill_mcast_type =
1708                                             IGMP_V2_ROUTER;
1709                                 } else {
1710                                         ip1dbg(("V1 query timer "
1711                                             "expired on %s; switching "
1712                                             "mode to IGMP_V3\n",
1713                                             ill->ill_name));
1714                                         ill->ill_mcast_type =
1715                                             IGMP_V3_ROUTER;
1716                                 }
1717                                 ill->ill_mcast_v1_time = 0;
1718                                 ill->ill_mcast_v1_tset = 0;
1719                                 atomic_dec_16(&ifp->illif_mcast_v1);
1720                         }
1721                         if ((ill->ill_mcast_type == IGMP_V2_ROUTER) &&
1722                             (ipst->ips_igmp_max_version >= IGMP_V3_ROUTER) &&
1723                             (ill->ill_mcast_v2_time >= OVQP(ill))) {
1724                                 ip1dbg(("V2 query timer expired on "
1725                                     "%s; switching mode to IGMP_V3\n",
1726                                     ill->ill_name));
1727                                 ill->ill_mcast_type = IGMP_V3_ROUTER;
1728                                 ill->ill_mcast_v2_time = 0;
1729                                 ill->ill_mcast_v2_tset = 0;
1730                                 atomic_dec_16(&ifp->illif_mcast_v2);
1731                         }
1732                         rw_exit(&ill->ill_mcast_lock);
1733                         ill_refrele(ill);
1734                         rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1735                 }
1736         }
1737         rw_exit(&ipst->ips_ill_g_lock);
1738         ill_mcast_timer_start(ipst);
1739         mutex_enter(&ipst->ips_igmp_slowtimeout_lock);
1740         ipst->ips_igmp_slowtimeout_id = timeout(igmp_slowtimo, (void *)ipst,
1741             MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL));
1742         mutex_exit(&ipst->ips_igmp_slowtimeout_lock);
1743 }
1744 
1745 /*
1746  * mld_slowtimo:
1747  * - Resets to newer version if we didn't hear from the older version router
1748  *   in MLD_AGE_THRESHOLD seconds.
1749  * - Restarts slowtimeout.
1750  * Check for ips_mld_max_version ensures that we don't revert to a higher
1751  * IGMP version than configured.
1752  */
1753 void
1754 mld_slowtimo(void *arg)
1755 {
1756         ill_t *ill;
1757         ill_if_t *ifp;
1758         avl_tree_t *avl_tree;
1759         ip_stack_t *ipst = (ip_stack_t *)arg;
1760 
1761         ASSERT(arg != NULL);
1762         /* See comments in igmp_slowtimo() above... */
1763         rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1764         for (ifp = IP_V6_ILL_G_LIST(ipst);
1765             ifp != (ill_if_t *)&IP_V6_ILL_G_LIST(ipst);
1766             ifp = ifp->illif_next) {
1767                 if (ifp->illif_mcast_v1 == 0)
1768                         continue;
1769 
1770                 avl_tree = &ifp->illif_avl_by_ppa;
1771                 for (ill = avl_first(avl_tree); ill != NULL;
1772                     ill = avl_walk(avl_tree, ill, AVL_AFTER)) {
1773                         /* Make sure the ill isn't going away. */
1774                         if (!ill_check_and_refhold(ill))
1775                                 continue;
1776                         rw_exit(&ipst->ips_ill_g_lock);
1777                         rw_enter(&ill->ill_mcast_lock, RW_WRITER);
1778                         if (ill->ill_mcast_v1_tset == 1)
1779                                 ill->ill_mcast_v1_time++;
1780                         if ((ill->ill_mcast_type == MLD_V1_ROUTER) &&
1781                             (ipst->ips_mld_max_version >= MLD_V2_ROUTER) &&
1782                             (ill->ill_mcast_v1_time >= OVQP(ill))) {
1783                                 ip1dbg(("MLD query timer expired on"
1784                                     " %s; switching mode to MLD_V2\n",
1785                                     ill->ill_name));
1786                                 ill->ill_mcast_type = MLD_V2_ROUTER;
1787                                 ill->ill_mcast_v1_time = 0;
1788                                 ill->ill_mcast_v1_tset = 0;
1789                                 atomic_dec_16(&ifp->illif_mcast_v1);
1790                         }
1791                         rw_exit(&ill->ill_mcast_lock);
1792                         ill_refrele(ill);
1793                         rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1794                 }
1795         }
1796         rw_exit(&ipst->ips_ill_g_lock);
1797         ill_mcast_timer_start(ipst);
1798         mutex_enter(&ipst->ips_mld_slowtimeout_lock);
1799         ipst->ips_mld_slowtimeout_id = timeout(mld_slowtimo, (void *)ipst,
1800             MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL));
1801         mutex_exit(&ipst->ips_mld_slowtimeout_lock);
1802 }
1803 
1804 /*
1805  * igmp_sendpkt:
1806  * This will send to ip_output_simple just like icmp_inbound.
1807  */
1808 static void
1809 igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr)
1810 {
1811         mblk_t  *mp;
1812         igmpa_t *igmpa;
1813         uint8_t *rtralert;
1814         ipha_t  *ipha;
1815         int     hdrlen = sizeof (ipha_t) + RTRALERT_LEN;
1816         size_t  size  = hdrlen + sizeof (igmpa_t);
1817         ill_t   *ill  = ilm->ilm_ill;
1818         ip_stack_t *ipst = ill->ill_ipst;
1819 
1820         ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
1821 
1822         mp = allocb(size, BPRI_HI);
1823         if (mp == NULL) {
1824                 return;
1825         }
1826         mp->b_wptr = mp->b_rptr + size;
1827 
1828         ipha = (ipha_t *)mp->b_rptr;
1829         rtralert = (uint8_t *)&(ipha[1]);
1830         igmpa = (igmpa_t *)&(rtralert[RTRALERT_LEN]);
1831         igmpa->igmpa_type   = type;
1832         igmpa->igmpa_code   = 0;
1833         igmpa->igmpa_group  = ilm->ilm_addr;
1834         igmpa->igmpa_cksum  = 0;
1835         igmpa->igmpa_cksum  = IP_CSUM(mp, hdrlen, 0);
1836 
1837         rtralert[0] = IPOPT_COPY | IPOPT_RTRALERT;
1838         rtralert[1] = RTRALERT_LEN;
1839         rtralert[2] = 0;
1840         rtralert[3] = 0;
1841 
1842         ipha->ipha_version_and_hdr_length = (IP_VERSION << 4)
1843             | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS);
1844         ipha->ipha_type_of_service   = 0;
1845         ipha->ipha_length = htons(size);
1846         ipha->ipha_ident = 0;
1847         ipha->ipha_fragment_offset_and_flags = 0;
1848         ipha->ipha_ttl               = IGMP_TTL;
1849         ipha->ipha_protocol  = IPPROTO_IGMP;
1850         ipha->ipha_hdr_checksum      = 0;
1851         ipha->ipha_dst               = addr ? addr : igmpa->igmpa_group;
1852         ipha->ipha_src               = INADDR_ANY;
1853 
1854         ill_mcast_queue(ill, mp);
1855 
1856         ++ipst->ips_igmpstat.igps_snd_reports;
1857 }
1858 
1859 /*
1860  * Sends an IGMP_V3_MEMBERSHIP_REPORT message out the ill.
1861  * The report will contain one group record
1862  * for each element of reclist.  If this causes packet length to
1863  * exceed ill->ill_mc_mtu, multiple reports are sent.
1864  * reclist is assumed to be made up of buffers allocated by mcast_bldmrec(),
1865  * and those buffers are freed here.
1866  */
1867 static void
1868 igmpv3_sendrpt(ill_t *ill, mrec_t *reclist)
1869 {
1870         igmp3ra_t *igmp3ra;
1871         grphdra_t *grphdr;
1872         mblk_t *mp;
1873         ipha_t *ipha;
1874         uint8_t *rtralert;
1875         ipaddr_t *src_array;
1876         int i, j, numrec, more_src_cnt;
1877         size_t hdrsize, size, rsize;
1878         mrec_t *rp, *cur_reclist;
1879         mrec_t *next_reclist = reclist;
1880         boolean_t morepkts;
1881         ip_stack_t       *ipst = ill->ill_ipst;
1882 
1883         ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
1884 
1885         /* if there aren't any records, there's nothing to send */
1886         if (reclist == NULL)
1887                 return;
1888 
1889         hdrsize = sizeof (ipha_t) + RTRALERT_LEN;
1890 nextpkt:
1891         size = hdrsize + sizeof (igmp3ra_t);
1892         morepkts = B_FALSE;
1893         more_src_cnt = 0;
1894         cur_reclist = next_reclist;
1895         numrec = 0;
1896         for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) {
1897                 rsize = sizeof (grphdra_t) +
1898                     (rp->mrec_srcs.sl_numsrc * sizeof (ipaddr_t));
1899                 if (size + rsize > ill->ill_mc_mtu) {
1900                         if (rp == cur_reclist) {
1901                                 /*
1902                                  * If the first mrec we looked at is too big
1903                                  * to fit in a single packet (i.e the source
1904                                  * list is too big), we must either truncate
1905                                  * the list (if TO_EX or IS_EX), or send
1906                                  * multiple reports for the same group (all
1907                                  * other types).
1908                                  */
1909                                 int srcspace, srcsperpkt;
1910                                 srcspace = ill->ill_mc_mtu - (size +
1911                                     sizeof (grphdra_t));
1912 
1913                                 /*
1914                                  * Skip if there's not even enough room in
1915                                  * a single packet to send something useful.
1916                                  */
1917                                 if (srcspace <= sizeof (ipaddr_t))
1918                                         continue;
1919 
1920                                 srcsperpkt = srcspace / sizeof (ipaddr_t);
1921                                 /*
1922                                  * Increment size and numrec, because we will
1923                                  * be sending a record for the mrec we're
1924                                  * looking at now.
1925                                  */
1926                                 size += sizeof (grphdra_t) +
1927                                     (srcsperpkt * sizeof (ipaddr_t));
1928                                 numrec++;
1929                                 if (rp->mrec_type == MODE_IS_EXCLUDE ||
1930                                     rp->mrec_type == CHANGE_TO_EXCLUDE) {
1931                                         rp->mrec_srcs.sl_numsrc = srcsperpkt;
1932                                         if (rp->mrec_next == NULL) {
1933                                                 /* no more packets to send */
1934                                                 break;
1935                                         } else {
1936                                                 /*
1937                                                  * more packets, but we're
1938                                                  * done with this mrec.
1939                                                  */
1940                                                 next_reclist = rp->mrec_next;
1941                                         }
1942                                 } else {
1943                                         more_src_cnt = rp->mrec_srcs.sl_numsrc
1944                                             - srcsperpkt;
1945                                         rp->mrec_srcs.sl_numsrc = srcsperpkt;
1946                                         /*
1947                                          * We'll fix up this mrec (remove the
1948                                          * srcs we've already sent) before
1949                                          * returning to nextpkt above.
1950                                          */
1951                                         next_reclist = rp;
1952                                 }
1953                         } else {
1954                                 next_reclist = rp;
1955                         }
1956                         morepkts = B_TRUE;
1957                         break;
1958                 }
1959                 size += rsize;
1960                 numrec++;
1961         }
1962 
1963         mp = allocb(size, BPRI_HI);
1964         if (mp == NULL) {
1965                 goto free_reclist;
1966         }
1967         bzero((char *)mp->b_rptr, size);
1968         mp->b_wptr = (uchar_t *)(mp->b_rptr + size);
1969 
1970         ipha = (ipha_t *)mp->b_rptr;
1971         rtralert = (uint8_t *)&(ipha[1]);
1972         igmp3ra = (igmp3ra_t *)&(rtralert[RTRALERT_LEN]);
1973         grphdr = (grphdra_t *)&(igmp3ra[1]);
1974 
1975         rp = cur_reclist;
1976         for (i = 0; i < numrec; i++) {
1977                 grphdr->grphdra_type = rp->mrec_type;
1978                 grphdr->grphdra_numsrc = htons(rp->mrec_srcs.sl_numsrc);
1979                 grphdr->grphdra_group = V4_PART_OF_V6(rp->mrec_group);
1980                 src_array = (ipaddr_t *)&(grphdr[1]);
1981 
1982                 for (j = 0; j < rp->mrec_srcs.sl_numsrc; j++)
1983                         src_array[j] = V4_PART_OF_V6(rp->mrec_srcs.sl_addr[j]);
1984 
1985                 grphdr = (grphdra_t *)&(src_array[j]);
1986                 rp = rp->mrec_next;
1987         }
1988 
1989         igmp3ra->igmp3ra_type = IGMP_V3_MEMBERSHIP_REPORT;
1990         igmp3ra->igmp3ra_numrec = htons(numrec);
1991         igmp3ra->igmp3ra_cksum = IP_CSUM(mp, hdrsize, 0);
1992 
1993         rtralert[0] = IPOPT_COPY | IPOPT_RTRALERT;
1994         rtralert[1] = RTRALERT_LEN;
1995         rtralert[2] = 0;
1996         rtralert[3] = 0;
1997 
1998         ipha->ipha_version_and_hdr_length = IP_VERSION << 4
1999             | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS);
2000         ipha->ipha_type_of_service = IPTOS_PREC_INTERNETCONTROL;
2001         ipha->ipha_length = htons(size);
2002         ipha->ipha_ttl = IGMP_TTL;
2003         ipha->ipha_protocol = IPPROTO_IGMP;
2004         ipha->ipha_dst = htonl(INADDR_ALLRPTS_GROUP);
2005         ipha->ipha_src = INADDR_ANY;
2006 
2007         ill_mcast_queue(ill, mp);
2008 
2009         ++ipst->ips_igmpstat.igps_snd_reports;
2010 
2011         if (morepkts) {
2012                 if (more_src_cnt > 0) {
2013                         int index, mvsize;
2014                         slist_t *sl = &next_reclist->mrec_srcs;
2015                         index = sl->sl_numsrc;
2016                         mvsize = more_src_cnt * sizeof (in6_addr_t);
2017                         (void) memmove(&sl->sl_addr[0], &sl->sl_addr[index],
2018                             mvsize);
2019                         sl->sl_numsrc = more_src_cnt;
2020                 }
2021                 goto nextpkt;
2022         }
2023 
2024 free_reclist:
2025         while (reclist != NULL) {
2026                 rp = reclist->mrec_next;
2027                 mi_free(reclist);
2028                 reclist = rp;
2029         }
2030 }
2031 
2032 /*
2033  * mld_input:
2034  * Return NULL for a bad packet that is discarded here.
2035  * Return mp if the message is OK and should be handed to "raw" receivers.
2036  * Callers of mld_input() may need to reinitialize variables that were copied
2037  * from the mblk as this calls pullupmsg().
2038  */
2039 mblk_t *
2040 mld_input(mblk_t *mp, ip_recv_attr_t *ira)
2041 {
2042         ip6_t           *ip6h = (ip6_t *)(mp->b_rptr);
2043         mld_hdr_t       *mldh;
2044         ilm_t           *ilm;
2045         ipif_t          *ipif;
2046         uint16_t        hdr_length, exthdr_length;
2047         in6_addr_t      *v6group_ptr;
2048         uint_t          next;
2049         int             mldlen;
2050         ill_t           *ill = ira->ira_ill;
2051         ip_stack_t      *ipst = ill->ill_ipst;
2052 
2053         BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembTotal);
2054 
2055         /* Make sure the src address of the packet is link-local */
2056         if (!(IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src))) {
2057                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
2058                 freemsg(mp);
2059                 return (NULL);
2060         }
2061 
2062         if (ip6h->ip6_hlim != 1) {
2063                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpBadHoplimit);
2064                 freemsg(mp);
2065                 return (NULL);
2066         }
2067 
2068         /* Get to the icmp header part */
2069         hdr_length = ira->ira_ip_hdr_length;
2070         exthdr_length = hdr_length - IPV6_HDR_LEN;
2071 
2072         mldlen = ntohs(ip6h->ip6_plen) - exthdr_length;
2073 
2074         /* An MLD packet must at least be 24 octets to be valid */
2075         if (mldlen < MLD_MINLEN) {
2076                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
2077                 freemsg(mp);
2078                 return (NULL);
2079         }
2080 
2081         mldh = (mld_hdr_t *)(&mp->b_rptr[hdr_length]);
2082 
2083         switch (mldh->mld_type) {
2084         case MLD_LISTENER_QUERY:
2085                 /*
2086                  * packet length differentiates between v1 and v2.  v1
2087                  * query should be exactly 24 octets long; v2 is >= 28.
2088                  */
2089                 if ((mldlen == MLD_MINLEN) ||
2090                     (ipst->ips_mld_max_version < MLD_V2_ROUTER)) {
2091                         next = mld_query_in(mldh, ill);
2092                 } else if (mldlen >= MLD_V2_QUERY_MINLEN) {
2093                         next = mldv2_query_in((mld2q_t *)mldh, ill, mldlen);
2094                 } else {
2095                         BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
2096                         freemsg(mp);
2097                         return (NULL);
2098                 }
2099                 if (next == 0) {
2100                         return (mp);
2101                 }
2102 
2103                 if (next != INFINITY)
2104                         mld_start_timers(next, ipst);
2105                 break;
2106 
2107         case MLD_LISTENER_REPORT:
2108                 /*
2109                  * For fast leave to work, we have to know that we are the
2110                  * last person to send a report for this group.  Reports
2111                  * generated by us are looped back since we could potentially
2112                  * be a multicast router, so discard reports sourced by me.
2113                  */
2114                 mutex_enter(&ill->ill_lock);
2115                 for (ipif = ill->ill_ipif; ipif != NULL;
2116                     ipif = ipif->ipif_next) {
2117                         if (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr,
2118                             &ip6h->ip6_src)) {
2119                                 if (ip_debug > 1) {
2120                                         char    buf1[INET6_ADDRSTRLEN];
2121 
2122                                         (void) mi_strlog(ill->ill_rq,
2123                                             1,
2124                                             SL_TRACE,
2125                                             "mld_input: we are only "
2126                                             "member src %s\n",
2127                                             inet_ntop(AF_INET6, &ip6h->ip6_src,
2128                                             buf1, sizeof (buf1)));
2129                                 }
2130                                 mutex_exit(&ill->ill_lock);
2131                                 return (mp);
2132                         }
2133                 }
2134                 mutex_exit(&ill->ill_lock);
2135                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembResponses);
2136 
2137                 v6group_ptr = &mldh->mld_addr;
2138                 if (!IN6_IS_ADDR_MULTICAST(v6group_ptr)) {
2139                         BUMP_MIB(ill->ill_icmp6_mib,
2140                             ipv6IfIcmpInGroupMembBadReports);
2141                         freemsg(mp);
2142                         return (NULL);
2143                 }
2144 
2145 
2146                 /*
2147                  * If we belong to the group being reported, and we are a
2148                  * 'Delaying member' per the RFC terminology, stop our timer
2149                  * for that group and 'clear flag' i.e. mark ilm_state as
2150                  * IGMP_OTHERMEMBER. With zones, there can be multiple group
2151                  * membership entries for the same group address (one per zone)
2152                  * so we need to walk the ill_ilm list.
2153                  */
2154                 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
2155                 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
2156                         if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group_ptr))
2157                                 continue;
2158                         BUMP_MIB(ill->ill_icmp6_mib,
2159                             ipv6IfIcmpInGroupMembOurReports);
2160 
2161                         ilm->ilm_timer = INFINITY;
2162                         ilm->ilm_state = IGMP_OTHERMEMBER;
2163                 }
2164                 rw_exit(&ill->ill_mcast_lock);
2165                 /*
2166                  * No packets have been sent above - no
2167                  * ill_mcast_send_queued is needed.
2168                  */
2169                 ill_mcast_timer_start(ill->ill_ipst);
2170                 break;
2171 
2172         case MLD_LISTENER_REDUCTION:
2173                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembReductions);
2174                 break;
2175         }
2176         return (mp);
2177 }
2178 
2179 /*
2180  * Handles an MLDv1 Listener Query.  Returns 0 on error, or the appropriate
2181  * (non-zero, unsigned) timer value to be set on success.
2182  */
2183 static uint_t
2184 mld_query_in(mld_hdr_t *mldh, ill_t *ill)
2185 {
2186         ilm_t   *ilm;
2187         int     timer;
2188         uint_t  next, current;
2189         in6_addr_t *v6group;
2190 
2191         BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries);
2192 
2193         /*
2194          * In the MLD specification, there are 3 states and a flag.
2195          *
2196          * In Non-Listener state, we simply don't have a membership record.
2197          * In Delaying state, our timer is running (ilm->ilm_timer < INFINITY)
2198          * In Idle Member state, our timer is not running (ilm->ilm_timer ==
2199          * INFINITY)
2200          *
2201          * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if
2202          * we have heard a report from another member, or IGMP_IREPORTEDLAST
2203          * if I sent the last report.
2204          */
2205         v6group = &mldh->mld_addr;
2206         if (!(IN6_IS_ADDR_UNSPECIFIED(v6group)) &&
2207             ((!IN6_IS_ADDR_MULTICAST(v6group)))) {
2208                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembBadQueries);
2209                 return (0);
2210         }
2211 
2212         /* Need to do compatibility mode checking */
2213         rw_enter(&ill->ill_mcast_lock, RW_WRITER);
2214         ill->ill_mcast_v1_time = 0;
2215         ill->ill_mcast_v1_tset = 1;
2216         if (ill->ill_mcast_type == MLD_V2_ROUTER) {
2217                 ip1dbg(("Received MLDv1 Query on %s, switching mode to "
2218                     "MLD_V1_ROUTER\n", ill->ill_name));
2219                 atomic_inc_16(&ill->ill_ifptr->illif_mcast_v1);
2220                 ill->ill_mcast_type = MLD_V1_ROUTER;
2221         }
2222 
2223         timer = (int)ntohs(mldh->mld_maxdelay);
2224         if (ip_debug > 1) {
2225                 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
2226                     "mld_input: TIMER = mld_maxdelay %d mld_type 0x%x",
2227                     timer, (int)mldh->mld_type);
2228         }
2229 
2230         /*
2231          * -Start the timers in all of our membership records for
2232          * the physical interface on which the query arrived,
2233          * excl:
2234          *      1.  those that belong to the "all hosts" group,
2235          *      2.  those with 0 scope, or 1 node-local scope.
2236          *
2237          * -Restart any timer that is already running but has a value
2238          * longer that the requested timeout.
2239          * -Use the value specified in the query message as the
2240          * maximum timeout.
2241          */
2242         next = INFINITY;
2243 
2244         current = CURRENT_MSTIME;
2245         for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
2246                 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr));
2247 
2248                 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) ||
2249                     IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) ||
2250                     IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr))
2251                         continue;
2252                 if ((!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr,
2253                     &ipv6_all_hosts_mcast)) &&
2254                     (IN6_IS_ADDR_UNSPECIFIED(v6group)) ||
2255                     (IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr))) {
2256                         if (timer == 0) {
2257                                 /* Respond immediately */
2258                                 ilm->ilm_timer = INFINITY;
2259                                 ilm->ilm_state = IGMP_IREPORTEDLAST;
2260                                 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL);
2261                                 break;
2262                         }
2263                         if (ilm->ilm_timer > timer) {
2264                                 MCAST_RANDOM_DELAY(ilm->ilm_timer, timer);
2265                                 if (ilm->ilm_timer < next)
2266                                         next = ilm->ilm_timer;
2267                                 ilm->ilm_timer += current;
2268                         }
2269                         break;
2270                 }
2271         }
2272         rw_exit(&ill->ill_mcast_lock);
2273         /* Send any deferred/queued IP packets */
2274         ill_mcast_send_queued(ill);
2275         ill_mcast_timer_start(ill->ill_ipst);
2276 
2277         return (next);
2278 }
2279 
2280 /*
2281  * Handles an MLDv2 Listener Query.  On error, returns 0; on success,
2282  * returns the appropriate (non-zero, unsigned) timer value (which may
2283  * be INFINITY) to be set.
2284  */
2285 static uint_t
2286 mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen)
2287 {
2288         ilm_t   *ilm;
2289         in6_addr_t *v6group, *src_array;
2290         uint_t  next, numsrc, i, mrd, delay, qqi, current;
2291         uint8_t qrv;
2292 
2293         v6group = &mld2q->mld2q_addr;
2294         numsrc = ntohs(mld2q->mld2q_numsrc);
2295 
2296         /* make sure numsrc matches packet size */
2297         if (mldlen < MLD_V2_QUERY_MINLEN + (numsrc * sizeof (in6_addr_t))) {
2298                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
2299                 return (0);
2300         }
2301         src_array = (in6_addr_t *)&mld2q[1];
2302 
2303         BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries);
2304 
2305         /* extract Maximum Response Delay from code in header */
2306         mrd = ntohs(mld2q->mld2q_mxrc);
2307         if (mrd >= MLD_V2_MAXRT_FPMIN) {
2308                 uint_t hdrval, mant, exp;
2309                 hdrval = mrd;
2310                 mant = hdrval & MLD_V2_MAXRT_MANT_MASK;
2311                 exp = (hdrval & MLD_V2_MAXRT_EXP_MASK) >> 12;
2312                 mrd = (mant | 0x1000) << (exp + 3);
2313         }
2314         if (mrd == 0)
2315                 mrd = DSEC_TO_MSEC(MCAST_DEF_QUERY_RESP_INTERVAL);
2316 
2317         MCAST_RANDOM_DELAY(delay, mrd);
2318         next = (unsigned)INFINITY;
2319         current = CURRENT_MSTIME;
2320 
2321         if ((qrv = mld2q->mld2q_sqrv & MLD_V2_RV_MASK) == 0)
2322                 ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS;
2323         else
2324                 ill->ill_mcast_rv = qrv;
2325 
2326         if ((qqi = (uint_t)mld2q->mld2q_qqic) >= MLD_V2_QQI_FPMIN) {
2327                 uint_t mant, exp;
2328                 mant = qqi & MLD_V2_QQI_MANT_MASK;
2329                 exp = (qqi & MLD_V2_QQI_EXP_MASK) >> 12;
2330                 qqi = (mant | 0x10) << (exp + 3);
2331         }
2332         ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi;
2333 
2334         /*
2335          * If we have a pending general query response that's scheduled
2336          * sooner than the delay we calculated for this response, then
2337          * no action is required (MLDv2 draft section 6.2 rule 1)
2338          */
2339         rw_enter(&ill->ill_mcast_lock, RW_WRITER);
2340         if (ill->ill_global_timer < (current + delay)) {
2341                 rw_exit(&ill->ill_mcast_lock);
2342                 return (next);
2343         }
2344 
2345         /*
2346          * Now take action depending on query type: general,
2347          * group specific, or group/source specific.
2348          */
2349         if ((numsrc == 0) && IN6_IS_ADDR_UNSPECIFIED(v6group)) {
2350                 /*
2351                  * general query
2352                  * We know global timer is either not running or is
2353                  * greater than our calculated delay, so reset it to
2354                  * our delay (random value in range [0, response time])
2355                  */
2356                 ill->ill_global_timer = current + delay;
2357                 next = delay;
2358         } else {
2359                 /* group or group/source specific query */
2360                 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
2361                         if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) ||
2362                             IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) ||
2363                             IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr) ||
2364                             !IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr))
2365                                 continue;
2366 
2367                         /*
2368                          * If the query is group specific or we have a
2369                          * pending group specific query, the response is
2370                          * group specific (pending sources list should be
2371                          * empty).  Otherwise, need to update the pending
2372                          * sources list for the group and source specific
2373                          * response.
2374                          */
2375                         if (numsrc == 0 || (ilm->ilm_timer < INFINITY &&
2376                             SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) {
2377 group_query:
2378                                 FREE_SLIST(ilm->ilm_pendsrcs);
2379                                 ilm->ilm_pendsrcs = NULL;
2380                         } else {
2381                                 boolean_t overflow;
2382                                 slist_t *pktl;
2383                                 if (numsrc > MAX_FILTER_SIZE ||
2384                                     (ilm->ilm_pendsrcs == NULL &&
2385                                     (ilm->ilm_pendsrcs = l_alloc()) == NULL)) {
2386                                         /*
2387                                          * We've been sent more sources than
2388                                          * we can deal with; or we can't deal
2389                                          * with a source list at all. Revert
2390                                          * to a group specific query.
2391                                          */
2392                                         goto group_query;
2393                                 }
2394                                 if ((pktl = l_alloc()) == NULL)
2395                                         goto group_query;
2396                                 pktl->sl_numsrc = numsrc;
2397                                 for (i = 0; i < numsrc; i++)
2398                                         pktl->sl_addr[i] = src_array[i];
2399                                 l_union_in_a(ilm->ilm_pendsrcs, pktl,
2400                                     &overflow);
2401                                 l_free(pktl);
2402                                 if (overflow)
2403                                         goto group_query;
2404                         }
2405                         ilm->ilm_timer = (ilm->ilm_timer == INFINITY) ?
2406                             INFINITY : (ilm->ilm_timer - current);
2407                         /* set timer to soonest value */
2408                         ilm->ilm_timer = MIN(ilm->ilm_timer, delay);
2409                         if (ilm->ilm_timer < next)
2410                                 next = ilm->ilm_timer;
2411                         ilm->ilm_timer += current;
2412                         break;
2413                 }
2414         }
2415         rw_exit(&ill->ill_mcast_lock);
2416         /*
2417          * No packets have been sent above - no
2418          * ill_mcast_send_queued is needed.
2419          */
2420         ill_mcast_timer_start(ill->ill_ipst);
2421 
2422         return (next);
2423 }
2424 
2425 /*
2426  * Send MLDv1 response packet with hoplimit 1
2427  */
2428 static void
2429 mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr)
2430 {
2431         mblk_t          *mp;
2432         mld_hdr_t       *mldh;
2433         ip6_t           *ip6h;
2434         ip6_hbh_t       *ip6hbh;
2435         struct ip6_opt_router   *ip6router;
2436         size_t          size = IPV6_HDR_LEN + sizeof (mld_hdr_t);
2437         ill_t           *ill = ilm->ilm_ill;
2438 
2439         ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
2440 
2441         /*
2442          * We need to place a router alert option in this packet.  The length
2443          * of the options must be a multiple of 8.  The hbh option header is 2
2444          * bytes followed by the 4 byte router alert option.  That leaves
2445          * 2 bytes of pad for a total of 8 bytes.
2446          */
2447         const int       router_alert_length = 8;
2448 
2449         ASSERT(ill->ill_isv6);
2450 
2451         size += router_alert_length;
2452         mp = allocb(size, BPRI_HI);
2453         if (mp == NULL)
2454                 return;
2455         bzero(mp->b_rptr, size);
2456         mp->b_wptr = mp->b_rptr + size;
2457 
2458         ip6h = (ip6_t *)mp->b_rptr;
2459         ip6hbh = (struct ip6_hbh *)&ip6h[1];
2460         ip6router = (struct ip6_opt_router *)&ip6hbh[1];
2461         /*
2462          * A zero is a pad option of length 1.  The bzero of the whole packet
2463          * above will pad between ip6router and mld.
2464          */
2465         mldh = (mld_hdr_t *)((uint8_t *)ip6hbh + router_alert_length);
2466 
2467         mldh->mld_type = type;
2468         mldh->mld_addr = ilm->ilm_v6addr;
2469 
2470         ip6router->ip6or_type = IP6OPT_ROUTER_ALERT;
2471         ip6router->ip6or_len = 2;
2472         ip6router->ip6or_value[0] = 0;
2473         ip6router->ip6or_value[1] = IP6_ALERT_MLD;
2474 
2475         ip6hbh->ip6h_nxt = IPPROTO_ICMPV6;
2476         ip6hbh->ip6h_len = 0;
2477 
2478         ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
2479         ip6h->ip6_plen = htons(sizeof (*mldh) + router_alert_length);
2480         ip6h->ip6_nxt = IPPROTO_HOPOPTS;
2481         ip6h->ip6_hops = MLD_HOP_LIMIT;
2482         if (v6addr == NULL)
2483                 ip6h->ip6_dst =  ilm->ilm_v6addr;
2484         else
2485                 ip6h->ip6_dst = *v6addr;
2486 
2487         ip6h->ip6_src = ipv6_all_zeros;
2488         /*
2489          * Prepare for checksum by putting icmp length in the icmp
2490          * checksum field. The checksum is calculated in ip_output.
2491          */
2492         mldh->mld_cksum = htons(sizeof (*mldh));
2493 
2494         ill_mcast_queue(ill, mp);
2495 }
2496 
2497 /*
2498  * Sends an MLD_V2_LISTENER_REPORT message out the passed-in ill.  The
2499  * report will contain one multicast address record for each element of
2500  * reclist.  If this causes packet length to exceed ill->ill_mc_mtu,
2501  * multiple reports are sent.  reclist is assumed to be made up of
2502  * buffers allocated by mcast_bldmrec(), and those buffers are freed here.
2503  */
2504 static void
2505 mldv2_sendrpt(ill_t *ill, mrec_t *reclist)
2506 {
2507         mblk_t          *mp;
2508         mld2r_t         *mld2r;
2509         mld2mar_t       *mld2mar;
2510         in6_addr_t      *srcarray;
2511         ip6_t           *ip6h;
2512         ip6_hbh_t       *ip6hbh;
2513         struct ip6_opt_router   *ip6router;
2514         size_t          size, optlen, padlen, icmpsize, rsize;
2515         int             i, numrec, more_src_cnt;
2516         mrec_t          *rp, *cur_reclist;
2517         mrec_t          *next_reclist = reclist;
2518         boolean_t       morepkts;
2519 
2520         /* If there aren't any records, there's nothing to send */
2521         if (reclist == NULL)
2522                 return;
2523 
2524         ASSERT(ill->ill_isv6);
2525         ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
2526 
2527         /*
2528          * Total option length (optlen + padlen) must be a multiple of
2529          * 8 bytes.  We assume here that optlen <= 8, so the total option
2530          * length will be 8.  Assert this in case anything ever changes.
2531          */
2532         optlen = sizeof (ip6_hbh_t) + sizeof (struct ip6_opt_router);
2533         ASSERT(optlen <= 8);
2534         padlen = 8 - optlen;
2535 nextpkt:
2536         icmpsize = sizeof (mld2r_t);
2537         size = IPV6_HDR_LEN + optlen + padlen + icmpsize;
2538         morepkts = B_FALSE;
2539         more_src_cnt = 0;
2540         for (rp = cur_reclist = next_reclist, numrec = 0; rp != NULL;
2541             rp = rp->mrec_next, numrec++) {
2542                 rsize = sizeof (mld2mar_t) +
2543                     (rp->mrec_srcs.sl_numsrc * sizeof (in6_addr_t));
2544                 if (size + rsize > ill->ill_mc_mtu) {
2545                         if (rp == cur_reclist) {
2546                                 /*
2547                                  * If the first mrec we looked at is too big
2548                                  * to fit in a single packet (i.e the source
2549                                  * list is too big), we must either truncate
2550                                  * the list (if TO_EX or IS_EX), or send
2551                                  * multiple reports for the same group (all
2552                                  * other types).
2553                                  */
2554                                 int srcspace, srcsperpkt;
2555                                 srcspace = ill->ill_mc_mtu -
2556                                     (size + sizeof (mld2mar_t));
2557 
2558                                 /*
2559                                  * Skip if there's not even enough room in
2560                                  * a single packet to send something useful.
2561                                  */
2562                                 if (srcspace <= sizeof (in6_addr_t))
2563                                         continue;
2564 
2565                                 srcsperpkt = srcspace / sizeof (in6_addr_t);
2566                                 /*
2567                                  * Increment icmpsize and size, because we will
2568                                  * be sending a record for the mrec we're
2569                                  * looking at now.
2570                                  */
2571                                 rsize = sizeof (mld2mar_t) +
2572                                     (srcsperpkt * sizeof (in6_addr_t));
2573                                 icmpsize += rsize;
2574                                 size += rsize;
2575                                 if (rp->mrec_type == MODE_IS_EXCLUDE ||
2576                                     rp->mrec_type == CHANGE_TO_EXCLUDE) {
2577                                         rp->mrec_srcs.sl_numsrc = srcsperpkt;
2578                                         if (rp->mrec_next == NULL) {
2579                                                 /* no more packets to send */
2580                                                 break;
2581                                         } else {
2582                                                 /*
2583                                                  * more packets, but we're
2584                                                  * done with this mrec.
2585                                                  */
2586                                                 next_reclist = rp->mrec_next;
2587                                         }
2588                                 } else {
2589                                         more_src_cnt = rp->mrec_srcs.sl_numsrc
2590                                             - srcsperpkt;
2591                                         rp->mrec_srcs.sl_numsrc = srcsperpkt;
2592                                         /*
2593                                          * We'll fix up this mrec (remove the
2594                                          * srcs we've already sent) before
2595                                          * returning to nextpkt above.
2596                                          */
2597                                         next_reclist = rp;
2598                                 }
2599                         } else {
2600                                 next_reclist = rp;
2601                         }
2602                         morepkts = B_TRUE;
2603                         break;
2604                 }
2605                 icmpsize += rsize;
2606                 size += rsize;
2607         }
2608 
2609         mp = allocb(size, BPRI_HI);
2610         if (mp == NULL)
2611                 goto free_reclist;
2612         bzero(mp->b_rptr, size);
2613         mp->b_wptr = mp->b_rptr + size;
2614 
2615         ip6h = (ip6_t *)mp->b_rptr;
2616         ip6hbh = (ip6_hbh_t *)&(ip6h[1]);
2617         ip6router = (struct ip6_opt_router *)&(ip6hbh[1]);
2618         mld2r = (mld2r_t *)((uint8_t *)ip6hbh + optlen + padlen);
2619         mld2mar = (mld2mar_t *)&(mld2r[1]);
2620 
2621         ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
2622         ip6h->ip6_plen = htons(optlen + padlen + icmpsize);
2623         ip6h->ip6_nxt = IPPROTO_HOPOPTS;
2624         ip6h->ip6_hops = MLD_HOP_LIMIT;
2625         ip6h->ip6_dst = ipv6_all_v2rtrs_mcast;
2626         ip6h->ip6_src = ipv6_all_zeros;
2627 
2628         ip6hbh->ip6h_nxt = IPPROTO_ICMPV6;
2629         /*
2630          * ip6h_len is the number of 8-byte words, not including the first
2631          * 8 bytes; we've assumed optlen + padlen == 8 bytes; hence len = 0.
2632          */
2633         ip6hbh->ip6h_len = 0;
2634 
2635         ip6router->ip6or_type = IP6OPT_ROUTER_ALERT;
2636         ip6router->ip6or_len = 2;
2637         ip6router->ip6or_value[0] = 0;
2638         ip6router->ip6or_value[1] = IP6_ALERT_MLD;
2639 
2640         mld2r->mld2r_type = MLD_V2_LISTENER_REPORT;
2641         mld2r->mld2r_nummar = htons(numrec);
2642         /*
2643          * Prepare for the checksum by putting icmp length in the icmp
2644          * checksum field. The checksum is calculated in ip_output_simple.
2645          */
2646         mld2r->mld2r_cksum = htons(icmpsize);
2647 
2648         for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) {
2649                 mld2mar->mld2mar_type = rp->mrec_type;
2650                 mld2mar->mld2mar_auxlen = 0;
2651                 mld2mar->mld2mar_numsrc = htons(rp->mrec_srcs.sl_numsrc);
2652                 mld2mar->mld2mar_group = rp->mrec_group;
2653                 srcarray = (in6_addr_t *)&(mld2mar[1]);
2654 
2655                 for (i = 0; i < rp->mrec_srcs.sl_numsrc; i++)
2656                         srcarray[i] = rp->mrec_srcs.sl_addr[i];
2657 
2658                 mld2mar = (mld2mar_t *)&(srcarray[i]);
2659         }
2660 
2661         ill_mcast_queue(ill, mp);
2662 
2663         if (morepkts) {
2664                 if (more_src_cnt > 0) {
2665                         int index, mvsize;
2666                         slist_t *sl = &next_reclist->mrec_srcs;
2667                         index = sl->sl_numsrc;
2668                         mvsize = more_src_cnt * sizeof (in6_addr_t);
2669                         (void) memmove(&sl->sl_addr[0], &sl->sl_addr[index],
2670                             mvsize);
2671                         sl->sl_numsrc = more_src_cnt;
2672                 }
2673                 goto nextpkt;
2674         }
2675 
2676 free_reclist:
2677         while (reclist != NULL) {
2678                 rp = reclist->mrec_next;
2679                 mi_free(reclist);
2680                 reclist = rp;
2681         }
2682 }
2683 
2684 static mrec_t *
2685 mcast_bldmrec(mcast_record_t type, in6_addr_t *grp, slist_t *srclist,
2686     mrec_t *next)
2687 {
2688         mrec_t *rp;
2689         int i;
2690 
2691         if ((type == ALLOW_NEW_SOURCES || type == BLOCK_OLD_SOURCES) &&
2692             SLIST_IS_EMPTY(srclist))
2693                 return (next);
2694 
2695         rp = (mrec_t *)mi_alloc(sizeof (mrec_t), BPRI_HI);
2696         if (rp == NULL)
2697                 return (next);
2698 
2699         rp->mrec_next = next;
2700         rp->mrec_type = type;
2701         rp->mrec_auxlen = 0;
2702         rp->mrec_group = *grp;
2703         if (srclist == NULL) {
2704                 rp->mrec_srcs.sl_numsrc = 0;
2705         } else {
2706                 rp->mrec_srcs.sl_numsrc = srclist->sl_numsrc;
2707                 for (i = 0; i < srclist->sl_numsrc; i++)
2708                         rp->mrec_srcs.sl_addr[i] = srclist->sl_addr[i];
2709         }
2710 
2711         return (rp);
2712 }
2713 
2714 /*
2715  * Set up initial retransmit state.  If memory cannot be allocated for
2716  * the source lists, simply create as much state as is possible; memory
2717  * allocation failures are considered one type of transient error that
2718  * the retransmissions are designed to overcome (and if they aren't
2719  * transient, there are bigger problems than failing to notify the
2720  * router about multicast group membership state changes).
2721  */
2722 static void
2723 mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp, mcast_record_t rtype,
2724     slist_t *flist)
2725 {
2726         /*
2727          * There are only three possibilities for rtype:
2728          *      New join, transition from INCLUDE {} to INCLUDE {flist}
2729          *        => rtype is ALLOW_NEW_SOURCES
2730          *      New join, transition from INCLUDE {} to EXCLUDE {flist}
2731          *        => rtype is CHANGE_TO_EXCLUDE
2732          *      State change that involves a filter mode change
2733          *        => rtype is either CHANGE_TO_INCLUDE or CHANGE_TO_EXCLUDE
2734          */
2735         ASSERT(rtype == CHANGE_TO_EXCLUDE || rtype == CHANGE_TO_INCLUDE ||
2736             rtype == ALLOW_NEW_SOURCES);
2737 
2738         rtxp->rtx_cnt = ill->ill_mcast_rv;
2739 
2740         switch (rtype) {
2741         case CHANGE_TO_EXCLUDE:
2742                 rtxp->rtx_fmode_cnt = ill->ill_mcast_rv;
2743                 CLEAR_SLIST(rtxp->rtx_allow);
2744                 COPY_SLIST(flist, rtxp->rtx_block);
2745                 break;
2746         case ALLOW_NEW_SOURCES:
2747         case CHANGE_TO_INCLUDE:
2748                 rtxp->rtx_fmode_cnt =
2749                     rtype == ALLOW_NEW_SOURCES ? 0 : ill->ill_mcast_rv;
2750                 CLEAR_SLIST(rtxp->rtx_block);
2751                 COPY_SLIST(flist, rtxp->rtx_allow);
2752                 break;
2753         }
2754 }
2755 
2756 /*
2757  * The basic strategy here, as extrapolated from RFC 3810 section 6.1 and
2758  * RFC 3376 section 5.1, covers three cases:
2759  *      * The current state change is a filter mode change
2760  *              Set filter mode retransmit counter; set retransmit allow or
2761  *              block list to new source list as appropriate, and clear the
2762  *              retransmit list that was not set; send TO_IN or TO_EX with
2763  *              new source list.
2764  *      * The current state change is a source list change, but the filter
2765  *        mode retransmit counter is > 0
2766  *              Decrement filter mode retransmit counter; set retransmit
2767  *              allow or block list to  new source list as appropriate,
2768  *              and clear the retransmit list that was not set; send TO_IN
2769  *              or TO_EX with new source list.
2770  *      * The current state change is a source list change, and the filter
2771  *        mode retransmit counter is 0.
2772  *              Merge existing rtx allow and block lists with new state:
2773  *                rtx_allow = (new allow + rtx_allow) - new block
2774  *                rtx_block = (new block + rtx_block) - new allow
2775  *              Send ALLOW and BLOCK records for new retransmit lists;
2776  *              decrement retransmit counter.
2777  *
2778  * As is the case for mcast_init_rtx(), memory allocation failures are
2779  * acceptable; we just create as much state as we can.
2780  */
2781 static mrec_t *
2782 mcast_merge_rtx(ilm_t *ilm, mrec_t *mreclist, slist_t *flist)
2783 {
2784         ill_t *ill;
2785         rtx_state_t *rtxp = &ilm->ilm_rtx;
2786         mcast_record_t txtype;
2787         mrec_t *rp, *rpnext, *rtnmrec;
2788         boolean_t ovf;
2789 
2790         ill = ilm->ilm_ill;
2791 
2792         if (mreclist == NULL)
2793                 return (mreclist);
2794 
2795         /*
2796          * A filter mode change is indicated by a single mrec, which is
2797          * either TO_IN or TO_EX.  In this case, we just need to set new
2798          * retransmit state as if this were an initial join.  There is
2799          * no change to the mrec list.
2800          */
2801         if (mreclist->mrec_type == CHANGE_TO_INCLUDE ||
2802             mreclist->mrec_type == CHANGE_TO_EXCLUDE) {
2803                 mcast_init_rtx(ill, rtxp, mreclist->mrec_type,
2804                     &mreclist->mrec_srcs);
2805                 return (mreclist);
2806         }
2807 
2808         /*
2809          * Only the source list has changed
2810          */
2811         rtxp->rtx_cnt = ill->ill_mcast_rv;
2812         if (rtxp->rtx_fmode_cnt > 0) {
2813                 /* but we're still sending filter mode change reports */
2814                 rtxp->rtx_fmode_cnt--;
2815                 if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
2816                         CLEAR_SLIST(rtxp->rtx_block);
2817                         COPY_SLIST(flist, rtxp->rtx_allow);
2818                         txtype = CHANGE_TO_INCLUDE;
2819                 } else {
2820                         CLEAR_SLIST(rtxp->rtx_allow);
2821                         COPY_SLIST(flist, rtxp->rtx_block);
2822                         txtype = CHANGE_TO_EXCLUDE;
2823                 }
2824                 /* overwrite first mrec with new info */
2825                 mreclist->mrec_type = txtype;
2826                 l_copy(flist, &mreclist->mrec_srcs);
2827                 /* then free any remaining mrecs */
2828                 for (rp = mreclist->mrec_next; rp != NULL; rp = rpnext) {
2829                         rpnext = rp->mrec_next;
2830                         mi_free(rp);
2831                 }
2832                 mreclist->mrec_next = NULL;
2833                 rtnmrec = mreclist;
2834         } else {
2835                 mrec_t *allow_mrec, *block_mrec;
2836                 /*
2837                  * Just send the source change reports; but we need to
2838                  * recalculate the ALLOW and BLOCK lists based on previous
2839                  * state and new changes.
2840                  */
2841                 rtnmrec = mreclist;
2842                 allow_mrec = block_mrec = NULL;
2843                 for (rp = mreclist; rp != NULL; rp = rp->mrec_next) {
2844                         ASSERT(rp->mrec_type == ALLOW_NEW_SOURCES ||
2845                             rp->mrec_type == BLOCK_OLD_SOURCES);
2846                         if (rp->mrec_type == ALLOW_NEW_SOURCES)
2847                                 allow_mrec = rp;
2848                         else
2849                                 block_mrec = rp;
2850                 }
2851                 /*
2852                  * Perform calculations:
2853                  *   new_allow = mrec_allow + (rtx_allow - mrec_block)
2854                  *   new_block = mrec_block + (rtx_block - mrec_allow)
2855                  *
2856                  * Each calc requires two steps, for example:
2857                  *   rtx_allow = rtx_allow - mrec_block;
2858                  *   new_allow = mrec_allow + rtx_allow;
2859                  *
2860                  * Store results in mrec lists, and then copy into rtx lists.
2861                  * We do it in this order in case the rtx list hasn't been
2862                  * alloc'd yet; if it hasn't and our alloc fails, that's okay,
2863                  * Overflows are also okay.
2864                  */
2865                 if (block_mrec != NULL) {
2866                         l_difference_in_a(rtxp->rtx_allow,
2867                             &block_mrec->mrec_srcs);
2868                 }
2869                 if (allow_mrec != NULL) {
2870                         l_difference_in_a(rtxp->rtx_block,
2871                             &allow_mrec->mrec_srcs);
2872                         l_union_in_a(&allow_mrec->mrec_srcs, rtxp->rtx_allow,
2873                             &ovf);
2874                 }
2875                 if (block_mrec != NULL) {
2876                         l_union_in_a(&block_mrec->mrec_srcs, rtxp->rtx_block,
2877                             &ovf);
2878                         COPY_SLIST(&block_mrec->mrec_srcs, rtxp->rtx_block);
2879                 } else {
2880                         rtnmrec = mcast_bldmrec(BLOCK_OLD_SOURCES,
2881                             &ilm->ilm_v6addr, rtxp->rtx_block, allow_mrec);
2882                 }
2883                 if (allow_mrec != NULL) {
2884                         COPY_SLIST(&allow_mrec->mrec_srcs, rtxp->rtx_allow);
2885                 } else {
2886                         rtnmrec = mcast_bldmrec(ALLOW_NEW_SOURCES,
2887                             &ilm->ilm_v6addr, rtxp->rtx_allow, block_mrec);
2888                 }
2889         }
2890 
2891         return (rtnmrec);
2892 }