Print this page
5045 use atomic_{inc,dec}_* instead of atomic_add_*
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/inet/ip/igmp.c
+++ new/usr/src/uts/common/inet/ip/igmp.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 */
24 24 /* Copyright (c) 1990 Mentat Inc. */
25 25
26 26 /*
27 27 * Internet Group Management Protocol (IGMP) routines.
28 28 * Multicast Listener Discovery Protocol (MLD) routines.
29 29 *
30 30 * Written by Steve Deering, Stanford, May 1988.
31 31 * Modified by Rosen Sharma, Stanford, Aug 1994.
32 32 * Modified by Bill Fenner, Xerox PARC, Feb. 1995.
33 33 *
34 34 * MULTICAST 3.5.1.1
35 35 */
36 36
37 37 #include <sys/types.h>
38 38 #include <sys/stream.h>
39 39 #include <sys/stropts.h>
40 40 #include <sys/strlog.h>
41 41 #include <sys/strsun.h>
42 42 #include <sys/systm.h>
43 43 #include <sys/ddi.h>
44 44 #include <sys/sunddi.h>
45 45 #include <sys/cmn_err.h>
46 46 #include <sys/atomic.h>
47 47 #include <sys/zone.h>
48 48 #include <sys/callb.h>
49 49 #include <sys/param.h>
50 50 #include <sys/socket.h>
51 51 #include <inet/ipclassifier.h>
52 52 #include <net/if.h>
53 53 #include <net/route.h>
54 54 #include <netinet/in.h>
55 55 #include <netinet/igmp_var.h>
56 56 #include <netinet/ip6.h>
57 57 #include <netinet/icmp6.h>
58 58 #include <inet/ipsec_impl.h>
59 59
60 60 #include <inet/common.h>
61 61 #include <inet/mi.h>
62 62 #include <inet/nd.h>
63 63 #include <inet/tunables.h>
64 64 #include <inet/ip.h>
65 65 #include <inet/ip6.h>
66 66 #include <inet/ip_multi.h>
67 67 #include <inet/ip_listutils.h>
68 68
69 69 #include <netinet/igmp.h>
70 70 #include <inet/ip_ndp.h>
71 71 #include <inet/ip_if.h>
72 72
73 73 static uint_t igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill);
74 74 static uint_t igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen);
75 75 static uint_t mld_query_in(mld_hdr_t *mldh, ill_t *ill);
76 76 static uint_t mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen);
77 77 static void igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr);
78 78 static void mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr);
79 79 static void igmpv3_sendrpt(ill_t *ill, mrec_t *reclist);
80 80 static void mldv2_sendrpt(ill_t *ill, mrec_t *reclist);
81 81 static mrec_t *mcast_bldmrec(mcast_record_t type, in6_addr_t *grp,
82 82 slist_t *srclist, mrec_t *next);
83 83 static void mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp,
84 84 mcast_record_t rtype, slist_t *flist);
85 85 static mrec_t *mcast_merge_rtx(ilm_t *ilm, mrec_t *rp, slist_t *flist);
86 86
87 87 /*
88 88 * Macros used to do timer len conversions. Timer values are always
89 89 * stored and passed to the timer functions as milliseconds; but the
90 90 * default values and values from the wire may not be.
91 91 *
92 92 * And yes, it's obscure, but decisecond is easier to abbreviate than
93 93 * "tenths of a second".
94 94 */
95 95 #define DSEC_TO_MSEC(dsec) ((dsec) * 100)
96 96 #define SEC_TO_MSEC(sec) ((sec) * 1000)
97 97
98 98 /*
99 99 * A running timer (scheduled thru timeout) can be cancelled if another
100 100 * timer with a shorter timeout value is scheduled before it has timed
101 101 * out. When the shorter timer expires, the original timer is updated
102 102 * to account for the time elapsed while the shorter timer ran; but this
103 103 * does not take into account the amount of time already spent in timeout
104 104 * state before being preempted by the shorter timer, that is the time
105 105 * interval between time scheduled to time cancelled. This can cause
106 106 * delays in sending out multicast membership reports. To resolve this
107 107 * problem, wallclock time (absolute time) is used instead of deltas
108 108 * (relative time) to track timers.
109 109 *
110 110 * The MACRO below gets the lbolt value, used for proper timer scheduling
111 111 * and firing. Therefore multicast membership reports are sent on time.
112 112 * The timer does not exactly fire at the time it was scehduled to fire,
113 113 * there is a difference of a few milliseconds observed. An offset is used
114 114 * to take care of the difference.
115 115 */
116 116
117 117 #define CURRENT_MSTIME ((uint_t)TICK_TO_MSEC(ddi_get_lbolt()))
118 118 #define CURRENT_OFFSET (999)
119 119
120 120 /*
121 121 * The first multicast join will trigger the igmp timers / mld timers
122 122 * The unit for next is milliseconds.
123 123 */
124 124 void
125 125 igmp_start_timers(unsigned next, ip_stack_t *ipst)
126 126 {
127 127 int time_left;
128 128 int ret;
129 129 timeout_id_t id;
130 130
131 131 ASSERT(next != 0 && next != INFINITY);
132 132
133 133 mutex_enter(&ipst->ips_igmp_timer_lock);
134 134
135 135 if (ipst->ips_igmp_timer_setter_active) {
136 136 /*
137 137 * Serialize timer setters, one at a time. If the
138 138 * timer is currently being set by someone,
139 139 * just record the next time when it has to be
140 140 * invoked and return. The current setter will
141 141 * take care.
142 142 */
143 143 ipst->ips_igmp_time_to_next =
144 144 MIN(ipst->ips_igmp_time_to_next, next);
145 145 mutex_exit(&ipst->ips_igmp_timer_lock);
146 146 return;
147 147 } else {
148 148 ipst->ips_igmp_timer_setter_active = B_TRUE;
149 149 }
150 150 if (ipst->ips_igmp_timeout_id == 0) {
151 151 /*
152 152 * The timer is inactive. We need to start a timer
153 153 */
154 154 ipst->ips_igmp_time_to_next = next;
155 155 ipst->ips_igmp_timeout_id = timeout(igmp_timeout_handler,
156 156 (void *)ipst, MSEC_TO_TICK(ipst->ips_igmp_time_to_next));
157 157 ipst->ips_igmp_timer_scheduled_last = ddi_get_lbolt();
158 158 ipst->ips_igmp_timer_setter_active = B_FALSE;
159 159 mutex_exit(&ipst->ips_igmp_timer_lock);
160 160 return;
161 161 }
162 162
163 163 /*
164 164 * The timer was scheduled sometime back for firing in
165 165 * 'igmp_time_to_next' ms and is active. We need to
166 166 * reschedule the timeout if the new 'next' will happen
167 167 * earlier than the currently scheduled timeout
168 168 */
169 169 time_left = ipst->ips_igmp_timer_scheduled_last +
170 170 MSEC_TO_TICK(ipst->ips_igmp_time_to_next) - ddi_get_lbolt();
171 171 if (time_left < MSEC_TO_TICK(next)) {
172 172 ipst->ips_igmp_timer_setter_active = B_FALSE;
173 173 mutex_exit(&ipst->ips_igmp_timer_lock);
174 174 return;
175 175 }
176 176 id = ipst->ips_igmp_timeout_id;
177 177
178 178 mutex_exit(&ipst->ips_igmp_timer_lock);
179 179 ret = untimeout(id);
180 180 mutex_enter(&ipst->ips_igmp_timer_lock);
181 181 /*
182 182 * The timeout was cancelled, or the timeout handler
183 183 * completed, while we were blocked in the untimeout.
184 184 * No other thread could have set the timer meanwhile
185 185 * since we serialized all the timer setters. Thus
186 186 * no timer is currently active nor executing nor will
187 187 * any timer fire in the future. We start the timer now
188 188 * if needed.
189 189 */
190 190 if (ret == -1) {
191 191 ASSERT(ipst->ips_igmp_timeout_id == 0);
192 192 } else {
193 193 ASSERT(ipst->ips_igmp_timeout_id != 0);
194 194 ipst->ips_igmp_timeout_id = 0;
195 195 }
196 196 if (ipst->ips_igmp_time_to_next != 0) {
197 197 ipst->ips_igmp_time_to_next =
198 198 MIN(ipst->ips_igmp_time_to_next, next);
199 199 ipst->ips_igmp_timeout_id = timeout(igmp_timeout_handler,
200 200 (void *)ipst, MSEC_TO_TICK(ipst->ips_igmp_time_to_next));
201 201 ipst->ips_igmp_timer_scheduled_last = ddi_get_lbolt();
202 202 }
203 203 ipst->ips_igmp_timer_setter_active = B_FALSE;
204 204 mutex_exit(&ipst->ips_igmp_timer_lock);
205 205 }
206 206
207 207 /*
208 208 * mld_start_timers:
209 209 * The unit for next is milliseconds.
210 210 */
211 211 void
212 212 mld_start_timers(unsigned next, ip_stack_t *ipst)
213 213 {
214 214 int time_left;
215 215 int ret;
216 216 timeout_id_t id;
217 217
218 218 ASSERT(next != 0 && next != INFINITY);
219 219
220 220 mutex_enter(&ipst->ips_mld_timer_lock);
221 221 if (ipst->ips_mld_timer_setter_active) {
222 222 /*
223 223 * Serialize timer setters, one at a time. If the
224 224 * timer is currently being set by someone,
225 225 * just record the next time when it has to be
226 226 * invoked and return. The current setter will
227 227 * take care.
228 228 */
229 229 ipst->ips_mld_time_to_next =
230 230 MIN(ipst->ips_mld_time_to_next, next);
231 231 mutex_exit(&ipst->ips_mld_timer_lock);
232 232 return;
233 233 } else {
234 234 ipst->ips_mld_timer_setter_active = B_TRUE;
235 235 }
236 236 if (ipst->ips_mld_timeout_id == 0) {
237 237 /*
238 238 * The timer is inactive. We need to start a timer
239 239 */
240 240 ipst->ips_mld_time_to_next = next;
241 241 ipst->ips_mld_timeout_id = timeout(mld_timeout_handler,
242 242 (void *)ipst, MSEC_TO_TICK(ipst->ips_mld_time_to_next));
243 243 ipst->ips_mld_timer_scheduled_last = ddi_get_lbolt();
244 244 ipst->ips_mld_timer_setter_active = B_FALSE;
245 245 mutex_exit(&ipst->ips_mld_timer_lock);
246 246 return;
247 247 }
248 248
249 249 /*
250 250 * The timer was scheduled sometime back for firing in
251 251 * 'igmp_time_to_next' ms and is active. We need to
252 252 * reschedule the timeout if the new 'next' will happen
253 253 * earlier than the currently scheduled timeout
254 254 */
255 255 time_left = ipst->ips_mld_timer_scheduled_last +
256 256 MSEC_TO_TICK(ipst->ips_mld_time_to_next) - ddi_get_lbolt();
257 257 if (time_left < MSEC_TO_TICK(next)) {
258 258 ipst->ips_mld_timer_setter_active = B_FALSE;
259 259 mutex_exit(&ipst->ips_mld_timer_lock);
260 260 return;
261 261 }
262 262 id = ipst->ips_mld_timeout_id;
263 263
264 264 mutex_exit(&ipst->ips_mld_timer_lock);
265 265 ret = untimeout(id);
266 266 mutex_enter(&ipst->ips_mld_timer_lock);
267 267 /*
268 268 * The timeout was cancelled, or the timeout handler
269 269 * completed, while we were blocked in the untimeout.
270 270 * No other thread could have set the timer meanwhile
271 271 * since we serialized all the timer setters. Thus
272 272 * no timer is currently active nor executing nor will
273 273 * any timer fire in the future. We start the timer now
274 274 * if needed.
275 275 */
276 276 if (ret == -1) {
277 277 ASSERT(ipst->ips_mld_timeout_id == 0);
278 278 } else {
279 279 ASSERT(ipst->ips_mld_timeout_id != 0);
280 280 ipst->ips_mld_timeout_id = 0;
281 281 }
282 282 if (ipst->ips_mld_time_to_next != 0) {
283 283 ipst->ips_mld_time_to_next =
284 284 MIN(ipst->ips_mld_time_to_next, next);
285 285 ipst->ips_mld_timeout_id = timeout(mld_timeout_handler,
286 286 (void *)ipst, MSEC_TO_TICK(ipst->ips_mld_time_to_next));
287 287 ipst->ips_mld_timer_scheduled_last = ddi_get_lbolt();
288 288 }
289 289 ipst->ips_mld_timer_setter_active = B_FALSE;
290 290 mutex_exit(&ipst->ips_mld_timer_lock);
291 291 }
292 292
293 293 /*
294 294 * igmp_input:
295 295 * Return NULL for a bad packet that is discarded here.
296 296 * Return mp if the message is OK and should be handed to "raw" receivers.
297 297 * Callers of igmp_input() may need to reinitialize variables that were copied
298 298 * from the mblk as this calls pullupmsg().
299 299 */
300 300 mblk_t *
301 301 igmp_input(mblk_t *mp, ip_recv_attr_t *ira)
302 302 {
303 303 igmpa_t *igmpa;
304 304 ipha_t *ipha = (ipha_t *)(mp->b_rptr);
305 305 int iphlen, igmplen, mblklen;
306 306 ilm_t *ilm;
307 307 uint32_t src, dst;
308 308 uint32_t group;
309 309 in6_addr_t v6group;
310 310 uint_t next;
311 311 ipif_t *ipif;
312 312 ill_t *ill = ira->ira_ill;
313 313 ip_stack_t *ipst = ill->ill_ipst;
314 314
315 315 ASSERT(!ill->ill_isv6);
316 316 ++ipst->ips_igmpstat.igps_rcv_total;
317 317
318 318 mblklen = MBLKL(mp);
319 319 iphlen = ira->ira_ip_hdr_length;
320 320 if (mblklen < 1 || mblklen < iphlen) {
321 321 ++ipst->ips_igmpstat.igps_rcv_tooshort;
322 322 goto bad_pkt;
323 323 }
324 324 igmplen = ira->ira_pktlen - iphlen;
325 325 /*
326 326 * Since msg sizes are more variable with v3, just pullup the
327 327 * whole thing now.
328 328 */
329 329 if (MBLKL(mp) < (igmplen + iphlen)) {
330 330 mblk_t *mp1;
331 331 if ((mp1 = msgpullup(mp, -1)) == NULL) {
332 332 ++ipst->ips_igmpstat.igps_rcv_tooshort;
333 333 goto bad_pkt;
334 334 }
335 335 freemsg(mp);
336 336 mp = mp1;
337 337 ipha = (ipha_t *)(mp->b_rptr);
338 338 }
339 339
340 340 /*
341 341 * Validate lengths
342 342 */
343 343 if (igmplen < IGMP_MINLEN) {
344 344 ++ipst->ips_igmpstat.igps_rcv_tooshort;
345 345 goto bad_pkt;
346 346 }
347 347
348 348 igmpa = (igmpa_t *)(&mp->b_rptr[iphlen]);
349 349 src = ipha->ipha_src;
350 350 dst = ipha->ipha_dst;
351 351 if (ip_debug > 1)
352 352 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
353 353 "igmp_input: src 0x%x, dst 0x%x on %s\n",
354 354 (int)ntohl(src), (int)ntohl(dst),
355 355 ill->ill_name);
356 356
357 357 switch (igmpa->igmpa_type) {
358 358 case IGMP_MEMBERSHIP_QUERY:
359 359 /*
360 360 * packet length differentiates between v1/v2 and v3
361 361 * v1/v2 should be exactly 8 octets long; v3 is >= 12
362 362 */
363 363 if ((igmplen == IGMP_MINLEN) ||
364 364 (ipst->ips_igmp_max_version <= IGMP_V2_ROUTER)) {
365 365 next = igmp_query_in(ipha, igmpa, ill);
366 366 } else if (igmplen >= IGMP_V3_QUERY_MINLEN) {
367 367 next = igmpv3_query_in((igmp3qa_t *)igmpa, ill,
368 368 igmplen);
369 369 } else {
370 370 ++ipst->ips_igmpstat.igps_rcv_tooshort;
371 371 goto bad_pkt;
372 372 }
373 373 if (next == 0)
374 374 goto bad_pkt;
375 375
376 376 if (next != INFINITY)
377 377 igmp_start_timers(next, ipst);
378 378
379 379 break;
380 380
381 381 case IGMP_V1_MEMBERSHIP_REPORT:
382 382 case IGMP_V2_MEMBERSHIP_REPORT:
383 383 /*
384 384 * For fast leave to work, we have to know that we are the
385 385 * last person to send a report for this group. Reports
386 386 * generated by us are looped back since we could potentially
387 387 * be a multicast router, so discard reports sourced by me.
388 388 */
389 389 mutex_enter(&ill->ill_lock);
390 390 for (ipif = ill->ill_ipif; ipif != NULL;
391 391 ipif = ipif->ipif_next) {
392 392 if (ipif->ipif_lcl_addr == src) {
393 393 if (ip_debug > 1) {
394 394 (void) mi_strlog(ill->ill_rq,
395 395 1,
396 396 SL_TRACE,
397 397 "igmp_input: we are only "
398 398 "member src 0x%x\n",
399 399 (int)ntohl(src));
400 400 }
401 401 mutex_exit(&ill->ill_lock);
402 402 return (mp);
403 403 }
404 404 }
405 405 mutex_exit(&ill->ill_lock);
406 406
407 407 ++ipst->ips_igmpstat.igps_rcv_reports;
408 408 group = igmpa->igmpa_group;
409 409 if (!CLASSD(group)) {
410 410 ++ipst->ips_igmpstat.igps_rcv_badreports;
411 411 goto bad_pkt;
412 412 }
413 413
414 414 /*
415 415 * KLUDGE: if the IP source address of the report has an
416 416 * unspecified (i.e., zero) subnet number, as is allowed for
417 417 * a booting host, replace it with the correct subnet number
418 418 * so that a process-level multicast routing demon can
419 419 * determine which subnet it arrived from. This is necessary
420 420 * to compensate for the lack of any way for a process to
421 421 * determine the arrival interface of an incoming packet.
422 422 *
423 423 * Requires that a copy of *this* message it passed up
424 424 * to the raw interface which is done by our caller.
425 425 */
426 426 if ((src & htonl(0xFF000000U)) == 0) { /* Minimum net mask */
427 427 /* Pick the first ipif on this ill */
428 428 mutex_enter(&ill->ill_lock);
429 429 src = ill->ill_ipif->ipif_subnet;
430 430 mutex_exit(&ill->ill_lock);
431 431 ip1dbg(("igmp_input: changed src to 0x%x\n",
432 432 (int)ntohl(src)));
433 433 ipha->ipha_src = src;
434 434 }
435 435
436 436 /*
437 437 * If our ill has ILMs that belong to the group being
438 438 * reported, and we are a 'Delaying Member' in the RFC
439 439 * terminology, stop our timer for that group and 'clear
440 440 * flag' i.e. mark as IGMP_OTHERMEMBER.
441 441 */
442 442 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
443 443 IN6_IPADDR_TO_V4MAPPED(group, &v6group);
444 444 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
445 445 if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, &v6group))
446 446 continue;
447 447
448 448 ++ipst->ips_igmpstat.igps_rcv_ourreports;
449 449 ilm->ilm_timer = INFINITY;
450 450 ilm->ilm_state = IGMP_OTHERMEMBER;
451 451 } /* for */
452 452 rw_exit(&ill->ill_mcast_lock);
453 453 ill_mcast_timer_start(ill->ill_ipst);
454 454 break;
455 455
456 456 case IGMP_V3_MEMBERSHIP_REPORT:
457 457 /*
458 458 * Currently nothing to do here; IGMP router is not
459 459 * implemented in ip, and v3 hosts don't pay attention
460 460 * to membership reports.
461 461 */
462 462 break;
463 463 }
464 464 /*
465 465 * Pass all valid IGMP packets up to any process(es) listening
466 466 * on a raw IGMP socket. Do not free the packet.
467 467 */
468 468 return (mp);
469 469
470 470 bad_pkt:
471 471 freemsg(mp);
472 472 return (NULL);
473 473 }
474 474
475 475 static uint_t
476 476 igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill)
477 477 {
478 478 ilm_t *ilm;
479 479 int timer;
480 480 uint_t next, current;
481 481 ip_stack_t *ipst;
482 482
483 483 ipst = ill->ill_ipst;
484 484 ++ipst->ips_igmpstat.igps_rcv_queries;
485 485
486 486 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
487 487 /*
488 488 * In the IGMPv2 specification, there are 3 states and a flag.
489 489 *
490 490 * In Non-Member state, we simply don't have a membership record.
491 491 * In Delaying Member state, our timer is running (ilm->ilm_timer
492 492 * < INFINITY). In Idle Member state, our timer is not running
493 493 * (ilm->ilm_timer == INFINITY).
494 494 *
495 495 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if
496 496 * we have heard a report from another member, or IGMP_IREPORTEDLAST
497 497 * if I sent the last report.
498 498 */
499 499 if ((igmpa->igmpa_code == 0) ||
500 500 (ipst->ips_igmp_max_version == IGMP_V1_ROUTER)) {
↓ open down ↓ |
500 lines elided |
↑ open up ↑ |
501 501 /*
502 502 * Query from an old router.
503 503 * Remember that the querier on this interface is old,
504 504 * and set the timer to the value in RFC 1112.
505 505 */
506 506 ill->ill_mcast_v1_time = 0;
507 507 ill->ill_mcast_v1_tset = 1;
508 508 if (ill->ill_mcast_type != IGMP_V1_ROUTER) {
509 509 ip1dbg(("Received IGMPv1 Query on %s, switching mode "
510 510 "to IGMP_V1_ROUTER\n", ill->ill_name));
511 - atomic_add_16(&ill->ill_ifptr->illif_mcast_v1, 1);
511 + atomic_inc_16(&ill->ill_ifptr->illif_mcast_v1);
512 512 ill->ill_mcast_type = IGMP_V1_ROUTER;
513 513 }
514 514
515 515 timer = SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY);
516 516
517 517 if (ipha->ipha_dst != htonl(INADDR_ALLHOSTS_GROUP) ||
518 518 igmpa->igmpa_group != 0) {
519 519 ++ipst->ips_igmpstat.igps_rcv_badqueries;
520 520 rw_exit(&ill->ill_mcast_lock);
521 521 ill_mcast_timer_start(ill->ill_ipst);
522 522 return (0);
523 523 }
524 524
525 525 } else {
526 526 in_addr_t group;
527 527
528 528 /*
529 529 * Query from a new router
530 530 * Simply do a validity check
531 531 */
532 532 group = igmpa->igmpa_group;
533 533 if (group != 0 && (!CLASSD(group))) {
534 534 ++ipst->ips_igmpstat.igps_rcv_badqueries;
535 535 rw_exit(&ill->ill_mcast_lock);
536 536 ill_mcast_timer_start(ill->ill_ipst);
537 537 return (0);
↓ open down ↓ |
16 lines elided |
↑ open up ↑ |
538 538 }
539 539
540 540 /*
541 541 * Switch interface state to v2 on receipt of a v2 query
542 542 * ONLY IF current state is v3. Let things be if current
543 543 * state if v1 but do reset the v2-querier-present timer.
544 544 */
545 545 if (ill->ill_mcast_type == IGMP_V3_ROUTER) {
546 546 ip1dbg(("Received IGMPv2 Query on %s, switching mode "
547 547 "to IGMP_V2_ROUTER", ill->ill_name));
548 - atomic_add_16(&ill->ill_ifptr->illif_mcast_v2, 1);
548 + atomic_inc_16(&ill->ill_ifptr->illif_mcast_v2);
549 549 ill->ill_mcast_type = IGMP_V2_ROUTER;
550 550 }
551 551 ill->ill_mcast_v2_time = 0;
552 552 ill->ill_mcast_v2_tset = 1;
553 553
554 554 timer = DSEC_TO_MSEC((int)igmpa->igmpa_code);
555 555 }
556 556
557 557 if (ip_debug > 1) {
558 558 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
559 559 "igmp_input: TIMER = igmp_code %d igmp_type 0x%x",
560 560 (int)ntohs(igmpa->igmpa_code),
561 561 (int)ntohs(igmpa->igmpa_type));
562 562 }
563 563
564 564 /*
565 565 * -Start the timers in all of our membership records
566 566 * for the physical interface on which the query
567 567 * arrived, excluding those that belong to the "all
568 568 * hosts" group (224.0.0.1).
569 569 *
570 570 * -Restart any timer that is already running but has
571 571 * a value longer than the requested timeout.
572 572 *
573 573 * -Use the value specified in the query message as
574 574 * the maximum timeout.
575 575 */
576 576 next = (unsigned)INFINITY;
577 577
578 578 current = CURRENT_MSTIME;
579 579 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
580 580
581 581 /*
582 582 * A multicast router joins INADDR_ANY address
583 583 * to enable promiscuous reception of all
584 584 * mcasts from the interface. This INADDR_ANY
585 585 * is stored in the ilm_v6addr as V6 unspec addr
586 586 */
587 587 if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr))
588 588 continue;
589 589 if (ilm->ilm_addr == htonl(INADDR_ANY))
590 590 continue;
591 591 if (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP) &&
592 592 (igmpa->igmpa_group == 0) ||
593 593 (igmpa->igmpa_group == ilm->ilm_addr)) {
594 594 if (ilm->ilm_timer > timer) {
595 595 MCAST_RANDOM_DELAY(ilm->ilm_timer, timer);
596 596 if (ilm->ilm_timer < next)
597 597 next = ilm->ilm_timer;
598 598 ilm->ilm_timer += current;
599 599 }
600 600 }
601 601 }
602 602 rw_exit(&ill->ill_mcast_lock);
603 603 /*
604 604 * No packets have been sent above - no
605 605 * ill_mcast_send_queued is needed.
606 606 */
607 607 ill_mcast_timer_start(ill->ill_ipst);
608 608
609 609 return (next);
610 610 }
611 611
612 612 static uint_t
613 613 igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen)
614 614 {
615 615 uint_t i, next, mrd, qqi, timer, delay, numsrc;
616 616 uint_t current;
617 617 ilm_t *ilm;
618 618 ipaddr_t *src_array;
619 619 uint8_t qrv;
620 620 ip_stack_t *ipst;
621 621
622 622 ipst = ill->ill_ipst;
623 623 /* make sure numsrc matches packet size */
624 624 numsrc = ntohs(igmp3qa->igmp3qa_numsrc);
625 625 if (igmplen < IGMP_V3_QUERY_MINLEN + (numsrc * sizeof (ipaddr_t))) {
626 626 ++ipst->ips_igmpstat.igps_rcv_tooshort;
627 627 return (0);
628 628 }
629 629 src_array = (ipaddr_t *)&igmp3qa[1];
630 630
631 631 ++ipst->ips_igmpstat.igps_rcv_queries;
632 632
633 633 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
634 634
635 635 if ((mrd = (uint_t)igmp3qa->igmp3qa_mxrc) >= IGMP_V3_MAXRT_FPMIN) {
636 636 uint_t hdrval, mant, exp;
637 637 hdrval = (uint_t)igmp3qa->igmp3qa_mxrc;
638 638 mant = hdrval & IGMP_V3_MAXRT_MANT_MASK;
639 639 exp = (hdrval & IGMP_V3_MAXRT_EXP_MASK) >> 4;
640 640 mrd = (mant | 0x10) << (exp + 3);
641 641 }
642 642 if (mrd == 0)
643 643 mrd = MCAST_DEF_QUERY_RESP_INTERVAL;
644 644 timer = DSEC_TO_MSEC(mrd);
645 645 MCAST_RANDOM_DELAY(delay, timer);
646 646 next = (unsigned)INFINITY;
647 647 current = CURRENT_MSTIME;
648 648
649 649 if ((qrv = igmp3qa->igmp3qa_sqrv & IGMP_V3_RV_MASK) == 0)
650 650 ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS;
651 651 else
652 652 ill->ill_mcast_rv = qrv;
653 653
654 654 if ((qqi = (uint_t)igmp3qa->igmp3qa_qqic) >= IGMP_V3_QQI_FPMIN) {
655 655 uint_t hdrval, mant, exp;
656 656 hdrval = (uint_t)igmp3qa->igmp3qa_qqic;
657 657 mant = hdrval & IGMP_V3_QQI_MANT_MASK;
658 658 exp = (hdrval & IGMP_V3_QQI_EXP_MASK) >> 4;
659 659 qqi = (mant | 0x10) << (exp + 3);
660 660 }
661 661 ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi;
662 662
663 663 /*
664 664 * If we have a pending general query response that's scheduled
665 665 * sooner than the delay we calculated for this response, then
666 666 * no action is required (RFC3376 section 5.2 rule 1)
667 667 */
668 668 if (ill->ill_global_timer < (current + delay)) {
669 669 rw_exit(&ill->ill_mcast_lock);
670 670 ill_mcast_timer_start(ill->ill_ipst);
671 671 return (next);
672 672 }
673 673
674 674 /*
675 675 * Now take action depending upon query type:
676 676 * general, group specific, or group/source specific.
677 677 */
678 678 if ((numsrc == 0) && (igmp3qa->igmp3qa_group == INADDR_ANY)) {
679 679 /*
680 680 * general query
681 681 * We know global timer is either not running or is
682 682 * greater than our calculated delay, so reset it to
683 683 * our delay (random value in range [0, response time]).
684 684 */
685 685 ill->ill_global_timer = current + delay;
686 686 next = delay;
687 687 } else {
688 688 /* group or group/source specific query */
689 689 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
690 690 if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr) ||
691 691 (ilm->ilm_addr == htonl(INADDR_ANY)) ||
692 692 (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) ||
693 693 (igmp3qa->igmp3qa_group != ilm->ilm_addr))
694 694 continue;
695 695 /*
696 696 * If the query is group specific or we have a
697 697 * pending group specific query, the response is
698 698 * group specific (pending sources list should be
699 699 * empty). Otherwise, need to update the pending
700 700 * sources list for the group and source specific
701 701 * response.
702 702 */
703 703 if (numsrc == 0 || (ilm->ilm_timer < INFINITY &&
704 704 SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) {
705 705 group_query:
706 706 FREE_SLIST(ilm->ilm_pendsrcs);
707 707 ilm->ilm_pendsrcs = NULL;
708 708 } else {
709 709 boolean_t overflow;
710 710 slist_t *pktl;
711 711 if (numsrc > MAX_FILTER_SIZE ||
712 712 (ilm->ilm_pendsrcs == NULL &&
713 713 (ilm->ilm_pendsrcs = l_alloc()) == NULL)) {
714 714 /*
715 715 * We've been sent more sources than
716 716 * we can deal with; or we can't deal
717 717 * with a source list at all. Revert
718 718 * to a group specific query.
719 719 */
720 720 goto group_query;
721 721 }
722 722 if ((pktl = l_alloc()) == NULL)
723 723 goto group_query;
724 724 pktl->sl_numsrc = numsrc;
725 725 for (i = 0; i < numsrc; i++)
726 726 IN6_IPADDR_TO_V4MAPPED(src_array[i],
727 727 &(pktl->sl_addr[i]));
728 728 l_union_in_a(ilm->ilm_pendsrcs, pktl,
729 729 &overflow);
730 730 l_free(pktl);
731 731 if (overflow)
732 732 goto group_query;
733 733 }
734 734
735 735 ilm->ilm_timer = (ilm->ilm_timer == INFINITY) ?
736 736 INFINITY : (ilm->ilm_timer - current);
737 737 /* choose soonest timer */
738 738 ilm->ilm_timer = MIN(ilm->ilm_timer, delay);
739 739 if (ilm->ilm_timer < next)
740 740 next = ilm->ilm_timer;
741 741 ilm->ilm_timer += current;
742 742 }
743 743 }
744 744 rw_exit(&ill->ill_mcast_lock);
745 745 /*
746 746 * No packets have been sent above - no
747 747 * ill_mcast_send_queued is needed.
748 748 */
749 749 ill_mcast_timer_start(ill->ill_ipst);
750 750
751 751 return (next);
752 752 }
753 753
754 754 /*
755 755 * Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
756 756 * and it gets sent after the lock is dropped.
757 757 */
758 758 void
759 759 igmp_joingroup(ilm_t *ilm)
760 760 {
761 761 uint_t timer;
762 762 ill_t *ill;
763 763 ip_stack_t *ipst = ilm->ilm_ipst;
764 764
765 765 ill = ilm->ilm_ill;
766 766
767 767 ASSERT(!ill->ill_isv6);
768 768 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
769 769
770 770 if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) {
771 771 ilm->ilm_rtx.rtx_timer = INFINITY;
772 772 ilm->ilm_state = IGMP_OTHERMEMBER;
773 773 } else {
774 774 ip1dbg(("Querier mode %d, sending report, group %x\n",
775 775 ill->ill_mcast_type, htonl(ilm->ilm_addr)));
776 776 if (ill->ill_mcast_type == IGMP_V1_ROUTER) {
777 777 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0);
778 778 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) {
779 779 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0);
780 780 } else if (ill->ill_mcast_type == IGMP_V3_ROUTER) {
781 781 mrec_t *rp;
782 782 mcast_record_t rtype;
783 783 /*
784 784 * The possible state changes we need to handle here:
785 785 * Old State New State Report
786 786 *
787 787 * INCLUDE(0) INCLUDE(X) ALLOW(X),BLOCK(0)
788 788 * INCLUDE(0) EXCLUDE(X) TO_EX(X)
789 789 *
790 790 * No need to send the BLOCK(0) report; ALLOW(X)
791 791 * is enough.
792 792 */
793 793 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ?
794 794 ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE;
795 795 rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr,
796 796 ilm->ilm_filter, NULL);
797 797 igmpv3_sendrpt(ill, rp);
798 798 /*
799 799 * Set up retransmission state. Timer is set below,
800 800 * for both v3 and older versions.
801 801 */
802 802 mcast_init_rtx(ill, &ilm->ilm_rtx, rtype,
803 803 ilm->ilm_filter);
804 804 }
805 805
806 806 /* Set the ilm timer value */
807 807 ilm->ilm_rtx.rtx_cnt = ill->ill_mcast_rv;
808 808 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer,
809 809 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY));
810 810 timer = ilm->ilm_rtx.rtx_timer;
811 811 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME;
812 812 ilm->ilm_state = IGMP_IREPORTEDLAST;
813 813
814 814 /*
815 815 * We are holding ill_mcast_lock here and the timeout
816 816 * handler (igmp_timeout_handler_per_ill) acquires that
817 817 * lock. Hence we can't call igmp_start_timers since it could
818 818 * deadlock in untimeout().
819 819 * Instead the thread which drops ill_mcast_lock will have
820 820 * to call ill_mcast_timer_start().
821 821 */
822 822 mutex_enter(&ipst->ips_igmp_timer_lock);
823 823 ipst->ips_igmp_deferred_next = MIN(timer,
824 824 ipst->ips_igmp_deferred_next);
825 825 mutex_exit(&ipst->ips_igmp_timer_lock);
826 826 }
827 827
828 828 if (ip_debug > 1) {
829 829 (void) mi_strlog(ilm->ilm_ill->ill_rq, 1, SL_TRACE,
830 830 "igmp_joingroup: multicast_type %d timer %d",
831 831 (ilm->ilm_ill->ill_mcast_type),
832 832 (int)ntohl(timer));
833 833 }
834 834 }
835 835
836 836 /*
837 837 * Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
838 838 * and it gets sent after the lock is dropped.
839 839 */
840 840 void
841 841 mld_joingroup(ilm_t *ilm)
842 842 {
843 843 uint_t timer;
844 844 ill_t *ill;
845 845 ip_stack_t *ipst = ilm->ilm_ipst;
846 846
847 847 ill = ilm->ilm_ill;
848 848
849 849 ASSERT(ill->ill_isv6);
850 850
851 851 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
852 852
853 853 if (IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr)) {
854 854 ilm->ilm_rtx.rtx_timer = INFINITY;
855 855 ilm->ilm_state = IGMP_OTHERMEMBER;
856 856 } else {
857 857 if (ill->ill_mcast_type == MLD_V1_ROUTER) {
858 858 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL);
859 859 } else {
860 860 mrec_t *rp;
861 861 mcast_record_t rtype;
862 862 /*
863 863 * The possible state changes we need to handle here:
864 864 * Old State New State Report
865 865 *
866 866 * INCLUDE(0) INCLUDE(X) ALLOW(X),BLOCK(0)
867 867 * INCLUDE(0) EXCLUDE(X) TO_EX(X)
868 868 *
869 869 * No need to send the BLOCK(0) report; ALLOW(X)
870 870 * is enough
871 871 */
872 872 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ?
873 873 ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE;
874 874 rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr,
875 875 ilm->ilm_filter, NULL);
876 876 mldv2_sendrpt(ill, rp);
877 877 /*
878 878 * Set up retransmission state. Timer is set below,
879 879 * for both v2 and v1.
880 880 */
881 881 mcast_init_rtx(ill, &ilm->ilm_rtx, rtype,
882 882 ilm->ilm_filter);
883 883 }
884 884
885 885 /* Set the ilm timer value */
886 886 ASSERT(ill->ill_mcast_type != MLD_V2_ROUTER ||
887 887 ilm->ilm_rtx.rtx_cnt > 0);
888 888
889 889 ilm->ilm_rtx.rtx_cnt = ill->ill_mcast_rv;
890 890 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer,
891 891 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY));
892 892 timer = ilm->ilm_rtx.rtx_timer;
893 893 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME;
894 894 ilm->ilm_state = IGMP_IREPORTEDLAST;
895 895
896 896 /*
897 897 * We are holding ill_mcast_lock here and the timeout
898 898 * handler (mld_timeout_handler_per_ill) acquires that
899 899 * lock. Hence we can't call mld_start_timers since it could
900 900 * deadlock in untimeout().
901 901 * Instead the thread which drops ill_mcast_lock will have
902 902 * to call ill_mcast_timer_start().
903 903 */
904 904 mutex_enter(&ipst->ips_mld_timer_lock);
905 905 ipst->ips_mld_deferred_next = MIN(timer,
906 906 ipst->ips_mld_deferred_next);
907 907 mutex_exit(&ipst->ips_mld_timer_lock);
908 908 }
909 909
910 910 if (ip_debug > 1) {
911 911 (void) mi_strlog(ilm->ilm_ill->ill_rq, 1, SL_TRACE,
912 912 "mld_joingroup: multicast_type %d timer %d",
913 913 (ilm->ilm_ill->ill_mcast_type),
914 914 (int)ntohl(timer));
915 915 }
916 916 }
917 917
918 918 /*
919 919 * Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
920 920 * and it gets sent after the lock is dropped.
921 921 */
922 922 void
923 923 igmp_leavegroup(ilm_t *ilm)
924 924 {
925 925 ill_t *ill = ilm->ilm_ill;
926 926
927 927 ASSERT(!ill->ill_isv6);
928 928
929 929 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
930 930 if (ilm->ilm_state == IGMP_IREPORTEDLAST &&
931 931 ill->ill_mcast_type == IGMP_V2_ROUTER &&
932 932 (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) {
933 933 igmp_sendpkt(ilm, IGMP_V2_LEAVE_GROUP,
934 934 (htonl(INADDR_ALLRTRS_GROUP)));
935 935 return;
936 936 }
937 937 if ((ill->ill_mcast_type == IGMP_V3_ROUTER) &&
938 938 (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) {
939 939 mrec_t *rp;
940 940 /*
941 941 * The possible state changes we need to handle here:
942 942 * Old State New State Report
943 943 *
944 944 * INCLUDE(X) INCLUDE(0) ALLOW(0),BLOCK(X)
945 945 * EXCLUDE(X) INCLUDE(0) TO_IN(0)
946 946 *
947 947 * No need to send the ALLOW(0) report; BLOCK(X) is enough
948 948 */
949 949 if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
950 950 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr,
951 951 ilm->ilm_filter, NULL);
952 952 } else {
953 953 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr,
954 954 NULL, NULL);
955 955 }
956 956 igmpv3_sendrpt(ill, rp);
957 957 return;
958 958 }
959 959 }
960 960
961 961 /*
962 962 * Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
963 963 * and it gets sent after the lock is dropped.
964 964 */
965 965 void
966 966 mld_leavegroup(ilm_t *ilm)
967 967 {
968 968 ill_t *ill = ilm->ilm_ill;
969 969
970 970 ASSERT(ill->ill_isv6);
971 971
972 972 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
973 973 if (ilm->ilm_state == IGMP_IREPORTEDLAST &&
974 974 ill->ill_mcast_type == MLD_V1_ROUTER &&
975 975 (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) {
976 976 mld_sendpkt(ilm, MLD_LISTENER_REDUCTION, &ipv6_all_rtrs_mcast);
977 977 return;
978 978 }
979 979 if ((ill->ill_mcast_type == MLD_V2_ROUTER) &&
980 980 (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) {
981 981 mrec_t *rp;
982 982 /*
983 983 * The possible state changes we need to handle here:
984 984 * Old State New State Report
985 985 *
986 986 * INCLUDE(X) INCLUDE(0) ALLOW(0),BLOCK(X)
987 987 * EXCLUDE(X) INCLUDE(0) TO_IN(0)
988 988 *
989 989 * No need to send the ALLOW(0) report; BLOCK(X) is enough
990 990 */
991 991 if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
992 992 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr,
993 993 ilm->ilm_filter, NULL);
994 994 } else {
995 995 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr,
996 996 NULL, NULL);
997 997 }
998 998 mldv2_sendrpt(ill, rp);
999 999 return;
1000 1000 }
1001 1001 }
1002 1002
1003 1003 /*
1004 1004 * Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
1005 1005 * and it gets sent after the lock is dropped.
1006 1006 */
1007 1007 void
1008 1008 igmp_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist)
1009 1009 {
1010 1010 ill_t *ill;
1011 1011 mrec_t *rp;
1012 1012 ip_stack_t *ipst = ilm->ilm_ipst;
1013 1013
1014 1014 ASSERT(ilm != NULL);
1015 1015
1016 1016 /* state change reports should only be sent if the router is v3 */
1017 1017 if (ilm->ilm_ill->ill_mcast_type != IGMP_V3_ROUTER)
1018 1018 return;
1019 1019
1020 1020 ill = ilm->ilm_ill;
1021 1021 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
1022 1022
1023 1023 /*
1024 1024 * Compare existing(old) state with the new state and prepare
1025 1025 * State Change Report, according to the rules in RFC 3376:
1026 1026 *
1027 1027 * Old State New State State Change Report
1028 1028 *
1029 1029 * INCLUDE(A) INCLUDE(B) ALLOW(B-A),BLOCK(A-B)
1030 1030 * EXCLUDE(A) EXCLUDE(B) ALLOW(A-B),BLOCK(B-A)
1031 1031 * INCLUDE(A) EXCLUDE(B) TO_EX(B)
1032 1032 * EXCLUDE(A) INCLUDE(B) TO_IN(B)
1033 1033 */
1034 1034
1035 1035 if (ilm->ilm_fmode == fmode) {
1036 1036 slist_t *a_minus_b = NULL, *b_minus_a = NULL;
1037 1037 slist_t *allow, *block;
1038 1038 if (((a_minus_b = l_alloc()) == NULL) ||
1039 1039 ((b_minus_a = l_alloc()) == NULL)) {
1040 1040 l_free(a_minus_b);
1041 1041 if (ilm->ilm_fmode == MODE_IS_INCLUDE)
1042 1042 goto send_to_ex;
1043 1043 else
1044 1044 goto send_to_in;
1045 1045 }
1046 1046 l_difference(ilm->ilm_filter, flist, a_minus_b);
1047 1047 l_difference(flist, ilm->ilm_filter, b_minus_a);
1048 1048 if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1049 1049 allow = b_minus_a;
1050 1050 block = a_minus_b;
1051 1051 } else {
1052 1052 allow = a_minus_b;
1053 1053 block = b_minus_a;
1054 1054 }
1055 1055 rp = NULL;
1056 1056 if (!SLIST_IS_EMPTY(allow))
1057 1057 rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr,
1058 1058 allow, rp);
1059 1059 if (!SLIST_IS_EMPTY(block))
1060 1060 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr,
1061 1061 block, rp);
1062 1062 l_free(a_minus_b);
1063 1063 l_free(b_minus_a);
1064 1064 } else if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1065 1065 send_to_ex:
1066 1066 rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist,
1067 1067 NULL);
1068 1068 } else {
1069 1069 send_to_in:
1070 1070 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist,
1071 1071 NULL);
1072 1072 }
1073 1073
1074 1074 /*
1075 1075 * Need to set up retransmission state; merge the new info with the
1076 1076 * current state (which may be null). If the timer is not currently
1077 1077 * running, the caller will start it when dropping ill_mcast_lock.
1078 1078 */
1079 1079 rp = mcast_merge_rtx(ilm, rp, flist);
1080 1080 if (ilm->ilm_rtx.rtx_timer == INFINITY) {
1081 1081 ilm->ilm_rtx.rtx_cnt = ill->ill_mcast_rv;
1082 1082 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer,
1083 1083 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY));
1084 1084 mutex_enter(&ipst->ips_igmp_timer_lock);
1085 1085 ipst->ips_igmp_deferred_next = MIN(ipst->ips_igmp_deferred_next,
1086 1086 ilm->ilm_rtx.rtx_timer);
1087 1087 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME;
1088 1088 mutex_exit(&ipst->ips_igmp_timer_lock);
1089 1089 }
1090 1090
1091 1091 igmpv3_sendrpt(ill, rp);
1092 1092 }
1093 1093
1094 1094 /*
1095 1095 * Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
1096 1096 * and it gets sent after the lock is dropped.
1097 1097 */
1098 1098 void
1099 1099 mld_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist)
1100 1100 {
1101 1101 ill_t *ill;
1102 1102 mrec_t *rp = NULL;
1103 1103 ip_stack_t *ipst = ilm->ilm_ipst;
1104 1104
1105 1105 ASSERT(ilm != NULL);
1106 1106
1107 1107 ill = ilm->ilm_ill;
1108 1108 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
1109 1109
1110 1110 /* only need to send if we have an mldv2-capable router */
1111 1111 if (ill->ill_mcast_type != MLD_V2_ROUTER) {
1112 1112 return;
1113 1113 }
1114 1114
1115 1115 /*
1116 1116 * Compare existing (old) state with the new state passed in
1117 1117 * and send appropriate MLDv2 State Change Report.
1118 1118 *
1119 1119 * Old State New State State Change Report
1120 1120 *
1121 1121 * INCLUDE(A) INCLUDE(B) ALLOW(B-A),BLOCK(A-B)
1122 1122 * EXCLUDE(A) EXCLUDE(B) ALLOW(A-B),BLOCK(B-A)
1123 1123 * INCLUDE(A) EXCLUDE(B) TO_EX(B)
1124 1124 * EXCLUDE(A) INCLUDE(B) TO_IN(B)
1125 1125 */
1126 1126 if (ilm->ilm_fmode == fmode) {
1127 1127 slist_t *a_minus_b = NULL, *b_minus_a = NULL;
1128 1128 slist_t *allow, *block;
1129 1129 if (((a_minus_b = l_alloc()) == NULL) ||
1130 1130 ((b_minus_a = l_alloc()) == NULL)) {
1131 1131 l_free(a_minus_b);
1132 1132 if (ilm->ilm_fmode == MODE_IS_INCLUDE)
1133 1133 goto send_to_ex;
1134 1134 else
1135 1135 goto send_to_in;
1136 1136 }
1137 1137 l_difference(ilm->ilm_filter, flist, a_minus_b);
1138 1138 l_difference(flist, ilm->ilm_filter, b_minus_a);
1139 1139 if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1140 1140 allow = b_minus_a;
1141 1141 block = a_minus_b;
1142 1142 } else {
1143 1143 allow = a_minus_b;
1144 1144 block = b_minus_a;
1145 1145 }
1146 1146 if (!SLIST_IS_EMPTY(allow))
1147 1147 rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr,
1148 1148 allow, rp);
1149 1149 if (!SLIST_IS_EMPTY(block))
1150 1150 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr,
1151 1151 block, rp);
1152 1152 l_free(a_minus_b);
1153 1153 l_free(b_minus_a);
1154 1154 } else if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1155 1155 send_to_ex:
1156 1156 rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist,
1157 1157 NULL);
1158 1158 } else {
1159 1159 send_to_in:
1160 1160 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist,
1161 1161 NULL);
1162 1162 }
1163 1163
1164 1164 /*
1165 1165 * Need to set up retransmission state; merge the new info with the
1166 1166 * current state (which may be null). If the timer is not currently
1167 1167 * running, the caller will start it when dropping ill_mcast_lock.
1168 1168 */
1169 1169 rp = mcast_merge_rtx(ilm, rp, flist);
1170 1170 ASSERT(ilm->ilm_rtx.rtx_cnt > 0);
1171 1171 if (ilm->ilm_rtx.rtx_timer == INFINITY) {
1172 1172 ilm->ilm_rtx.rtx_cnt = ill->ill_mcast_rv;
1173 1173 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer,
1174 1174 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY));
1175 1175 mutex_enter(&ipst->ips_mld_timer_lock);
1176 1176 ipst->ips_mld_deferred_next =
1177 1177 MIN(ipst->ips_mld_deferred_next, ilm->ilm_rtx.rtx_timer);
1178 1178 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME;
1179 1179 mutex_exit(&ipst->ips_mld_timer_lock);
1180 1180 }
1181 1181
1182 1182 mldv2_sendrpt(ill, rp);
1183 1183 }
1184 1184
1185 1185 uint_t
1186 1186 igmp_timeout_handler_per_ill(ill_t *ill)
1187 1187 {
1188 1188 uint_t next = INFINITY, current;
1189 1189 ilm_t *ilm;
1190 1190 mrec_t *rp = NULL;
1191 1191 mrec_t *rtxrp = NULL;
1192 1192 rtx_state_t *rtxp;
1193 1193 mcast_record_t rtype;
1194 1194
1195 1195 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
1196 1196
1197 1197 current = CURRENT_MSTIME;
1198 1198 /* First check the global timer on this interface */
1199 1199 if (ill->ill_global_timer == INFINITY)
1200 1200 goto per_ilm_timer;
1201 1201 if (ill->ill_global_timer <= (current + CURRENT_OFFSET)) {
1202 1202 ill->ill_global_timer = INFINITY;
1203 1203 /*
1204 1204 * Send report for each group on this interface.
1205 1205 * Since we just set the global timer (received a v3 general
1206 1206 * query), need to skip the all hosts addr (224.0.0.1), per
1207 1207 * RFC 3376 section 5.
1208 1208 */
1209 1209 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1210 1210 if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP))
1211 1211 continue;
1212 1212 rp = mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr,
1213 1213 ilm->ilm_filter, rp);
1214 1214 /*
1215 1215 * Since we're sending a report on this group, okay
1216 1216 * to delete pending group-specific timers. Note
1217 1217 * that group-specific retransmit timers still need
1218 1218 * to be checked in the per_ilm_timer for-loop.
1219 1219 */
1220 1220 ilm->ilm_timer = INFINITY;
1221 1221 ilm->ilm_state = IGMP_IREPORTEDLAST;
1222 1222 FREE_SLIST(ilm->ilm_pendsrcs);
1223 1223 ilm->ilm_pendsrcs = NULL;
1224 1224 }
1225 1225 igmpv3_sendrpt(ill, rp);
1226 1226 rp = NULL;
1227 1227 } else {
1228 1228 if ((ill->ill_global_timer - current) < next)
1229 1229 next = ill->ill_global_timer - current;
1230 1230 }
1231 1231
1232 1232 per_ilm_timer:
1233 1233 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1234 1234 if (ilm->ilm_timer == INFINITY)
1235 1235 goto per_ilm_rtxtimer;
1236 1236
1237 1237 if (ilm->ilm_timer > (current + CURRENT_OFFSET)) {
1238 1238 if ((ilm->ilm_timer - current) < next)
1239 1239 next = ilm->ilm_timer - current;
1240 1240
1241 1241 if (ip_debug > 1) {
1242 1242 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
1243 1243 "igmp_timo_hlr 2: ilm_timr %d "
1244 1244 "typ %d nxt %d",
1245 1245 (int)ntohl(ilm->ilm_timer - current),
1246 1246 (ill->ill_mcast_type), next);
1247 1247 }
1248 1248
1249 1249 goto per_ilm_rtxtimer;
1250 1250 }
1251 1251
1252 1252 /* the timer has expired, need to take action */
1253 1253 ilm->ilm_timer = INFINITY;
1254 1254 ilm->ilm_state = IGMP_IREPORTEDLAST;
1255 1255 if (ill->ill_mcast_type == IGMP_V1_ROUTER) {
1256 1256 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0);
1257 1257 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) {
1258 1258 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0);
1259 1259 } else {
1260 1260 slist_t *rsp;
1261 1261 if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) &&
1262 1262 (rsp = l_alloc()) != NULL) {
1263 1263 /*
1264 1264 * Contents of reply depend on pending
1265 1265 * requested source list.
1266 1266 */
1267 1267 if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1268 1268 l_intersection(ilm->ilm_filter,
1269 1269 ilm->ilm_pendsrcs, rsp);
1270 1270 } else {
1271 1271 l_difference(ilm->ilm_pendsrcs,
1272 1272 ilm->ilm_filter, rsp);
1273 1273 }
1274 1274 FREE_SLIST(ilm->ilm_pendsrcs);
1275 1275 ilm->ilm_pendsrcs = NULL;
1276 1276 if (!SLIST_IS_EMPTY(rsp))
1277 1277 rp = mcast_bldmrec(MODE_IS_INCLUDE,
1278 1278 &ilm->ilm_v6addr, rsp, rp);
1279 1279 FREE_SLIST(rsp);
1280 1280 } else {
1281 1281 /*
1282 1282 * Either the pending request is just group-
1283 1283 * specific, or we couldn't get the resources
1284 1284 * (rsp) to build a source-specific reply.
1285 1285 */
1286 1286 rp = mcast_bldmrec(ilm->ilm_fmode,
1287 1287 &ilm->ilm_v6addr, ilm->ilm_filter, rp);
1288 1288 }
1289 1289 igmpv3_sendrpt(ill, rp);
1290 1290 rp = NULL;
1291 1291 }
1292 1292
1293 1293 per_ilm_rtxtimer:
1294 1294 rtxp = &ilm->ilm_rtx;
1295 1295
1296 1296 if (rtxp->rtx_timer == INFINITY)
1297 1297 continue;
1298 1298 if (rtxp->rtx_timer > (current + CURRENT_OFFSET)) {
1299 1299 if ((rtxp->rtx_timer - current) < next)
1300 1300 next = rtxp->rtx_timer - current;
1301 1301 continue;
1302 1302 }
1303 1303
1304 1304 rtxp->rtx_timer = INFINITY;
1305 1305 ilm->ilm_state = IGMP_IREPORTEDLAST;
1306 1306 if (ill->ill_mcast_type == IGMP_V1_ROUTER) {
1307 1307 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0);
1308 1308 continue;
1309 1309 }
1310 1310 if (ill->ill_mcast_type == IGMP_V2_ROUTER) {
1311 1311 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0);
1312 1312 continue;
1313 1313 }
1314 1314
1315 1315 /*
1316 1316 * The retransmit timer has popped, and our router is
1317 1317 * IGMPv3. We have to delve into the retransmit state
1318 1318 * stored in the ilm.
1319 1319 *
1320 1320 * Decrement the retransmit count. If the fmode rtx
1321 1321 * count is active, decrement it, and send a filter
1322 1322 * mode change report with the ilm's source list.
1323 1323 * Otherwise, send a source list change report with
1324 1324 * the current retransmit lists.
1325 1325 */
1326 1326 ASSERT(rtxp->rtx_cnt > 0);
1327 1327 ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt);
1328 1328 rtxp->rtx_cnt--;
1329 1329 if (rtxp->rtx_fmode_cnt > 0) {
1330 1330 rtxp->rtx_fmode_cnt--;
1331 1331 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ?
1332 1332 CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE;
1333 1333 rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr,
1334 1334 ilm->ilm_filter, rtxrp);
1335 1335 } else {
1336 1336 rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES,
1337 1337 &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp);
1338 1338 rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES,
1339 1339 &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp);
1340 1340 }
1341 1341 if (rtxp->rtx_cnt > 0) {
1342 1342 MCAST_RANDOM_DELAY(rtxp->rtx_timer,
1343 1343 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY));
1344 1344 if (rtxp->rtx_timer < next)
1345 1345 next = rtxp->rtx_timer;
1346 1346 rtxp->rtx_timer += current;
1347 1347 } else {
1348 1348 ASSERT(rtxp->rtx_timer == INFINITY);
1349 1349 CLEAR_SLIST(rtxp->rtx_allow);
1350 1350 CLEAR_SLIST(rtxp->rtx_block);
1351 1351 }
1352 1352 igmpv3_sendrpt(ill, rtxrp);
1353 1353 rtxrp = NULL;
1354 1354 }
1355 1355
1356 1356 rw_exit(&ill->ill_mcast_lock);
1357 1357 /* Send any deferred/queued IP packets */
1358 1358 ill_mcast_send_queued(ill);
1359 1359 /* Defer ill_mcast_timer_start() until the caller is done */
1360 1360
1361 1361 return (next);
1362 1362 }
1363 1363
1364 1364 /*
1365 1365 * igmp_timeout_handler:
1366 1366 * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick).
1367 1367 * Returns number of ticks to next event (or 0 if none).
1368 1368 *
1369 1369 * As part of multicast join and leave igmp we may need to send out an
1370 1370 * igmp request. The igmp related state variables in the ilm are protected
1371 1371 * by ill_mcast_lock. A single global igmp timer is used to track igmp timeouts.
1372 1372 * igmp_timer_lock protects the global igmp_timeout_id. igmp_start_timers
1373 1373 * starts the igmp timer if needed. It serializes multiple threads trying to
1374 1374 * simultaneously start the timer using the igmp_timer_setter_active flag.
1375 1375 *
1376 1376 * igmp_input() receives igmp queries and responds to the queries
1377 1377 * in a delayed fashion by posting a timer i.e. it calls igmp_start_timers().
1378 1378 * Later the igmp_timer fires, the timeout handler igmp_timerout_handler()
1379 1379 * performs the action exclusively after acquiring ill_mcast_lock.
1380 1380 *
1381 1381 * The igmp_slowtimeo() function is called thru another timer.
1382 1382 * igmp_slowtimeout_lock protects the igmp_slowtimeout_id
1383 1383 */
1384 1384 void
1385 1385 igmp_timeout_handler(void *arg)
1386 1386 {
1387 1387 ill_t *ill;
1388 1388 uint_t global_next = INFINITY;
1389 1389 uint_t next;
1390 1390 ill_walk_context_t ctx;
1391 1391 ip_stack_t *ipst = arg;
1392 1392
1393 1393 ASSERT(arg != NULL);
1394 1394 mutex_enter(&ipst->ips_igmp_timer_lock);
1395 1395 ASSERT(ipst->ips_igmp_timeout_id != 0);
1396 1396 ipst->ips_igmp_timeout_id = 0;
1397 1397 ipst->ips_igmp_timer_scheduled_last = 0;
1398 1398 ipst->ips_igmp_time_to_next = 0;
1399 1399 mutex_exit(&ipst->ips_igmp_timer_lock);
1400 1400
1401 1401 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1402 1402 ill = ILL_START_WALK_V4(&ctx, ipst);
1403 1403 for (; ill != NULL; ill = ill_next(&ctx, ill)) {
1404 1404 ASSERT(!ill->ill_isv6);
1405 1405 /* Make sure the ill isn't going away. */
1406 1406 if (!ill_check_and_refhold(ill))
1407 1407 continue;
1408 1408 rw_exit(&ipst->ips_ill_g_lock);
1409 1409 next = igmp_timeout_handler_per_ill(ill);
1410 1410 if (next < global_next)
1411 1411 global_next = next;
1412 1412 ill_refrele(ill);
1413 1413 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1414 1414 }
1415 1415 rw_exit(&ipst->ips_ill_g_lock);
1416 1416 if (global_next != INFINITY)
1417 1417 igmp_start_timers(global_next, ipst);
1418 1418 }
1419 1419
1420 1420 /*
1421 1421 * mld_timeout_handler:
1422 1422 * Called when there are timeout events, every next (tick).
1423 1423 * Returns number of ticks to next event (or 0 if none).
1424 1424 */
1425 1425 uint_t
1426 1426 mld_timeout_handler_per_ill(ill_t *ill)
1427 1427 {
1428 1428 ilm_t *ilm;
1429 1429 uint_t next = INFINITY, current;
1430 1430 mrec_t *rp, *rtxrp;
1431 1431 rtx_state_t *rtxp;
1432 1432 mcast_record_t rtype;
1433 1433
1434 1434 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
1435 1435
1436 1436 current = CURRENT_MSTIME;
1437 1437 /*
1438 1438 * First check the global timer on this interface; the global timer
1439 1439 * is not used for MLDv1, so if it's set we can assume we're v2.
1440 1440 */
1441 1441 if (ill->ill_global_timer == INFINITY)
1442 1442 goto per_ilm_timer;
1443 1443 if (ill->ill_global_timer <= (current + CURRENT_OFFSET)) {
1444 1444 ill->ill_global_timer = INFINITY;
1445 1445 /*
1446 1446 * Send report for each group on this interface.
1447 1447 * Since we just set the global timer (received a v2 general
1448 1448 * query), need to skip the all hosts addr (ff02::1), per
1449 1449 * RFC 3810 section 6.
1450 1450 */
1451 1451 rp = NULL;
1452 1452 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1453 1453 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr,
1454 1454 &ipv6_all_hosts_mcast))
1455 1455 continue;
1456 1456 rp = mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr,
1457 1457 ilm->ilm_filter, rp);
1458 1458 /*
1459 1459 * Since we're sending a report on this group, okay
1460 1460 * to delete pending group-specific timers. Note
1461 1461 * that group-specific retransmit timers still need
1462 1462 * to be checked in the per_ilm_timer for-loop.
1463 1463 */
1464 1464 ilm->ilm_timer = INFINITY;
1465 1465 ilm->ilm_state = IGMP_IREPORTEDLAST;
1466 1466 FREE_SLIST(ilm->ilm_pendsrcs);
1467 1467 ilm->ilm_pendsrcs = NULL;
1468 1468 }
1469 1469 mldv2_sendrpt(ill, rp);
1470 1470 } else {
1471 1471 if ((ill->ill_global_timer - current) < next)
1472 1472 next = ill->ill_global_timer - current;
1473 1473 }
1474 1474
1475 1475 per_ilm_timer:
1476 1476 rp = rtxrp = NULL;
1477 1477 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1478 1478 if (ilm->ilm_timer == INFINITY)
1479 1479 goto per_ilm_rtxtimer;
1480 1480
1481 1481 if (ilm->ilm_timer > (current + CURRENT_OFFSET)) {
1482 1482 if ((ilm->ilm_timer - current) < next)
1483 1483 next = ilm->ilm_timer - current;
1484 1484
1485 1485 if (ip_debug > 1) {
1486 1486 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
1487 1487 "igmp_timo_hlr 2: ilm_timr"
1488 1488 " %d typ %d nxt %d",
1489 1489 (int)ntohl(ilm->ilm_timer - current),
1490 1490 (ill->ill_mcast_type), next);
1491 1491 }
1492 1492
1493 1493 goto per_ilm_rtxtimer;
1494 1494 }
1495 1495
1496 1496 /* the timer has expired, need to take action */
1497 1497 ilm->ilm_timer = INFINITY;
1498 1498 ilm->ilm_state = IGMP_IREPORTEDLAST;
1499 1499 if (ill->ill_mcast_type == MLD_V1_ROUTER) {
1500 1500 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL);
1501 1501 } else {
1502 1502 slist_t *rsp;
1503 1503 if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) &&
1504 1504 (rsp = l_alloc()) != NULL) {
1505 1505 /*
1506 1506 * Contents of reply depend on pending
1507 1507 * requested source list.
1508 1508 */
1509 1509 if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1510 1510 l_intersection(ilm->ilm_filter,
1511 1511 ilm->ilm_pendsrcs, rsp);
1512 1512 } else {
1513 1513 l_difference(ilm->ilm_pendsrcs,
1514 1514 ilm->ilm_filter, rsp);
1515 1515 }
1516 1516 FREE_SLIST(ilm->ilm_pendsrcs);
1517 1517 ilm->ilm_pendsrcs = NULL;
1518 1518 if (!SLIST_IS_EMPTY(rsp))
1519 1519 rp = mcast_bldmrec(MODE_IS_INCLUDE,
1520 1520 &ilm->ilm_v6addr, rsp, rp);
1521 1521 FREE_SLIST(rsp);
1522 1522 } else {
1523 1523 rp = mcast_bldmrec(ilm->ilm_fmode,
1524 1524 &ilm->ilm_v6addr, ilm->ilm_filter, rp);
1525 1525 }
1526 1526 }
1527 1527
1528 1528 per_ilm_rtxtimer:
1529 1529 rtxp = &ilm->ilm_rtx;
1530 1530
1531 1531 if (rtxp->rtx_timer == INFINITY)
1532 1532 continue;
1533 1533 if (rtxp->rtx_timer > (current + CURRENT_OFFSET)) {
1534 1534 if ((rtxp->rtx_timer - current) < next)
1535 1535 next = rtxp->rtx_timer - current;
1536 1536 continue;
1537 1537 }
1538 1538
1539 1539 rtxp->rtx_timer = INFINITY;
1540 1540 ilm->ilm_state = IGMP_IREPORTEDLAST;
1541 1541 if (ill->ill_mcast_type == MLD_V1_ROUTER) {
1542 1542 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL);
1543 1543 continue;
1544 1544 }
1545 1545
1546 1546 /*
1547 1547 * The retransmit timer has popped, and our router is
1548 1548 * MLDv2. We have to delve into the retransmit state
1549 1549 * stored in the ilm.
1550 1550 *
1551 1551 * Decrement the retransmit count. If the fmode rtx
1552 1552 * count is active, decrement it, and send a filter
1553 1553 * mode change report with the ilm's source list.
1554 1554 * Otherwise, send a source list change report with
1555 1555 * the current retransmit lists.
1556 1556 */
1557 1557 ASSERT(rtxp->rtx_cnt > 0);
1558 1558 ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt);
1559 1559 rtxp->rtx_cnt--;
1560 1560 if (rtxp->rtx_fmode_cnt > 0) {
1561 1561 rtxp->rtx_fmode_cnt--;
1562 1562 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ?
1563 1563 CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE;
1564 1564 rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr,
1565 1565 ilm->ilm_filter, rtxrp);
1566 1566 } else {
1567 1567 rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES,
1568 1568 &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp);
1569 1569 rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES,
1570 1570 &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp);
1571 1571 }
1572 1572 if (rtxp->rtx_cnt > 0) {
1573 1573 MCAST_RANDOM_DELAY(rtxp->rtx_timer,
1574 1574 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY));
1575 1575 if (rtxp->rtx_timer < next)
1576 1576 next = rtxp->rtx_timer;
1577 1577 rtxp->rtx_timer += current;
1578 1578 } else {
1579 1579 ASSERT(rtxp->rtx_timer == INFINITY);
1580 1580 CLEAR_SLIST(rtxp->rtx_allow);
1581 1581 CLEAR_SLIST(rtxp->rtx_block);
1582 1582 }
1583 1583 }
1584 1584
1585 1585 if (ill->ill_mcast_type == MLD_V2_ROUTER) {
1586 1586 mldv2_sendrpt(ill, rp);
1587 1587 mldv2_sendrpt(ill, rtxrp);
1588 1588 }
1589 1589 rw_exit(&ill->ill_mcast_lock);
1590 1590 /* Send any deferred/queued IP packets */
1591 1591 ill_mcast_send_queued(ill);
1592 1592 /* Defer ill_mcast_timer_start() until the caller is done */
1593 1593
1594 1594 return (next);
1595 1595 }
1596 1596
1597 1597 /*
1598 1598 * mld_timeout_handler:
1599 1599 * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick).
1600 1600 * Returns number of ticks to next event (or 0 if none).
1601 1601 * MT issues are same as igmp_timeout_handler
1602 1602 */
1603 1603 void
1604 1604 mld_timeout_handler(void *arg)
1605 1605 {
1606 1606 ill_t *ill;
1607 1607 uint_t global_next = INFINITY;
1608 1608 uint_t next;
1609 1609 ill_walk_context_t ctx;
1610 1610 ip_stack_t *ipst = arg;
1611 1611
1612 1612 ASSERT(arg != NULL);
1613 1613 mutex_enter(&ipst->ips_mld_timer_lock);
1614 1614 ASSERT(ipst->ips_mld_timeout_id != 0);
1615 1615 ipst->ips_mld_timeout_id = 0;
1616 1616 ipst->ips_mld_timer_scheduled_last = 0;
1617 1617 ipst->ips_mld_time_to_next = 0;
1618 1618 mutex_exit(&ipst->ips_mld_timer_lock);
1619 1619
1620 1620 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1621 1621 ill = ILL_START_WALK_V6(&ctx, ipst);
1622 1622 for (; ill != NULL; ill = ill_next(&ctx, ill)) {
1623 1623 ASSERT(ill->ill_isv6);
1624 1624 /* Make sure the ill isn't going away. */
1625 1625 if (!ill_check_and_refhold(ill))
1626 1626 continue;
1627 1627 rw_exit(&ipst->ips_ill_g_lock);
1628 1628 next = mld_timeout_handler_per_ill(ill);
1629 1629 if (next < global_next)
1630 1630 global_next = next;
1631 1631 ill_refrele(ill);
1632 1632 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1633 1633 }
1634 1634 rw_exit(&ipst->ips_ill_g_lock);
1635 1635 if (global_next != INFINITY)
1636 1636 mld_start_timers(global_next, ipst);
1637 1637 }
1638 1638
1639 1639 /*
1640 1640 * Calculate the Older Version Querier Present timeout value, in number
1641 1641 * of slowtimo intervals, for the given ill.
1642 1642 */
1643 1643 #define OVQP(ill) \
1644 1644 ((1000 * (((ill)->ill_mcast_rv * (ill)->ill_mcast_qi) \
1645 1645 + MCAST_QUERY_RESP_INTERVAL)) / MCAST_SLOWTIMO_INTERVAL)
1646 1646
1647 1647 /*
1648 1648 * igmp_slowtimo:
1649 1649 * - Resets to new router if we didnt we hear from the router
1650 1650 * in IGMP_AGE_THRESHOLD seconds.
1651 1651 * - Resets slowtimeout.
1652 1652 * Check for ips_igmp_max_version ensures that we don't revert to a higher
1653 1653 * IGMP version than configured.
1654 1654 */
1655 1655 void
1656 1656 igmp_slowtimo(void *arg)
1657 1657 {
1658 1658 ill_t *ill;
1659 1659 ill_if_t *ifp;
1660 1660 avl_tree_t *avl_tree;
1661 1661 ip_stack_t *ipst = (ip_stack_t *)arg;
1662 1662
1663 1663 ASSERT(arg != NULL);
1664 1664
1665 1665 /*
1666 1666 * The ill_if_t list is circular, hence the odd loop parameters.
1667 1667 *
1668 1668 * We can't use the ILL_START_WALK and ill_next() wrappers for this
1669 1669 * walk, as we need to check the illif_mcast_* fields in the ill_if_t
1670 1670 * structure (allowing us to skip if none of the instances have timers
1671 1671 * running).
1672 1672 */
1673 1673 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1674 1674 for (ifp = IP_V4_ILL_G_LIST(ipst);
1675 1675 ifp != (ill_if_t *)&IP_V4_ILL_G_LIST(ipst);
1676 1676 ifp = ifp->illif_next) {
1677 1677 /*
1678 1678 * illif_mcast_v[12] are set using atomics. If an ill hears
1679 1679 * a V1 or V2 query now and we miss seeing the count now,
1680 1680 * we will see it the next time igmp_slowtimo is called.
1681 1681 */
1682 1682 if (ifp->illif_mcast_v1 == 0 && ifp->illif_mcast_v2 == 0)
1683 1683 continue;
1684 1684
1685 1685 avl_tree = &ifp->illif_avl_by_ppa;
1686 1686 for (ill = avl_first(avl_tree); ill != NULL;
1687 1687 ill = avl_walk(avl_tree, ill, AVL_AFTER)) {
1688 1688 /* Make sure the ill isn't going away. */
1689 1689 if (!ill_check_and_refhold(ill))
1690 1690 continue;
1691 1691 rw_exit(&ipst->ips_ill_g_lock);
1692 1692 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
1693 1693 if (ill->ill_mcast_v1_tset == 1)
1694 1694 ill->ill_mcast_v1_time++;
1695 1695 if (ill->ill_mcast_v2_tset == 1)
1696 1696 ill->ill_mcast_v2_time++;
1697 1697 if ((ill->ill_mcast_type == IGMP_V1_ROUTER) &&
1698 1698 (ipst->ips_igmp_max_version >= IGMP_V2_ROUTER) &&
1699 1699 (ill->ill_mcast_v1_time >= OVQP(ill))) {
1700 1700 if ((ill->ill_mcast_v2_tset > 0) ||
1701 1701 (ipst->ips_igmp_max_version ==
1702 1702 IGMP_V2_ROUTER)) {
1703 1703 ip1dbg(("V1 query timer "
1704 1704 "expired on %s; switching "
1705 1705 "mode to IGMP_V2\n",
1706 1706 ill->ill_name));
1707 1707 ill->ill_mcast_type =
1708 1708 IGMP_V2_ROUTER;
↓ open down ↓ |
1150 lines elided |
↑ open up ↑ |
1709 1709 } else {
1710 1710 ip1dbg(("V1 query timer "
1711 1711 "expired on %s; switching "
1712 1712 "mode to IGMP_V3\n",
1713 1713 ill->ill_name));
1714 1714 ill->ill_mcast_type =
1715 1715 IGMP_V3_ROUTER;
1716 1716 }
1717 1717 ill->ill_mcast_v1_time = 0;
1718 1718 ill->ill_mcast_v1_tset = 0;
1719 - atomic_add_16(&ifp->illif_mcast_v1, -1);
1719 + atomic_dec_16(&ifp->illif_mcast_v1);
1720 1720 }
1721 1721 if ((ill->ill_mcast_type == IGMP_V2_ROUTER) &&
1722 1722 (ipst->ips_igmp_max_version >= IGMP_V3_ROUTER) &&
1723 1723 (ill->ill_mcast_v2_time >= OVQP(ill))) {
1724 1724 ip1dbg(("V2 query timer expired on "
1725 1725 "%s; switching mode to IGMP_V3\n",
1726 1726 ill->ill_name));
1727 1727 ill->ill_mcast_type = IGMP_V3_ROUTER;
1728 1728 ill->ill_mcast_v2_time = 0;
1729 1729 ill->ill_mcast_v2_tset = 0;
1730 - atomic_add_16(&ifp->illif_mcast_v2, -1);
1730 + atomic_dec_16(&ifp->illif_mcast_v2);
1731 1731 }
1732 1732 rw_exit(&ill->ill_mcast_lock);
1733 1733 ill_refrele(ill);
1734 1734 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1735 1735 }
1736 1736 }
1737 1737 rw_exit(&ipst->ips_ill_g_lock);
1738 1738 ill_mcast_timer_start(ipst);
1739 1739 mutex_enter(&ipst->ips_igmp_slowtimeout_lock);
1740 1740 ipst->ips_igmp_slowtimeout_id = timeout(igmp_slowtimo, (void *)ipst,
1741 1741 MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL));
1742 1742 mutex_exit(&ipst->ips_igmp_slowtimeout_lock);
1743 1743 }
1744 1744
1745 1745 /*
1746 1746 * mld_slowtimo:
1747 1747 * - Resets to newer version if we didn't hear from the older version router
1748 1748 * in MLD_AGE_THRESHOLD seconds.
1749 1749 * - Restarts slowtimeout.
1750 1750 * Check for ips_mld_max_version ensures that we don't revert to a higher
1751 1751 * IGMP version than configured.
1752 1752 */
1753 1753 void
1754 1754 mld_slowtimo(void *arg)
1755 1755 {
1756 1756 ill_t *ill;
1757 1757 ill_if_t *ifp;
1758 1758 avl_tree_t *avl_tree;
1759 1759 ip_stack_t *ipst = (ip_stack_t *)arg;
1760 1760
1761 1761 ASSERT(arg != NULL);
1762 1762 /* See comments in igmp_slowtimo() above... */
1763 1763 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1764 1764 for (ifp = IP_V6_ILL_G_LIST(ipst);
1765 1765 ifp != (ill_if_t *)&IP_V6_ILL_G_LIST(ipst);
1766 1766 ifp = ifp->illif_next) {
1767 1767 if (ifp->illif_mcast_v1 == 0)
1768 1768 continue;
1769 1769
1770 1770 avl_tree = &ifp->illif_avl_by_ppa;
1771 1771 for (ill = avl_first(avl_tree); ill != NULL;
1772 1772 ill = avl_walk(avl_tree, ill, AVL_AFTER)) {
1773 1773 /* Make sure the ill isn't going away. */
1774 1774 if (!ill_check_and_refhold(ill))
1775 1775 continue;
1776 1776 rw_exit(&ipst->ips_ill_g_lock);
1777 1777 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
1778 1778 if (ill->ill_mcast_v1_tset == 1)
↓ open down ↓ |
38 lines elided |
↑ open up ↑ |
1779 1779 ill->ill_mcast_v1_time++;
1780 1780 if ((ill->ill_mcast_type == MLD_V1_ROUTER) &&
1781 1781 (ipst->ips_mld_max_version >= MLD_V2_ROUTER) &&
1782 1782 (ill->ill_mcast_v1_time >= OVQP(ill))) {
1783 1783 ip1dbg(("MLD query timer expired on"
1784 1784 " %s; switching mode to MLD_V2\n",
1785 1785 ill->ill_name));
1786 1786 ill->ill_mcast_type = MLD_V2_ROUTER;
1787 1787 ill->ill_mcast_v1_time = 0;
1788 1788 ill->ill_mcast_v1_tset = 0;
1789 - atomic_add_16(&ifp->illif_mcast_v1, -1);
1789 + atomic_dec_16(&ifp->illif_mcast_v1);
1790 1790 }
1791 1791 rw_exit(&ill->ill_mcast_lock);
1792 1792 ill_refrele(ill);
1793 1793 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1794 1794 }
1795 1795 }
1796 1796 rw_exit(&ipst->ips_ill_g_lock);
1797 1797 ill_mcast_timer_start(ipst);
1798 1798 mutex_enter(&ipst->ips_mld_slowtimeout_lock);
1799 1799 ipst->ips_mld_slowtimeout_id = timeout(mld_slowtimo, (void *)ipst,
1800 1800 MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL));
1801 1801 mutex_exit(&ipst->ips_mld_slowtimeout_lock);
1802 1802 }
1803 1803
1804 1804 /*
1805 1805 * igmp_sendpkt:
1806 1806 * This will send to ip_output_simple just like icmp_inbound.
1807 1807 */
1808 1808 static void
1809 1809 igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr)
1810 1810 {
1811 1811 mblk_t *mp;
1812 1812 igmpa_t *igmpa;
1813 1813 uint8_t *rtralert;
1814 1814 ipha_t *ipha;
1815 1815 int hdrlen = sizeof (ipha_t) + RTRALERT_LEN;
1816 1816 size_t size = hdrlen + sizeof (igmpa_t);
1817 1817 ill_t *ill = ilm->ilm_ill;
1818 1818 ip_stack_t *ipst = ill->ill_ipst;
1819 1819
1820 1820 ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
1821 1821
1822 1822 mp = allocb(size, BPRI_HI);
1823 1823 if (mp == NULL) {
1824 1824 return;
1825 1825 }
1826 1826 mp->b_wptr = mp->b_rptr + size;
1827 1827
1828 1828 ipha = (ipha_t *)mp->b_rptr;
1829 1829 rtralert = (uint8_t *)&(ipha[1]);
1830 1830 igmpa = (igmpa_t *)&(rtralert[RTRALERT_LEN]);
1831 1831 igmpa->igmpa_type = type;
1832 1832 igmpa->igmpa_code = 0;
1833 1833 igmpa->igmpa_group = ilm->ilm_addr;
1834 1834 igmpa->igmpa_cksum = 0;
1835 1835 igmpa->igmpa_cksum = IP_CSUM(mp, hdrlen, 0);
1836 1836
1837 1837 rtralert[0] = IPOPT_COPY | IPOPT_RTRALERT;
1838 1838 rtralert[1] = RTRALERT_LEN;
1839 1839 rtralert[2] = 0;
1840 1840 rtralert[3] = 0;
1841 1841
1842 1842 ipha->ipha_version_and_hdr_length = (IP_VERSION << 4)
1843 1843 | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS);
1844 1844 ipha->ipha_type_of_service = 0;
1845 1845 ipha->ipha_length = htons(size);
1846 1846 ipha->ipha_ident = 0;
1847 1847 ipha->ipha_fragment_offset_and_flags = 0;
1848 1848 ipha->ipha_ttl = IGMP_TTL;
1849 1849 ipha->ipha_protocol = IPPROTO_IGMP;
1850 1850 ipha->ipha_hdr_checksum = 0;
1851 1851 ipha->ipha_dst = addr ? addr : igmpa->igmpa_group;
1852 1852 ipha->ipha_src = INADDR_ANY;
1853 1853
1854 1854 ill_mcast_queue(ill, mp);
1855 1855
1856 1856 ++ipst->ips_igmpstat.igps_snd_reports;
1857 1857 }
1858 1858
1859 1859 /*
1860 1860 * Sends an IGMP_V3_MEMBERSHIP_REPORT message out the ill.
1861 1861 * The report will contain one group record
1862 1862 * for each element of reclist. If this causes packet length to
1863 1863 * exceed ill->ill_mc_mtu, multiple reports are sent.
1864 1864 * reclist is assumed to be made up of buffers allocated by mcast_bldmrec(),
1865 1865 * and those buffers are freed here.
1866 1866 */
1867 1867 static void
1868 1868 igmpv3_sendrpt(ill_t *ill, mrec_t *reclist)
1869 1869 {
1870 1870 igmp3ra_t *igmp3ra;
1871 1871 grphdra_t *grphdr;
1872 1872 mblk_t *mp;
1873 1873 ipha_t *ipha;
1874 1874 uint8_t *rtralert;
1875 1875 ipaddr_t *src_array;
1876 1876 int i, j, numrec, more_src_cnt;
1877 1877 size_t hdrsize, size, rsize;
1878 1878 mrec_t *rp, *cur_reclist;
1879 1879 mrec_t *next_reclist = reclist;
1880 1880 boolean_t morepkts;
1881 1881 ip_stack_t *ipst = ill->ill_ipst;
1882 1882
1883 1883 ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
1884 1884
1885 1885 /* if there aren't any records, there's nothing to send */
1886 1886 if (reclist == NULL)
1887 1887 return;
1888 1888
1889 1889 hdrsize = sizeof (ipha_t) + RTRALERT_LEN;
1890 1890 nextpkt:
1891 1891 size = hdrsize + sizeof (igmp3ra_t);
1892 1892 morepkts = B_FALSE;
1893 1893 more_src_cnt = 0;
1894 1894 cur_reclist = next_reclist;
1895 1895 numrec = 0;
1896 1896 for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) {
1897 1897 rsize = sizeof (grphdra_t) +
1898 1898 (rp->mrec_srcs.sl_numsrc * sizeof (ipaddr_t));
1899 1899 if (size + rsize > ill->ill_mc_mtu) {
1900 1900 if (rp == cur_reclist) {
1901 1901 /*
1902 1902 * If the first mrec we looked at is too big
1903 1903 * to fit in a single packet (i.e the source
1904 1904 * list is too big), we must either truncate
1905 1905 * the list (if TO_EX or IS_EX), or send
1906 1906 * multiple reports for the same group (all
1907 1907 * other types).
1908 1908 */
1909 1909 int srcspace, srcsperpkt;
1910 1910 srcspace = ill->ill_mc_mtu - (size +
1911 1911 sizeof (grphdra_t));
1912 1912
1913 1913 /*
1914 1914 * Skip if there's not even enough room in
1915 1915 * a single packet to send something useful.
1916 1916 */
1917 1917 if (srcspace <= sizeof (ipaddr_t))
1918 1918 continue;
1919 1919
1920 1920 srcsperpkt = srcspace / sizeof (ipaddr_t);
1921 1921 /*
1922 1922 * Increment size and numrec, because we will
1923 1923 * be sending a record for the mrec we're
1924 1924 * looking at now.
1925 1925 */
1926 1926 size += sizeof (grphdra_t) +
1927 1927 (srcsperpkt * sizeof (ipaddr_t));
1928 1928 numrec++;
1929 1929 if (rp->mrec_type == MODE_IS_EXCLUDE ||
1930 1930 rp->mrec_type == CHANGE_TO_EXCLUDE) {
1931 1931 rp->mrec_srcs.sl_numsrc = srcsperpkt;
1932 1932 if (rp->mrec_next == NULL) {
1933 1933 /* no more packets to send */
1934 1934 break;
1935 1935 } else {
1936 1936 /*
1937 1937 * more packets, but we're
1938 1938 * done with this mrec.
1939 1939 */
1940 1940 next_reclist = rp->mrec_next;
1941 1941 }
1942 1942 } else {
1943 1943 more_src_cnt = rp->mrec_srcs.sl_numsrc
1944 1944 - srcsperpkt;
1945 1945 rp->mrec_srcs.sl_numsrc = srcsperpkt;
1946 1946 /*
1947 1947 * We'll fix up this mrec (remove the
1948 1948 * srcs we've already sent) before
1949 1949 * returning to nextpkt above.
1950 1950 */
1951 1951 next_reclist = rp;
1952 1952 }
1953 1953 } else {
1954 1954 next_reclist = rp;
1955 1955 }
1956 1956 morepkts = B_TRUE;
1957 1957 break;
1958 1958 }
1959 1959 size += rsize;
1960 1960 numrec++;
1961 1961 }
1962 1962
1963 1963 mp = allocb(size, BPRI_HI);
1964 1964 if (mp == NULL) {
1965 1965 goto free_reclist;
1966 1966 }
1967 1967 bzero((char *)mp->b_rptr, size);
1968 1968 mp->b_wptr = (uchar_t *)(mp->b_rptr + size);
1969 1969
1970 1970 ipha = (ipha_t *)mp->b_rptr;
1971 1971 rtralert = (uint8_t *)&(ipha[1]);
1972 1972 igmp3ra = (igmp3ra_t *)&(rtralert[RTRALERT_LEN]);
1973 1973 grphdr = (grphdra_t *)&(igmp3ra[1]);
1974 1974
1975 1975 rp = cur_reclist;
1976 1976 for (i = 0; i < numrec; i++) {
1977 1977 grphdr->grphdra_type = rp->mrec_type;
1978 1978 grphdr->grphdra_numsrc = htons(rp->mrec_srcs.sl_numsrc);
1979 1979 grphdr->grphdra_group = V4_PART_OF_V6(rp->mrec_group);
1980 1980 src_array = (ipaddr_t *)&(grphdr[1]);
1981 1981
1982 1982 for (j = 0; j < rp->mrec_srcs.sl_numsrc; j++)
1983 1983 src_array[j] = V4_PART_OF_V6(rp->mrec_srcs.sl_addr[j]);
1984 1984
1985 1985 grphdr = (grphdra_t *)&(src_array[j]);
1986 1986 rp = rp->mrec_next;
1987 1987 }
1988 1988
1989 1989 igmp3ra->igmp3ra_type = IGMP_V3_MEMBERSHIP_REPORT;
1990 1990 igmp3ra->igmp3ra_numrec = htons(numrec);
1991 1991 igmp3ra->igmp3ra_cksum = IP_CSUM(mp, hdrsize, 0);
1992 1992
1993 1993 rtralert[0] = IPOPT_COPY | IPOPT_RTRALERT;
1994 1994 rtralert[1] = RTRALERT_LEN;
1995 1995 rtralert[2] = 0;
1996 1996 rtralert[3] = 0;
1997 1997
1998 1998 ipha->ipha_version_and_hdr_length = IP_VERSION << 4
1999 1999 | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS);
2000 2000 ipha->ipha_type_of_service = IPTOS_PREC_INTERNETCONTROL;
2001 2001 ipha->ipha_length = htons(size);
2002 2002 ipha->ipha_ttl = IGMP_TTL;
2003 2003 ipha->ipha_protocol = IPPROTO_IGMP;
2004 2004 ipha->ipha_dst = htonl(INADDR_ALLRPTS_GROUP);
2005 2005 ipha->ipha_src = INADDR_ANY;
2006 2006
2007 2007 ill_mcast_queue(ill, mp);
2008 2008
2009 2009 ++ipst->ips_igmpstat.igps_snd_reports;
2010 2010
2011 2011 if (morepkts) {
2012 2012 if (more_src_cnt > 0) {
2013 2013 int index, mvsize;
2014 2014 slist_t *sl = &next_reclist->mrec_srcs;
2015 2015 index = sl->sl_numsrc;
2016 2016 mvsize = more_src_cnt * sizeof (in6_addr_t);
2017 2017 (void) memmove(&sl->sl_addr[0], &sl->sl_addr[index],
2018 2018 mvsize);
2019 2019 sl->sl_numsrc = more_src_cnt;
2020 2020 }
2021 2021 goto nextpkt;
2022 2022 }
2023 2023
2024 2024 free_reclist:
2025 2025 while (reclist != NULL) {
2026 2026 rp = reclist->mrec_next;
2027 2027 mi_free(reclist);
2028 2028 reclist = rp;
2029 2029 }
2030 2030 }
2031 2031
2032 2032 /*
2033 2033 * mld_input:
2034 2034 * Return NULL for a bad packet that is discarded here.
2035 2035 * Return mp if the message is OK and should be handed to "raw" receivers.
2036 2036 * Callers of mld_input() may need to reinitialize variables that were copied
2037 2037 * from the mblk as this calls pullupmsg().
2038 2038 */
2039 2039 mblk_t *
2040 2040 mld_input(mblk_t *mp, ip_recv_attr_t *ira)
2041 2041 {
2042 2042 ip6_t *ip6h = (ip6_t *)(mp->b_rptr);
2043 2043 mld_hdr_t *mldh;
2044 2044 ilm_t *ilm;
2045 2045 ipif_t *ipif;
2046 2046 uint16_t hdr_length, exthdr_length;
2047 2047 in6_addr_t *v6group_ptr;
2048 2048 uint_t next;
2049 2049 int mldlen;
2050 2050 ill_t *ill = ira->ira_ill;
2051 2051 ip_stack_t *ipst = ill->ill_ipst;
2052 2052
2053 2053 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembTotal);
2054 2054
2055 2055 /* Make sure the src address of the packet is link-local */
2056 2056 if (!(IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src))) {
2057 2057 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
2058 2058 freemsg(mp);
2059 2059 return (NULL);
2060 2060 }
2061 2061
2062 2062 if (ip6h->ip6_hlim != 1) {
2063 2063 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpBadHoplimit);
2064 2064 freemsg(mp);
2065 2065 return (NULL);
2066 2066 }
2067 2067
2068 2068 /* Get to the icmp header part */
2069 2069 hdr_length = ira->ira_ip_hdr_length;
2070 2070 exthdr_length = hdr_length - IPV6_HDR_LEN;
2071 2071
2072 2072 mldlen = ntohs(ip6h->ip6_plen) - exthdr_length;
2073 2073
2074 2074 /* An MLD packet must at least be 24 octets to be valid */
2075 2075 if (mldlen < MLD_MINLEN) {
2076 2076 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
2077 2077 freemsg(mp);
2078 2078 return (NULL);
2079 2079 }
2080 2080
2081 2081 mldh = (mld_hdr_t *)(&mp->b_rptr[hdr_length]);
2082 2082
2083 2083 switch (mldh->mld_type) {
2084 2084 case MLD_LISTENER_QUERY:
2085 2085 /*
2086 2086 * packet length differentiates between v1 and v2. v1
2087 2087 * query should be exactly 24 octets long; v2 is >= 28.
2088 2088 */
2089 2089 if ((mldlen == MLD_MINLEN) ||
2090 2090 (ipst->ips_mld_max_version < MLD_V2_ROUTER)) {
2091 2091 next = mld_query_in(mldh, ill);
2092 2092 } else if (mldlen >= MLD_V2_QUERY_MINLEN) {
2093 2093 next = mldv2_query_in((mld2q_t *)mldh, ill, mldlen);
2094 2094 } else {
2095 2095 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
2096 2096 freemsg(mp);
2097 2097 return (NULL);
2098 2098 }
2099 2099 if (next == 0) {
2100 2100 return (mp);
2101 2101 }
2102 2102
2103 2103 if (next != INFINITY)
2104 2104 mld_start_timers(next, ipst);
2105 2105 break;
2106 2106
2107 2107 case MLD_LISTENER_REPORT:
2108 2108 /*
2109 2109 * For fast leave to work, we have to know that we are the
2110 2110 * last person to send a report for this group. Reports
2111 2111 * generated by us are looped back since we could potentially
2112 2112 * be a multicast router, so discard reports sourced by me.
2113 2113 */
2114 2114 mutex_enter(&ill->ill_lock);
2115 2115 for (ipif = ill->ill_ipif; ipif != NULL;
2116 2116 ipif = ipif->ipif_next) {
2117 2117 if (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr,
2118 2118 &ip6h->ip6_src)) {
2119 2119 if (ip_debug > 1) {
2120 2120 char buf1[INET6_ADDRSTRLEN];
2121 2121
2122 2122 (void) mi_strlog(ill->ill_rq,
2123 2123 1,
2124 2124 SL_TRACE,
2125 2125 "mld_input: we are only "
2126 2126 "member src %s\n",
2127 2127 inet_ntop(AF_INET6, &ip6h->ip6_src,
2128 2128 buf1, sizeof (buf1)));
2129 2129 }
2130 2130 mutex_exit(&ill->ill_lock);
2131 2131 return (mp);
2132 2132 }
2133 2133 }
2134 2134 mutex_exit(&ill->ill_lock);
2135 2135 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembResponses);
2136 2136
2137 2137 v6group_ptr = &mldh->mld_addr;
2138 2138 if (!IN6_IS_ADDR_MULTICAST(v6group_ptr)) {
2139 2139 BUMP_MIB(ill->ill_icmp6_mib,
2140 2140 ipv6IfIcmpInGroupMembBadReports);
2141 2141 freemsg(mp);
2142 2142 return (NULL);
2143 2143 }
2144 2144
2145 2145
2146 2146 /*
2147 2147 * If we belong to the group being reported, and we are a
2148 2148 * 'Delaying member' per the RFC terminology, stop our timer
2149 2149 * for that group and 'clear flag' i.e. mark ilm_state as
2150 2150 * IGMP_OTHERMEMBER. With zones, there can be multiple group
2151 2151 * membership entries for the same group address (one per zone)
2152 2152 * so we need to walk the ill_ilm list.
2153 2153 */
2154 2154 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
2155 2155 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
2156 2156 if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group_ptr))
2157 2157 continue;
2158 2158 BUMP_MIB(ill->ill_icmp6_mib,
2159 2159 ipv6IfIcmpInGroupMembOurReports);
2160 2160
2161 2161 ilm->ilm_timer = INFINITY;
2162 2162 ilm->ilm_state = IGMP_OTHERMEMBER;
2163 2163 }
2164 2164 rw_exit(&ill->ill_mcast_lock);
2165 2165 /*
2166 2166 * No packets have been sent above - no
2167 2167 * ill_mcast_send_queued is needed.
2168 2168 */
2169 2169 ill_mcast_timer_start(ill->ill_ipst);
2170 2170 break;
2171 2171
2172 2172 case MLD_LISTENER_REDUCTION:
2173 2173 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembReductions);
2174 2174 break;
2175 2175 }
2176 2176 return (mp);
2177 2177 }
2178 2178
2179 2179 /*
2180 2180 * Handles an MLDv1 Listener Query. Returns 0 on error, or the appropriate
2181 2181 * (non-zero, unsigned) timer value to be set on success.
2182 2182 */
2183 2183 static uint_t
2184 2184 mld_query_in(mld_hdr_t *mldh, ill_t *ill)
2185 2185 {
2186 2186 ilm_t *ilm;
2187 2187 int timer;
2188 2188 uint_t next, current;
2189 2189 in6_addr_t *v6group;
2190 2190
2191 2191 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries);
2192 2192
2193 2193 /*
2194 2194 * In the MLD specification, there are 3 states and a flag.
2195 2195 *
2196 2196 * In Non-Listener state, we simply don't have a membership record.
2197 2197 * In Delaying state, our timer is running (ilm->ilm_timer < INFINITY)
2198 2198 * In Idle Member state, our timer is not running (ilm->ilm_timer ==
2199 2199 * INFINITY)
2200 2200 *
2201 2201 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if
2202 2202 * we have heard a report from another member, or IGMP_IREPORTEDLAST
2203 2203 * if I sent the last report.
2204 2204 */
2205 2205 v6group = &mldh->mld_addr;
2206 2206 if (!(IN6_IS_ADDR_UNSPECIFIED(v6group)) &&
2207 2207 ((!IN6_IS_ADDR_MULTICAST(v6group)))) {
2208 2208 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembBadQueries);
↓ open down ↓ |
409 lines elided |
↑ open up ↑ |
2209 2209 return (0);
2210 2210 }
2211 2211
2212 2212 /* Need to do compatibility mode checking */
2213 2213 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
2214 2214 ill->ill_mcast_v1_time = 0;
2215 2215 ill->ill_mcast_v1_tset = 1;
2216 2216 if (ill->ill_mcast_type == MLD_V2_ROUTER) {
2217 2217 ip1dbg(("Received MLDv1 Query on %s, switching mode to "
2218 2218 "MLD_V1_ROUTER\n", ill->ill_name));
2219 - atomic_add_16(&ill->ill_ifptr->illif_mcast_v1, 1);
2219 + atomic_inc_16(&ill->ill_ifptr->illif_mcast_v1);
2220 2220 ill->ill_mcast_type = MLD_V1_ROUTER;
2221 2221 }
2222 2222
2223 2223 timer = (int)ntohs(mldh->mld_maxdelay);
2224 2224 if (ip_debug > 1) {
2225 2225 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
2226 2226 "mld_input: TIMER = mld_maxdelay %d mld_type 0x%x",
2227 2227 timer, (int)mldh->mld_type);
2228 2228 }
2229 2229
2230 2230 /*
2231 2231 * -Start the timers in all of our membership records for
2232 2232 * the physical interface on which the query arrived,
2233 2233 * excl:
2234 2234 * 1. those that belong to the "all hosts" group,
2235 2235 * 2. those with 0 scope, or 1 node-local scope.
2236 2236 *
2237 2237 * -Restart any timer that is already running but has a value
2238 2238 * longer that the requested timeout.
2239 2239 * -Use the value specified in the query message as the
2240 2240 * maximum timeout.
2241 2241 */
2242 2242 next = INFINITY;
2243 2243
2244 2244 current = CURRENT_MSTIME;
2245 2245 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
2246 2246 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr));
2247 2247
2248 2248 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) ||
2249 2249 IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) ||
2250 2250 IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr))
2251 2251 continue;
2252 2252 if ((!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr,
2253 2253 &ipv6_all_hosts_mcast)) &&
2254 2254 (IN6_IS_ADDR_UNSPECIFIED(v6group)) ||
2255 2255 (IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr))) {
2256 2256 if (timer == 0) {
2257 2257 /* Respond immediately */
2258 2258 ilm->ilm_timer = INFINITY;
2259 2259 ilm->ilm_state = IGMP_IREPORTEDLAST;
2260 2260 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL);
2261 2261 break;
2262 2262 }
2263 2263 if (ilm->ilm_timer > timer) {
2264 2264 MCAST_RANDOM_DELAY(ilm->ilm_timer, timer);
2265 2265 if (ilm->ilm_timer < next)
2266 2266 next = ilm->ilm_timer;
2267 2267 ilm->ilm_timer += current;
2268 2268 }
2269 2269 break;
2270 2270 }
2271 2271 }
2272 2272 rw_exit(&ill->ill_mcast_lock);
2273 2273 /* Send any deferred/queued IP packets */
2274 2274 ill_mcast_send_queued(ill);
2275 2275 ill_mcast_timer_start(ill->ill_ipst);
2276 2276
2277 2277 return (next);
2278 2278 }
2279 2279
2280 2280 /*
2281 2281 * Handles an MLDv2 Listener Query. On error, returns 0; on success,
2282 2282 * returns the appropriate (non-zero, unsigned) timer value (which may
2283 2283 * be INFINITY) to be set.
2284 2284 */
2285 2285 static uint_t
2286 2286 mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen)
2287 2287 {
2288 2288 ilm_t *ilm;
2289 2289 in6_addr_t *v6group, *src_array;
2290 2290 uint_t next, numsrc, i, mrd, delay, qqi, current;
2291 2291 uint8_t qrv;
2292 2292
2293 2293 v6group = &mld2q->mld2q_addr;
2294 2294 numsrc = ntohs(mld2q->mld2q_numsrc);
2295 2295
2296 2296 /* make sure numsrc matches packet size */
2297 2297 if (mldlen < MLD_V2_QUERY_MINLEN + (numsrc * sizeof (in6_addr_t))) {
2298 2298 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
2299 2299 return (0);
2300 2300 }
2301 2301 src_array = (in6_addr_t *)&mld2q[1];
2302 2302
2303 2303 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries);
2304 2304
2305 2305 /* extract Maximum Response Delay from code in header */
2306 2306 mrd = ntohs(mld2q->mld2q_mxrc);
2307 2307 if (mrd >= MLD_V2_MAXRT_FPMIN) {
2308 2308 uint_t hdrval, mant, exp;
2309 2309 hdrval = mrd;
2310 2310 mant = hdrval & MLD_V2_MAXRT_MANT_MASK;
2311 2311 exp = (hdrval & MLD_V2_MAXRT_EXP_MASK) >> 12;
2312 2312 mrd = (mant | 0x1000) << (exp + 3);
2313 2313 }
2314 2314 if (mrd == 0)
2315 2315 mrd = DSEC_TO_MSEC(MCAST_DEF_QUERY_RESP_INTERVAL);
2316 2316
2317 2317 MCAST_RANDOM_DELAY(delay, mrd);
2318 2318 next = (unsigned)INFINITY;
2319 2319 current = CURRENT_MSTIME;
2320 2320
2321 2321 if ((qrv = mld2q->mld2q_sqrv & MLD_V2_RV_MASK) == 0)
2322 2322 ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS;
2323 2323 else
2324 2324 ill->ill_mcast_rv = qrv;
2325 2325
2326 2326 if ((qqi = (uint_t)mld2q->mld2q_qqic) >= MLD_V2_QQI_FPMIN) {
2327 2327 uint_t mant, exp;
2328 2328 mant = qqi & MLD_V2_QQI_MANT_MASK;
2329 2329 exp = (qqi & MLD_V2_QQI_EXP_MASK) >> 12;
2330 2330 qqi = (mant | 0x10) << (exp + 3);
2331 2331 }
2332 2332 ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi;
2333 2333
2334 2334 /*
2335 2335 * If we have a pending general query response that's scheduled
2336 2336 * sooner than the delay we calculated for this response, then
2337 2337 * no action is required (MLDv2 draft section 6.2 rule 1)
2338 2338 */
2339 2339 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
2340 2340 if (ill->ill_global_timer < (current + delay)) {
2341 2341 rw_exit(&ill->ill_mcast_lock);
2342 2342 return (next);
2343 2343 }
2344 2344
2345 2345 /*
2346 2346 * Now take action depending on query type: general,
2347 2347 * group specific, or group/source specific.
2348 2348 */
2349 2349 if ((numsrc == 0) && IN6_IS_ADDR_UNSPECIFIED(v6group)) {
2350 2350 /*
2351 2351 * general query
2352 2352 * We know global timer is either not running or is
2353 2353 * greater than our calculated delay, so reset it to
2354 2354 * our delay (random value in range [0, response time])
2355 2355 */
2356 2356 ill->ill_global_timer = current + delay;
2357 2357 next = delay;
2358 2358 } else {
2359 2359 /* group or group/source specific query */
2360 2360 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
2361 2361 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) ||
2362 2362 IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) ||
2363 2363 IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr) ||
2364 2364 !IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr))
2365 2365 continue;
2366 2366
2367 2367 /*
2368 2368 * If the query is group specific or we have a
2369 2369 * pending group specific query, the response is
2370 2370 * group specific (pending sources list should be
2371 2371 * empty). Otherwise, need to update the pending
2372 2372 * sources list for the group and source specific
2373 2373 * response.
2374 2374 */
2375 2375 if (numsrc == 0 || (ilm->ilm_timer < INFINITY &&
2376 2376 SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) {
2377 2377 group_query:
2378 2378 FREE_SLIST(ilm->ilm_pendsrcs);
2379 2379 ilm->ilm_pendsrcs = NULL;
2380 2380 } else {
2381 2381 boolean_t overflow;
2382 2382 slist_t *pktl;
2383 2383 if (numsrc > MAX_FILTER_SIZE ||
2384 2384 (ilm->ilm_pendsrcs == NULL &&
2385 2385 (ilm->ilm_pendsrcs = l_alloc()) == NULL)) {
2386 2386 /*
2387 2387 * We've been sent more sources than
2388 2388 * we can deal with; or we can't deal
2389 2389 * with a source list at all. Revert
2390 2390 * to a group specific query.
2391 2391 */
2392 2392 goto group_query;
2393 2393 }
2394 2394 if ((pktl = l_alloc()) == NULL)
2395 2395 goto group_query;
2396 2396 pktl->sl_numsrc = numsrc;
2397 2397 for (i = 0; i < numsrc; i++)
2398 2398 pktl->sl_addr[i] = src_array[i];
2399 2399 l_union_in_a(ilm->ilm_pendsrcs, pktl,
2400 2400 &overflow);
2401 2401 l_free(pktl);
2402 2402 if (overflow)
2403 2403 goto group_query;
2404 2404 }
2405 2405 ilm->ilm_timer = (ilm->ilm_timer == INFINITY) ?
2406 2406 INFINITY : (ilm->ilm_timer - current);
2407 2407 /* set timer to soonest value */
2408 2408 ilm->ilm_timer = MIN(ilm->ilm_timer, delay);
2409 2409 if (ilm->ilm_timer < next)
2410 2410 next = ilm->ilm_timer;
2411 2411 ilm->ilm_timer += current;
2412 2412 break;
2413 2413 }
2414 2414 }
2415 2415 rw_exit(&ill->ill_mcast_lock);
2416 2416 /*
2417 2417 * No packets have been sent above - no
2418 2418 * ill_mcast_send_queued is needed.
2419 2419 */
2420 2420 ill_mcast_timer_start(ill->ill_ipst);
2421 2421
2422 2422 return (next);
2423 2423 }
2424 2424
2425 2425 /*
2426 2426 * Send MLDv1 response packet with hoplimit 1
2427 2427 */
2428 2428 static void
2429 2429 mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr)
2430 2430 {
2431 2431 mblk_t *mp;
2432 2432 mld_hdr_t *mldh;
2433 2433 ip6_t *ip6h;
2434 2434 ip6_hbh_t *ip6hbh;
2435 2435 struct ip6_opt_router *ip6router;
2436 2436 size_t size = IPV6_HDR_LEN + sizeof (mld_hdr_t);
2437 2437 ill_t *ill = ilm->ilm_ill;
2438 2438
2439 2439 ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
2440 2440
2441 2441 /*
2442 2442 * We need to place a router alert option in this packet. The length
2443 2443 * of the options must be a multiple of 8. The hbh option header is 2
2444 2444 * bytes followed by the 4 byte router alert option. That leaves
2445 2445 * 2 bytes of pad for a total of 8 bytes.
2446 2446 */
2447 2447 const int router_alert_length = 8;
2448 2448
2449 2449 ASSERT(ill->ill_isv6);
2450 2450
2451 2451 size += router_alert_length;
2452 2452 mp = allocb(size, BPRI_HI);
2453 2453 if (mp == NULL)
2454 2454 return;
2455 2455 bzero(mp->b_rptr, size);
2456 2456 mp->b_wptr = mp->b_rptr + size;
2457 2457
2458 2458 ip6h = (ip6_t *)mp->b_rptr;
2459 2459 ip6hbh = (struct ip6_hbh *)&ip6h[1];
2460 2460 ip6router = (struct ip6_opt_router *)&ip6hbh[1];
2461 2461 /*
2462 2462 * A zero is a pad option of length 1. The bzero of the whole packet
2463 2463 * above will pad between ip6router and mld.
2464 2464 */
2465 2465 mldh = (mld_hdr_t *)((uint8_t *)ip6hbh + router_alert_length);
2466 2466
2467 2467 mldh->mld_type = type;
2468 2468 mldh->mld_addr = ilm->ilm_v6addr;
2469 2469
2470 2470 ip6router->ip6or_type = IP6OPT_ROUTER_ALERT;
2471 2471 ip6router->ip6or_len = 2;
2472 2472 ip6router->ip6or_value[0] = 0;
2473 2473 ip6router->ip6or_value[1] = IP6_ALERT_MLD;
2474 2474
2475 2475 ip6hbh->ip6h_nxt = IPPROTO_ICMPV6;
2476 2476 ip6hbh->ip6h_len = 0;
2477 2477
2478 2478 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
2479 2479 ip6h->ip6_plen = htons(sizeof (*mldh) + router_alert_length);
2480 2480 ip6h->ip6_nxt = IPPROTO_HOPOPTS;
2481 2481 ip6h->ip6_hops = MLD_HOP_LIMIT;
2482 2482 if (v6addr == NULL)
2483 2483 ip6h->ip6_dst = ilm->ilm_v6addr;
2484 2484 else
2485 2485 ip6h->ip6_dst = *v6addr;
2486 2486
2487 2487 ip6h->ip6_src = ipv6_all_zeros;
2488 2488 /*
2489 2489 * Prepare for checksum by putting icmp length in the icmp
2490 2490 * checksum field. The checksum is calculated in ip_output.
2491 2491 */
2492 2492 mldh->mld_cksum = htons(sizeof (*mldh));
2493 2493
2494 2494 ill_mcast_queue(ill, mp);
2495 2495 }
2496 2496
2497 2497 /*
2498 2498 * Sends an MLD_V2_LISTENER_REPORT message out the passed-in ill. The
2499 2499 * report will contain one multicast address record for each element of
2500 2500 * reclist. If this causes packet length to exceed ill->ill_mc_mtu,
2501 2501 * multiple reports are sent. reclist is assumed to be made up of
2502 2502 * buffers allocated by mcast_bldmrec(), and those buffers are freed here.
2503 2503 */
2504 2504 static void
2505 2505 mldv2_sendrpt(ill_t *ill, mrec_t *reclist)
2506 2506 {
2507 2507 mblk_t *mp;
2508 2508 mld2r_t *mld2r;
2509 2509 mld2mar_t *mld2mar;
2510 2510 in6_addr_t *srcarray;
2511 2511 ip6_t *ip6h;
2512 2512 ip6_hbh_t *ip6hbh;
2513 2513 struct ip6_opt_router *ip6router;
2514 2514 size_t size, optlen, padlen, icmpsize, rsize;
2515 2515 int i, numrec, more_src_cnt;
2516 2516 mrec_t *rp, *cur_reclist;
2517 2517 mrec_t *next_reclist = reclist;
2518 2518 boolean_t morepkts;
2519 2519
2520 2520 /* If there aren't any records, there's nothing to send */
2521 2521 if (reclist == NULL)
2522 2522 return;
2523 2523
2524 2524 ASSERT(ill->ill_isv6);
2525 2525 ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
2526 2526
2527 2527 /*
2528 2528 * Total option length (optlen + padlen) must be a multiple of
2529 2529 * 8 bytes. We assume here that optlen <= 8, so the total option
2530 2530 * length will be 8. Assert this in case anything ever changes.
2531 2531 */
2532 2532 optlen = sizeof (ip6_hbh_t) + sizeof (struct ip6_opt_router);
2533 2533 ASSERT(optlen <= 8);
2534 2534 padlen = 8 - optlen;
2535 2535 nextpkt:
2536 2536 icmpsize = sizeof (mld2r_t);
2537 2537 size = IPV6_HDR_LEN + optlen + padlen + icmpsize;
2538 2538 morepkts = B_FALSE;
2539 2539 more_src_cnt = 0;
2540 2540 for (rp = cur_reclist = next_reclist, numrec = 0; rp != NULL;
2541 2541 rp = rp->mrec_next, numrec++) {
2542 2542 rsize = sizeof (mld2mar_t) +
2543 2543 (rp->mrec_srcs.sl_numsrc * sizeof (in6_addr_t));
2544 2544 if (size + rsize > ill->ill_mc_mtu) {
2545 2545 if (rp == cur_reclist) {
2546 2546 /*
2547 2547 * If the first mrec we looked at is too big
2548 2548 * to fit in a single packet (i.e the source
2549 2549 * list is too big), we must either truncate
2550 2550 * the list (if TO_EX or IS_EX), or send
2551 2551 * multiple reports for the same group (all
2552 2552 * other types).
2553 2553 */
2554 2554 int srcspace, srcsperpkt;
2555 2555 srcspace = ill->ill_mc_mtu -
2556 2556 (size + sizeof (mld2mar_t));
2557 2557
2558 2558 /*
2559 2559 * Skip if there's not even enough room in
2560 2560 * a single packet to send something useful.
2561 2561 */
2562 2562 if (srcspace <= sizeof (in6_addr_t))
2563 2563 continue;
2564 2564
2565 2565 srcsperpkt = srcspace / sizeof (in6_addr_t);
2566 2566 /*
2567 2567 * Increment icmpsize and size, because we will
2568 2568 * be sending a record for the mrec we're
2569 2569 * looking at now.
2570 2570 */
2571 2571 rsize = sizeof (mld2mar_t) +
2572 2572 (srcsperpkt * sizeof (in6_addr_t));
2573 2573 icmpsize += rsize;
2574 2574 size += rsize;
2575 2575 if (rp->mrec_type == MODE_IS_EXCLUDE ||
2576 2576 rp->mrec_type == CHANGE_TO_EXCLUDE) {
2577 2577 rp->mrec_srcs.sl_numsrc = srcsperpkt;
2578 2578 if (rp->mrec_next == NULL) {
2579 2579 /* no more packets to send */
2580 2580 break;
2581 2581 } else {
2582 2582 /*
2583 2583 * more packets, but we're
2584 2584 * done with this mrec.
2585 2585 */
2586 2586 next_reclist = rp->mrec_next;
2587 2587 }
2588 2588 } else {
2589 2589 more_src_cnt = rp->mrec_srcs.sl_numsrc
2590 2590 - srcsperpkt;
2591 2591 rp->mrec_srcs.sl_numsrc = srcsperpkt;
2592 2592 /*
2593 2593 * We'll fix up this mrec (remove the
2594 2594 * srcs we've already sent) before
2595 2595 * returning to nextpkt above.
2596 2596 */
2597 2597 next_reclist = rp;
2598 2598 }
2599 2599 } else {
2600 2600 next_reclist = rp;
2601 2601 }
2602 2602 morepkts = B_TRUE;
2603 2603 break;
2604 2604 }
2605 2605 icmpsize += rsize;
2606 2606 size += rsize;
2607 2607 }
2608 2608
2609 2609 mp = allocb(size, BPRI_HI);
2610 2610 if (mp == NULL)
2611 2611 goto free_reclist;
2612 2612 bzero(mp->b_rptr, size);
2613 2613 mp->b_wptr = mp->b_rptr + size;
2614 2614
2615 2615 ip6h = (ip6_t *)mp->b_rptr;
2616 2616 ip6hbh = (ip6_hbh_t *)&(ip6h[1]);
2617 2617 ip6router = (struct ip6_opt_router *)&(ip6hbh[1]);
2618 2618 mld2r = (mld2r_t *)((uint8_t *)ip6hbh + optlen + padlen);
2619 2619 mld2mar = (mld2mar_t *)&(mld2r[1]);
2620 2620
2621 2621 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
2622 2622 ip6h->ip6_plen = htons(optlen + padlen + icmpsize);
2623 2623 ip6h->ip6_nxt = IPPROTO_HOPOPTS;
2624 2624 ip6h->ip6_hops = MLD_HOP_LIMIT;
2625 2625 ip6h->ip6_dst = ipv6_all_v2rtrs_mcast;
2626 2626 ip6h->ip6_src = ipv6_all_zeros;
2627 2627
2628 2628 ip6hbh->ip6h_nxt = IPPROTO_ICMPV6;
2629 2629 /*
2630 2630 * ip6h_len is the number of 8-byte words, not including the first
2631 2631 * 8 bytes; we've assumed optlen + padlen == 8 bytes; hence len = 0.
2632 2632 */
2633 2633 ip6hbh->ip6h_len = 0;
2634 2634
2635 2635 ip6router->ip6or_type = IP6OPT_ROUTER_ALERT;
2636 2636 ip6router->ip6or_len = 2;
2637 2637 ip6router->ip6or_value[0] = 0;
2638 2638 ip6router->ip6or_value[1] = IP6_ALERT_MLD;
2639 2639
2640 2640 mld2r->mld2r_type = MLD_V2_LISTENER_REPORT;
2641 2641 mld2r->mld2r_nummar = htons(numrec);
2642 2642 /*
2643 2643 * Prepare for the checksum by putting icmp length in the icmp
2644 2644 * checksum field. The checksum is calculated in ip_output_simple.
2645 2645 */
2646 2646 mld2r->mld2r_cksum = htons(icmpsize);
2647 2647
2648 2648 for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) {
2649 2649 mld2mar->mld2mar_type = rp->mrec_type;
2650 2650 mld2mar->mld2mar_auxlen = 0;
2651 2651 mld2mar->mld2mar_numsrc = htons(rp->mrec_srcs.sl_numsrc);
2652 2652 mld2mar->mld2mar_group = rp->mrec_group;
2653 2653 srcarray = (in6_addr_t *)&(mld2mar[1]);
2654 2654
2655 2655 for (i = 0; i < rp->mrec_srcs.sl_numsrc; i++)
2656 2656 srcarray[i] = rp->mrec_srcs.sl_addr[i];
2657 2657
2658 2658 mld2mar = (mld2mar_t *)&(srcarray[i]);
2659 2659 }
2660 2660
2661 2661 ill_mcast_queue(ill, mp);
2662 2662
2663 2663 if (morepkts) {
2664 2664 if (more_src_cnt > 0) {
2665 2665 int index, mvsize;
2666 2666 slist_t *sl = &next_reclist->mrec_srcs;
2667 2667 index = sl->sl_numsrc;
2668 2668 mvsize = more_src_cnt * sizeof (in6_addr_t);
2669 2669 (void) memmove(&sl->sl_addr[0], &sl->sl_addr[index],
2670 2670 mvsize);
2671 2671 sl->sl_numsrc = more_src_cnt;
2672 2672 }
2673 2673 goto nextpkt;
2674 2674 }
2675 2675
2676 2676 free_reclist:
2677 2677 while (reclist != NULL) {
2678 2678 rp = reclist->mrec_next;
2679 2679 mi_free(reclist);
2680 2680 reclist = rp;
2681 2681 }
2682 2682 }
2683 2683
2684 2684 static mrec_t *
2685 2685 mcast_bldmrec(mcast_record_t type, in6_addr_t *grp, slist_t *srclist,
2686 2686 mrec_t *next)
2687 2687 {
2688 2688 mrec_t *rp;
2689 2689 int i;
2690 2690
2691 2691 if ((type == ALLOW_NEW_SOURCES || type == BLOCK_OLD_SOURCES) &&
2692 2692 SLIST_IS_EMPTY(srclist))
2693 2693 return (next);
2694 2694
2695 2695 rp = (mrec_t *)mi_alloc(sizeof (mrec_t), BPRI_HI);
2696 2696 if (rp == NULL)
2697 2697 return (next);
2698 2698
2699 2699 rp->mrec_next = next;
2700 2700 rp->mrec_type = type;
2701 2701 rp->mrec_auxlen = 0;
2702 2702 rp->mrec_group = *grp;
2703 2703 if (srclist == NULL) {
2704 2704 rp->mrec_srcs.sl_numsrc = 0;
2705 2705 } else {
2706 2706 rp->mrec_srcs.sl_numsrc = srclist->sl_numsrc;
2707 2707 for (i = 0; i < srclist->sl_numsrc; i++)
2708 2708 rp->mrec_srcs.sl_addr[i] = srclist->sl_addr[i];
2709 2709 }
2710 2710
2711 2711 return (rp);
2712 2712 }
2713 2713
2714 2714 /*
2715 2715 * Set up initial retransmit state. If memory cannot be allocated for
2716 2716 * the source lists, simply create as much state as is possible; memory
2717 2717 * allocation failures are considered one type of transient error that
2718 2718 * the retransmissions are designed to overcome (and if they aren't
2719 2719 * transient, there are bigger problems than failing to notify the
2720 2720 * router about multicast group membership state changes).
2721 2721 */
2722 2722 static void
2723 2723 mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp, mcast_record_t rtype,
2724 2724 slist_t *flist)
2725 2725 {
2726 2726 /*
2727 2727 * There are only three possibilities for rtype:
2728 2728 * New join, transition from INCLUDE {} to INCLUDE {flist}
2729 2729 * => rtype is ALLOW_NEW_SOURCES
2730 2730 * New join, transition from INCLUDE {} to EXCLUDE {flist}
2731 2731 * => rtype is CHANGE_TO_EXCLUDE
2732 2732 * State change that involves a filter mode change
2733 2733 * => rtype is either CHANGE_TO_INCLUDE or CHANGE_TO_EXCLUDE
2734 2734 */
2735 2735 ASSERT(rtype == CHANGE_TO_EXCLUDE || rtype == CHANGE_TO_INCLUDE ||
2736 2736 rtype == ALLOW_NEW_SOURCES);
2737 2737
2738 2738 rtxp->rtx_cnt = ill->ill_mcast_rv;
2739 2739
2740 2740 switch (rtype) {
2741 2741 case CHANGE_TO_EXCLUDE:
2742 2742 rtxp->rtx_fmode_cnt = ill->ill_mcast_rv;
2743 2743 CLEAR_SLIST(rtxp->rtx_allow);
2744 2744 COPY_SLIST(flist, rtxp->rtx_block);
2745 2745 break;
2746 2746 case ALLOW_NEW_SOURCES:
2747 2747 case CHANGE_TO_INCLUDE:
2748 2748 rtxp->rtx_fmode_cnt =
2749 2749 rtype == ALLOW_NEW_SOURCES ? 0 : ill->ill_mcast_rv;
2750 2750 CLEAR_SLIST(rtxp->rtx_block);
2751 2751 COPY_SLIST(flist, rtxp->rtx_allow);
2752 2752 break;
2753 2753 }
2754 2754 }
2755 2755
2756 2756 /*
2757 2757 * The basic strategy here, as extrapolated from RFC 3810 section 6.1 and
2758 2758 * RFC 3376 section 5.1, covers three cases:
2759 2759 * * The current state change is a filter mode change
2760 2760 * Set filter mode retransmit counter; set retransmit allow or
2761 2761 * block list to new source list as appropriate, and clear the
2762 2762 * retransmit list that was not set; send TO_IN or TO_EX with
2763 2763 * new source list.
2764 2764 * * The current state change is a source list change, but the filter
2765 2765 * mode retransmit counter is > 0
2766 2766 * Decrement filter mode retransmit counter; set retransmit
2767 2767 * allow or block list to new source list as appropriate,
2768 2768 * and clear the retransmit list that was not set; send TO_IN
2769 2769 * or TO_EX with new source list.
2770 2770 * * The current state change is a source list change, and the filter
2771 2771 * mode retransmit counter is 0.
2772 2772 * Merge existing rtx allow and block lists with new state:
2773 2773 * rtx_allow = (new allow + rtx_allow) - new block
2774 2774 * rtx_block = (new block + rtx_block) - new allow
2775 2775 * Send ALLOW and BLOCK records for new retransmit lists;
2776 2776 * decrement retransmit counter.
2777 2777 *
2778 2778 * As is the case for mcast_init_rtx(), memory allocation failures are
2779 2779 * acceptable; we just create as much state as we can.
2780 2780 */
2781 2781 static mrec_t *
2782 2782 mcast_merge_rtx(ilm_t *ilm, mrec_t *mreclist, slist_t *flist)
2783 2783 {
2784 2784 ill_t *ill;
2785 2785 rtx_state_t *rtxp = &ilm->ilm_rtx;
2786 2786 mcast_record_t txtype;
2787 2787 mrec_t *rp, *rpnext, *rtnmrec;
2788 2788 boolean_t ovf;
2789 2789
2790 2790 ill = ilm->ilm_ill;
2791 2791
2792 2792 if (mreclist == NULL)
2793 2793 return (mreclist);
2794 2794
2795 2795 /*
2796 2796 * A filter mode change is indicated by a single mrec, which is
2797 2797 * either TO_IN or TO_EX. In this case, we just need to set new
2798 2798 * retransmit state as if this were an initial join. There is
2799 2799 * no change to the mrec list.
2800 2800 */
2801 2801 if (mreclist->mrec_type == CHANGE_TO_INCLUDE ||
2802 2802 mreclist->mrec_type == CHANGE_TO_EXCLUDE) {
2803 2803 mcast_init_rtx(ill, rtxp, mreclist->mrec_type,
2804 2804 &mreclist->mrec_srcs);
2805 2805 return (mreclist);
2806 2806 }
2807 2807
2808 2808 /*
2809 2809 * Only the source list has changed
2810 2810 */
2811 2811 rtxp->rtx_cnt = ill->ill_mcast_rv;
2812 2812 if (rtxp->rtx_fmode_cnt > 0) {
2813 2813 /* but we're still sending filter mode change reports */
2814 2814 rtxp->rtx_fmode_cnt--;
2815 2815 if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
2816 2816 CLEAR_SLIST(rtxp->rtx_block);
2817 2817 COPY_SLIST(flist, rtxp->rtx_allow);
2818 2818 txtype = CHANGE_TO_INCLUDE;
2819 2819 } else {
2820 2820 CLEAR_SLIST(rtxp->rtx_allow);
2821 2821 COPY_SLIST(flist, rtxp->rtx_block);
2822 2822 txtype = CHANGE_TO_EXCLUDE;
2823 2823 }
2824 2824 /* overwrite first mrec with new info */
2825 2825 mreclist->mrec_type = txtype;
2826 2826 l_copy(flist, &mreclist->mrec_srcs);
2827 2827 /* then free any remaining mrecs */
2828 2828 for (rp = mreclist->mrec_next; rp != NULL; rp = rpnext) {
2829 2829 rpnext = rp->mrec_next;
2830 2830 mi_free(rp);
2831 2831 }
2832 2832 mreclist->mrec_next = NULL;
2833 2833 rtnmrec = mreclist;
2834 2834 } else {
2835 2835 mrec_t *allow_mrec, *block_mrec;
2836 2836 /*
2837 2837 * Just send the source change reports; but we need to
2838 2838 * recalculate the ALLOW and BLOCK lists based on previous
2839 2839 * state and new changes.
2840 2840 */
2841 2841 rtnmrec = mreclist;
2842 2842 allow_mrec = block_mrec = NULL;
2843 2843 for (rp = mreclist; rp != NULL; rp = rp->mrec_next) {
2844 2844 ASSERT(rp->mrec_type == ALLOW_NEW_SOURCES ||
2845 2845 rp->mrec_type == BLOCK_OLD_SOURCES);
2846 2846 if (rp->mrec_type == ALLOW_NEW_SOURCES)
2847 2847 allow_mrec = rp;
2848 2848 else
2849 2849 block_mrec = rp;
2850 2850 }
2851 2851 /*
2852 2852 * Perform calculations:
2853 2853 * new_allow = mrec_allow + (rtx_allow - mrec_block)
2854 2854 * new_block = mrec_block + (rtx_block - mrec_allow)
2855 2855 *
2856 2856 * Each calc requires two steps, for example:
2857 2857 * rtx_allow = rtx_allow - mrec_block;
2858 2858 * new_allow = mrec_allow + rtx_allow;
2859 2859 *
2860 2860 * Store results in mrec lists, and then copy into rtx lists.
2861 2861 * We do it in this order in case the rtx list hasn't been
2862 2862 * alloc'd yet; if it hasn't and our alloc fails, that's okay,
2863 2863 * Overflows are also okay.
2864 2864 */
2865 2865 if (block_mrec != NULL) {
2866 2866 l_difference_in_a(rtxp->rtx_allow,
2867 2867 &block_mrec->mrec_srcs);
2868 2868 }
2869 2869 if (allow_mrec != NULL) {
2870 2870 l_difference_in_a(rtxp->rtx_block,
2871 2871 &allow_mrec->mrec_srcs);
2872 2872 l_union_in_a(&allow_mrec->mrec_srcs, rtxp->rtx_allow,
2873 2873 &ovf);
2874 2874 }
2875 2875 if (block_mrec != NULL) {
2876 2876 l_union_in_a(&block_mrec->mrec_srcs, rtxp->rtx_block,
2877 2877 &ovf);
2878 2878 COPY_SLIST(&block_mrec->mrec_srcs, rtxp->rtx_block);
2879 2879 } else {
2880 2880 rtnmrec = mcast_bldmrec(BLOCK_OLD_SOURCES,
2881 2881 &ilm->ilm_v6addr, rtxp->rtx_block, allow_mrec);
2882 2882 }
2883 2883 if (allow_mrec != NULL) {
2884 2884 COPY_SLIST(&allow_mrec->mrec_srcs, rtxp->rtx_allow);
2885 2885 } else {
2886 2886 rtnmrec = mcast_bldmrec(ALLOW_NEW_SOURCES,
2887 2887 &ilm->ilm_v6addr, rtxp->rtx_allow, block_mrec);
2888 2888 }
2889 2889 }
2890 2890
2891 2891 return (rtnmrec);
2892 2892 }
↓ open down ↓ |
663 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX