1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 /*
26 * IEEE 802.3ad Link Aggregation - LACP & Marker Protocol processing.
27 */
28
29 #include <sys/types.h>
30 #include <sys/sysmacros.h>
31 #include <sys/callb.h>
32 #include <sys/conf.h>
33 #include <sys/cmn_err.h>
34 #include <sys/disp.h>
35 #include <sys/list.h>
36 #include <sys/ksynch.h>
37 #include <sys/kmem.h>
38 #include <sys/stream.h>
39 #include <sys/modctl.h>
40 #include <sys/ddi.h>
41 #include <sys/sunddi.h>
42 #include <sys/atomic.h>
43 #include <sys/stat.h>
44 #include <sys/byteorder.h>
45 #include <sys/strsun.h>
46 #include <sys/isa_defs.h>
47 #include <sys/sdt.h>
48
49 #include <sys/aggr.h>
50 #include <sys/aggr_impl.h>
51
52 static struct ether_addr etherzeroaddr = {
53 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
54 };
55
56 /*
57 * Slow_Protocol_Multicast address, as per IEEE 802.3ad spec.
58 */
59 static struct ether_addr slow_multicast_addr = {
60 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02
61 };
62
63 #ifdef DEBUG
64 /* LACP state machine debugging support */
65 static uint32_t aggr_lacp_debug = 0;
66 #define AGGR_LACP_DBG(x) if (aggr_lacp_debug) { (void) printf x; }
67 #else
68 #define AGGR_LACP_DBG(x) {}
69 #endif /* DEBUG */
70
71 #define NSECS_PER_SEC 1000000000ll
72
73 /* used by lacp_misconfig_walker() */
74 typedef struct lacp_misconfig_check_state_s {
75 aggr_port_t *cs_portp;
76 boolean_t cs_found;
77 } lacp_misconfig_check_state_t;
78
79 static const char *lacp_receive_str[] = LACP_RECEIVE_STATE_STRINGS;
80 static const char *lacp_periodic_str[] = LACP_PERIODIC_STRINGS;
81 static const char *lacp_mux_str[] = LACP_MUX_STRINGS;
82
83 static uint16_t lacp_port_priority = 0x1000;
84 static uint16_t lacp_system_priority = 0x1000;
85
86 /*
87 * Maintains a list of all ports in ATTACHED state. This information
88 * is used to detect misconfiguration.
89 */
90 typedef struct lacp_sel_ports {
91 datalink_id_t sp_grp_linkid;
92 datalink_id_t sp_linkid;
93 /* Note: sp_partner_system must be 2-byte aligned */
94 struct ether_addr sp_partner_system;
95 uint32_t sp_partner_key;
96 struct lacp_sel_ports *sp_next;
97 } lacp_sel_ports_t;
98
99 static lacp_sel_ports_t *sel_ports = NULL;
100 static kmutex_t lacp_sel_lock;
101
102 static void periodic_timer_pop(void *);
103 static void periodic_timer_pop_handler(aggr_port_t *);
104 static void lacp_xmit_sm(aggr_port_t *);
105 static void lacp_periodic_sm(aggr_port_t *);
106 static void fill_lacp_pdu(aggr_port_t *, lacp_t *);
107 static void fill_lacp_ether(aggr_port_t *, struct ether_header *);
108 static void lacp_on(aggr_port_t *);
109 static void lacp_off(aggr_port_t *);
110 static boolean_t valid_lacp_pdu(aggr_port_t *, lacp_t *);
111 static void lacp_receive_sm(aggr_port_t *, lacp_t *);
112 static void aggr_set_coll_dist(aggr_port_t *, boolean_t);
113 static void start_wait_while_timer(aggr_port_t *);
114 static void stop_wait_while_timer(aggr_port_t *);
115 static void lacp_reset_port(aggr_port_t *);
116 static void stop_current_while_timer(aggr_port_t *);
117 static void current_while_timer_pop(void *);
118 static void current_while_timer_pop_handler(aggr_port_t *);
119 static void update_default_selected(aggr_port_t *);
120 static boolean_t update_selected(aggr_port_t *, lacp_t *);
121 static boolean_t lacp_sel_ports_add(aggr_port_t *);
122 static void lacp_sel_ports_del(aggr_port_t *);
123 static void wait_while_timer_pop(void *);
124 static void wait_while_timer_pop_handler(aggr_port_t *);
125
126 void
127 aggr_lacp_init(void)
128 {
129 mutex_init(&lacp_sel_lock, NULL, MUTEX_DEFAULT, NULL);
130 }
131
132 void
133 aggr_lacp_fini(void)
134 {
135 mutex_destroy(&lacp_sel_lock);
136 }
137
138 /*
139 * The following functions are used for handling LACP timers.
140 *
141 * Note that we cannot fully rely on the aggr's mac perimeter in the timeout
142 * handler routine, otherwise it may cause deadlock with the untimeout() call
143 * which is usually called with the mac perimeter held. Instead, a
144 * lacp_timer_lock mutex is introduced, which protects a bitwise flag
145 * (lacp_timer_bits). This flag is set/cleared by timeout()/stop_timer()
146 * routines and is checked by a dedicated thread, that executes the real
147 * timeout operation.
148 */
149 static void
150 aggr_port_timer_thread(void *arg)
151 {
152 aggr_port_t *port = arg;
153 aggr_lacp_port_t *pl = &port->lp_lacp;
154 aggr_grp_t *grp = port->lp_grp;
155 uint32_t lacp_timer_bits;
156 mac_perim_handle_t mph;
157 callb_cpr_t cprinfo;
158
159 CALLB_CPR_INIT(&cprinfo, &pl->lacp_timer_lock, callb_generic_cpr,
160 "aggr_port_timer_thread");
161
162 mutex_enter(&pl->lacp_timer_lock);
163
164 for (;;) {
165
166 if ((lacp_timer_bits = pl->lacp_timer_bits) == 0) {
167 CALLB_CPR_SAFE_BEGIN(&cprinfo);
168 cv_wait(&pl->lacp_timer_cv, &pl->lacp_timer_lock);
169 CALLB_CPR_SAFE_END(&cprinfo, &pl->lacp_timer_lock);
170 continue;
171 }
172 pl->lacp_timer_bits = 0;
173
174 if (lacp_timer_bits & LACP_THREAD_EXIT)
175 break;
176
177 if (lacp_timer_bits & LACP_PERIODIC_TIMEOUT)
178 pl->periodic_timer.id = 0;
179 if (lacp_timer_bits & LACP_WAIT_WHILE_TIMEOUT)
180 pl->wait_while_timer.id = 0;
181 if (lacp_timer_bits & LACP_CURRENT_WHILE_TIMEOUT)
182 pl->current_while_timer.id = 0;
183
184 mutex_exit(&pl->lacp_timer_lock);
185
186 mac_perim_enter_by_mh(grp->lg_mh, &mph);
187 if (port->lp_closing) {
188 mac_perim_exit(mph);
189 mutex_enter(&pl->lacp_timer_lock);
190 break;
191 }
192
193 if (lacp_timer_bits & LACP_PERIODIC_TIMEOUT)
194 periodic_timer_pop_handler(port);
195 if (lacp_timer_bits & LACP_WAIT_WHILE_TIMEOUT)
196 wait_while_timer_pop_handler(port);
197 if (lacp_timer_bits & LACP_CURRENT_WHILE_TIMEOUT)
198 current_while_timer_pop_handler(port);
199 mac_perim_exit(mph);
200
201 mutex_enter(&pl->lacp_timer_lock);
202 if (pl->lacp_timer_bits & LACP_THREAD_EXIT)
203 break;
204 }
205
206 pl->lacp_timer_bits = 0;
207 pl->lacp_timer_thread = NULL;
208 cv_broadcast(&pl->lacp_timer_cv);
209
210 /* CALLB_CPR_EXIT drops the lock */
211 CALLB_CPR_EXIT(&cprinfo);
212
213 /*
214 * Release the reference of the grp so aggr_grp_delete() can call
215 * mac_unregister() safely.
216 */
217 aggr_grp_port_rele(port);
218 thread_exit();
219 }
220
221 /*
222 * Set the port LACP state to SELECTED. Returns B_FALSE if the operation
223 * could not be performed due to a memory allocation error, B_TRUE otherwise.
224 */
225 static boolean_t
226 lacp_port_select(aggr_port_t *portp)
227 {
228 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
229
230 if (!lacp_sel_ports_add(portp))
231 return (B_FALSE);
232 portp->lp_lacp.sm.selected = AGGR_SELECTED;
233 return (B_TRUE);
234 }
235
236 /*
237 * Set the port LACP state to UNSELECTED.
238 */
239 static void
240 lacp_port_unselect(aggr_port_t *portp)
241 {
242 aggr_grp_t *grp = portp->lp_grp;
243
244 ASSERT((grp->lg_mh == NULL) || MAC_PERIM_HELD(grp->lg_mh));
245
246 lacp_sel_ports_del(portp);
247 portp->lp_lacp.sm.selected = AGGR_UNSELECTED;
248 }
249
250 /*
251 * Initialize group specific LACP state and parameters.
252 */
253 void
254 aggr_lacp_init_grp(aggr_grp_t *aggrp)
255 {
256 aggrp->aggr.PeriodicTimer = AGGR_LACP_TIMER_SHORT;
257 aggrp->aggr.ActorSystemPriority = (uint16_t)lacp_system_priority;
258 aggrp->aggr.CollectorMaxDelay = 10;
259 aggrp->lg_lacp_mode = AGGR_LACP_OFF;
260 aggrp->aggr.ready = B_FALSE;
261 }
262
263 /*
264 * Complete LACP info initialization at port creation time.
265 */
266 void
267 aggr_lacp_init_port(aggr_port_t *portp)
268 {
269 aggr_grp_t *aggrp = portp->lp_grp;
270 aggr_lacp_port_t *pl = &portp->lp_lacp;
271
272 ASSERT(aggrp->lg_mh == NULL || MAC_PERIM_HELD(aggrp->lg_mh));
273 ASSERT(MAC_PERIM_HELD(portp->lp_mh));
274
275 /* actor port # */
276 pl->ActorPortNumber = portp->lp_portid;
277 AGGR_LACP_DBG(("aggr_lacp_init_port(%d): "
278 "ActorPortNumber = 0x%x\n", portp->lp_linkid,
279 pl->ActorPortNumber));
280
281 pl->ActorPortPriority = (uint16_t)lacp_port_priority;
282 pl->ActorPortAggrId = 0; /* aggregator id - not used */
283 pl->NTT = B_FALSE; /* need to transmit */
284
285 pl->ActorAdminPortKey = aggrp->lg_key;
286 pl->ActorOperPortKey = pl->ActorAdminPortKey;
287 AGGR_LACP_DBG(("aggr_lacp_init_port(%d) "
288 "ActorAdminPortKey = 0x%x, ActorAdminPortKey = 0x%x\n",
289 portp->lp_linkid, pl->ActorAdminPortKey, pl->ActorOperPortKey));
290
291 /* Actor admin. port state */
292 pl->ActorAdminPortState.bit.activity = B_FALSE;
293 pl->ActorAdminPortState.bit.timeout = B_TRUE;
294 pl->ActorAdminPortState.bit.aggregation = B_TRUE;
295 pl->ActorAdminPortState.bit.sync = B_FALSE;
296 pl->ActorAdminPortState.bit.collecting = B_FALSE;
297 pl->ActorAdminPortState.bit.distributing = B_FALSE;
298 pl->ActorAdminPortState.bit.defaulted = B_FALSE;
299 pl->ActorAdminPortState.bit.expired = B_FALSE;
300 pl->ActorOperPortState = pl->ActorAdminPortState;
301
302 /*
303 * Partner Administrative Information
304 * (All initialized to zero except for the following)
305 * Fast Timeouts.
306 */
307 pl->PartnerAdminPortState.bit.timeout =
308 pl->PartnerOperPortState.bit.timeout = B_TRUE;
309
310 pl->PartnerCollectorMaxDelay = 0; /* tens of microseconds */
311
312 /*
313 * State machine information.
314 */
315 pl->sm.lacp_on = B_FALSE; /* LACP Off default */
316 pl->sm.begin = B_TRUE; /* Prevents transmissions */
317 pl->sm.lacp_enabled = B_FALSE;
318 pl->sm.port_enabled = B_FALSE; /* Link Down */
319 pl->sm.actor_churn = B_FALSE;
320 pl->sm.partner_churn = B_FALSE;
321 pl->sm.ready_n = B_FALSE;
322 pl->sm.port_moved = B_FALSE;
323
324 lacp_port_unselect(portp);
325
326 pl->sm.periodic_state = LACP_NO_PERIODIC;
327 pl->sm.receive_state = LACP_INITIALIZE;
328 pl->sm.mux_state = LACP_DETACHED;
329 pl->sm.churn_state = LACP_NO_ACTOR_CHURN;
330
331 /*
332 * Timer information.
333 */
334 pl->current_while_timer.id = 0;
335 pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
336
337 pl->periodic_timer.id = 0;
338 pl->periodic_timer.val = FAST_PERIODIC_TIME;
339
340 pl->wait_while_timer.id = 0;
341 pl->wait_while_timer.val = AGGREGATE_WAIT_TIME;
342
343 pl->lacp_timer_bits = 0;
344
345 mutex_init(&pl->lacp_timer_lock, NULL, MUTEX_DRIVER, NULL);
346 cv_init(&pl->lacp_timer_cv, NULL, CV_DRIVER, NULL);
347
348 pl->lacp_timer_thread = thread_create(NULL, 0, aggr_port_timer_thread,
349 portp, 0, &p0, TS_RUN, minclsyspri);
350
351 /*
352 * Hold a reference of the grp and the port and this reference will
353 * be release when the thread exits.
354 *
355 * The reference on the port is used for aggr_port_delete() to
356 * continue without waiting for the thread to exit; the reference
357 * on the grp is used for aggr_grp_delete() to wait for the thread
358 * to exit before calling mac_unregister().
359 */
360 aggr_grp_port_hold(portp);
361 }
362
363 /*
364 * Port initialization when we need to
365 * turn LACP on/off, etc. Not everything is
366 * reset like in the above routine.
367 * Do NOT modify things like link status.
368 */
369 static void
370 lacp_reset_port(aggr_port_t *portp)
371 {
372 aggr_lacp_port_t *pl = &portp->lp_lacp;
373
374 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
375
376 pl->NTT = B_FALSE; /* need to transmit */
377
378 /* reset operational port state */
379 pl->ActorOperPortState.bit.timeout =
380 pl->ActorAdminPortState.bit.timeout;
381
382 pl->ActorOperPortState.bit.sync = B_FALSE;
383 pl->ActorOperPortState.bit.collecting = B_FALSE;
384 pl->ActorOperPortState.bit.distributing = B_FALSE;
385 pl->ActorOperPortState.bit.defaulted = B_TRUE;
386 pl->ActorOperPortState.bit.expired = B_FALSE;
387
388 pl->PartnerOperPortState.bit.timeout = B_TRUE; /* fast t/o */
389 pl->PartnerCollectorMaxDelay = 0; /* tens of microseconds */
390
391 /*
392 * State machine information.
393 */
394 pl->sm.begin = B_TRUE; /* Prevents transmissions */
395 pl->sm.actor_churn = B_FALSE;
396 pl->sm.partner_churn = B_FALSE;
397 pl->sm.ready_n = B_FALSE;
398
399 lacp_port_unselect(portp);
400
401 pl->sm.periodic_state = LACP_NO_PERIODIC;
402 pl->sm.receive_state = LACP_INITIALIZE;
403 pl->sm.mux_state = LACP_DETACHED;
404 pl->sm.churn_state = LACP_NO_ACTOR_CHURN;
405
406 /*
407 * Timer information.
408 */
409 pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
410 pl->periodic_timer.val = FAST_PERIODIC_TIME;
411 }
412
413 static void
414 aggr_lacp_mcast_on(aggr_port_t *port)
415 {
416 ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh));
417 ASSERT(MAC_PERIM_HELD(port->lp_mh));
418
419 if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
420 return;
421
422 (void) aggr_port_multicst(port, B_TRUE,
423 (uchar_t *)&slow_multicast_addr);
424 }
425
426 static void
427 aggr_lacp_mcast_off(aggr_port_t *port)
428 {
429 ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh));
430 ASSERT(MAC_PERIM_HELD(port->lp_mh));
431
432 if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
433 return;
434
435 (void) aggr_port_multicst(port, B_FALSE,
436 (uchar_t *)&slow_multicast_addr);
437 }
438
439 static void
440 start_periodic_timer(aggr_port_t *portp)
441 {
442 aggr_lacp_port_t *pl = &portp->lp_lacp;
443
444 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
445
446 mutex_enter(&pl->lacp_timer_lock);
447 if (pl->periodic_timer.id == 0) {
448 pl->periodic_timer.id = timeout(periodic_timer_pop, portp,
449 drv_sectohz(portp->lp_lacp.periodic_timer.val));
450 }
451 mutex_exit(&pl->lacp_timer_lock);
452 }
453
454 static void
455 stop_periodic_timer(aggr_port_t *portp)
456 {
457 aggr_lacp_port_t *pl = &portp->lp_lacp;
458 timeout_id_t id;
459
460 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
461
462 mutex_enter(&pl->lacp_timer_lock);
463 if ((id = pl->periodic_timer.id) != 0) {
464 pl->lacp_timer_bits &= ~LACP_PERIODIC_TIMEOUT;
465 pl->periodic_timer.id = 0;
466 }
467 mutex_exit(&pl->lacp_timer_lock);
468
469 if (id != 0)
470 (void) untimeout(id);
471 }
472
473 /*
474 * When the timer pops, we arrive here to
475 * clear out LACPDU count as well as transmit an
476 * LACPDU. We then set the periodic state and let
477 * the periodic state machine restart the timer.
478 */
479 static void
480 periodic_timer_pop(void *data)
481 {
482 aggr_port_t *portp = data;
483 aggr_lacp_port_t *pl = &portp->lp_lacp;
484
485 mutex_enter(&pl->lacp_timer_lock);
486 pl->lacp_timer_bits |= LACP_PERIODIC_TIMEOUT;
487 cv_broadcast(&pl->lacp_timer_cv);
488 mutex_exit(&pl->lacp_timer_lock);
489 }
490
491 /*
492 * When the timer pops, we arrive here to
493 * clear out LACPDU count as well as transmit an
494 * LACPDU. We then set the periodic state and let
495 * the periodic state machine restart the timer.
496 */
497 static void
498 periodic_timer_pop_handler(aggr_port_t *portp)
499 {
500 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
501
502 portp->lp_lacp_stats.LACPDUsTx = 0;
503
504 /* current timestamp */
505 portp->lp_lacp.time = gethrtime();
506 portp->lp_lacp.NTT = B_TRUE;
507 lacp_xmit_sm(portp);
508
509 /*
510 * Set Periodic State machine state based on the
511 * value of the Partner Operation Port State timeout
512 * bit.
513 */
514 if (portp->lp_lacp.PartnerOperPortState.bit.timeout) {
515 portp->lp_lacp.periodic_timer.val = FAST_PERIODIC_TIME;
516 portp->lp_lacp.sm.periodic_state = LACP_FAST_PERIODIC;
517 } else {
518 portp->lp_lacp.periodic_timer.val = SLOW_PERIODIC_TIME;
519 portp->lp_lacp.sm.periodic_state = LACP_SLOW_PERIODIC;
520 }
521
522 lacp_periodic_sm(portp);
523 }
524
525 /*
526 * Invoked from:
527 * - startup upon aggregation
528 * - when the periodic timer pops
529 * - when the periodic timer value is changed
530 * - when the port is attached or detached
531 * - when LACP mode is changed.
532 */
533 static void
534 lacp_periodic_sm(aggr_port_t *portp)
535 {
536 lacp_periodic_state_t oldstate = portp->lp_lacp.sm.periodic_state;
537 aggr_lacp_port_t *pl = &portp->lp_lacp;
538
539 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
540
541 /* LACP_OFF state not in specification so check here. */
542 if (!pl->sm.lacp_on) {
543 /* Stop timer whether it is running or not */
544 stop_periodic_timer(portp);
545 pl->sm.periodic_state = LACP_NO_PERIODIC;
546 pl->NTT = B_FALSE;
547 AGGR_LACP_DBG(("lacp_periodic_sm(%d):NO LACP "
548 "%s--->%s\n", portp->lp_linkid,
549 lacp_periodic_str[oldstate],
550 lacp_periodic_str[pl->sm.periodic_state]));
551 return;
552 }
553
554 if (pl->sm.begin || !pl->sm.lacp_enabled ||
555 !pl->sm.port_enabled ||
556 !pl->ActorOperPortState.bit.activity &&
557 !pl->PartnerOperPortState.bit.activity) {
558
559 /* Stop timer whether it is running or not */
560 stop_periodic_timer(portp);
561 pl->sm.periodic_state = LACP_NO_PERIODIC;
562 pl->NTT = B_FALSE;
563 AGGR_LACP_DBG(("lacp_periodic_sm(%d):STOP %s--->%s\n",
564 portp->lp_linkid, lacp_periodic_str[oldstate],
565 lacp_periodic_str[pl->sm.periodic_state]));
566 return;
567 }
568
569 /*
570 * Startup with FAST_PERIODIC_TIME if no previous LACPDU
571 * has been received. Then after we timeout, then it is
572 * possible to go to SLOW_PERIODIC_TIME.
573 */
574 if (pl->sm.periodic_state == LACP_NO_PERIODIC) {
575 pl->periodic_timer.val = FAST_PERIODIC_TIME;
576 pl->sm.periodic_state = LACP_FAST_PERIODIC;
577 } else if ((pl->sm.periodic_state == LACP_SLOW_PERIODIC) &&
578 pl->PartnerOperPortState.bit.timeout) {
579 /*
580 * If we receive a bit indicating we are going to
581 * fast periodic from slow periodic, stop the timer
582 * and let the periodic_timer_pop routine deal
583 * with reseting the periodic state and transmitting
584 * a LACPDU.
585 */
586 stop_periodic_timer(portp);
587 periodic_timer_pop_handler(portp);
588 }
589
590 /* Rearm timer with value provided by partner */
591 start_periodic_timer(portp);
592 }
593
594 /*
595 * This routine transmits an LACPDU if lacp_enabled
596 * is TRUE and if NTT is set.
597 */
598 static void
599 lacp_xmit_sm(aggr_port_t *portp)
600 {
601 aggr_lacp_port_t *pl = &portp->lp_lacp;
602 size_t len;
603 mblk_t *mp;
604 hrtime_t now, elapsed;
605
606 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
607
608 /* LACP_OFF state not in specification so check here. */
609 if (!pl->sm.lacp_on || !pl->NTT || !portp->lp_started)
610 return;
611
612 /*
613 * Do nothing if LACP has been turned off or if the
614 * periodic state machine is not enabled.
615 */
616 if ((pl->sm.periodic_state == LACP_NO_PERIODIC) ||
617 !pl->sm.lacp_enabled || pl->sm.begin) {
618 pl->NTT = B_FALSE;
619 return;
620 }
621
622 /*
623 * If we have sent 5 Slow packets in the last second, avoid
624 * sending any more here. No more than three LACPDUs may be transmitted
625 * in any Fast_Periodic_Time interval.
626 */
627 if (portp->lp_lacp_stats.LACPDUsTx >= 3) {
628 /*
629 * Grab the current time value and see if
630 * more than 1 second has passed. If so,
631 * reset the timestamp and clear the count.
632 */
633 now = gethrtime();
634 elapsed = now - pl->time;
635 if (elapsed > NSECS_PER_SEC) {
636 portp->lp_lacp_stats.LACPDUsTx = 0;
637 pl->time = now;
638 } else {
639 return;
640 }
641 }
642
643 len = sizeof (lacp_t) + sizeof (struct ether_header);
644 mp = allocb(len, BPRI_MED);
645 if (mp == NULL)
646 return;
647
648 mp->b_wptr = mp->b_rptr + len;
649 bzero(mp->b_rptr, len);
650
651 fill_lacp_ether(portp, (struct ether_header *)mp->b_rptr);
652 fill_lacp_pdu(portp,
653 (lacp_t *)(mp->b_rptr + sizeof (struct ether_header)));
654
655 /* Send the packet over the first TX ring */
656 mp = mac_hwring_send_priv(portp->lp_mch, portp->lp_tx_rings[0], mp);
657 if (mp != NULL)
658 freemsg(mp);
659
660 pl->NTT = B_FALSE;
661 portp->lp_lacp_stats.LACPDUsTx++;
662 }
663
664 /*
665 * Initialize the ethernet header of a LACP packet sent from the specified
666 * port.
667 */
668 static void
669 fill_lacp_ether(aggr_port_t *port, struct ether_header *ether)
670 {
671 bcopy(port->lp_addr, (uint8_t *)&(ether->ether_shost), ETHERADDRL);
672 bcopy(&slow_multicast_addr, (uint8_t *)&(ether->ether_dhost),
673 ETHERADDRL);
674 ether->ether_type = htons(ETHERTYPE_SLOW);
675 }
676
677 static void
678 fill_lacp_pdu(aggr_port_t *portp, lacp_t *lacp)
679 {
680 aggr_lacp_port_t *pl = &portp->lp_lacp;
681 aggr_grp_t *aggrp = portp->lp_grp;
682 mac_perim_handle_t pmph;
683
684 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
685 mac_perim_enter_by_mh(portp->lp_mh, &pmph);
686
687 lacp->subtype = LACP_SUBTYPE;
688 lacp->version = LACP_VERSION;
689
690 /*
691 * Actor Information
692 */
693 lacp->actor_info.tlv_type = ACTOR_TLV;
694 lacp->actor_info.information_len = sizeof (link_info_t);
695 lacp->actor_info.system_priority =
696 htons(aggrp->aggr.ActorSystemPriority);
697 bcopy(aggrp->lg_addr, (uchar_t *)&lacp->actor_info.system_id,
698 ETHERADDRL);
699 lacp->actor_info.key = htons(pl->ActorOperPortKey);
700 lacp->actor_info.port_priority = htons(pl->ActorPortPriority);
701 lacp->actor_info.port = htons(pl->ActorPortNumber);
702 lacp->actor_info.state.state = pl->ActorOperPortState.state;
703
704 /*
705 * Partner Information
706 */
707 lacp->partner_info.tlv_type = PARTNER_TLV;
708 lacp->partner_info.information_len = sizeof (link_info_t);
709 lacp->partner_info.system_priority =
710 htons(pl->PartnerOperSysPriority);
711 lacp->partner_info.system_id = pl->PartnerOperSystem;
712 lacp->partner_info.key = htons(pl->PartnerOperKey);
713 lacp->partner_info.port_priority =
714 htons(pl->PartnerOperPortPriority);
715 lacp->partner_info.port = htons(pl->PartnerOperPortNum);
716 lacp->partner_info.state.state = pl->PartnerOperPortState.state;
717
718 /* Collector Information */
719 lacp->tlv_collector = COLLECTOR_TLV;
720 lacp->collector_len = 0x10;
721 lacp->collector_max_delay = htons(aggrp->aggr.CollectorMaxDelay);
722
723 /* Termination Information */
724 lacp->tlv_terminator = TERMINATOR_TLV;
725 lacp->terminator_len = 0x0;
726
727 mac_perim_exit(pmph);
728 }
729
730 /*
731 * lacp_mux_sm - LACP mux state machine
732 * This state machine is invoked from:
733 * - startup upon aggregation
734 * - from the Selection logic
735 * - when the wait_while_timer pops
736 * - when the aggregation MAC address is changed
737 * - when receiving DL_NOTE_LINK_UP/DOWN
738 * - when receiving DL_NOTE_AGGR_AVAIL/UNAVAIL
739 * - when LACP mode is changed.
740 * - when a DL_NOTE_SPEED is received
741 */
742 static void
743 lacp_mux_sm(aggr_port_t *portp)
744 {
745 aggr_grp_t *aggrp = portp->lp_grp;
746 boolean_t NTT_updated = B_FALSE;
747 aggr_lacp_port_t *pl = &portp->lp_lacp;
748 lacp_mux_state_t oldstate = pl->sm.mux_state;
749
750 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
751
752 /* LACP_OFF state not in specification so check here. */
753 if (!pl->sm.lacp_on) {
754 pl->sm.mux_state = LACP_DETACHED;
755 pl->ActorOperPortState.bit.sync = B_FALSE;
756
757 if (pl->ActorOperPortState.bit.collecting ||
758 pl->ActorOperPortState.bit.distributing) {
759 AGGR_LACP_DBG(("trunk link: (%d): "
760 "Collector_Distributor Disabled.\n",
761 portp->lp_linkid));
762 }
763
764 pl->ActorOperPortState.bit.collecting =
765 pl->ActorOperPortState.bit.distributing = B_FALSE;
766 return;
767 }
768
769 if (pl->sm.begin || !pl->sm.lacp_enabled)
770 pl->sm.mux_state = LACP_DETACHED;
771
772 again:
773 /* determine next state, or return if state unchanged */
774 switch (pl->sm.mux_state) {
775 case LACP_DETACHED:
776 if (pl->sm.begin) {
777 break;
778 }
779
780 if ((pl->sm.selected == AGGR_SELECTED) ||
781 (pl->sm.selected == AGGR_STANDBY)) {
782 pl->sm.mux_state = LACP_WAITING;
783 break;
784 }
785 return;
786
787 case LACP_WAITING:
788 if (pl->sm.selected == AGGR_UNSELECTED) {
789 pl->sm.mux_state = LACP_DETACHED;
790 break;
791 }
792
793 if ((pl->sm.selected == AGGR_SELECTED) && aggrp->aggr.ready) {
794 pl->sm.mux_state = LACP_ATTACHED;
795 break;
796 }
797 return;
798
799 case LACP_ATTACHED:
800 if ((pl->sm.selected == AGGR_UNSELECTED) ||
801 (pl->sm.selected == AGGR_STANDBY)) {
802 pl->sm.mux_state = LACP_DETACHED;
803 break;
804 }
805
806 if ((pl->sm.selected == AGGR_SELECTED) &&
807 pl->PartnerOperPortState.bit.sync) {
808 pl->sm.mux_state = LACP_COLLECTING_DISTRIBUTING;
809 break;
810 }
811 return;
812
813 case LACP_COLLECTING_DISTRIBUTING:
814 if ((pl->sm.selected == AGGR_UNSELECTED) ||
815 (pl->sm.selected == AGGR_STANDBY) ||
816 !pl->PartnerOperPortState.bit.sync) {
817 pl->sm.mux_state = LACP_ATTACHED;
818 break;
819 }
820 return;
821 }
822
823 AGGR_LACP_DBG(("lacp_mux_sm(%d):%s--->%s\n",
824 portp->lp_linkid, lacp_mux_str[oldstate],
825 lacp_mux_str[pl->sm.mux_state]));
826
827 /* perform actions on entering a new state */
828 switch (pl->sm.mux_state) {
829 case LACP_DETACHED:
830 if (pl->ActorOperPortState.bit.collecting ||
831 pl->ActorOperPortState.bit.distributing) {
832 AGGR_LACP_DBG(("trunk link: (%d): "
833 "Collector_Distributor Disabled.\n",
834 portp->lp_linkid));
835 }
836
837 pl->ActorOperPortState.bit.sync =
838 pl->ActorOperPortState.bit.collecting = B_FALSE;
839
840 /* Turn OFF Collector_Distributor */
841 aggr_set_coll_dist(portp, B_FALSE);
842
843 pl->ActorOperPortState.bit.distributing = B_FALSE;
844 NTT_updated = B_TRUE;
845 break;
846
847 case LACP_WAITING:
848 start_wait_while_timer(portp);
849 break;
850
851 case LACP_ATTACHED:
852 if (pl->ActorOperPortState.bit.collecting ||
853 pl->ActorOperPortState.bit.distributing) {
854 AGGR_LACP_DBG(("trunk link: (%d): "
855 "Collector_Distributor Disabled.\n",
856 portp->lp_linkid));
857 }
858
859 pl->ActorOperPortState.bit.sync = B_TRUE;
860 pl->ActorOperPortState.bit.collecting = B_FALSE;
861
862 /* Turn OFF Collector_Distributor */
863 aggr_set_coll_dist(portp, B_FALSE);
864
865 pl->ActorOperPortState.bit.distributing = B_FALSE;
866 NTT_updated = B_TRUE;
867 if (pl->PartnerOperPortState.bit.sync) {
868 /*
869 * We had already received an updated sync from
870 * the partner. Attempt to transition to
871 * collecting/distributing now.
872 */
873 goto again;
874 }
875 break;
876
877 case LACP_COLLECTING_DISTRIBUTING:
878 if (!pl->ActorOperPortState.bit.collecting &&
879 !pl->ActorOperPortState.bit.distributing) {
880 AGGR_LACP_DBG(("trunk link: (%d): "
881 "Collector_Distributor Enabled.\n",
882 portp->lp_linkid));
883 }
884 pl->ActorOperPortState.bit.distributing = B_TRUE;
885
886 /* Turn Collector_Distributor back ON */
887 aggr_set_coll_dist(portp, B_TRUE);
888
889 pl->ActorOperPortState.bit.collecting = B_TRUE;
890 NTT_updated = B_TRUE;
891 break;
892 }
893
894 /*
895 * If we updated the state of the NTT variable, then
896 * initiate a LACPDU transmission.
897 */
898 if (NTT_updated) {
899 pl->NTT = B_TRUE;
900 lacp_xmit_sm(portp);
901 }
902 } /* lacp_mux_sm */
903
904
905 static int
906 receive_marker_pdu(aggr_port_t *portp, mblk_t *mp)
907 {
908 marker_pdu_t *markerp = (marker_pdu_t *)mp->b_rptr;
909
910 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
911
912 AGGR_LACP_DBG(("trunk link: (%d): MARKER PDU received:\n",
913 portp->lp_linkid));
914
915 /* LACP_OFF state not in specification so check here. */
916 if (!portp->lp_lacp.sm.lacp_on)
917 return (-1);
918
919 if (MBLKL(mp) < sizeof (marker_pdu_t))
920 return (-1);
921
922 if (markerp->version != MARKER_VERSION) {
923 AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: "
924 "version = %d does not match s/w version %d\n",
925 portp->lp_linkid, markerp->version, MARKER_VERSION));
926 return (-1);
927 }
928
929 if (markerp->tlv_marker == MARKER_RESPONSE_TLV) {
930 /* We do not yet send out MARKER info PDUs */
931 AGGR_LACP_DBG(("trunk link (%d): MARKER RESPONSE PDU: "
932 " MARKER TLV = %d - We don't send out info type!\n",
933 portp->lp_linkid, markerp->tlv_marker));
934 return (-1);
935 }
936
937 if (markerp->tlv_marker != MARKER_INFO_TLV) {
938 AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: "
939 " MARKER TLV = %d \n", portp->lp_linkid,
940 markerp->tlv_marker));
941 return (-1);
942 }
943
944 if (markerp->marker_len != MARKER_INFO_RESPONSE_LENGTH) {
945 AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: "
946 " MARKER length = %d \n", portp->lp_linkid,
947 markerp->marker_len));
948 return (-1);
949 }
950
951 if (markerp->requestor_port != portp->lp_lacp.PartnerOperPortNum) {
952 AGGR_LACP_DBG(("trunk link (%d): MARKER PDU: "
953 " MARKER Port %d not equal to Partner port %d\n",
954 portp->lp_linkid, markerp->requestor_port,
955 portp->lp_lacp.PartnerOperPortNum));
956 return (-1);
957 }
958
959 if (ether_cmp(&markerp->system_id,
960 &portp->lp_lacp.PartnerOperSystem) != 0) {
961 AGGR_LACP_DBG(("trunk link (%d): MARKER PDU: "
962 " MARKER MAC not equal to Partner MAC\n",
963 portp->lp_linkid));
964 return (-1);
965 }
966
967 /*
968 * Turn into Marker Response PDU
969 * and return mblk to sending system
970 */
971 markerp->tlv_marker = MARKER_RESPONSE_TLV;
972
973 /* reuse the space that was used by received ethernet header */
974 ASSERT(MBLKHEAD(mp) >= sizeof (struct ether_header));
975 mp->b_rptr -= sizeof (struct ether_header);
976 fill_lacp_ether(portp, (struct ether_header *)mp->b_rptr);
977 return (0);
978 }
979
980 /*
981 * Update the LACP mode (off, active, or passive) of the specified group.
982 */
983 void
984 aggr_lacp_update_mode(aggr_grp_t *grp, aggr_lacp_mode_t mode)
985 {
986 aggr_lacp_mode_t old_mode = grp->lg_lacp_mode;
987 aggr_port_t *port;
988
989 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
990 ASSERT(!grp->lg_closing);
991
992 if (mode == old_mode)
993 return;
994
995 grp->lg_lacp_mode = mode;
996
997 for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
998 port->lp_lacp.ActorAdminPortState.bit.activity =
999 port->lp_lacp.ActorOperPortState.bit.activity =
1000 (mode == AGGR_LACP_ACTIVE);
1001
1002 if (old_mode == AGGR_LACP_OFF) {
1003 /* OFF -> {PASSIVE,ACTIVE} */
1004 /* turn OFF Collector_Distributor */
1005 aggr_set_coll_dist(port, B_FALSE);
1006 lacp_on(port);
1007 } else if (mode == AGGR_LACP_OFF) {
1008 /* {PASSIVE,ACTIVE} -> OFF */
1009 lacp_off(port);
1010 /* Turn ON Collector_Distributor */
1011 aggr_set_coll_dist(port, B_TRUE);
1012 } else {
1013 /* PASSIVE->ACTIVE or ACTIVE->PASSIVE */
1014 port->lp_lacp.sm.begin = B_TRUE;
1015 lacp_mux_sm(port);
1016 lacp_periodic_sm(port);
1017
1018 /* kick off state machines */
1019 lacp_receive_sm(port, NULL);
1020 lacp_mux_sm(port);
1021 }
1022 }
1023 }
1024
1025
1026 /*
1027 * Update the LACP timer (short or long) of the specified group.
1028 */
1029 void
1030 aggr_lacp_update_timer(aggr_grp_t *grp, aggr_lacp_timer_t timer)
1031 {
1032 aggr_port_t *port;
1033
1034 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1035
1036 if (timer == grp->aggr.PeriodicTimer)
1037 return;
1038
1039 grp->aggr.PeriodicTimer = timer;
1040
1041 for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1042 port->lp_lacp.ActorAdminPortState.bit.timeout =
1043 port->lp_lacp.ActorOperPortState.bit.timeout =
1044 (timer == AGGR_LACP_TIMER_SHORT);
1045 }
1046 }
1047
1048 void
1049 aggr_port_lacp_set_mode(aggr_grp_t *grp, aggr_port_t *port)
1050 {
1051 aggr_lacp_mode_t mode;
1052 aggr_lacp_timer_t timer;
1053
1054 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1055
1056 mode = grp->lg_lacp_mode;
1057 timer = grp->aggr.PeriodicTimer;
1058
1059 port->lp_lacp.ActorAdminPortState.bit.activity =
1060 port->lp_lacp.ActorOperPortState.bit.activity =
1061 (mode == AGGR_LACP_ACTIVE);
1062
1063 port->lp_lacp.ActorAdminPortState.bit.timeout =
1064 port->lp_lacp.ActorOperPortState.bit.timeout =
1065 (timer == AGGR_LACP_TIMER_SHORT);
1066
1067 if (mode == AGGR_LACP_OFF) {
1068 /* Turn ON Collector_Distributor */
1069 aggr_set_coll_dist(port, B_TRUE);
1070 } else { /* LACP_ACTIVE/PASSIVE */
1071 lacp_on(port);
1072 }
1073 }
1074
1075 /*
1076 * Sets the initial LACP mode (off, active, passive) and LACP timer
1077 * (short, long) of the specified group.
1078 */
1079 void
1080 aggr_lacp_set_mode(aggr_grp_t *grp, aggr_lacp_mode_t mode,
1081 aggr_lacp_timer_t timer)
1082 {
1083 aggr_port_t *port;
1084
1085 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1086
1087 grp->lg_lacp_mode = mode;
1088 grp->aggr.PeriodicTimer = timer;
1089
1090 for (port = grp->lg_ports; port != NULL; port = port->lp_next)
1091 aggr_port_lacp_set_mode(grp, port);
1092 }
1093
1094 /*
1095 * Verify that the Partner MAC and Key recorded by the specified
1096 * port are not found in other ports that are not part of our
1097 * aggregation. Returns B_TRUE if such a port is found, B_FALSE
1098 * otherwise.
1099 */
1100 static boolean_t
1101 lacp_misconfig_check(aggr_port_t *portp)
1102 {
1103 aggr_grp_t *grp = portp->lp_grp;
1104 lacp_sel_ports_t *cport;
1105
1106 mutex_enter(&lacp_sel_lock);
1107
1108 for (cport = sel_ports; cport != NULL; cport = cport->sp_next) {
1109
1110 /* skip entries of the group of the port being checked */
1111 if (cport->sp_grp_linkid == grp->lg_linkid)
1112 continue;
1113
1114 if ((ether_cmp(&cport->sp_partner_system,
1115 &grp->aggr.PartnerSystem) == 0) &&
1116 (cport->sp_partner_key == grp->aggr.PartnerOperAggrKey)) {
1117 char mac_str[ETHERADDRL*3];
1118 struct ether_addr *mac = &cport->sp_partner_system;
1119
1120 /*
1121 * The Partner port information is already in use
1122 * by ports in another aggregation so disable this
1123 * port.
1124 */
1125
1126 (void) snprintf(mac_str, sizeof (mac_str),
1127 "%x:%x:%x:%x:%x:%x",
1128 mac->ether_addr_octet[0], mac->ether_addr_octet[1],
1129 mac->ether_addr_octet[2], mac->ether_addr_octet[3],
1130 mac->ether_addr_octet[4], mac->ether_addr_octet[5]);
1131
1132 portp->lp_lacp.sm.selected = AGGR_UNSELECTED;
1133
1134 cmn_err(CE_NOTE, "aggr %d port %d: Port Partner "
1135 "MAC %s and key %d in use on aggregation %d "
1136 "port %d\n", grp->lg_linkid, portp->lp_linkid,
1137 mac_str, portp->lp_lacp.PartnerOperKey,
1138 cport->sp_grp_linkid, cport->sp_linkid);
1139 break;
1140 }
1141 }
1142
1143 mutex_exit(&lacp_sel_lock);
1144 return (cport != NULL);
1145 }
1146
1147 /*
1148 * Remove the specified port from the list of selected ports.
1149 */
1150 static void
1151 lacp_sel_ports_del(aggr_port_t *portp)
1152 {
1153 lacp_sel_ports_t *cport, **prev = NULL;
1154
1155 mutex_enter(&lacp_sel_lock);
1156
1157 prev = &sel_ports;
1158 for (cport = sel_ports; cport != NULL; prev = &cport->sp_next,
1159 cport = cport->sp_next) {
1160 if (portp->lp_linkid == cport->sp_linkid)
1161 break;
1162 }
1163
1164 if (cport == NULL) {
1165 mutex_exit(&lacp_sel_lock);
1166 return;
1167 }
1168
1169 *prev = cport->sp_next;
1170 kmem_free(cport, sizeof (*cport));
1171
1172 mutex_exit(&lacp_sel_lock);
1173 }
1174
1175 /*
1176 * Add the specified port to the list of selected ports. Returns B_FALSE
1177 * if the operation could not be performed due to an memory allocation
1178 * error.
1179 */
1180 static boolean_t
1181 lacp_sel_ports_add(aggr_port_t *portp)
1182 {
1183 lacp_sel_ports_t *new_port;
1184 lacp_sel_ports_t *cport, **last;
1185
1186 mutex_enter(&lacp_sel_lock);
1187
1188 /* check if port is already in the list */
1189 last = &sel_ports;
1190 for (cport = sel_ports; cport != NULL;
1191 last = &cport->sp_next, cport = cport->sp_next) {
1192 if (portp->lp_linkid == cport->sp_linkid) {
1193 ASSERT(cport->sp_partner_key ==
1194 portp->lp_lacp.PartnerOperKey);
1195 ASSERT(ether_cmp(&cport->sp_partner_system,
1196 &portp->lp_lacp.PartnerOperSystem) == 0);
1197
1198 mutex_exit(&lacp_sel_lock);
1199 return (B_TRUE);
1200 }
1201 }
1202
1203 /* create and initialize new entry */
1204 new_port = kmem_zalloc(sizeof (lacp_sel_ports_t), KM_NOSLEEP);
1205 if (new_port == NULL) {
1206 mutex_exit(&lacp_sel_lock);
1207 return (B_FALSE);
1208 }
1209
1210 new_port->sp_grp_linkid = portp->lp_grp->lg_linkid;
1211 bcopy(&portp->lp_lacp.PartnerOperSystem,
1212 &new_port->sp_partner_system, sizeof (new_port->sp_partner_system));
1213 new_port->sp_partner_key = portp->lp_lacp.PartnerOperKey;
1214 new_port->sp_linkid = portp->lp_linkid;
1215
1216 *last = new_port;
1217
1218 mutex_exit(&lacp_sel_lock);
1219 return (B_TRUE);
1220 }
1221
1222 /*
1223 * lacp_selection_logic - LACP selection logic
1224 * Sets the selected variable on a per port basis
1225 * and sets Ready when all waiting ports are ready
1226 * to go online.
1227 *
1228 * parameters:
1229 * - portp - instance this applies to.
1230 *
1231 * invoked:
1232 * - when initialization is needed
1233 * - when UNSELECTED is set from the lacp_receive_sm() in LACP_CURRENT state
1234 * - When the lacp_receive_sm goes to the LACP_DEFAULTED state
1235 * - every time the wait_while_timer pops
1236 * - everytime we turn LACP on/off
1237 */
1238 static void
1239 lacp_selection_logic(aggr_port_t *portp)
1240 {
1241 aggr_port_t *tpp;
1242 aggr_grp_t *aggrp = portp->lp_grp;
1243 int ports_waiting;
1244 boolean_t reset_mac = B_FALSE;
1245 aggr_lacp_port_t *pl = &portp->lp_lacp;
1246
1247 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
1248
1249 /* LACP_OFF state not in specification so check here. */
1250 if (!pl->sm.lacp_on) {
1251 lacp_port_unselect(portp);
1252 aggrp->aggr.ready = B_FALSE;
1253 lacp_mux_sm(portp);
1254 return;
1255 }
1256
1257 if (pl->sm.begin || !pl->sm.lacp_enabled ||
1258 (portp->lp_state != AGGR_PORT_STATE_ATTACHED)) {
1259
1260 AGGR_LACP_DBG(("lacp_selection_logic:(%d): "
1261 "selected %d-->%d (begin=%d, lacp_enabled = %d, "
1262 "lp_state=%d)\n", portp->lp_linkid, pl->sm.selected,
1263 AGGR_UNSELECTED, pl->sm.begin, pl->sm.lacp_enabled,
1264 portp->lp_state));
1265
1266 lacp_port_unselect(portp);
1267 aggrp->aggr.ready = B_FALSE;
1268 lacp_mux_sm(portp);
1269 return;
1270 }
1271
1272 /*
1273 * If LACP is not enabled then selected is never set.
1274 */
1275 if (!pl->sm.lacp_enabled) {
1276 AGGR_LACP_DBG(("lacp_selection_logic:(%d): selected %d-->%d\n",
1277 portp->lp_linkid, pl->sm.selected, AGGR_UNSELECTED));
1278
1279 lacp_port_unselect(portp);
1280 lacp_mux_sm(portp);
1281 return;
1282 }
1283
1284 /*
1285 * Check if the Partner MAC or Key are zero. If so, we have
1286 * not received any LACP info or it has expired and the
1287 * receive machine is in the LACP_DEFAULTED state.
1288 */
1289 if (ether_cmp(&pl->PartnerOperSystem, ðerzeroaddr) == 0 ||
1290 (pl->PartnerOperKey == 0)) {
1291
1292 for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1293 if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1294 ðerzeroaddr) != 0 &&
1295 (tpp->lp_lacp.PartnerOperKey != 0))
1296 break;
1297 }
1298
1299 /*
1300 * If all ports have no key or aggregation address,
1301 * then clear the negotiated Partner MAC and key.
1302 */
1303 if (tpp == NULL) {
1304 /* Clear the aggregation Partner MAC and key */
1305 aggrp->aggr.PartnerSystem = etherzeroaddr;
1306 aggrp->aggr.PartnerOperAggrKey = 0;
1307 }
1308
1309 return;
1310 }
1311
1312 /*
1313 * Insure that at least one port in the aggregation
1314 * matches the Partner aggregation MAC and key. If not,
1315 * then clear the aggregation MAC and key. Later we will
1316 * set the Partner aggregation MAC and key to that of the
1317 * current port's Partner MAC and key.
1318 */
1319 if (ether_cmp(&pl->PartnerOperSystem,
1320 &aggrp->aggr.PartnerSystem) != 0 ||
1321 (pl->PartnerOperKey != aggrp->aggr.PartnerOperAggrKey)) {
1322
1323 for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1324 if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1325 &aggrp->aggr.PartnerSystem) == 0 &&
1326 (tpp->lp_lacp.PartnerOperKey ==
1327 aggrp->aggr.PartnerOperAggrKey)) {
1328 /* Set aggregation Partner MAC and key */
1329 aggrp->aggr.PartnerSystem =
1330 pl->PartnerOperSystem;
1331 aggrp->aggr.PartnerOperAggrKey =
1332 pl->PartnerOperKey;
1333 break;
1334 }
1335 }
1336
1337 if (tpp == NULL) {
1338 /* Clear the aggregation Partner MAC and key */
1339 aggrp->aggr.PartnerSystem = etherzeroaddr;
1340 aggrp->aggr.PartnerOperAggrKey = 0;
1341 reset_mac = B_TRUE;
1342 }
1343 }
1344
1345 /*
1346 * If our Actor MAC is found in the Partner MAC
1347 * on this port then we have a loopback misconfiguration.
1348 */
1349 if (ether_cmp(&pl->PartnerOperSystem,
1350 (struct ether_addr *)&aggrp->lg_addr) == 0) {
1351 cmn_err(CE_NOTE, "trunk link: (%d): Loopback condition.\n",
1352 portp->lp_linkid);
1353
1354 lacp_port_unselect(portp);
1355 lacp_mux_sm(portp);
1356 return;
1357 }
1358
1359 /*
1360 * If our Partner MAC and Key are found on any other
1361 * ports that are not in our aggregation, we have
1362 * a misconfiguration.
1363 */
1364 if (lacp_misconfig_check(portp)) {
1365 lacp_mux_sm(portp);
1366 return;
1367 }
1368
1369 /*
1370 * If the Aggregation Partner MAC and Key have not been
1371 * set, then this is either the first port or the aggregation
1372 * MAC and key have been reset. In either case we must set
1373 * the values of the Partner MAC and key.
1374 */
1375 if (ether_cmp(&aggrp->aggr.PartnerSystem, ðerzeroaddr) == 0 &&
1376 (aggrp->aggr.PartnerOperAggrKey == 0)) {
1377 /* Set aggregation Partner MAC and key */
1378 aggrp->aggr.PartnerSystem = pl->PartnerOperSystem;
1379 aggrp->aggr.PartnerOperAggrKey = pl->PartnerOperKey;
1380
1381 /*
1382 * If we reset Partner aggregation MAC, then restart
1383 * selection_logic on ports that match new MAC address.
1384 */
1385 if (reset_mac) {
1386 for (tpp = aggrp->lg_ports; tpp; tpp =
1387 tpp->lp_next) {
1388 if (tpp == portp)
1389 continue;
1390 if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1391 &aggrp->aggr.PartnerSystem) == 0 &&
1392 (tpp->lp_lacp.PartnerOperKey ==
1393 aggrp->aggr.PartnerOperAggrKey))
1394 lacp_selection_logic(tpp);
1395 }
1396 }
1397 } else if (ether_cmp(&pl->PartnerOperSystem,
1398 &aggrp->aggr.PartnerSystem) != 0 ||
1399 (pl->PartnerOperKey != aggrp->aggr.PartnerOperAggrKey)) {
1400 /*
1401 * The Partner port information does not match
1402 * that of the other ports in the aggregation
1403 * so disable this port.
1404 */
1405 lacp_port_unselect(portp);
1406
1407 cmn_err(CE_NOTE, "trunk link: (%d): Port Partner MAC "
1408 "or key (%d) incompatible with Aggregation Partner "
1409 "MAC or key (%d)\n", portp->lp_linkid, pl->PartnerOperKey,
1410 aggrp->aggr.PartnerOperAggrKey);
1411
1412 lacp_mux_sm(portp);
1413 return;
1414 }
1415
1416 /* If we get to here, automatically set selected */
1417 if (pl->sm.selected != AGGR_SELECTED) {
1418 AGGR_LACP_DBG(("lacp_selection_logic:(%d): "
1419 "selected %d-->%d\n", portp->lp_linkid,
1420 pl->sm.selected, AGGR_SELECTED));
1421 if (!lacp_port_select(portp))
1422 return;
1423 lacp_mux_sm(portp);
1424 }
1425
1426 /*
1427 * From this point onward we have selected the port
1428 * and are simply checking if the Ready flag should
1429 * be set.
1430 */
1431
1432 /*
1433 * If at least two ports are waiting to aggregate
1434 * and ready_n is set on all ports waiting to aggregate
1435 * then set READY for the aggregation.
1436 */
1437
1438 ports_waiting = 0;
1439
1440 if (!aggrp->aggr.ready) {
1441 /*
1442 * If all ports in the aggregation have received compatible
1443 * partner information and they match up correctly with the
1444 * switch, there is no need to wait for all the
1445 * wait_while_timers to pop.
1446 */
1447 for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1448 if (((tpp->lp_lacp.sm.mux_state == LACP_WAITING) ||
1449 tpp->lp_lacp.sm.begin) &&
1450 !tpp->lp_lacp.PartnerOperPortState.bit.sync) {
1451 /* Add up ports uninitialized or waiting */
1452 ports_waiting++;
1453 if (!tpp->lp_lacp.sm.ready_n) {
1454 DTRACE_PROBE1(port___not__ready,
1455 aggr_port_t *, tpp);
1456 return;
1457 }
1458 }
1459 }
1460 }
1461
1462 if (aggrp->aggr.ready) {
1463 AGGR_LACP_DBG(("lacp_selection_logic:(%d): "
1464 "aggr.ready already set\n", portp->lp_linkid));
1465 lacp_mux_sm(portp);
1466 } else {
1467 AGGR_LACP_DBG(("lacp_selection_logic:(%d): Ready %d-->%d\n",
1468 portp->lp_linkid, aggrp->aggr.ready, B_TRUE));
1469 aggrp->aggr.ready = B_TRUE;
1470
1471 for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next)
1472 lacp_mux_sm(tpp);
1473 }
1474
1475 }
1476
1477 /*
1478 * wait_while_timer_pop - When the timer pops, we arrive here to
1479 * set ready_n and trigger the selection logic.
1480 */
1481 static void
1482 wait_while_timer_pop(void *data)
1483 {
1484 aggr_port_t *portp = data;
1485 aggr_lacp_port_t *pl = &portp->lp_lacp;
1486
1487 mutex_enter(&pl->lacp_timer_lock);
1488 pl->lacp_timer_bits |= LACP_WAIT_WHILE_TIMEOUT;
1489 cv_broadcast(&pl->lacp_timer_cv);
1490 mutex_exit(&pl->lacp_timer_lock);
1491 }
1492
1493 /*
1494 * wait_while_timer_pop_handler - When the timer pops, we arrive here to
1495 * set ready_n and trigger the selection logic.
1496 */
1497 static void
1498 wait_while_timer_pop_handler(aggr_port_t *portp)
1499 {
1500 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1501
1502 AGGR_LACP_DBG(("trunk link:(%d): wait_while_timer pop \n",
1503 portp->lp_linkid));
1504 portp->lp_lacp.sm.ready_n = B_TRUE;
1505
1506 lacp_selection_logic(portp);
1507 }
1508
1509 static void
1510 start_wait_while_timer(aggr_port_t *portp)
1511 {
1512 aggr_lacp_port_t *pl = &portp->lp_lacp;
1513
1514 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1515
1516 mutex_enter(&pl->lacp_timer_lock);
1517 if (pl->wait_while_timer.id == 0) {
1518 pl->wait_while_timer.id =
1519 timeout(wait_while_timer_pop, portp,
1520 drv_sectohz(portp->lp_lacp.wait_while_timer.val));
1521 }
1522 mutex_exit(&pl->lacp_timer_lock);
1523 }
1524
1525
1526 static void
1527 stop_wait_while_timer(aggr_port_t *portp)
1528 {
1529 aggr_lacp_port_t *pl = &portp->lp_lacp;
1530 timeout_id_t id;
1531
1532 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1533
1534 mutex_enter(&pl->lacp_timer_lock);
1535 if ((id = pl->wait_while_timer.id) != 0) {
1536 pl->lacp_timer_bits &= ~LACP_WAIT_WHILE_TIMEOUT;
1537 pl->wait_while_timer.id = 0;
1538 }
1539 mutex_exit(&pl->lacp_timer_lock);
1540
1541 if (id != 0)
1542 (void) untimeout(id);
1543 }
1544
1545 /*
1546 * Invoked when a port has been attached to a group.
1547 * Complete the processing that couldn't be finished from lacp_on()
1548 * because the port was not started. We know that the link is full
1549 * duplex and ON, otherwise it wouldn't be attached.
1550 */
1551 void
1552 aggr_lacp_port_attached(aggr_port_t *portp)
1553 {
1554 aggr_grp_t *grp = portp->lp_grp;
1555 aggr_lacp_port_t *pl = &portp->lp_lacp;
1556
1557 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1558 ASSERT(MAC_PERIM_HELD(portp->lp_mh));
1559 ASSERT(portp->lp_state == AGGR_PORT_STATE_ATTACHED);
1560
1561 AGGR_LACP_DBG(("aggr_lacp_port_attached: port %d\n",
1562 portp->lp_linkid));
1563
1564 portp->lp_lacp.sm.port_enabled = B_TRUE; /* link on */
1565
1566 if (grp->lg_lacp_mode == AGGR_LACP_OFF)
1567 return;
1568
1569 pl->sm.lacp_enabled = B_TRUE;
1570 pl->ActorOperPortState.bit.aggregation = B_TRUE;
1571 pl->sm.begin = B_TRUE;
1572
1573 lacp_receive_sm(portp, NULL);
1574 lacp_mux_sm(portp);
1575
1576 /* Enable Multicast Slow Protocol address */
1577 aggr_lacp_mcast_on(portp);
1578
1579 /* periodic_sm is started up from the receive machine */
1580 lacp_selection_logic(portp);
1581 }
1582
1583 /*
1584 * Invoked when a port has been detached from a group. Turn off
1585 * LACP processing if it was enabled.
1586 */
1587 void
1588 aggr_lacp_port_detached(aggr_port_t *portp)
1589 {
1590 aggr_grp_t *grp = portp->lp_grp;
1591
1592 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1593 ASSERT(MAC_PERIM_HELD(portp->lp_mh));
1594
1595 AGGR_LACP_DBG(("aggr_lacp_port_detached: port %d\n",
1596 portp->lp_linkid));
1597
1598 portp->lp_lacp.sm.port_enabled = B_FALSE;
1599
1600 if (grp->lg_lacp_mode == AGGR_LACP_OFF)
1601 return;
1602
1603 portp->lp_lacp.sm.lacp_enabled = B_FALSE;
1604 lacp_selection_logic(portp);
1605 lacp_mux_sm(portp);
1606 lacp_periodic_sm(portp);
1607
1608 /*
1609 * Disable Slow Protocol Timers.
1610 */
1611 stop_periodic_timer(portp);
1612 stop_current_while_timer(portp);
1613 stop_wait_while_timer(portp);
1614
1615 /* Disable Multicast Slow Protocol address */
1616 aggr_lacp_mcast_off(portp);
1617 aggr_set_coll_dist(portp, B_FALSE);
1618 }
1619
1620 /*
1621 * Enable Slow Protocol LACP and Marker PDUs.
1622 */
1623 static void
1624 lacp_on(aggr_port_t *portp)
1625 {
1626 aggr_lacp_port_t *pl = &portp->lp_lacp;
1627 mac_perim_handle_t mph;
1628
1629 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1630
1631 mac_perim_enter_by_mh(portp->lp_mh, &mph);
1632
1633 /*
1634 * Reset the state machines and Partner operational
1635 * information. Careful to not reset things like
1636 * our link state.
1637 */
1638 lacp_reset_port(portp);
1639 pl->sm.lacp_on = B_TRUE;
1640
1641 AGGR_LACP_DBG(("lacp_on:(%d): \n", portp->lp_linkid));
1642
1643 if (portp->lp_state == AGGR_PORT_STATE_ATTACHED) {
1644 pl->sm.port_enabled = B_TRUE;
1645 pl->sm.lacp_enabled = B_TRUE;
1646 pl->ActorOperPortState.bit.aggregation = B_TRUE;
1647 }
1648
1649 lacp_receive_sm(portp, NULL);
1650 lacp_mux_sm(portp);
1651
1652 if (portp->lp_state == AGGR_PORT_STATE_ATTACHED) {
1653 /* Enable Multicast Slow Protocol address */
1654 aggr_lacp_mcast_on(portp);
1655
1656 /* periodic_sm is started up from the receive machine */
1657 lacp_selection_logic(portp);
1658 }
1659 done:
1660 mac_perim_exit(mph);
1661 } /* lacp_on */
1662
1663 /* Disable Slow Protocol LACP and Marker PDUs */
1664 static void
1665 lacp_off(aggr_port_t *portp)
1666 {
1667 aggr_lacp_port_t *pl = &portp->lp_lacp;
1668 mac_perim_handle_t mph;
1669
1670 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1671 mac_perim_enter_by_mh(portp->lp_mh, &mph);
1672
1673 pl->sm.lacp_on = B_FALSE;
1674
1675 AGGR_LACP_DBG(("lacp_off:(%d): \n", portp->lp_linkid));
1676
1677 if (portp->lp_state == AGGR_PORT_STATE_ATTACHED) {
1678 /*
1679 * Disable Slow Protocol Timers.
1680 */
1681 stop_periodic_timer(portp);
1682 stop_current_while_timer(portp);
1683 stop_wait_while_timer(portp);
1684
1685 /* Disable Multicast Slow Protocol address */
1686 aggr_lacp_mcast_off(portp);
1687
1688 pl->sm.port_enabled = B_FALSE;
1689 pl->sm.lacp_enabled = B_FALSE;
1690 pl->ActorOperPortState.bit.aggregation = B_FALSE;
1691 }
1692
1693 lacp_mux_sm(portp);
1694 lacp_periodic_sm(portp);
1695 lacp_selection_logic(portp);
1696
1697 /* Turn OFF Collector_Distributor */
1698 aggr_set_coll_dist(portp, B_FALSE);
1699
1700 lacp_reset_port(portp);
1701 mac_perim_exit(mph);
1702 }
1703
1704
1705 static boolean_t
1706 valid_lacp_pdu(aggr_port_t *portp, lacp_t *lacp)
1707 {
1708 /*
1709 * 43.4.12 - "a Receive machine shall not validate
1710 * the Version Number, TLV_type, or Reserved fields in received
1711 * LACPDUs."
1712 * ... "a Receive machine may validate the Actor_Information_Length,
1713 * Partner_Information_Length, Collector_Information_Length,
1714 * or Terminator_Length fields."
1715 */
1716 if ((lacp->actor_info.information_len != sizeof (link_info_t)) ||
1717 (lacp->partner_info.information_len != sizeof (link_info_t)) ||
1718 (lacp->collector_len != LACP_COLLECTOR_INFO_LEN) ||
1719 (lacp->terminator_len != LACP_TERMINATOR_INFO_LEN)) {
1720 AGGR_LACP_DBG(("trunk link (%d): Malformed LACPDU: "
1721 " Terminator Length = %d \n", portp->lp_linkid,
1722 lacp->terminator_len));
1723 return (B_FALSE);
1724 }
1725
1726 return (B_TRUE);
1727 }
1728
1729
1730 static void
1731 start_current_while_timer(aggr_port_t *portp, uint_t time)
1732 {
1733 aggr_lacp_port_t *pl = &portp->lp_lacp;
1734
1735 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1736
1737 mutex_enter(&pl->lacp_timer_lock);
1738 if (pl->current_while_timer.id == 0) {
1739 if (time > 0)
1740 pl->current_while_timer.val = time;
1741 else if (pl->ActorOperPortState.bit.timeout)
1742 pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
1743 else
1744 pl->current_while_timer.val = LONG_TIMEOUT_TIME;
1745
1746 pl->current_while_timer.id =
1747 timeout(current_while_timer_pop, portp,
1748 drv_usectohz((clock_t)1000000 *
1749 (clock_t)portp->lp_lacp.current_while_timer.val));
1750 }
1751 mutex_exit(&pl->lacp_timer_lock);
1752 }
1753
1754
1755 static void
1756 stop_current_while_timer(aggr_port_t *portp)
1757 {
1758 aggr_lacp_port_t *pl = &portp->lp_lacp;
1759 timeout_id_t id;
1760
1761 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1762
1763 mutex_enter(&pl->lacp_timer_lock);
1764 if ((id = pl->current_while_timer.id) != 0) {
1765 pl->lacp_timer_bits &= ~LACP_CURRENT_WHILE_TIMEOUT;
1766 pl->current_while_timer.id = 0;
1767 }
1768 mutex_exit(&pl->lacp_timer_lock);
1769
1770 if (id != 0)
1771 (void) untimeout(id);
1772 }
1773
1774 static void
1775 current_while_timer_pop(void *data)
1776 {
1777 aggr_port_t *portp = (aggr_port_t *)data;
1778 aggr_lacp_port_t *pl = &portp->lp_lacp;
1779
1780 mutex_enter(&pl->lacp_timer_lock);
1781 pl->lacp_timer_bits |= LACP_CURRENT_WHILE_TIMEOUT;
1782 cv_broadcast(&pl->lacp_timer_cv);
1783 mutex_exit(&pl->lacp_timer_lock);
1784 }
1785
1786 static void
1787 current_while_timer_pop_handler(aggr_port_t *portp)
1788 {
1789 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1790
1791 AGGR_LACP_DBG(("trunk link:(%d): current_while_timer "
1792 "pop id=%p\n", portp->lp_linkid,
1793 portp->lp_lacp.current_while_timer.id));
1794
1795 lacp_receive_sm(portp, NULL);
1796 }
1797
1798 /*
1799 * record_Default - Simply copies over administrative values
1800 * to the partner operational values, and sets our state to indicate we
1801 * are using defaulted values.
1802 */
1803 static void
1804 record_Default(aggr_port_t *portp)
1805 {
1806 aggr_lacp_port_t *pl = &portp->lp_lacp;
1807
1808 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1809
1810 pl->PartnerOperPortNum = pl->PartnerAdminPortNum;
1811 pl->PartnerOperPortPriority = pl->PartnerAdminPortPriority;
1812 pl->PartnerOperSystem = pl->PartnerAdminSystem;
1813 pl->PartnerOperSysPriority = pl->PartnerAdminSysPriority;
1814 pl->PartnerOperKey = pl->PartnerAdminKey;
1815 pl->PartnerOperPortState.state = pl->PartnerAdminPortState.state;
1816
1817 pl->ActorOperPortState.bit.defaulted = B_TRUE;
1818 }
1819
1820
1821 /* Returns B_TRUE on sync value changing */
1822 static boolean_t
1823 record_PDU(aggr_port_t *portp, lacp_t *lacp)
1824 {
1825 aggr_grp_t *aggrp = portp->lp_grp;
1826 aggr_lacp_port_t *pl = &portp->lp_lacp;
1827 uint8_t save_sync;
1828
1829 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
1830
1831 /*
1832 * Partner Information
1833 */
1834 pl->PartnerOperPortNum = ntohs(lacp->actor_info.port);
1835 pl->PartnerOperPortPriority =
1836 ntohs(lacp->actor_info.port_priority);
1837 pl->PartnerOperSystem = lacp->actor_info.system_id;
1838 pl->PartnerOperSysPriority =
1839 htons(lacp->actor_info.system_priority);
1840 pl->PartnerOperKey = ntohs(lacp->actor_info.key);
1841
1842 /* All state info except for Synchronization */
1843 save_sync = pl->PartnerOperPortState.bit.sync;
1844 pl->PartnerOperPortState.state = lacp->actor_info.state.state;
1845
1846 /* Defaulted set to FALSE */
1847 pl->ActorOperPortState.bit.defaulted = B_FALSE;
1848
1849 /*
1850 * 43.4.9 - (Partner_Port, Partner_Port_Priority, Partner_system,
1851 * Partner_System_Priority, Partner_Key, and
1852 * Partner_State.Aggregation) are compared to the
1853 * corresponding operations paramters values for
1854 * the Actor. If these are equal, or if this is
1855 * an individual link, we are synchronized.
1856 */
1857 if (((ntohs(lacp->partner_info.port) == pl->ActorPortNumber) &&
1858 (ntohs(lacp->partner_info.port_priority) ==
1859 pl->ActorPortPriority) &&
1860 (ether_cmp(&lacp->partner_info.system_id,
1861 (struct ether_addr *)&aggrp->lg_addr) == 0) &&
1862 (ntohs(lacp->partner_info.system_priority) ==
1863 aggrp->aggr.ActorSystemPriority) &&
1864 (ntohs(lacp->partner_info.key) == pl->ActorOperPortKey) &&
1865 (lacp->partner_info.state.bit.aggregation ==
1866 pl->ActorOperPortState.bit.aggregation)) ||
1867 (!lacp->actor_info.state.bit.aggregation)) {
1868
1869 pl->PartnerOperPortState.bit.sync =
1870 lacp->actor_info.state.bit.sync;
1871 } else {
1872 pl->PartnerOperPortState.bit.sync = B_FALSE;
1873 }
1874
1875 if (save_sync != pl->PartnerOperPortState.bit.sync) {
1876 AGGR_LACP_DBG(("record_PDU:(%d): partner sync "
1877 "%d -->%d\n", portp->lp_linkid, save_sync,
1878 pl->PartnerOperPortState.bit.sync));
1879 return (B_TRUE);
1880 } else {
1881 return (B_FALSE);
1882 }
1883 }
1884
1885
1886 /*
1887 * update_selected - If any of the Partner parameters has
1888 * changed from a previous value, then
1889 * unselect the link from the aggregator.
1890 */
1891 static boolean_t
1892 update_selected(aggr_port_t *portp, lacp_t *lacp)
1893 {
1894 aggr_lacp_port_t *pl = &portp->lp_lacp;
1895
1896 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1897
1898 if ((pl->PartnerOperPortNum != ntohs(lacp->actor_info.port)) ||
1899 (pl->PartnerOperPortPriority !=
1900 ntohs(lacp->actor_info.port_priority)) ||
1901 (ether_cmp(&pl->PartnerOperSystem,
1902 &lacp->actor_info.system_id) != 0) ||
1903 (pl->PartnerOperSysPriority !=
1904 ntohs(lacp->actor_info.system_priority)) ||
1905 (pl->PartnerOperKey != ntohs(lacp->actor_info.key)) ||
1906 (pl->PartnerOperPortState.bit.aggregation !=
1907 lacp->actor_info.state.bit.aggregation)) {
1908 AGGR_LACP_DBG(("update_selected:(%d): "
1909 "selected %d-->%d\n", portp->lp_linkid, pl->sm.selected,
1910 AGGR_UNSELECTED));
1911
1912 lacp_port_unselect(portp);
1913 return (B_TRUE);
1914 } else {
1915 return (B_FALSE);
1916 }
1917 }
1918
1919
1920 /*
1921 * update_default_selected - If any of the operational Partner parameters
1922 * is different than that of the administrative values
1923 * then unselect the link from the aggregator.
1924 */
1925 static void
1926 update_default_selected(aggr_port_t *portp)
1927 {
1928 aggr_lacp_port_t *pl = &portp->lp_lacp;
1929
1930 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1931
1932 if ((pl->PartnerAdminPortNum != pl->PartnerOperPortNum) ||
1933 (pl->PartnerOperPortPriority != pl->PartnerAdminPortPriority) ||
1934 (ether_cmp(&pl->PartnerOperSystem, &pl->PartnerAdminSystem) != 0) ||
1935 (pl->PartnerOperSysPriority != pl->PartnerAdminSysPriority) ||
1936 (pl->PartnerOperKey != pl->PartnerAdminKey) ||
1937 (pl->PartnerOperPortState.bit.aggregation !=
1938 pl->PartnerAdminPortState.bit.aggregation)) {
1939
1940 AGGR_LACP_DBG(("update_default_selected:(%d): "
1941 "selected %d-->%d\n", portp->lp_linkid,
1942 pl->sm.selected, AGGR_UNSELECTED));
1943
1944 lacp_port_unselect(portp);
1945 }
1946 }
1947
1948
1949 /*
1950 * update_NTT - If any of the Partner values in the received LACPDU
1951 * are different than that of the Actor operational
1952 * values then set NTT to true.
1953 */
1954 static void
1955 update_NTT(aggr_port_t *portp, lacp_t *lacp)
1956 {
1957 aggr_grp_t *aggrp = portp->lp_grp;
1958 aggr_lacp_port_t *pl = &portp->lp_lacp;
1959
1960 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
1961
1962 if ((pl->ActorPortNumber != ntohs(lacp->partner_info.port)) ||
1963 (pl->ActorPortPriority !=
1964 ntohs(lacp->partner_info.port_priority)) ||
1965 (ether_cmp(&aggrp->lg_addr,
1966 &lacp->partner_info.system_id) != 0) ||
1967 (aggrp->aggr.ActorSystemPriority !=
1968 ntohs(lacp->partner_info.system_priority)) ||
1969 (pl->ActorOperPortKey != ntohs(lacp->partner_info.key)) ||
1970 (pl->ActorOperPortState.bit.activity !=
1971 lacp->partner_info.state.bit.activity) ||
1972 (pl->ActorOperPortState.bit.timeout !=
1973 lacp->partner_info.state.bit.timeout) ||
1974 (pl->ActorOperPortState.bit.sync !=
1975 lacp->partner_info.state.bit.sync) ||
1976 (pl->ActorOperPortState.bit.aggregation !=
1977 lacp->partner_info.state.bit.aggregation)) {
1978
1979 AGGR_LACP_DBG(("update_NTT:(%d): NTT %d-->%d\n",
1980 portp->lp_linkid, pl->NTT, B_TRUE));
1981
1982 pl->NTT = B_TRUE;
1983 }
1984 }
1985
1986 /*
1987 * lacp_receive_sm - LACP receive state machine
1988 *
1989 * parameters:
1990 * - portp - instance this applies to.
1991 * - lacp - pointer in the case of a received LACPDU.
1992 * This value is NULL if there is no LACPDU.
1993 *
1994 * invoked:
1995 * - when initialization is needed
1996 * - upon reception of an LACPDU. This is the common case.
1997 * - every time the current_while_timer pops
1998 */
1999 static void
2000 lacp_receive_sm(aggr_port_t *portp, lacp_t *lacp)
2001 {
2002 boolean_t sync_updated, selected_updated, save_activity;
2003 aggr_lacp_port_t *pl = &portp->lp_lacp;
2004 lacp_receive_state_t oldstate = pl->sm.receive_state;
2005
2006 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
2007
2008 /* LACP_OFF state not in specification so check here. */
2009 if (!pl->sm.lacp_on)
2010 return;
2011
2012 /* figure next state */
2013 if (pl->sm.begin || pl->sm.port_moved) {
2014 pl->sm.receive_state = LACP_INITIALIZE;
2015 } else if (!pl->sm.port_enabled) { /* DL_NOTE_LINK_DOWN */
2016 pl->sm.receive_state = LACP_PORT_DISABLED;
2017 } else if (!pl->sm.lacp_enabled) { /* DL_NOTE_AGGR_UNAVAIL */
2018 pl->sm.receive_state =
2019 (pl->sm.receive_state == LACP_PORT_DISABLED) ?
2020 LACP_DISABLED : LACP_PORT_DISABLED;
2021 } else if (lacp != NULL) {
2022 if ((pl->sm.receive_state == LACP_EXPIRED) ||
2023 (pl->sm.receive_state == LACP_DEFAULTED)) {
2024 pl->sm.receive_state = LACP_CURRENT;
2025 }
2026 } else if ((pl->sm.receive_state == LACP_CURRENT) &&
2027 (pl->current_while_timer.id == 0)) {
2028 pl->sm.receive_state = LACP_EXPIRED;
2029 } else if ((pl->sm.receive_state == LACP_EXPIRED) &&
2030 (pl->current_while_timer.id == 0)) {
2031 pl->sm.receive_state = LACP_DEFAULTED;
2032 }
2033
2034 if (!((lacp && (oldstate == LACP_CURRENT) &&
2035 (pl->sm.receive_state == LACP_CURRENT)))) {
2036 AGGR_LACP_DBG(("lacp_receive_sm(%d):%s--->%s\n",
2037 portp->lp_linkid, lacp_receive_str[oldstate],
2038 lacp_receive_str[pl->sm.receive_state]));
2039 }
2040
2041 switch (pl->sm.receive_state) {
2042 case LACP_INITIALIZE:
2043 lacp_port_unselect(portp);
2044 record_Default(portp);
2045 pl->ActorOperPortState.bit.expired = B_FALSE;
2046 pl->sm.port_moved = B_FALSE;
2047 pl->sm.receive_state = LACP_PORT_DISABLED;
2048 pl->sm.begin = B_FALSE;
2049 lacp_receive_sm(portp, NULL);
2050 break;
2051
2052 case LACP_PORT_DISABLED:
2053 pl->PartnerOperPortState.bit.sync = B_FALSE;
2054 /*
2055 * Stop current_while_timer in case
2056 * we got here from link down
2057 */
2058 stop_current_while_timer(portp);
2059
2060 if (pl->sm.port_enabled && !pl->sm.lacp_enabled) {
2061 pl->sm.receive_state = LACP_DISABLED;
2062 lacp_receive_sm(portp, lacp);
2063 /* We goto LACP_DISABLED state */
2064 break;
2065 } else if (pl->sm.port_enabled && pl->sm.lacp_enabled) {
2066 pl->sm.receive_state = LACP_EXPIRED;
2067 /*
2068 * FALL THROUGH TO LACP_EXPIRED CASE:
2069 * We have no way of knowing if we get into
2070 * lacp_receive_sm() from a current_while_timer
2071 * expiring as it has never been kicked off yet!
2072 */
2073 } else {
2074 /* We stay in LACP_PORT_DISABLED state */
2075 break;
2076 }
2077 /* LACP_PORT_DISABLED -> LACP_EXPIRED */
2078 /* FALLTHROUGH */
2079
2080 case LACP_EXPIRED:
2081 /*
2082 * Arrives here from LACP_PORT_DISABLED state as well as
2083 * as well as current_while_timer expiring.
2084 */
2085 pl->PartnerOperPortState.bit.sync = B_FALSE;
2086 pl->PartnerOperPortState.bit.timeout = B_TRUE;
2087
2088 pl->ActorOperPortState.bit.expired = B_TRUE;
2089 start_current_while_timer(portp, SHORT_TIMEOUT_TIME);
2090 lacp_periodic_sm(portp);
2091 break;
2092
2093 case LACP_DISABLED:
2094 /*
2095 * This is the normal state for recv_sm when LACP_OFF
2096 * is set or the NIC is in half duplex mode.
2097 */
2098 lacp_port_unselect(portp);
2099 record_Default(portp);
2100 pl->PartnerOperPortState.bit.aggregation = B_FALSE;
2101 pl->ActorOperPortState.bit.expired = B_FALSE;
2102 break;
2103
2104 case LACP_DEFAULTED:
2105 /*
2106 * Current_while_timer expired a second time.
2107 */
2108 update_default_selected(portp);
2109 record_Default(portp); /* overwrite Partner Oper val */
2110 pl->ActorOperPortState.bit.expired = B_FALSE;
2111 pl->PartnerOperPortState.bit.sync = B_TRUE;
2112
2113 lacp_selection_logic(portp);
2114 lacp_mux_sm(portp);
2115 break;
2116
2117 case LACP_CURRENT:
2118 /*
2119 * Reception of LACPDU
2120 */
2121
2122 if (!lacp) /* no LACPDU so current_while_timer popped */
2123 break;
2124
2125 AGGR_LACP_DBG(("lacp_receive_sm: (%d): LACPDU received:\n",
2126 portp->lp_linkid));
2127
2128 /*
2129 * Validate Actor_Information_Length,
2130 * Partner_Information_Length, Collector_Information_Length,
2131 * and Terminator_Length fields.
2132 */
2133 if (!valid_lacp_pdu(portp, lacp)) {
2134 AGGR_LACP_DBG(("lacp_receive_sm (%d): "
2135 "Invalid LACPDU received\n",
2136 portp->lp_linkid));
2137 break;
2138 }
2139
2140 save_activity = pl->PartnerOperPortState.bit.activity;
2141 selected_updated = update_selected(portp, lacp);
2142 update_NTT(portp, lacp);
2143 sync_updated = record_PDU(portp, lacp);
2144
2145 pl->ActorOperPortState.bit.expired = B_FALSE;
2146
2147 if (selected_updated) {
2148 lacp_selection_logic(portp);
2149 lacp_mux_sm(portp);
2150 } else if (sync_updated) {
2151 lacp_mux_sm(portp);
2152 }
2153
2154 /*
2155 * If the periodic timer value bit has been modified
2156 * or the partner activity bit has been changed then
2157 * we need to respectively:
2158 * - restart the timer with the proper timeout value.
2159 * - possibly enable/disable transmission of LACPDUs.
2160 */
2161 if ((pl->PartnerOperPortState.bit.timeout &&
2162 (pl->periodic_timer.val != FAST_PERIODIC_TIME)) ||
2163 (!pl->PartnerOperPortState.bit.timeout &&
2164 (pl->periodic_timer.val != SLOW_PERIODIC_TIME)) ||
2165 (pl->PartnerOperPortState.bit.activity !=
2166 save_activity)) {
2167 lacp_periodic_sm(portp);
2168 }
2169
2170 stop_current_while_timer(portp);
2171 /* Check if we need to transmit an LACPDU */
2172 if (pl->NTT)
2173 lacp_xmit_sm(portp);
2174 start_current_while_timer(portp, 0);
2175
2176 break;
2177 }
2178 }
2179
2180 static void
2181 aggr_set_coll_dist(aggr_port_t *portp, boolean_t enable)
2182 {
2183 mac_perim_handle_t mph;
2184
2185 AGGR_LACP_DBG(("AGGR_SET_COLL_DIST_TYPE: (%d) %s\n",
2186 portp->lp_linkid, enable ? "ENABLED" : "DISABLED"));
2187
2188 mac_perim_enter_by_mh(portp->lp_mh, &mph);
2189 if (!enable) {
2190 /*
2191 * Turn OFF Collector_Distributor.
2192 */
2193 portp->lp_collector_enabled = B_FALSE;
2194 aggr_send_port_disable(portp);
2195 goto done;
2196 }
2197
2198 /*
2199 * Turn ON Collector_Distributor.
2200 */
2201
2202 if (!portp->lp_lacp.sm.lacp_on || (portp->lp_lacp.sm.lacp_on &&
2203 (portp->lp_lacp.sm.mux_state == LACP_COLLECTING_DISTRIBUTING))) {
2204 /* Port is compatible and can be aggregated */
2205 portp->lp_collector_enabled = B_TRUE;
2206 aggr_send_port_enable(portp);
2207 }
2208
2209 done:
2210 mac_perim_exit(mph);
2211 }
2212
2213 /*
2214 * Because the LACP packet processing needs to enter the aggr's mac perimeter
2215 * and that would potentially cause a deadlock with the thread in which the
2216 * grp/port is deleted, we defer the packet process to a worker thread. Here
2217 * we only enqueue the received Marker or LACPDU for later processing.
2218 */
2219 void
2220 aggr_lacp_rx_enqueue(aggr_port_t *portp, mblk_t *dmp)
2221 {
2222 aggr_grp_t *grp = portp->lp_grp;
2223 lacp_t *lacp;
2224
2225 dmp->b_rptr += sizeof (struct ether_header);
2226
2227 if (MBLKL(dmp) < sizeof (lacp_t)) {
2228 freemsg(dmp);
2229 return;
2230 }
2231
2232 lacp = (lacp_t *)dmp->b_rptr;
2233 if (lacp->subtype != LACP_SUBTYPE && lacp->subtype != MARKER_SUBTYPE) {
2234 AGGR_LACP_DBG(("aggr_lacp_rx_enqueue: (%d): "
2235 "Unknown Slow Protocol type %d\n",
2236 portp->lp_linkid, lacp->subtype));
2237 freemsg(dmp);
2238 return;
2239 }
2240
2241 mutex_enter(&grp->lg_lacp_lock);
2242
2243 /*
2244 * If the lg_lacp_done is set, this aggregation is in the process of
2245 * being deleted, return directly.
2246 */
2247 if (grp->lg_lacp_done) {
2248 mutex_exit(&grp->lg_lacp_lock);
2249 freemsg(dmp);
2250 return;
2251 }
2252
2253 if (grp->lg_lacp_tail == NULL) {
2254 grp->lg_lacp_head = grp->lg_lacp_tail = dmp;
2255 } else {
2256 grp->lg_lacp_tail->b_next = dmp;
2257 grp->lg_lacp_tail = dmp;
2258 }
2259
2260 /*
2261 * Hold a reference of the port so that the port won't be freed when it
2262 * is removed from the aggr. The b_prev field is borrowed to save the
2263 * port information.
2264 */
2265 AGGR_PORT_REFHOLD(portp);
2266 dmp->b_prev = (mblk_t *)portp;
2267 cv_broadcast(&grp->lg_lacp_cv);
2268 mutex_exit(&grp->lg_lacp_lock);
2269 }
2270
2271 static void
2272 aggr_lacp_rx(mblk_t *dmp)
2273 {
2274 aggr_port_t *portp = (aggr_port_t *)dmp->b_prev;
2275 mac_perim_handle_t mph;
2276 lacp_t *lacp;
2277
2278 dmp->b_prev = NULL;
2279
2280 mac_perim_enter_by_mh(portp->lp_grp->lg_mh, &mph);
2281 if (portp->lp_closing)
2282 goto done;
2283
2284 lacp = (lacp_t *)dmp->b_rptr;
2285 switch (lacp->subtype) {
2286 case LACP_SUBTYPE:
2287 AGGR_LACP_DBG(("aggr_lacp_rx:(%d): LACPDU received.\n",
2288 portp->lp_linkid));
2289
2290 if (!portp->lp_lacp.sm.lacp_on) {
2291 break;
2292 }
2293 lacp_receive_sm(portp, lacp);
2294 break;
2295
2296 case MARKER_SUBTYPE:
2297 AGGR_LACP_DBG(("aggr_lacp_rx:(%d): Marker Packet received.\n",
2298 portp->lp_linkid));
2299
2300 if (receive_marker_pdu(portp, dmp) != 0)
2301 break;
2302
2303 /* Send the packet over the first TX ring */
2304 dmp = mac_hwring_send_priv(portp->lp_mch,
2305 portp->lp_tx_rings[0], dmp);
2306 if (dmp != NULL)
2307 freemsg(dmp);
2308 mac_perim_exit(mph);
2309 AGGR_PORT_REFRELE(portp);
2310 return;
2311 }
2312
2313 done:
2314 mac_perim_exit(mph);
2315 AGGR_PORT_REFRELE(portp);
2316 freemsg(dmp);
2317 }
2318
2319 void
2320 aggr_lacp_rx_thread(void *arg)
2321 {
2322 callb_cpr_t cprinfo;
2323 aggr_grp_t *grp = (aggr_grp_t *)arg;
2324 aggr_port_t *port;
2325 mblk_t *mp, *nextmp;
2326
2327 CALLB_CPR_INIT(&cprinfo, &grp->lg_lacp_lock, callb_generic_cpr,
2328 "aggr_lacp_rx_thread");
2329
2330 mutex_enter(&grp->lg_lacp_lock);
2331
2332 /*
2333 * Quit the thread if the grp is deleted.
2334 */
2335 while (!grp->lg_lacp_done) {
2336 if ((mp = grp->lg_lacp_head) == NULL) {
2337 CALLB_CPR_SAFE_BEGIN(&cprinfo);
2338 cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock);
2339 CALLB_CPR_SAFE_END(&cprinfo, &grp->lg_lacp_lock);
2340 continue;
2341 }
2342
2343 grp->lg_lacp_head = grp->lg_lacp_tail = NULL;
2344 mutex_exit(&grp->lg_lacp_lock);
2345
2346 while (mp != NULL) {
2347 nextmp = mp->b_next;
2348 mp->b_next = NULL;
2349 aggr_lacp_rx(mp);
2350 mp = nextmp;
2351 }
2352 mutex_enter(&grp->lg_lacp_lock);
2353 }
2354
2355 /*
2356 * The grp is being destroyed, simply free all of the LACP messages
2357 * left in the queue which did not have the chance to be processed.
2358 * We cannot use freemsgchain() here since we need to clear the
2359 * b_prev field.
2360 */
2361 for (mp = grp->lg_lacp_head; mp != NULL; mp = nextmp) {
2362 port = (aggr_port_t *)mp->b_prev;
2363 AGGR_PORT_REFRELE(port);
2364 nextmp = mp->b_next;
2365 mp->b_next = NULL;
2366 mp->b_prev = NULL;
2367 freemsg(mp);
2368 }
2369
2370 grp->lg_lacp_head = grp->lg_lacp_tail = NULL;
2371 grp->lg_lacp_rx_thread = NULL;
2372 cv_broadcast(&grp->lg_lacp_cv);
2373 CALLB_CPR_EXIT(&cprinfo);
2374 thread_exit();
2375 }