Print this page
5255 uts shouldn't open-code ISP2
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/io/myri10ge/drv/myri10ge.c
+++ new/usr/src/uts/common/io/myri10ge/drv/myri10ge.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 */
26 26
27 27 /*
28 28 * Copyright 2007-2009 Myricom, Inc. All rights reserved.
29 29 * Use is subject to license terms.
30 30 */
31 31
32 32 #ifndef lint
33 33 static const char __idstring[] =
34 34 "@(#)$Id: myri10ge.c,v 1.186 2009-06-29 13:47:22 gallatin Exp $";
35 35 #endif
36 36
37 37 #define MXGEFW_NDIS
38 38 #include "myri10ge_var.h"
39 39 #include "rss_eth_z8e.h"
40 40 #include "rss_ethp_z8e.h"
41 41 #include "mcp_gen_header.h"
42 42
43 43 #define MYRI10GE_MAX_ETHER_MTU 9014
44 44
45 45 #define MYRI10GE_ETH_STOPPED 0
46 46 #define MYRI10GE_ETH_STOPPING 1
47 47 #define MYRI10GE_ETH_STARTING 2
48 48 #define MYRI10GE_ETH_RUNNING 3
49 49 #define MYRI10GE_ETH_OPEN_FAILED 4
50 50 #define MYRI10GE_ETH_SUSPENDED_RUNNING 5
51 51
52 52 static int myri10ge_small_bytes = 510;
53 53 static int myri10ge_intr_coal_delay = 125;
54 54 static int myri10ge_flow_control = 1;
55 55 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
56 56 static int myri10ge_nvidia_ecrc_enable = 1;
57 57 #endif
58 58 static int myri10ge_mtu_override = 0;
59 59 static int myri10ge_tx_copylen = 512;
60 60 static int myri10ge_deassert_wait = 1;
61 61 static int myri10ge_verbose = 0;
62 62 static int myri10ge_watchdog_reset = 0;
63 63 static int myri10ge_use_msix = 1;
64 64 static int myri10ge_max_slices = -1;
65 65 static int myri10ge_use_msi = 1;
66 66 int myri10ge_force_firmware = 0;
67 67 static boolean_t myri10ge_use_lso = B_TRUE;
68 68 static int myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
69 69 static int myri10ge_tx_hash = 1;
70 70 static int myri10ge_lro = 0;
71 71 static int myri10ge_lro_cnt = 8;
72 72 int myri10ge_lro_max_aggr = 2;
73 73 static int myri10ge_lso_copy = 0;
74 74 static mblk_t *myri10ge_send_wrapper(void *arg, mblk_t *mp);
75 75 int myri10ge_tx_handles_initial = 128;
76 76
77 77 static kmutex_t myri10ge_param_lock;
78 78 static void* myri10ge_db_lastfree;
79 79
80 80 static int myri10ge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
81 81 static int myri10ge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
82 82 static int myri10ge_quiesce(dev_info_t *dip);
83 83
84 84 DDI_DEFINE_STREAM_OPS(myri10ge_ops, nulldev, nulldev, myri10ge_attach,
85 85 myri10ge_detach, nodev, NULL, D_MP, NULL, myri10ge_quiesce);
86 86
87 87
88 88 static struct modldrv modldrv = {
89 89 &mod_driverops,
90 90 "Myricom 10G driver (10GbE)",
91 91 &myri10ge_ops,
92 92 };
93 93
94 94
95 95 static struct modlinkage modlinkage = {
96 96 MODREV_1,
97 97 {&modldrv, NULL},
98 98 };
99 99
100 100 unsigned char myri10ge_broadcastaddr[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
101 101
102 102 static ddi_dma_attr_t myri10ge_misc_dma_attr = {
103 103 DMA_ATTR_V0, /* version number. */
104 104 (uint64_t)0, /* low address */
105 105 (uint64_t)0xffffffffffffffffULL, /* high address */
106 106 (uint64_t)0x7ffffff, /* address counter max */
107 107 (uint64_t)4096, /* alignment */
108 108 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */
109 109 (uint32_t)0x1, /* minimum transfer size */
110 110 (uint64_t)0x7fffffff, /* maximum transfer size */
111 111 (uint64_t)0x7fffffff, /* maximum segment size */
112 112 1, /* scatter/gather list length */
113 113 1, /* granularity */
114 114 0 /* attribute flags */
115 115 };
116 116
117 117 /*
118 118 * The Myri10GE NIC has the following constraints on receive buffers:
119 119 * 1) Buffers which cross a 4KB boundary must be aligned to 4KB
120 120 * 2) Buffers which are not aligned to 4KB must not cross a 4KB boundary
121 121 */
122 122
123 123 static ddi_dma_attr_t myri10ge_rx_jumbo_dma_attr = {
124 124 DMA_ATTR_V0, /* version number. */
125 125 (uint64_t)0, /* low address */
126 126 (uint64_t)0xffffffffffffffffULL, /* high address */
127 127 (uint64_t)0x7ffffff, /* address counter max */
128 128 (uint64_t)4096, /* alignment */
129 129 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */
130 130 (uint32_t)0x1, /* minimum transfer size */
131 131 (uint64_t)0x7fffffff, /* maximum transfer size */
132 132 UINT64_MAX, /* maximum segment size */
133 133 1, /* scatter/gather list length */
134 134 1, /* granularity */
135 135 0 /* attribute flags */
136 136 };
137 137
138 138 static ddi_dma_attr_t myri10ge_rx_std_dma_attr = {
139 139 DMA_ATTR_V0, /* version number. */
140 140 (uint64_t)0, /* low address */
141 141 (uint64_t)0xffffffffffffffffULL, /* high address */
142 142 (uint64_t)0x7ffffff, /* address counter max */
143 143 #if defined sparc64 || defined __sparcv9
144 144 (uint64_t)4096, /* alignment */
145 145 #else
146 146 (uint64_t)0x80, /* alignment */
147 147 #endif
148 148 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */
149 149 (uint32_t)0x1, /* minimum transfer size */
150 150 (uint64_t)0x7fffffff, /* maximum transfer size */
151 151 #if defined sparc64 || defined __sparcv9
152 152 UINT64_MAX, /* maximum segment size */
153 153 #else
154 154 (uint64_t)0xfff, /* maximum segment size */
155 155 #endif
156 156 1, /* scatter/gather list length */
157 157 1, /* granularity */
158 158 0 /* attribute flags */
159 159 };
160 160
161 161 static ddi_dma_attr_t myri10ge_tx_dma_attr = {
162 162 DMA_ATTR_V0, /* version number. */
163 163 (uint64_t)0, /* low address */
164 164 (uint64_t)0xffffffffffffffffULL, /* high address */
165 165 (uint64_t)0x7ffffff, /* address counter max */
166 166 (uint64_t)1, /* alignment */
167 167 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */
168 168 (uint32_t)0x1, /* minimum transfer size */
169 169 (uint64_t)0x7fffffff, /* maximum transfer size */
170 170 UINT64_MAX, /* maximum segment size */
171 171 INT32_MAX, /* scatter/gather list length */
172 172 1, /* granularity */
173 173 0 /* attribute flags */
174 174 };
175 175
176 176 #if defined sparc64 || defined __sparcv9
177 177 #define WC 0
178 178 #else
179 179 #define WC 1
180 180 #endif
181 181
182 182 struct ddi_device_acc_attr myri10ge_dev_access_attr = {
183 183 DDI_DEVICE_ATTR_V0, /* version */
184 184 DDI_NEVERSWAP_ACC, /* endian flash */
185 185 #if WC
186 186 DDI_MERGING_OK_ACC /* data order */
187 187 #else
188 188 DDI_STRICTORDER_ACC
189 189 #endif
190 190 };
191 191
192 192 static void myri10ge_watchdog(void *arg);
193 193
194 194 #ifdef MYRICOM_PRIV
195 195 int myri10ge_mtu = MYRI10GE_MAX_ETHER_MTU + MXGEFW_PAD + VLAN_TAGSZ;
196 196 #else
197 197 int myri10ge_mtu = ETHERMAX + MXGEFW_PAD + VLAN_TAGSZ;
198 198 #endif
199 199 int myri10ge_bigbufs_initial = 1024;
200 200 int myri10ge_bigbufs_max = 4096;
201 201
202 202
203 203 caddr_t
204 204 myri10ge_dma_alloc(dev_info_t *dip, size_t len,
205 205 ddi_dma_attr_t *attr, ddi_device_acc_attr_t *accattr,
206 206 uint_t alloc_flags, int bind_flags, struct myri10ge_dma_stuff *dma,
207 207 int warn, int (*wait)(caddr_t))
208 208 {
209 209 caddr_t kaddr;
210 210 size_t real_length;
211 211 ddi_dma_cookie_t cookie;
212 212 uint_t count;
213 213 int err;
214 214
215 215 err = ddi_dma_alloc_handle(dip, attr, wait,
216 216 NULL, &dma->handle);
217 217 if (err != DDI_SUCCESS) {
218 218 if (warn)
219 219 cmn_err(CE_WARN,
220 220 "myri10ge: ddi_dma_alloc_handle failed\n");
221 221 goto abort_with_nothing;
222 222 }
223 223
224 224 err = ddi_dma_mem_alloc(dma->handle, len, accattr, alloc_flags,
225 225 wait, NULL, &kaddr, &real_length,
226 226 &dma->acc_handle);
227 227 if (err != DDI_SUCCESS) {
228 228 if (warn)
229 229 cmn_err(CE_WARN,
230 230 "myri10ge: ddi_dma_mem_alloc failed\n");
231 231 goto abort_with_handle;
232 232 }
233 233
234 234 err = ddi_dma_addr_bind_handle(dma->handle, NULL, kaddr, len,
235 235 bind_flags, wait, NULL, &cookie, &count);
236 236
237 237 if (err != DDI_SUCCESS) {
238 238 if (warn)
239 239 cmn_err(CE_WARN,
240 240 "myri10ge: ddi_dma_addr_bind_handle failed\n");
241 241 goto abort_with_mem;
242 242 }
243 243
244 244 if (count != 1) {
245 245 if (warn)
246 246 cmn_err(CE_WARN,
247 247 "myri10ge: got too many dma segments ");
248 248 goto abort_with_bind;
249 249 }
250 250 dma->low = htonl(MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress));
251 251 dma->high = htonl(MYRI10GE_HIGHPART_TO_U32(cookie.dmac_laddress));
252 252 return (kaddr);
253 253
254 254 abort_with_bind:
255 255 (void) ddi_dma_unbind_handle(dma->handle);
256 256
257 257 abort_with_mem:
258 258 ddi_dma_mem_free(&dma->acc_handle);
259 259
260 260 abort_with_handle:
261 261 ddi_dma_free_handle(&dma->handle);
262 262 abort_with_nothing:
263 263 if (warn) {
264 264 cmn_err(CE_WARN, "myri10ge: myri10ge_dma_alloc failed.\n ");
265 265 cmn_err(CE_WARN, "args: dip=%p len=0x%lx ddi_dma_attr=%p\n",
266 266 (void*) dip, len, (void*) attr);
267 267 cmn_err(CE_WARN,
268 268 "args: ddi_device_acc_attr=%p alloc_flags=0x%x\n",
269 269 (void*) accattr, alloc_flags);
270 270 cmn_err(CE_WARN, "args: bind_flags=0x%x dmastuff=%p",
271 271 bind_flags, (void*) dma);
272 272 }
273 273 return (NULL);
274 274
275 275 }
276 276
277 277 void
278 278 myri10ge_dma_free(struct myri10ge_dma_stuff *dma)
279 279 {
280 280 (void) ddi_dma_unbind_handle(dma->handle);
281 281 ddi_dma_mem_free(&dma->acc_handle);
282 282 ddi_dma_free_handle(&dma->handle);
283 283 }
284 284
285 285 static inline void
286 286 myri10ge_pio_copy32(void *to, uint32_t *from32, size_t size)
287 287 {
288 288 register volatile uint32_t *to32;
289 289 size_t i;
290 290
291 291 to32 = (volatile uint32_t *) to;
292 292 for (i = (size / 4); i; i--) {
293 293 *to32 = *from32;
294 294 to32++;
295 295 from32++;
296 296 }
297 297 }
298 298
299 299 #if defined(_LP64)
300 300 static inline void
301 301 myri10ge_pio_copy64(void *to, uint64_t *from64, size_t size)
302 302 {
303 303 register volatile uint64_t *to64;
304 304 size_t i;
305 305
306 306 to64 = (volatile uint64_t *) to;
307 307 for (i = (size / 8); i; i--) {
308 308 *to64 = *from64;
309 309 to64++;
310 310 from64++;
311 311 }
312 312 }
313 313 #endif
314 314
315 315 /*
316 316 * This routine copies memory from the host to the NIC.
317 317 * The "size" argument must always be a multiple of
318 318 * the size of long (4 or 8 bytes), and to/from must also
319 319 * be naturally aligned.
320 320 */
321 321 static inline void
322 322 myri10ge_pio_copy(void *to, void *from, size_t size)
323 323 {
324 324 #if !defined(_LP64)
325 325 ASSERT((size % 4) == 0);
326 326 myri10ge_pio_copy32(to, (uint32_t *)from, size);
327 327 #else
328 328 ASSERT((size % 8) == 0);
329 329 myri10ge_pio_copy64(to, (uint64_t *)from, size);
330 330 #endif
331 331 }
332 332
333 333
334 334 /*
335 335 * Due to various bugs in Solaris (especially bug 6186772 where the
336 336 * TCP/UDP checksum is calculated incorrectly on mblk chains with more
337 337 * than two elements), and the design bug where hardware checksums are
338 338 * ignored on mblk chains with more than 2 elements, we need to
339 339 * allocate private pool of physically contiguous receive buffers.
340 340 */
341 341
342 342 static void
343 343 myri10ge_jpool_init(struct myri10ge_slice_state *ss)
344 344 {
345 345 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
346 346
347 347 bzero(jpool, sizeof (*jpool));
348 348 mutex_init(&jpool->mtx, NULL, MUTEX_DRIVER,
349 349 ss->mgp->icookie);
350 350 jpool->head = NULL;
351 351 }
352 352
353 353 static void
354 354 myri10ge_jpool_fini(struct myri10ge_slice_state *ss)
355 355 {
356 356 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
357 357
358 358 if (jpool->head != NULL) {
359 359 cmn_err(CE_WARN,
360 360 "%s: BUG! myri10ge_jpool_fini called on non-empty pool\n",
361 361 ss->mgp->name);
362 362 }
363 363 mutex_destroy(&jpool->mtx);
364 364 }
365 365
366 366
367 367 /*
368 368 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
369 369 * at most 32 bytes at a time, so as to avoid involving the software
370 370 * pio handler in the nic. We re-write the first segment's low
371 371 * DMA address to mark it valid only after we write the entire chunk
372 372 * in a burst
373 373 */
374 374 static inline void
375 375 myri10ge_submit_8rx(mcp_kreq_ether_recv_t *dst, mcp_kreq_ether_recv_t *src)
376 376 {
377 377 src->addr_low |= BE_32(1);
378 378 myri10ge_pio_copy(dst, src, 4 * sizeof (*src));
379 379 mb();
380 380 myri10ge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
381 381 mb();
382 382 src->addr_low &= ~(BE_32(1));
383 383 dst->addr_low = src->addr_low;
384 384 mb();
385 385 }
386 386
387 387 static void
388 388 myri10ge_pull_jpool(struct myri10ge_slice_state *ss)
389 389 {
390 390 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
391 391 struct myri10ge_jpool_entry *jtail, *j, *jfree;
392 392 volatile uintptr_t *putp;
393 393 uintptr_t put;
394 394 int i;
395 395
396 396 /* find tail */
397 397 jtail = NULL;
398 398 if (jpool->head != NULL) {
399 399 j = jpool->head;
400 400 while (j->next != NULL)
401 401 j = j->next;
402 402 jtail = j;
403 403 }
404 404
405 405 /*
406 406 * iterate over all per-CPU caches, and add contents into
407 407 * jpool
408 408 */
409 409 for (i = 0; i < MYRI10GE_MAX_CPUS; i++) {
410 410 /* take per-CPU free list */
411 411 putp = (void *)&jpool->cpu[i & MYRI10GE_MAX_CPU_MASK].head;
412 412 if (*putp == NULL)
413 413 continue;
414 414 put = atomic_swap_ulong(putp, 0);
415 415 jfree = (struct myri10ge_jpool_entry *)put;
416 416
417 417 /* append to pool */
418 418 if (jtail == NULL) {
419 419 jpool->head = jfree;
420 420 } else {
421 421 jtail->next = jfree;
422 422 }
423 423 j = jfree;
424 424 while (j->next != NULL)
425 425 j = j->next;
426 426 jtail = j;
427 427 }
428 428 }
429 429
430 430 /*
431 431 * Transfers buffers from the free pool to the nic
432 432 * Must be called holding the jpool mutex.
433 433 */
434 434
435 435 static inline void
436 436 myri10ge_restock_jumbos(struct myri10ge_slice_state *ss)
437 437 {
438 438 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
439 439 struct myri10ge_jpool_entry *j;
440 440 myri10ge_rx_ring_t *rx;
441 441 int i, idx, limit;
442 442
443 443 rx = &ss->rx_big;
444 444 limit = ss->j_rx_cnt + (rx->mask + 1);
445 445
446 446 for (i = rx->cnt; i != limit; i++) {
447 447 idx = i & (rx->mask);
448 448 j = jpool->head;
449 449 if (j == NULL) {
450 450 myri10ge_pull_jpool(ss);
451 451 j = jpool->head;
452 452 if (j == NULL) {
453 453 break;
454 454 }
455 455 }
456 456 jpool->head = j->next;
457 457 rx->info[idx].j = j;
458 458 rx->shadow[idx].addr_low = j->dma.low;
459 459 rx->shadow[idx].addr_high = j->dma.high;
460 460 /* copy 4 descriptors (32-bytes) to the mcp at a time */
461 461 if ((idx & 7) == 7) {
462 462 myri10ge_submit_8rx(&rx->lanai[idx - 7],
463 463 &rx->shadow[idx - 7]);
464 464 }
465 465 }
466 466 rx->cnt = i;
467 467 }
468 468
469 469 /*
470 470 * Transfer buffers from the nic to the free pool.
471 471 * Should be called holding the jpool mutex
472 472 */
473 473
474 474 static inline void
475 475 myri10ge_unstock_jumbos(struct myri10ge_slice_state *ss)
476 476 {
477 477 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
478 478 struct myri10ge_jpool_entry *j;
479 479 myri10ge_rx_ring_t *rx;
480 480 int i;
481 481
482 482 mutex_enter(&jpool->mtx);
483 483 rx = &ss->rx_big;
484 484
485 485 for (i = 0; i < rx->mask + 1; i++) {
486 486 j = rx->info[i].j;
487 487 rx->info[i].j = NULL;
488 488 if (j == NULL)
489 489 continue;
490 490 j->next = jpool->head;
491 491 jpool->head = j;
492 492 }
493 493 mutex_exit(&jpool->mtx);
494 494
495 495 }
496 496
497 497
498 498 /*
499 499 * Free routine which is called when the mblk allocated via
500 500 * esballoc() is freed. Here we return the jumbo buffer
501 501 * to the free pool, and possibly pass some jumbo buffers
502 502 * to the nic
503 503 */
504 504
505 505 static void
506 506 myri10ge_jfree_rtn(void *arg)
507 507 {
508 508 struct myri10ge_jpool_entry *j = (struct myri10ge_jpool_entry *)arg;
509 509 struct myri10ge_jpool_stuff *jpool;
510 510 volatile uintptr_t *putp;
511 511 uintptr_t old, new;
512 512
513 513 jpool = &j->ss->jpool;
514 514
515 515 /* prepend buffer locklessly to per-CPU freelist */
516 516 putp = (void *)&jpool->cpu[CPU->cpu_seqid & MYRI10GE_MAX_CPU_MASK].head;
517 517 new = (uintptr_t)j;
518 518 do {
519 519 old = *putp;
520 520 j->next = (void *)old;
521 521 } while (atomic_cas_ulong(putp, old, new) != old);
522 522 }
523 523
524 524 static void
525 525 myri10ge_remove_jbuf(struct myri10ge_jpool_entry *j)
526 526 {
527 527 (void) ddi_dma_unbind_handle(j->dma_handle);
528 528 ddi_dma_mem_free(&j->acc_handle);
529 529 ddi_dma_free_handle(&j->dma_handle);
530 530 kmem_free(j, sizeof (*j));
531 531 }
532 532
533 533
534 534 /*
535 535 * Allocates one physically contiguous descriptor
536 536 * and add it to the jumbo buffer pool.
537 537 */
538 538
539 539 static int
540 540 myri10ge_add_jbuf(struct myri10ge_slice_state *ss)
541 541 {
542 542 struct myri10ge_jpool_entry *j;
543 543 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
544 544 ddi_dma_attr_t *rx_dma_attr;
545 545 size_t real_length;
546 546 ddi_dma_cookie_t cookie;
547 547 uint_t count;
548 548 int err;
549 549
550 550 if (myri10ge_mtu < 2048)
551 551 rx_dma_attr = &myri10ge_rx_std_dma_attr;
552 552 else
553 553 rx_dma_attr = &myri10ge_rx_jumbo_dma_attr;
554 554
555 555 again:
556 556 j = (struct myri10ge_jpool_entry *)
557 557 kmem_alloc(sizeof (*j), KM_SLEEP);
558 558 err = ddi_dma_alloc_handle(ss->mgp->dip, rx_dma_attr,
559 559 DDI_DMA_DONTWAIT, NULL, &j->dma_handle);
560 560 if (err != DDI_SUCCESS)
561 561 goto abort_with_j;
562 562
563 563 err = ddi_dma_mem_alloc(j->dma_handle, myri10ge_mtu,
564 564 &myri10ge_dev_access_attr, DDI_DMA_STREAMING, DDI_DMA_DONTWAIT,
565 565 NULL, &j->buf, &real_length, &j->acc_handle);
566 566 if (err != DDI_SUCCESS)
567 567 goto abort_with_handle;
568 568
569 569 err = ddi_dma_addr_bind_handle(j->dma_handle, NULL, j->buf,
570 570 real_length, DDI_DMA_READ|DDI_DMA_STREAMING, DDI_DMA_DONTWAIT,
571 571 NULL, &cookie, &count);
572 572 if (err != DDI_SUCCESS)
573 573 goto abort_with_mem;
574 574
575 575 /*
576 576 * Make certain std MTU buffers do not cross a 4KB boundary:
577 577 *
578 578 * Setting dma_attr_align=4096 will do this, but the system
579 579 * will only allocate 1 RX buffer per 4KB page, rather than 2.
580 580 * Setting dma_attr_granular=4096 *seems* to work around this,
581 581 * but I'm paranoid about future systems no longer honoring
582 582 * this, so fall back to the safe, but memory wasting way if a
583 583 * buffer crosses a 4KB boundary.
584 584 */
585 585
586 586 if (rx_dma_attr == &myri10ge_rx_std_dma_attr &&
587 587 rx_dma_attr->dma_attr_align != 4096) {
588 588 uint32_t start, end;
589 589
590 590 start = MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress);
591 591 end = start + myri10ge_mtu;
592 592 if (((end >> 12) != (start >> 12)) && (start & 4095U)) {
593 593 printf("std buffer crossed a 4KB boundary!\n");
594 594 myri10ge_remove_jbuf(j);
595 595 rx_dma_attr->dma_attr_align = 4096;
596 596 rx_dma_attr->dma_attr_seg = UINT64_MAX;
597 597 goto again;
598 598 }
599 599 }
600 600
601 601 j->dma.low =
602 602 htonl(MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress));
603 603 j->dma.high =
604 604 htonl(MYRI10GE_HIGHPART_TO_U32(cookie.dmac_laddress));
605 605 j->ss = ss;
606 606
607 607
608 608 j->free_func.free_func = myri10ge_jfree_rtn;
609 609 j->free_func.free_arg = (char *)j;
610 610 mutex_enter(&jpool->mtx);
611 611 j->next = jpool->head;
612 612 jpool->head = j;
613 613 jpool->num_alloc++;
614 614 mutex_exit(&jpool->mtx);
615 615 return (0);
616 616
617 617 abort_with_mem:
618 618 ddi_dma_mem_free(&j->acc_handle);
619 619
620 620 abort_with_handle:
621 621 ddi_dma_free_handle(&j->dma_handle);
622 622
623 623 abort_with_j:
624 624 kmem_free(j, sizeof (*j));
625 625
626 626 /*
627 627 * If an allocation failed, perhaps it failed because it could
628 628 * not satisfy granularity requirement. Disable that, and
629 629 * try agin.
630 630 */
631 631 if (rx_dma_attr == &myri10ge_rx_std_dma_attr &&
632 632 rx_dma_attr->dma_attr_align != 4096) {
633 633 cmn_err(CE_NOTE,
634 634 "!alloc failed, reverting to gran=1\n");
635 635 rx_dma_attr->dma_attr_align = 4096;
636 636 rx_dma_attr->dma_attr_seg = UINT64_MAX;
637 637 goto again;
638 638 }
639 639 return (err);
640 640 }
641 641
642 642 static int
643 643 myri10ge_jfree_cnt(struct myri10ge_jpool_stuff *jpool)
644 644 {
645 645 int i;
646 646 struct myri10ge_jpool_entry *j;
647 647
648 648 mutex_enter(&jpool->mtx);
649 649 j = jpool->head;
650 650 i = 0;
651 651 while (j != NULL) {
652 652 i++;
653 653 j = j->next;
654 654 }
655 655 mutex_exit(&jpool->mtx);
656 656 return (i);
657 657 }
658 658
659 659 static int
660 660 myri10ge_add_jbufs(struct myri10ge_slice_state *ss, int num, int total)
661 661 {
662 662 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
663 663 int allocated = 0;
664 664 int err;
665 665 int needed;
666 666
667 667 /*
668 668 * if total is set, user wants "num" jbufs in the pool,
669 669 * otherwise the user wants to "num" additional jbufs
670 670 * added to the pool
671 671 */
672 672 if (total && jpool->num_alloc) {
673 673 allocated = myri10ge_jfree_cnt(jpool);
674 674 needed = num - allocated;
675 675 } else {
676 676 needed = num;
677 677 }
678 678
679 679 while (needed > 0) {
680 680 needed--;
681 681 err = myri10ge_add_jbuf(ss);
682 682 if (err == 0) {
683 683 allocated++;
684 684 }
685 685 }
686 686 return (allocated);
687 687 }
688 688
689 689 static void
690 690 myri10ge_remove_jbufs(struct myri10ge_slice_state *ss)
691 691 {
692 692 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
693 693 struct myri10ge_jpool_entry *j;
694 694
695 695 mutex_enter(&jpool->mtx);
696 696 myri10ge_pull_jpool(ss);
697 697 while (jpool->head != NULL) {
698 698 jpool->num_alloc--;
699 699 j = jpool->head;
700 700 jpool->head = j->next;
701 701 myri10ge_remove_jbuf(j);
702 702 }
703 703 mutex_exit(&jpool->mtx);
704 704 }
705 705
706 706 static void
707 707 myri10ge_carve_up_jbufs_into_small_ring(struct myri10ge_slice_state *ss)
708 708 {
709 709 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
710 710 struct myri10ge_jpool_entry *j = NULL;
711 711 caddr_t ptr;
712 712 uint32_t dma_low, dma_high;
713 713 int idx, len;
714 714 unsigned int alloc_size;
715 715
716 716 dma_low = dma_high = len = 0;
717 717 alloc_size = myri10ge_small_bytes + MXGEFW_PAD;
718 718 ptr = NULL;
719 719 for (idx = 0; idx < ss->rx_small.mask + 1; idx++) {
720 720 /* Allocate a jumbo frame and carve it into small frames */
721 721 if (len < alloc_size) {
722 722 mutex_enter(&jpool->mtx);
723 723 /* remove jumbo from freelist */
724 724 j = jpool->head;
725 725 jpool->head = j->next;
726 726 /* place it onto small list */
727 727 j->next = ss->small_jpool;
728 728 ss->small_jpool = j;
729 729 mutex_exit(&jpool->mtx);
730 730 len = myri10ge_mtu;
731 731 dma_low = ntohl(j->dma.low);
732 732 dma_high = ntohl(j->dma.high);
733 733 ptr = j->buf;
734 734 }
735 735 ss->rx_small.info[idx].ptr = ptr;
736 736 ss->rx_small.shadow[idx].addr_low = htonl(dma_low);
737 737 ss->rx_small.shadow[idx].addr_high = htonl(dma_high);
738 738 len -= alloc_size;
739 739 ptr += alloc_size;
740 740 dma_low += alloc_size;
741 741 }
742 742 }
743 743
744 744 /*
745 745 * Return the jumbo bufs we carved up for small to the jumbo pool
746 746 */
747 747
748 748 static void
749 749 myri10ge_release_small_jbufs(struct myri10ge_slice_state *ss)
750 750 {
751 751 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
752 752 struct myri10ge_jpool_entry *j = NULL;
753 753
754 754 mutex_enter(&jpool->mtx);
755 755 while (ss->small_jpool != NULL) {
756 756 j = ss->small_jpool;
757 757 ss->small_jpool = j->next;
758 758 j->next = jpool->head;
759 759 jpool->head = j;
760 760 }
761 761 mutex_exit(&jpool->mtx);
762 762 ss->jbufs_for_smalls = 0;
763 763 }
764 764
765 765 static int
766 766 myri10ge_add_tx_handle(struct myri10ge_slice_state *ss)
767 767 {
768 768 myri10ge_tx_ring_t *tx = &ss->tx;
769 769 struct myri10ge_priv *mgp = ss->mgp;
770 770 struct myri10ge_tx_dma_handle *handle;
771 771 int err;
772 772
773 773 handle = kmem_zalloc(sizeof (*handle), KM_SLEEP);
774 774 err = ddi_dma_alloc_handle(mgp->dip,
775 775 &myri10ge_tx_dma_attr,
776 776 DDI_DMA_SLEEP, NULL,
777 777 &handle->h);
778 778 if (err) {
779 779 static int limit = 0;
780 780 if (limit == 0)
781 781 cmn_err(CE_WARN, "%s: Falled to alloc tx dma handle\n",
782 782 mgp->name);
783 783 limit++;
784 784 kmem_free(handle, sizeof (*handle));
785 785 return (err);
786 786 }
787 787 mutex_enter(&tx->handle_lock);
788 788 MYRI10GE_SLICE_STAT_INC(tx_handles_alloced);
789 789 handle->next = tx->free_tx_handles;
790 790 tx->free_tx_handles = handle;
791 791 mutex_exit(&tx->handle_lock);
792 792 return (DDI_SUCCESS);
793 793 }
794 794
795 795 static void
796 796 myri10ge_remove_tx_handles(struct myri10ge_slice_state *ss)
797 797 {
798 798 myri10ge_tx_ring_t *tx = &ss->tx;
799 799 struct myri10ge_tx_dma_handle *handle;
800 800 mutex_enter(&tx->handle_lock);
801 801
802 802 handle = tx->free_tx_handles;
803 803 while (handle != NULL) {
804 804 tx->free_tx_handles = handle->next;
805 805 ddi_dma_free_handle(&handle->h);
806 806 kmem_free(handle, sizeof (*handle));
807 807 handle = tx->free_tx_handles;
808 808 MYRI10GE_SLICE_STAT_DEC(tx_handles_alloced);
809 809 }
810 810 mutex_exit(&tx->handle_lock);
811 811 if (MYRI10GE_SLICE_STAT(tx_handles_alloced) != 0) {
812 812 cmn_err(CE_WARN, "%s: %d tx dma handles allocated at close\n",
813 813 ss->mgp->name,
814 814 (int)MYRI10GE_SLICE_STAT(tx_handles_alloced));
815 815 }
816 816 }
817 817
818 818 static void
819 819 myri10ge_free_tx_handles(myri10ge_tx_ring_t *tx,
820 820 struct myri10ge_tx_dma_handle_head *list)
821 821 {
822 822 mutex_enter(&tx->handle_lock);
823 823 list->tail->next = tx->free_tx_handles;
824 824 tx->free_tx_handles = list->head;
825 825 mutex_exit(&tx->handle_lock);
826 826 }
827 827
828 828 static void
829 829 myri10ge_free_tx_handle_slist(myri10ge_tx_ring_t *tx,
830 830 struct myri10ge_tx_dma_handle *handle)
831 831 {
832 832 struct myri10ge_tx_dma_handle_head list;
833 833
834 834 if (handle == NULL)
835 835 return;
836 836 list.head = handle;
837 837 list.tail = handle;
838 838 while (handle != NULL) {
839 839 list.tail = handle;
840 840 handle = handle->next;
841 841 }
842 842 myri10ge_free_tx_handles(tx, &list);
843 843 }
844 844
845 845 static int
846 846 myri10ge_alloc_tx_handles(struct myri10ge_slice_state *ss, int count,
847 847 struct myri10ge_tx_dma_handle **ret)
848 848 {
849 849 myri10ge_tx_ring_t *tx = &ss->tx;
850 850 struct myri10ge_tx_dma_handle *handle;
851 851 int err, i;
852 852
853 853 mutex_enter(&tx->handle_lock);
854 854 for (i = 0; i < count; i++) {
855 855 handle = tx->free_tx_handles;
856 856 while (handle == NULL) {
857 857 mutex_exit(&tx->handle_lock);
858 858 err = myri10ge_add_tx_handle(ss);
859 859 if (err != DDI_SUCCESS) {
860 860 goto abort_with_handles;
861 861 }
862 862 mutex_enter(&tx->handle_lock);
863 863 handle = tx->free_tx_handles;
864 864 }
865 865 tx->free_tx_handles = handle->next;
866 866 handle->next = *ret;
867 867 *ret = handle;
868 868 }
869 869 mutex_exit(&tx->handle_lock);
870 870 return (DDI_SUCCESS);
871 871
872 872 abort_with_handles:
873 873 myri10ge_free_tx_handle_slist(tx, *ret);
874 874 return (err);
875 875 }
876 876
877 877
878 878 /*
879 879 * Frees DMA resources associated with the send ring
880 880 */
881 881 static void
882 882 myri10ge_unprepare_tx_ring(struct myri10ge_slice_state *ss)
883 883 {
884 884 myri10ge_tx_ring_t *tx;
885 885 struct myri10ge_tx_dma_handle_head handles;
886 886 size_t bytes;
887 887 int idx;
888 888
889 889 tx = &ss->tx;
890 890 handles.head = NULL;
891 891 handles.tail = NULL;
892 892 for (idx = 0; idx < ss->tx.mask + 1; idx++) {
893 893 if (tx->info[idx].m) {
894 894 (void) ddi_dma_unbind_handle(tx->info[idx].handle->h);
895 895 handles.head = tx->info[idx].handle;
896 896 if (handles.tail == NULL)
897 897 handles.tail = tx->info[idx].handle;
898 898 freeb(tx->info[idx].m);
899 899 tx->info[idx].m = 0;
900 900 tx->info[idx].handle = 0;
901 901 }
902 902 tx->cp[idx].va = NULL;
903 903 myri10ge_dma_free(&tx->cp[idx].dma);
904 904 }
905 905 bytes = sizeof (*tx->cp) * (tx->mask + 1);
906 906 kmem_free(tx->cp, bytes);
907 907 tx->cp = NULL;
908 908 if (handles.head != NULL)
909 909 myri10ge_free_tx_handles(tx, &handles);
910 910 myri10ge_remove_tx_handles(ss);
911 911 }
912 912
913 913 /*
914 914 * Allocates DMA handles associated with the send ring
915 915 */
916 916 static inline int
917 917 myri10ge_prepare_tx_ring(struct myri10ge_slice_state *ss)
918 918 {
919 919 struct myri10ge_tx_dma_handle *handles;
920 920 int h;
921 921 size_t bytes;
922 922
923 923 bytes = sizeof (*ss->tx.cp) * (ss->tx.mask + 1);
924 924 ss->tx.cp = kmem_zalloc(bytes, KM_SLEEP);
925 925 if (ss->tx.cp == NULL) {
926 926 cmn_err(CE_WARN,
927 927 "%s: Failed to allocate tx copyblock storage\n",
928 928 ss->mgp->name);
929 929 return (DDI_FAILURE);
930 930 }
931 931
932 932
933 933 /* allocate the TX copyblocks */
934 934 for (h = 0; h < ss->tx.mask + 1; h++) {
935 935 ss->tx.cp[h].va = myri10ge_dma_alloc(ss->mgp->dip,
936 936 4096, &myri10ge_rx_jumbo_dma_attr,
937 937 &myri10ge_dev_access_attr, DDI_DMA_STREAMING,
938 938 DDI_DMA_WRITE|DDI_DMA_STREAMING, &ss->tx.cp[h].dma, 1,
939 939 DDI_DMA_DONTWAIT);
940 940 if (ss->tx.cp[h].va == NULL) {
941 941 cmn_err(CE_WARN, "%s: Failed to allocate tx "
942 942 "copyblock %d\n", ss->mgp->name, h);
943 943 goto abort_with_copyblocks;
944 944 }
945 945 }
946 946 /* pre-allocate transmit handles */
947 947 handles = NULL;
948 948 (void) myri10ge_alloc_tx_handles(ss, myri10ge_tx_handles_initial,
949 949 &handles);
950 950 if (handles != NULL)
951 951 myri10ge_free_tx_handle_slist(&ss->tx, handles);
952 952
953 953 return (DDI_SUCCESS);
954 954
955 955 abort_with_copyblocks:
956 956 while (h > 0) {
957 957 h--;
958 958 myri10ge_dma_free(&ss->tx.cp[h].dma);
959 959 }
960 960
961 961 bytes = sizeof (*ss->tx.cp) * (ss->tx.mask + 1);
962 962 kmem_free(ss->tx.cp, bytes);
963 963 ss->tx.cp = NULL;
964 964 return (DDI_FAILURE);
965 965 }
966 966
967 967 /*
968 968 * The eeprom strings on the lanaiX have the format
969 969 * SN=x\0
970 970 * MAC=x:x:x:x:x:x\0
971 971 * PT:ddd mmm xx xx:xx:xx xx\0
972 972 * PV:ddd mmm xx xx:xx:xx xx\0
973 973 */
974 974 static int
975 975 myri10ge_read_mac_addr(struct myri10ge_priv *mgp)
976 976 {
977 977 #define MYRI10GE_NEXT_STRING(p) while (ptr < limit && *ptr++)
978 978 #define myri10ge_digit(c) (((c) >= '0' && (c) <= '9') ? ((c) - '0') : \
979 979 (((c) >= 'A' && (c) <= 'F') ? (10 + (c) - 'A') : \
980 980 (((c) >= 'a' && (c) <= 'f') ? (10 + (c) - 'a') : -1)))
981 981
982 982 char *ptr, *limit;
983 983 int i, hv, lv;
984 984
985 985 ptr = mgp->eeprom_strings;
986 986 limit = mgp->eeprom_strings + MYRI10GE_EEPROM_STRINGS_SIZE;
987 987
988 988 while (*ptr != '\0' && ptr < limit) {
989 989 if (memcmp(ptr, "MAC=", 4) == 0) {
990 990 ptr += 4;
991 991 if (myri10ge_verbose)
992 992 printf("%s: mac address = %s\n", mgp->name,
993 993 ptr);
994 994 mgp->mac_addr_string = ptr;
995 995 for (i = 0; i < 6; i++) {
996 996 if ((ptr + 2) > limit)
997 997 goto abort;
998 998
999 999 if (*(ptr+1) == ':') {
1000 1000 hv = 0;
1001 1001 lv = myri10ge_digit(*ptr); ptr++;
1002 1002 } else {
1003 1003 hv = myri10ge_digit(*ptr); ptr++;
1004 1004 lv = myri10ge_digit(*ptr); ptr++;
1005 1005 }
1006 1006 mgp->mac_addr[i] = (hv << 4) | lv;
1007 1007 ptr++;
1008 1008 }
1009 1009 }
1010 1010 if (memcmp((const void *)ptr, "SN=", 3) == 0) {
1011 1011 ptr += 3;
1012 1012 mgp->sn_str = (char *)ptr;
1013 1013 }
1014 1014 if (memcmp((const void *)ptr, "PC=", 3) == 0) {
1015 1015 ptr += 3;
1016 1016 mgp->pc_str = (char *)ptr;
1017 1017 }
1018 1018 MYRI10GE_NEXT_STRING(ptr);
1019 1019 }
1020 1020
1021 1021 return (0);
1022 1022
1023 1023 abort:
1024 1024 cmn_err(CE_WARN, "%s: failed to parse eeprom_strings", mgp->name);
1025 1025 return (ENXIO);
1026 1026 }
1027 1027
1028 1028
1029 1029 /*
1030 1030 * Determine the register set containing the PCI resource we
1031 1031 * want to map: the memory-mappable part of the interface. We do
1032 1032 * this by scanning the DDI "reg" property of the interface,
1033 1033 * which is an array of mx_ddi_reg_set structures.
1034 1034 */
1035 1035 static int
1036 1036 myri10ge_reg_set(dev_info_t *dip, int *reg_set, int *span,
1037 1037 unsigned long *busno, unsigned long *devno,
1038 1038 unsigned long *funcno)
1039 1039 {
1040 1040
1041 1041 #define REGISTER_NUMBER(ip) (ip[0] >> 0 & 0xff)
1042 1042 #define FUNCTION_NUMBER(ip) (ip[0] >> 8 & 0x07)
1043 1043 #define DEVICE_NUMBER(ip) (ip[0] >> 11 & 0x1f)
1044 1044 #define BUS_NUMBER(ip) (ip[0] >> 16 & 0xff)
1045 1045 #define ADDRESS_SPACE(ip) (ip[0] >> 24 & 0x03)
1046 1046 #define PCI_ADDR_HIGH(ip) (ip[1])
1047 1047 #define PCI_ADDR_LOW(ip) (ip[2])
1048 1048 #define PCI_SPAN_HIGH(ip) (ip[3])
1049 1049 #define PCI_SPAN_LOW(ip) (ip[4])
1050 1050
1051 1051 #define MX_DDI_REG_SET_32_BIT_MEMORY_SPACE 2
1052 1052 #define MX_DDI_REG_SET_64_BIT_MEMORY_SPACE 3
1053 1053
1054 1054 int *data, i, *rs;
1055 1055 uint32_t nelementsp;
1056 1056
1057 1057 #ifdef MYRI10GE_REGSET_VERBOSE
1058 1058 char *address_space_name[] = { "Configuration Space",
1059 1059 "I/O Space",
1060 1060 "32-bit Memory Space",
1061 1061 "64-bit Memory Space"
1062 1062 };
1063 1063 #endif
1064 1064
1065 1065 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
1066 1066 "reg", &data, &nelementsp) != DDI_SUCCESS) {
1067 1067 printf("Could not determine register set.\n");
1068 1068 return (ENXIO);
1069 1069 }
1070 1070
1071 1071 #ifdef MYRI10GE_REGSET_VERBOSE
1072 1072 printf("There are %d register sets.\n", nelementsp / 5);
1073 1073 #endif
1074 1074 if (!nelementsp) {
1075 1075 printf("Didn't find any \"reg\" properties.\n");
1076 1076 ddi_prop_free(data);
1077 1077 return (ENODEV);
1078 1078 }
1079 1079
1080 1080 /* Scan for the register number. */
1081 1081 rs = &data[0];
1082 1082 *busno = BUS_NUMBER(rs);
1083 1083 *devno = DEVICE_NUMBER(rs);
1084 1084 *funcno = FUNCTION_NUMBER(rs);
1085 1085
1086 1086 #ifdef MYRI10GE_REGSET_VERBOSE
1087 1087 printf("*** Scanning for register number.\n");
1088 1088 #endif
1089 1089 for (i = 0; i < nelementsp / 5; i++) {
1090 1090 rs = &data[5 * i];
1091 1091 #ifdef MYRI10GE_REGSET_VERBOSE
1092 1092 printf("Examining register set %d:\n", i);
1093 1093 printf(" Register number = %d.\n", REGISTER_NUMBER(rs));
1094 1094 printf(" Function number = %d.\n", FUNCTION_NUMBER(rs));
1095 1095 printf(" Device number = %d.\n", DEVICE_NUMBER(rs));
1096 1096 printf(" Bus number = %d.\n", BUS_NUMBER(rs));
1097 1097 printf(" Address space = %d (%s ).\n", ADDRESS_SPACE(rs),
1098 1098 address_space_name[ADDRESS_SPACE(rs)]);
1099 1099 printf(" pci address 0x%08x %08x\n", PCI_ADDR_HIGH(rs),
1100 1100 PCI_ADDR_LOW(rs));
1101 1101 printf(" pci span 0x%08x %08x\n", PCI_SPAN_HIGH(rs),
1102 1102 PCI_SPAN_LOW(rs));
1103 1103 #endif
1104 1104 /* We are looking for a memory property. */
1105 1105
1106 1106 if (ADDRESS_SPACE(rs) == MX_DDI_REG_SET_64_BIT_MEMORY_SPACE ||
1107 1107 ADDRESS_SPACE(rs) == MX_DDI_REG_SET_32_BIT_MEMORY_SPACE) {
1108 1108 *reg_set = i;
1109 1109
1110 1110 #ifdef MYRI10GE_REGSET_VERBOSE
1111 1111 printf("%s uses register set %d.\n",
1112 1112 address_space_name[ADDRESS_SPACE(rs)], *reg_set);
1113 1113 #endif
1114 1114
1115 1115 *span = (PCI_SPAN_LOW(rs));
1116 1116 #ifdef MYRI10GE_REGSET_VERBOSE
1117 1117 printf("Board span is 0x%x\n", *span);
1118 1118 #endif
1119 1119 break;
1120 1120 }
1121 1121 }
1122 1122
1123 1123 ddi_prop_free(data);
1124 1124
1125 1125 /* If no match, fail. */
1126 1126 if (i >= nelementsp / 5) {
1127 1127 return (EIO);
1128 1128 }
1129 1129
1130 1130 return (0);
1131 1131 }
1132 1132
1133 1133
1134 1134 static int
1135 1135 myri10ge_load_firmware_from_zlib(struct myri10ge_priv *mgp, uint32_t *limit)
1136 1136 {
1137 1137 void *inflate_buffer;
1138 1138 int rv, status;
1139 1139 size_t sram_size = mgp->sram_size - MYRI10GE_EEPROM_STRINGS_SIZE;
1140 1140 size_t destlen;
1141 1141 mcp_gen_header_t *hdr;
1142 1142 unsigned hdr_offset, i;
1143 1143
1144 1144
1145 1145 *limit = 0; /* -Wuninitialized */
1146 1146 status = 0;
1147 1147
1148 1148 inflate_buffer = kmem_zalloc(sram_size, KM_NOSLEEP);
1149 1149 if (!inflate_buffer) {
1150 1150 cmn_err(CE_WARN,
1151 1151 "%s: Could not allocate buffer to inflate mcp\n",
1152 1152 mgp->name);
1153 1153 return (ENOMEM);
1154 1154 }
1155 1155
1156 1156 destlen = sram_size;
1157 1157 rv = z_uncompress(inflate_buffer, &destlen, mgp->eth_z8e,
1158 1158 mgp->eth_z8e_length);
1159 1159
1160 1160 if (rv != Z_OK) {
1161 1161 cmn_err(CE_WARN, "%s: Could not inflate mcp: %s\n",
1162 1162 mgp->name, z_strerror(rv));
1163 1163 status = ENXIO;
1164 1164 goto abort;
1165 1165 }
1166 1166
1167 1167 *limit = (uint32_t)destlen;
1168 1168
1169 1169 hdr_offset = htonl(*(uint32_t *)(void *)((char *)inflate_buffer +
1170 1170 MCP_HEADER_PTR_OFFSET));
1171 1171 hdr = (void *)((char *)inflate_buffer + hdr_offset);
1172 1172 if (ntohl(hdr->mcp_type) != MCP_TYPE_ETH) {
1173 1173 cmn_err(CE_WARN, "%s: Bad firmware type: 0x%x\n", mgp->name,
1174 1174 ntohl(hdr->mcp_type));
1175 1175 status = EIO;
1176 1176 goto abort;
1177 1177 }
1178 1178
1179 1179 /* save firmware version for kstat */
1180 1180 (void) strncpy(mgp->fw_version, hdr->version, sizeof (mgp->fw_version));
1181 1181 if (myri10ge_verbose)
1182 1182 printf("%s: firmware id: %s\n", mgp->name, hdr->version);
1183 1183
1184 1184 /* Copy the inflated firmware to NIC SRAM. */
1185 1185 for (i = 0; i < *limit; i += 256) {
1186 1186 myri10ge_pio_copy((char *)mgp->sram + MYRI10GE_FW_OFFSET + i,
1187 1187 (char *)inflate_buffer + i,
1188 1188 min(256U, (unsigned)(*limit - i)));
1189 1189 mb();
1190 1190 (void) *(int *)(void *)mgp->sram;
1191 1191 mb();
1192 1192 }
1193 1193
1194 1194 abort:
1195 1195 kmem_free(inflate_buffer, sram_size);
1196 1196
1197 1197 return (status);
1198 1198
1199 1199 }
1200 1200
1201 1201
1202 1202 int
1203 1203 myri10ge_send_cmd(struct myri10ge_priv *mgp, uint32_t cmd,
1204 1204 myri10ge_cmd_t *data)
1205 1205 {
1206 1206 mcp_cmd_t *buf;
1207 1207 char buf_bytes[sizeof (*buf) + 8];
1208 1208 volatile mcp_cmd_response_t *response = mgp->cmd;
1209 1209 volatile char *cmd_addr =
1210 1210 (volatile char *)mgp->sram + MXGEFW_ETH_CMD;
1211 1211 int sleep_total = 0;
1212 1212
1213 1213 /* ensure buf is aligned to 8 bytes */
1214 1214 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
1215 1215
1216 1216 buf->data0 = htonl(data->data0);
1217 1217 buf->data1 = htonl(data->data1);
1218 1218 buf->data2 = htonl(data->data2);
1219 1219 buf->cmd = htonl(cmd);
1220 1220 buf->response_addr.low = mgp->cmd_dma.low;
1221 1221 buf->response_addr.high = mgp->cmd_dma.high;
1222 1222 mutex_enter(&mgp->cmd_lock);
1223 1223 response->result = 0xffffffff;
1224 1224 mb();
1225 1225
1226 1226 myri10ge_pio_copy((void *)cmd_addr, buf, sizeof (*buf));
1227 1227
1228 1228 /* wait up to 20ms */
1229 1229 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
1230 1230 mb();
1231 1231 if (response->result != 0xffffffff) {
1232 1232 if (response->result == 0) {
1233 1233 data->data0 = ntohl(response->data);
1234 1234 mutex_exit(&mgp->cmd_lock);
1235 1235 return (0);
1236 1236 } else if (ntohl(response->result)
1237 1237 == MXGEFW_CMD_UNKNOWN) {
1238 1238 mutex_exit(&mgp->cmd_lock);
1239 1239 return (ENOSYS);
1240 1240 } else if (ntohl(response->result)
1241 1241 == MXGEFW_CMD_ERROR_UNALIGNED) {
1242 1242 mutex_exit(&mgp->cmd_lock);
1243 1243 return (E2BIG);
1244 1244 } else {
1245 1245 cmn_err(CE_WARN,
1246 1246 "%s: command %d failed, result = %d\n",
1247 1247 mgp->name, cmd, ntohl(response->result));
1248 1248 mutex_exit(&mgp->cmd_lock);
1249 1249 return (ENXIO);
1250 1250 }
1251 1251 }
1252 1252 drv_usecwait(1000);
1253 1253 }
1254 1254 mutex_exit(&mgp->cmd_lock);
1255 1255 cmn_err(CE_WARN, "%s: command %d timed out, result = %d\n",
1256 1256 mgp->name, cmd, ntohl(response->result));
1257 1257 return (EAGAIN);
1258 1258 }
1259 1259
1260 1260 /*
1261 1261 * Enable or disable periodic RDMAs from the host to make certain
1262 1262 * chipsets resend dropped PCIe messages
1263 1263 */
1264 1264
1265 1265 static void
1266 1266 myri10ge_dummy_rdma(struct myri10ge_priv *mgp, int enable)
1267 1267 {
1268 1268 char buf_bytes[72];
1269 1269 volatile uint32_t *confirm;
1270 1270 volatile char *submit;
1271 1271 uint32_t *buf;
1272 1272 int i;
1273 1273
1274 1274 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
1275 1275
1276 1276 /* clear confirmation addr */
1277 1277 confirm = (volatile uint32_t *)mgp->cmd;
1278 1278 *confirm = 0;
1279 1279 mb();
1280 1280
1281 1281 /*
1282 1282 * send an rdma command to the PCIe engine, and wait for the
1283 1283 * response in the confirmation address. The firmware should
1284 1284 * write a -1 there to indicate it is alive and well
1285 1285 */
1286 1286
1287 1287 buf[0] = mgp->cmd_dma.high; /* confirm addr MSW */
1288 1288 buf[1] = mgp->cmd_dma.low; /* confirm addr LSW */
1289 1289 buf[2] = htonl(0xffffffff); /* confirm data */
1290 1290 buf[3] = htonl(mgp->cmd_dma.high); /* dummy addr MSW */
1291 1291 buf[4] = htonl(mgp->cmd_dma.low); /* dummy addr LSW */
1292 1292 buf[5] = htonl(enable); /* enable? */
1293 1293
1294 1294
1295 1295 submit = (volatile char *)(mgp->sram + MXGEFW_BOOT_DUMMY_RDMA);
1296 1296
1297 1297 myri10ge_pio_copy((char *)submit, buf, 64);
1298 1298 mb();
1299 1299 drv_usecwait(1000);
1300 1300 mb();
1301 1301 i = 0;
1302 1302 while (*confirm != 0xffffffff && i < 20) {
1303 1303 drv_usecwait(1000);
1304 1304 i++;
1305 1305 }
1306 1306 if (*confirm != 0xffffffff) {
1307 1307 cmn_err(CE_WARN, "%s: dummy rdma %s failed (%p = 0x%x)",
1308 1308 mgp->name,
1309 1309 (enable ? "enable" : "disable"), (void*) confirm, *confirm);
1310 1310 }
1311 1311 }
1312 1312
1313 1313 static int
1314 1314 myri10ge_load_firmware(struct myri10ge_priv *mgp)
1315 1315 {
1316 1316 myri10ge_cmd_t cmd;
1317 1317 volatile uint32_t *confirm;
1318 1318 volatile char *submit;
1319 1319 char buf_bytes[72];
1320 1320 uint32_t *buf, size;
1321 1321 int status, i;
1322 1322
1323 1323 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
1324 1324
1325 1325 status = myri10ge_load_firmware_from_zlib(mgp, &size);
1326 1326 if (status) {
1327 1327 cmn_err(CE_WARN, "%s: firmware loading failed\n", mgp->name);
1328 1328 return (status);
1329 1329 }
1330 1330
1331 1331 /* clear confirmation addr */
1332 1332 confirm = (volatile uint32_t *)mgp->cmd;
1333 1333 *confirm = 0;
1334 1334 mb();
1335 1335
1336 1336 /*
1337 1337 * send a reload command to the bootstrap MCP, and wait for the
1338 1338 * response in the confirmation address. The firmware should
1339 1339 * write a -1 there to indicate it is alive and well
1340 1340 */
1341 1341
1342 1342 buf[0] = mgp->cmd_dma.high; /* confirm addr MSW */
1343 1343 buf[1] = mgp->cmd_dma.low; /* confirm addr LSW */
1344 1344 buf[2] = htonl(0xffffffff); /* confirm data */
1345 1345
1346 1346 /*
1347 1347 * FIX: All newest firmware should un-protect the bottom of
1348 1348 * the sram before handoff. However, the very first interfaces
1349 1349 * do not. Therefore the handoff copy must skip the first 8 bytes
1350 1350 */
1351 1351 buf[3] = htonl(MYRI10GE_FW_OFFSET + 8); /* where the code starts */
1352 1352 buf[4] = htonl(size - 8); /* length of code */
1353 1353 buf[5] = htonl(8); /* where to copy to */
1354 1354 buf[6] = htonl(0); /* where to jump to */
1355 1355
1356 1356 submit = (volatile char *)(mgp->sram + MXGEFW_BOOT_HANDOFF);
1357 1357
1358 1358 myri10ge_pio_copy((char *)submit, buf, 64);
1359 1359 mb();
1360 1360 drv_usecwait(1000);
1361 1361 mb();
1362 1362 i = 0;
1363 1363 while (*confirm != 0xffffffff && i < 1000) {
1364 1364 drv_usecwait(1000);
1365 1365 i++;
1366 1366 }
1367 1367 if (*confirm != 0xffffffff) {
1368 1368 cmn_err(CE_WARN, "%s: handoff failed (%p = 0x%x)",
1369 1369 mgp->name, (void *) confirm, *confirm);
1370 1370
1371 1371 return (ENXIO);
1372 1372 }
1373 1373 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
1374 1374 if (status != 0) {
1375 1375 cmn_err(CE_WARN, "%s: failed MXGEFW_CMD_GET_RX_RING_SIZE\n",
1376 1376 mgp->name);
1377 1377 return (ENXIO);
1378 1378 }
1379 1379
1380 1380 mgp->max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
1381 1381 myri10ge_dummy_rdma(mgp, 1);
1382 1382 return (0);
1383 1383 }
1384 1384
1385 1385 static int
1386 1386 myri10ge_m_unicst(void *arg, const uint8_t *addr)
1387 1387 {
1388 1388 struct myri10ge_priv *mgp = arg;
1389 1389 myri10ge_cmd_t cmd;
1390 1390 int status;
1391 1391
1392 1392 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1393 1393 | (addr[2] << 8) | addr[3]);
1394 1394
1395 1395 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1396 1396
1397 1397 status = myri10ge_send_cmd(mgp, MXGEFW_SET_MAC_ADDRESS, &cmd);
1398 1398 if (status == 0 && (addr != mgp->mac_addr))
1399 1399 (void) memcpy(mgp->mac_addr, addr, sizeof (mgp->mac_addr));
1400 1400
1401 1401 return (status);
1402 1402 }
1403 1403
1404 1404 static int
1405 1405 myri10ge_change_pause(struct myri10ge_priv *mgp, int pause)
1406 1406 {
1407 1407 myri10ge_cmd_t cmd;
1408 1408 int status;
1409 1409
1410 1410 if (pause)
1411 1411 status = myri10ge_send_cmd(mgp, MXGEFW_ENABLE_FLOW_CONTROL,
1412 1412 &cmd);
1413 1413 else
1414 1414 status = myri10ge_send_cmd(mgp, MXGEFW_DISABLE_FLOW_CONTROL,
1415 1415 &cmd);
1416 1416
1417 1417 if (status) {
1418 1418 cmn_err(CE_WARN, "%s: Failed to set flow control mode\n",
1419 1419 mgp->name);
1420 1420 return (ENXIO);
1421 1421 }
1422 1422 mgp->pause = pause;
1423 1423 return (0);
1424 1424 }
1425 1425
1426 1426 static void
1427 1427 myri10ge_change_promisc(struct myri10ge_priv *mgp, int promisc)
1428 1428 {
1429 1429 myri10ge_cmd_t cmd;
1430 1430 int status;
1431 1431
1432 1432 if (promisc)
1433 1433 status = myri10ge_send_cmd(mgp, MXGEFW_ENABLE_PROMISC, &cmd);
1434 1434 else
1435 1435 status = myri10ge_send_cmd(mgp, MXGEFW_DISABLE_PROMISC, &cmd);
1436 1436
1437 1437 if (status) {
1438 1438 cmn_err(CE_WARN, "%s: Failed to set promisc mode\n",
1439 1439 mgp->name);
1440 1440 }
1441 1441 }
1442 1442
1443 1443 static int
1444 1444 myri10ge_dma_test(struct myri10ge_priv *mgp, int test_type)
1445 1445 {
1446 1446 myri10ge_cmd_t cmd;
1447 1447 int status;
1448 1448 uint32_t len;
1449 1449 void *dmabench;
1450 1450 struct myri10ge_dma_stuff dmabench_dma;
1451 1451 char *test = " ";
1452 1452
1453 1453 /*
1454 1454 * Run a small DMA test.
1455 1455 * The magic multipliers to the length tell the firmware
1456 1456 * tp do DMA read, write, or read+write tests. The
1457 1457 * results are returned in cmd.data0. The upper 16
1458 1458 * bits or the return is the number of transfers completed.
1459 1459 * The lower 16 bits is the time in 0.5us ticks that the
1460 1460 * transfers took to complete
1461 1461 */
1462 1462
1463 1463 len = mgp->tx_boundary;
1464 1464
1465 1465 dmabench = myri10ge_dma_alloc(mgp->dip, len,
1466 1466 &myri10ge_rx_jumbo_dma_attr, &myri10ge_dev_access_attr,
1467 1467 DDI_DMA_STREAMING, DDI_DMA_RDWR|DDI_DMA_STREAMING,
1468 1468 &dmabench_dma, 1, DDI_DMA_DONTWAIT);
1469 1469 mgp->read_dma = mgp->write_dma = mgp->read_write_dma = 0;
1470 1470 if (dmabench == NULL) {
1471 1471 cmn_err(CE_WARN, "%s dma benchmark aborted\n", mgp->name);
1472 1472 return (ENOMEM);
1473 1473 }
1474 1474
1475 1475 cmd.data0 = ntohl(dmabench_dma.low);
1476 1476 cmd.data1 = ntohl(dmabench_dma.high);
1477 1477 cmd.data2 = len * 0x10000;
1478 1478 status = myri10ge_send_cmd(mgp, test_type, &cmd);
1479 1479 if (status != 0) {
1480 1480 test = "read";
1481 1481 goto abort;
1482 1482 }
1483 1483 mgp->read_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff);
1484 1484
1485 1485 cmd.data0 = ntohl(dmabench_dma.low);
1486 1486 cmd.data1 = ntohl(dmabench_dma.high);
1487 1487 cmd.data2 = len * 0x1;
1488 1488 status = myri10ge_send_cmd(mgp, test_type, &cmd);
1489 1489 if (status != 0) {
1490 1490 test = "write";
1491 1491 goto abort;
1492 1492 }
1493 1493 mgp->write_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff);
1494 1494
1495 1495 cmd.data0 = ntohl(dmabench_dma.low);
1496 1496 cmd.data1 = ntohl(dmabench_dma.high);
1497 1497 cmd.data2 = len * 0x10001;
1498 1498 status = myri10ge_send_cmd(mgp, test_type, &cmd);
1499 1499 if (status != 0) {
1500 1500 test = "read/write";
1501 1501 goto abort;
1502 1502 }
1503 1503 mgp->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
1504 1504 (cmd.data0 & 0xffff);
1505 1505
1506 1506
1507 1507 abort:
1508 1508 myri10ge_dma_free(&dmabench_dma);
1509 1509 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
1510 1510 cmn_err(CE_WARN, "%s %s dma benchmark failed\n", mgp->name,
1511 1511 test);
1512 1512 return (status);
1513 1513 }
1514 1514
1515 1515 static int
1516 1516 myri10ge_reset(struct myri10ge_priv *mgp)
1517 1517 {
1518 1518 myri10ge_cmd_t cmd;
1519 1519 struct myri10ge_nic_stat *ethstat;
1520 1520 struct myri10ge_slice_state *ss;
1521 1521 int i, status;
1522 1522 size_t bytes;
1523 1523
1524 1524 /* send a reset command to the card to see if it is alive */
1525 1525 (void) memset(&cmd, 0, sizeof (cmd));
1526 1526 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd);
1527 1527 if (status != 0) {
1528 1528 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name);
1529 1529 return (ENXIO);
1530 1530 }
1531 1531
1532 1532 /* Now exchange information about interrupts */
1533 1533
1534 1534 bytes = mgp->max_intr_slots * sizeof (*mgp->ss[0].rx_done.entry);
1535 1535 cmd.data0 = (uint32_t)bytes;
1536 1536 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1537 1537
1538 1538 /*
1539 1539 * Even though we already know how many slices are supported
1540 1540 * via myri10ge_probe_slices() MXGEFW_CMD_GET_MAX_RSS_QUEUES
1541 1541 * has magic side effects, and must be called after a reset.
1542 1542 * It must be called prior to calling any RSS related cmds,
1543 1543 * including assigning an interrupt queue for anything but
1544 1544 * slice 0. It must also be called *after*
1545 1545 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1546 1546 * the firmware to compute offsets.
1547 1547 */
1548 1548
1549 1549 if (mgp->num_slices > 1) {
1550 1550
1551 1551 /* ask the maximum number of slices it supports */
1552 1552 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1553 1553 &cmd);
1554 1554 if (status != 0) {
1555 1555 cmn_err(CE_WARN,
1556 1556 "%s: failed to get number of slices\n",
1557 1557 mgp->name);
1558 1558 return (status);
1559 1559 }
1560 1560
1561 1561 /*
1562 1562 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1563 1563 * to setting up the interrupt queue DMA
1564 1564 */
1565 1565
1566 1566 cmd.data0 = mgp->num_slices;
1567 1567 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE |
1568 1568 MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1569 1569 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1570 1570 &cmd);
1571 1571 if (status != 0) {
1572 1572 cmn_err(CE_WARN,
1573 1573 "%s: failed to set number of slices\n",
1574 1574 mgp->name);
1575 1575 return (status);
1576 1576 }
1577 1577 }
1578 1578 for (i = 0; i < mgp->num_slices; i++) {
1579 1579 ss = &mgp->ss[i];
1580 1580 cmd.data0 = ntohl(ss->rx_done.dma.low);
1581 1581 cmd.data1 = ntohl(ss->rx_done.dma.high);
1582 1582 cmd.data2 = i;
1583 1583 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_DMA,
1584 1584 &cmd);
1585 1585 };
1586 1586
1587 1587 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1588 1588 for (i = 0; i < mgp->num_slices; i++) {
1589 1589 ss = &mgp->ss[i];
1590 1590 ss->irq_claim = (volatile unsigned int *)
1591 1591 (void *)(mgp->sram + cmd.data0 + 8 * i);
1592 1592 }
1593 1593
1594 1594 if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) {
1595 1595 status |= myri10ge_send_cmd(mgp,
1596 1596 MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, &cmd);
1597 1597 mgp->irq_deassert = (uint32_t *)(void *)(mgp->sram + cmd.data0);
1598 1598 }
1599 1599
1600 1600 status |= myri10ge_send_cmd(mgp,
1601 1601 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1602 1602 mgp->intr_coal_delay_ptr = (uint32_t *)(void *)(mgp->sram + cmd.data0);
1603 1603
1604 1604 if (status != 0) {
1605 1605 cmn_err(CE_WARN, "%s: failed set interrupt parameters\n",
1606 1606 mgp->name);
1607 1607 return (status);
1608 1608 }
1609 1609
1610 1610 *mgp->intr_coal_delay_ptr = htonl(mgp->intr_coal_delay);
1611 1611 (void) myri10ge_dma_test(mgp, MXGEFW_DMA_TEST);
1612 1612
1613 1613 /* reset mcp/driver shared state back to 0 */
1614 1614
1615 1615 for (i = 0; i < mgp->num_slices; i++) {
1616 1616 ss = &mgp->ss[i];
1617 1617 bytes = mgp->max_intr_slots *
1618 1618 sizeof (*mgp->ss[0].rx_done.entry);
1619 1619 (void) memset(ss->rx_done.entry, 0, bytes);
1620 1620 ss->tx.req = 0;
1621 1621 ss->tx.done = 0;
1622 1622 ss->tx.pkt_done = 0;
1623 1623 ss->rx_big.cnt = 0;
1624 1624 ss->rx_small.cnt = 0;
1625 1625 ss->rx_done.idx = 0;
1626 1626 ss->rx_done.cnt = 0;
1627 1627 ss->rx_token = 0;
1628 1628 ss->tx.watchdog_done = 0;
1629 1629 ss->tx.watchdog_req = 0;
1630 1630 ss->tx.active = 0;
1631 1631 ss->tx.activate = 0;
1632 1632 }
1633 1633 mgp->watchdog_rx_pause = 0;
1634 1634 if (mgp->ksp_stat != NULL) {
1635 1635 ethstat = (struct myri10ge_nic_stat *)mgp->ksp_stat->ks_data;
1636 1636 ethstat->link_changes.value.ul = 0;
1637 1637 }
1638 1638 status = myri10ge_m_unicst(mgp, mgp->mac_addr);
1639 1639 myri10ge_change_promisc(mgp, 0);
1640 1640 (void) myri10ge_change_pause(mgp, mgp->pause);
1641 1641 return (status);
1642 1642 }
1643 1643
1644 1644 static int
1645 1645 myri10ge_init_toeplitz(struct myri10ge_priv *mgp)
1646 1646 {
1647 1647 myri10ge_cmd_t cmd;
1648 1648 int i, b, s, t, j;
1649 1649 int status;
1650 1650 uint32_t k[8];
1651 1651 uint32_t tmp;
1652 1652 uint8_t *key;
1653 1653
1654 1654 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RSS_KEY_OFFSET,
1655 1655 &cmd);
1656 1656 if (status != 0) {
1657 1657 cmn_err(CE_WARN, "%s: failed to get rss key\n",
1658 1658 mgp->name);
1659 1659 return (EIO);
1660 1660 }
1661 1661 myri10ge_pio_copy32(mgp->rss_key,
1662 1662 (uint32_t *)(void*)((char *)mgp->sram + cmd.data0),
1663 1663 sizeof (mgp->rss_key));
1664 1664
1665 1665 mgp->toeplitz_hash_table = kmem_alloc(sizeof (uint32_t) * 12 * 256,
1666 1666 KM_SLEEP);
1667 1667 key = (uint8_t *)mgp->rss_key;
1668 1668 t = 0;
1669 1669 for (b = 0; b < 12; b++) {
1670 1670 for (s = 0; s < 8; s++) {
1671 1671 /* Bits: b*8+s, ..., b*8+s+31 */
1672 1672 k[s] = 0;
1673 1673 for (j = 0; j < 32; j++) {
1674 1674 int bit = b*8+s+j;
1675 1675 bit = 0x1 & (key[bit / 8] >> (7 -(bit & 0x7)));
1676 1676 k[s] |= bit << (31 - j);
1677 1677 }
1678 1678 }
1679 1679
1680 1680 for (i = 0; i <= 0xff; i++) {
1681 1681 tmp = 0;
1682 1682 if (i & (1 << 7)) { tmp ^= k[0]; }
1683 1683 if (i & (1 << 6)) { tmp ^= k[1]; }
1684 1684 if (i & (1 << 5)) { tmp ^= k[2]; }
1685 1685 if (i & (1 << 4)) { tmp ^= k[3]; }
1686 1686 if (i & (1 << 3)) { tmp ^= k[4]; }
1687 1687 if (i & (1 << 2)) { tmp ^= k[5]; }
1688 1688 if (i & (1 << 1)) { tmp ^= k[6]; }
1689 1689 if (i & (1 << 0)) { tmp ^= k[7]; }
1690 1690 mgp->toeplitz_hash_table[t++] = tmp;
1691 1691 }
1692 1692 }
1693 1693 return (0);
1694 1694 }
1695 1695
1696 1696 static inline struct myri10ge_slice_state *
1697 1697 myri10ge_toeplitz_send_hash(struct myri10ge_priv *mgp, struct ip *ip)
1698 1698 {
1699 1699 struct tcphdr *hdr;
1700 1700 uint32_t saddr, daddr;
1701 1701 uint32_t hash, slice;
1702 1702 uint32_t *table = mgp->toeplitz_hash_table;
1703 1703 uint16_t src, dst;
1704 1704
1705 1705 /*
1706 1706 * Note hashing order is reversed from how it is done
1707 1707 * in the NIC, so as to generate the same hash value
1708 1708 * for the connection to try to keep connections CPU local
1709 1709 */
1710 1710
1711 1711 /* hash on IPv4 src/dst address */
1712 1712 saddr = ntohl(ip->ip_src.s_addr);
1713 1713 daddr = ntohl(ip->ip_dst.s_addr);
1714 1714 hash = table[(256 * 0) + ((daddr >> 24) & 0xff)];
1715 1715 hash ^= table[(256 * 1) + ((daddr >> 16) & 0xff)];
1716 1716 hash ^= table[(256 * 2) + ((daddr >> 8) & 0xff)];
1717 1717 hash ^= table[(256 * 3) + ((daddr) & 0xff)];
1718 1718 hash ^= table[(256 * 4) + ((saddr >> 24) & 0xff)];
1719 1719 hash ^= table[(256 * 5) + ((saddr >> 16) & 0xff)];
1720 1720 hash ^= table[(256 * 6) + ((saddr >> 8) & 0xff)];
1721 1721 hash ^= table[(256 * 7) + ((saddr) & 0xff)];
1722 1722 /* hash on TCP port, if required */
1723 1723 if ((myri10ge_rss_hash & MXGEFW_RSS_HASH_TYPE_TCP_IPV4) &&
1724 1724 ip->ip_p == IPPROTO_TCP) {
1725 1725 hdr = (struct tcphdr *)(void *)
1726 1726 (((uint8_t *)ip) + (ip->ip_hl << 2));
1727 1727 src = ntohs(hdr->th_sport);
1728 1728 dst = ntohs(hdr->th_dport);
1729 1729
1730 1730 hash ^= table[(256 * 8) + ((dst >> 8) & 0xff)];
1731 1731 hash ^= table[(256 * 9) + ((dst) & 0xff)];
1732 1732 hash ^= table[(256 * 10) + ((src >> 8) & 0xff)];
1733 1733 hash ^= table[(256 * 11) + ((src) & 0xff)];
1734 1734 }
1735 1735 slice = (mgp->num_slices - 1) & hash;
1736 1736 return (&mgp->ss[slice]);
1737 1737
1738 1738 }
1739 1739
1740 1740 static inline struct myri10ge_slice_state *
1741 1741 myri10ge_simple_send_hash(struct myri10ge_priv *mgp, struct ip *ip)
1742 1742 {
1743 1743 struct tcphdr *hdr;
1744 1744 uint32_t slice, hash_val;
1745 1745
1746 1746
1747 1747 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP) {
1748 1748 return (&mgp->ss[0]);
1749 1749 }
1750 1750 hdr = (struct tcphdr *)(void *)(((uint8_t *)ip) + (ip->ip_hl << 2));
1751 1751
1752 1752 /*
1753 1753 * Use the second byte of the *destination* address for
1754 1754 * MXGEFW_RSS_HASH_TYPE_SRC_PORT, so as to match NIC's hashing
1755 1755 */
1756 1756 hash_val = ntohs(hdr->th_dport) & 0xff;
1757 1757 if (myri10ge_rss_hash == MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT)
1758 1758 hash_val += ntohs(hdr->th_sport) & 0xff;
1759 1759
1760 1760 slice = (mgp->num_slices - 1) & hash_val;
1761 1761 return (&mgp->ss[slice]);
1762 1762 }
1763 1763
1764 1764 static inline struct myri10ge_slice_state *
1765 1765 myri10ge_send_hash(struct myri10ge_priv *mgp, mblk_t *mp)
1766 1766 {
1767 1767 unsigned int slice = 0;
1768 1768 struct ether_header *eh;
1769 1769 struct ether_vlan_header *vh;
1770 1770 struct ip *ip;
1771 1771 int ehl, ihl;
1772 1772
1773 1773 if (mgp->num_slices == 1)
1774 1774 return (&mgp->ss[0]);
1775 1775
1776 1776 if (myri10ge_tx_hash == 0) {
1777 1777 slice = CPU->cpu_id & (mgp->num_slices - 1);
1778 1778 return (&mgp->ss[slice]);
1779 1779 }
1780 1780
1781 1781 /*
1782 1782 * ensure it is a TCP or UDP over IPv4 packet, and that the
1783 1783 * headers are in the 1st mblk. Otherwise, punt
1784 1784 */
1785 1785 ehl = sizeof (*eh);
1786 1786 ihl = sizeof (*ip);
1787 1787 if ((MBLKL(mp)) < (ehl + ihl + 8))
1788 1788 return (&mgp->ss[0]);
1789 1789 eh = (struct ether_header *)(void *)mp->b_rptr;
1790 1790 ip = (struct ip *)(void *)(eh + 1);
1791 1791 if (eh->ether_type != BE_16(ETHERTYPE_IP)) {
1792 1792 if (eh->ether_type != BE_16(ETHERTYPE_VLAN))
1793 1793 return (&mgp->ss[0]);
1794 1794 vh = (struct ether_vlan_header *)(void *)mp->b_rptr;
1795 1795 if (vh->ether_type != BE_16(ETHERTYPE_IP))
1796 1796 return (&mgp->ss[0]);
1797 1797 ehl += 4;
1798 1798 ip = (struct ip *)(void *)(vh + 1);
1799 1799 }
1800 1800 ihl = ip->ip_hl << 2;
1801 1801 if (MBLKL(mp) < (ehl + ihl + 8))
1802 1802 return (&mgp->ss[0]);
1803 1803 switch (myri10ge_rss_hash) {
1804 1804 case MXGEFW_RSS_HASH_TYPE_IPV4:
1805 1805 /* fallthru */
1806 1806 case MXGEFW_RSS_HASH_TYPE_TCP_IPV4:
1807 1807 /* fallthru */
1808 1808 case (MXGEFW_RSS_HASH_TYPE_IPV4|MXGEFW_RSS_HASH_TYPE_TCP_IPV4):
1809 1809 return (myri10ge_toeplitz_send_hash(mgp, ip));
1810 1810 case MXGEFW_RSS_HASH_TYPE_SRC_PORT:
1811 1811 /* fallthru */
1812 1812 case MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT:
1813 1813 return (myri10ge_simple_send_hash(mgp, ip));
1814 1814 default:
1815 1815 break;
1816 1816 }
1817 1817 return (&mgp->ss[0]);
1818 1818 }
1819 1819
1820 1820 static int
1821 1821 myri10ge_setup_slice(struct myri10ge_slice_state *ss)
1822 1822 {
1823 1823 struct myri10ge_priv *mgp = ss->mgp;
1824 1824 myri10ge_cmd_t cmd;
1825 1825 int tx_ring_size, rx_ring_size;
1826 1826 int tx_ring_entries, rx_ring_entries;
1827 1827 int slice, status;
1828 1828 int allocated, idx;
1829 1829 size_t bytes;
1830 1830
1831 1831 slice = ss - mgp->ss;
1832 1832 cmd.data0 = slice;
1833 1833 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
1834 1834 tx_ring_size = cmd.data0;
1835 1835 cmd.data0 = slice;
1836 1836 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
1837 1837 if (status != 0)
1838 1838 return (status);
1839 1839 rx_ring_size = cmd.data0;
1840 1840
1841 1841 tx_ring_entries = tx_ring_size / sizeof (struct mcp_kreq_ether_send);
1842 1842 rx_ring_entries = rx_ring_size / sizeof (struct mcp_dma_addr);
1843 1843 ss->tx.mask = tx_ring_entries - 1;
1844 1844 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
1845 1845
1846 1846 /* get the lanai pointers to the send and receive rings */
1847 1847
1848 1848 cmd.data0 = slice;
1849 1849 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
1850 1850 ss->tx.lanai = (mcp_kreq_ether_send_t *)(void *)(mgp->sram + cmd.data0);
1851 1851 if (mgp->num_slices > 1) {
1852 1852 ss->tx.go = (char *)mgp->sram + MXGEFW_ETH_SEND_GO + 64 * slice;
1853 1853 ss->tx.stop = (char *)mgp->sram + MXGEFW_ETH_SEND_STOP +
1854 1854 64 * slice;
1855 1855 } else {
1856 1856 ss->tx.go = NULL;
1857 1857 ss->tx.stop = NULL;
1858 1858 }
1859 1859
1860 1860 cmd.data0 = slice;
1861 1861 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
1862 1862 ss->rx_small.lanai = (mcp_kreq_ether_recv_t *)
1863 1863 (void *)(mgp->sram + cmd.data0);
1864 1864
1865 1865 cmd.data0 = slice;
1866 1866 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
1867 1867 ss->rx_big.lanai = (mcp_kreq_ether_recv_t *)(void *)
1868 1868 (mgp->sram + cmd.data0);
1869 1869
1870 1870 if (status != 0) {
1871 1871 cmn_err(CE_WARN,
1872 1872 "%s: failed to get ring sizes or locations\n", mgp->name);
1873 1873 return (status);
1874 1874 }
1875 1875
1876 1876 status = ENOMEM;
1877 1877 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
1878 1878 ss->rx_small.shadow = kmem_zalloc(bytes, KM_SLEEP);
1879 1879 if (ss->rx_small.shadow == NULL)
1880 1880 goto abort;
1881 1881 (void) memset(ss->rx_small.shadow, 0, bytes);
1882 1882
1883 1883 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
1884 1884 ss->rx_big.shadow = kmem_zalloc(bytes, KM_SLEEP);
1885 1885 if (ss->rx_big.shadow == NULL)
1886 1886 goto abort_with_rx_small_shadow;
1887 1887 (void) memset(ss->rx_big.shadow, 0, bytes);
1888 1888
1889 1889 /* allocate the host info rings */
1890 1890
1891 1891 bytes = tx_ring_entries * sizeof (*ss->tx.info);
1892 1892 ss->tx.info = kmem_zalloc(bytes, KM_SLEEP);
1893 1893 if (ss->tx.info == NULL)
1894 1894 goto abort_with_rx_big_shadow;
1895 1895 (void) memset(ss->tx.info, 0, bytes);
1896 1896
1897 1897 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
1898 1898 ss->rx_small.info = kmem_zalloc(bytes, KM_SLEEP);
1899 1899 if (ss->rx_small.info == NULL)
1900 1900 goto abort_with_tx_info;
1901 1901 (void) memset(ss->rx_small.info, 0, bytes);
1902 1902
1903 1903 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
1904 1904 ss->rx_big.info = kmem_zalloc(bytes, KM_SLEEP);
1905 1905 if (ss->rx_big.info == NULL)
1906 1906 goto abort_with_rx_small_info;
1907 1907 (void) memset(ss->rx_big.info, 0, bytes);
1908 1908
1909 1909 ss->tx.stall = ss->tx.sched = 0;
1910 1910 ss->tx.stall_early = ss->tx.stall_late = 0;
1911 1911
1912 1912 ss->jbufs_for_smalls = 1 + (1 + ss->rx_small.mask) /
1913 1913 (myri10ge_mtu / (myri10ge_small_bytes + MXGEFW_PAD));
1914 1914
1915 1915 allocated = myri10ge_add_jbufs(ss,
1916 1916 myri10ge_bigbufs_initial + ss->jbufs_for_smalls, 1);
1917 1917 if (allocated < ss->jbufs_for_smalls + myri10ge_bigbufs_initial) {
1918 1918 cmn_err(CE_WARN,
1919 1919 "%s: Could not allocate enough receive buffers (%d/%d)\n",
1920 1920 mgp->name, allocated,
1921 1921 myri10ge_bigbufs_initial + ss->jbufs_for_smalls);
1922 1922 goto abort_with_jumbos;
1923 1923 }
1924 1924
1925 1925 myri10ge_carve_up_jbufs_into_small_ring(ss);
1926 1926 ss->j_rx_cnt = 0;
1927 1927
1928 1928 mutex_enter(&ss->jpool.mtx);
1929 1929 if (allocated < rx_ring_entries)
1930 1930 ss->jpool.low_water = allocated / 4;
1931 1931 else
1932 1932 ss->jpool.low_water = rx_ring_entries / 2;
1933 1933
1934 1934 /*
1935 1935 * invalidate the big receive ring in case we do not
1936 1936 * allocate sufficient jumbos to fill it
1937 1937 */
1938 1938 (void) memset(ss->rx_big.shadow, 1,
1939 1939 (ss->rx_big.mask + 1) * sizeof (ss->rx_big.shadow[0]));
1940 1940 for (idx = 7; idx <= ss->rx_big.mask; idx += 8) {
1941 1941 myri10ge_submit_8rx(&ss->rx_big.lanai[idx - 7],
1942 1942 &ss->rx_big.shadow[idx - 7]);
1943 1943 mb();
1944 1944 }
1945 1945
1946 1946
1947 1947 myri10ge_restock_jumbos(ss);
1948 1948
1949 1949 for (idx = 7; idx <= ss->rx_small.mask; idx += 8) {
1950 1950 myri10ge_submit_8rx(&ss->rx_small.lanai[idx - 7],
1951 1951 &ss->rx_small.shadow[idx - 7]);
1952 1952 mb();
1953 1953 }
1954 1954 ss->rx_small.cnt = ss->rx_small.mask + 1;
1955 1955
1956 1956 mutex_exit(&ss->jpool.mtx);
1957 1957
1958 1958 status = myri10ge_prepare_tx_ring(ss);
1959 1959
1960 1960 if (status != 0)
1961 1961 goto abort_with_small_jbufs;
1962 1962
1963 1963 cmd.data0 = ntohl(ss->fw_stats_dma.low);
1964 1964 cmd.data1 = ntohl(ss->fw_stats_dma.high);
1965 1965 cmd.data2 = sizeof (mcp_irq_data_t);
1966 1966 cmd.data2 |= (slice << 16);
1967 1967 bzero(ss->fw_stats, sizeof (*ss->fw_stats));
1968 1968 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
1969 1969 if (status == ENOSYS) {
1970 1970 cmd.data0 = ntohl(ss->fw_stats_dma.low) +
1971 1971 offsetof(mcp_irq_data_t, send_done_count);
1972 1972 cmd.data1 = ntohl(ss->fw_stats_dma.high);
1973 1973 status = myri10ge_send_cmd(mgp,
1974 1974 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, &cmd);
1975 1975 }
1976 1976 if (status) {
1977 1977 cmn_err(CE_WARN, "%s: Couldn't set stats DMA\n", mgp->name);
1978 1978 goto abort_with_tx;
1979 1979 }
1980 1980
1981 1981 return (0);
1982 1982
1983 1983 abort_with_tx:
1984 1984 myri10ge_unprepare_tx_ring(ss);
1985 1985
1986 1986 abort_with_small_jbufs:
1987 1987 myri10ge_release_small_jbufs(ss);
1988 1988
1989 1989 abort_with_jumbos:
1990 1990 if (allocated != 0) {
1991 1991 mutex_enter(&ss->jpool.mtx);
1992 1992 ss->jpool.low_water = 0;
1993 1993 mutex_exit(&ss->jpool.mtx);
1994 1994 myri10ge_unstock_jumbos(ss);
1995 1995 myri10ge_remove_jbufs(ss);
1996 1996 }
1997 1997
1998 1998 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
1999 1999 kmem_free(ss->rx_big.info, bytes);
2000 2000
2001 2001 abort_with_rx_small_info:
2002 2002 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
2003 2003 kmem_free(ss->rx_small.info, bytes);
2004 2004
2005 2005 abort_with_tx_info:
2006 2006 bytes = tx_ring_entries * sizeof (*ss->tx.info);
2007 2007 kmem_free(ss->tx.info, bytes);
2008 2008
2009 2009 abort_with_rx_big_shadow:
2010 2010 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
2011 2011 kmem_free(ss->rx_big.shadow, bytes);
2012 2012
2013 2013 abort_with_rx_small_shadow:
2014 2014 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
2015 2015 kmem_free(ss->rx_small.shadow, bytes);
2016 2016 abort:
2017 2017 return (status);
2018 2018
2019 2019 }
2020 2020
2021 2021 static void
2022 2022 myri10ge_teardown_slice(struct myri10ge_slice_state *ss)
2023 2023 {
2024 2024 int tx_ring_entries, rx_ring_entries;
2025 2025 size_t bytes;
2026 2026
2027 2027 /* ignore slices that have not been fully setup */
2028 2028 if (ss->tx.cp == NULL)
2029 2029 return;
2030 2030 /* Free the TX copy buffers */
2031 2031 myri10ge_unprepare_tx_ring(ss);
2032 2032
2033 2033 /* stop passing returned buffers to firmware */
2034 2034
2035 2035 mutex_enter(&ss->jpool.mtx);
2036 2036 ss->jpool.low_water = 0;
2037 2037 mutex_exit(&ss->jpool.mtx);
2038 2038 myri10ge_release_small_jbufs(ss);
2039 2039
2040 2040 /* Release the free jumbo frame pool */
2041 2041 myri10ge_unstock_jumbos(ss);
2042 2042 myri10ge_remove_jbufs(ss);
2043 2043
2044 2044 rx_ring_entries = ss->rx_big.mask + 1;
2045 2045 tx_ring_entries = ss->tx.mask + 1;
2046 2046
2047 2047 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
2048 2048 kmem_free(ss->rx_big.info, bytes);
2049 2049
2050 2050 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
2051 2051 kmem_free(ss->rx_small.info, bytes);
2052 2052
2053 2053 bytes = tx_ring_entries * sizeof (*ss->tx.info);
2054 2054 kmem_free(ss->tx.info, bytes);
2055 2055
2056 2056 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
2057 2057 kmem_free(ss->rx_big.shadow, bytes);
2058 2058
2059 2059 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
2060 2060 kmem_free(ss->rx_small.shadow, bytes);
2061 2061
2062 2062 }
2063 2063 static int
2064 2064 myri10ge_start_locked(struct myri10ge_priv *mgp)
2065 2065 {
2066 2066 myri10ge_cmd_t cmd;
2067 2067 int status, big_pow2, i;
2068 2068 volatile uint8_t *itable;
2069 2069
2070 2070 status = DDI_SUCCESS;
2071 2071 /* Allocate DMA resources and receive buffers */
2072 2072
2073 2073 status = myri10ge_reset(mgp);
2074 2074 if (status != 0) {
2075 2075 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name);
2076 2076 return (DDI_FAILURE);
2077 2077 }
2078 2078
2079 2079 if (mgp->num_slices > 1) {
2080 2080 cmd.data0 = mgp->num_slices;
2081 2081 cmd.data1 = 1; /* use MSI-X */
2082 2082 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES,
2083 2083 &cmd);
2084 2084 if (status != 0) {
2085 2085 cmn_err(CE_WARN,
2086 2086 "%s: failed to set number of slices\n",
2087 2087 mgp->name);
2088 2088 goto abort_with_nothing;
2089 2089 }
2090 2090 /* setup the indirection table */
2091 2091 cmd.data0 = mgp->num_slices;
2092 2092 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
2093 2093 &cmd);
2094 2094
2095 2095 status |= myri10ge_send_cmd(mgp,
2096 2096 MXGEFW_CMD_GET_RSS_TABLE_OFFSET, &cmd);
2097 2097 if (status != 0) {
2098 2098 cmn_err(CE_WARN,
2099 2099 "%s: failed to setup rss tables\n", mgp->name);
2100 2100 }
2101 2101
2102 2102 /* just enable an identity mapping */
2103 2103 itable = mgp->sram + cmd.data0;
2104 2104 for (i = 0; i < mgp->num_slices; i++)
2105 2105 itable[i] = (uint8_t)i;
2106 2106
2107 2107 if (myri10ge_rss_hash & MYRI10GE_TOEPLITZ_HASH) {
2108 2108 status = myri10ge_init_toeplitz(mgp);
2109 2109 if (status != 0) {
2110 2110 cmn_err(CE_WARN, "%s: failed to setup "
2111 2111 "toeplitz tx hash table", mgp->name);
2112 2112 goto abort_with_nothing;
2113 2113 }
2114 2114 }
2115 2115 cmd.data0 = 1;
2116 2116 cmd.data1 = myri10ge_rss_hash;
2117 2117 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_ENABLE,
2118 2118 &cmd);
2119 2119 if (status != 0) {
2120 2120 cmn_err(CE_WARN,
2121 2121 "%s: failed to enable slices\n", mgp->name);
2122 2122 goto abort_with_toeplitz;
2123 2123 }
2124 2124 }
2125 2125
2126 2126 for (i = 0; i < mgp->num_slices; i++) {
2127 2127 status = myri10ge_setup_slice(&mgp->ss[i]);
2128 2128 if (status != 0)
2129 2129 goto abort_with_slices;
2130 2130 }
↓ open down ↓ |
2130 lines elided |
↑ open up ↑ |
2131 2131
2132 2132 /*
2133 2133 * Tell the MCP how many buffers he has, and to
2134 2134 * bring the ethernet interface up
2135 2135 *
2136 2136 * Firmware needs the big buff size as a power of 2. Lie and
2137 2137 * tell him the buffer is larger, because we only use 1
2138 2138 * buffer/pkt, and the mtu will prevent overruns
2139 2139 */
2140 2140 big_pow2 = myri10ge_mtu + MXGEFW_PAD;
2141 - while ((big_pow2 & (big_pow2 - 1)) != 0)
2141 + while (!ISP2(big_pow2))
2142 2142 big_pow2++;
2143 2143
2144 2144 /* now give firmware buffers sizes, and MTU */
2145 2145 cmd.data0 = myri10ge_mtu;
2146 2146 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_MTU, &cmd);
2147 2147 cmd.data0 = myri10ge_small_bytes;
2148 2148 status |=
2149 2149 myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd);
2150 2150 cmd.data0 = big_pow2;
2151 2151 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
2152 2152 if (status) {
2153 2153 cmn_err(CE_WARN, "%s: Couldn't set buffer sizes\n", mgp->name);
2154 2154 goto abort_with_slices;
2155 2155 }
2156 2156
2157 2157
2158 2158 cmd.data0 = 1;
2159 2159 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_TSO_MODE, &cmd);
2160 2160 if (status) {
2161 2161 cmn_err(CE_WARN, "%s: unable to setup TSO (%d)\n",
2162 2162 mgp->name, status);
2163 2163 } else {
2164 2164 mgp->features |= MYRI10GE_TSO;
2165 2165 }
2166 2166
2167 2167 mgp->link_state = -1;
2168 2168 mgp->rdma_tags_available = 15;
2169 2169 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_UP, &cmd);
2170 2170 if (status) {
2171 2171 cmn_err(CE_WARN, "%s: unable to start ethernet\n", mgp->name);
2172 2172 goto abort_with_slices;
2173 2173 }
2174 2174 mgp->running = MYRI10GE_ETH_RUNNING;
2175 2175 return (DDI_SUCCESS);
2176 2176
2177 2177 abort_with_slices:
2178 2178 for (i = 0; i < mgp->num_slices; i++)
2179 2179 myri10ge_teardown_slice(&mgp->ss[i]);
2180 2180
2181 2181 mgp->running = MYRI10GE_ETH_STOPPED;
2182 2182
2183 2183 abort_with_toeplitz:
2184 2184 if (mgp->toeplitz_hash_table != NULL) {
2185 2185 kmem_free(mgp->toeplitz_hash_table,
2186 2186 sizeof (uint32_t) * 12 * 256);
2187 2187 mgp->toeplitz_hash_table = NULL;
2188 2188 }
2189 2189
2190 2190 abort_with_nothing:
2191 2191 return (DDI_FAILURE);
2192 2192 }
2193 2193
2194 2194 static void
2195 2195 myri10ge_stop_locked(struct myri10ge_priv *mgp)
2196 2196 {
2197 2197 int status, old_down_cnt;
2198 2198 myri10ge_cmd_t cmd;
2199 2199 int wait_time = 10;
2200 2200 int i, polling;
2201 2201
2202 2202 old_down_cnt = mgp->down_cnt;
2203 2203 mb();
2204 2204 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
2205 2205 if (status) {
2206 2206 cmn_err(CE_WARN, "%s: Couldn't bring down link\n", mgp->name);
2207 2207 }
2208 2208
2209 2209 while (old_down_cnt == *((volatile int *)&mgp->down_cnt)) {
2210 2210 delay(1 * drv_usectohz(1000000));
2211 2211 wait_time--;
2212 2212 if (wait_time == 0)
2213 2213 break;
2214 2214 }
2215 2215 again:
2216 2216 if (old_down_cnt == *((volatile int *)&mgp->down_cnt)) {
2217 2217 cmn_err(CE_WARN, "%s: didn't get down irq\n", mgp->name);
2218 2218 for (i = 0; i < mgp->num_slices; i++) {
2219 2219 /*
2220 2220 * take and release the rx lock to ensure
2221 2221 * that no interrupt thread is blocked
2222 2222 * elsewhere in the stack, preventing
2223 2223 * completion
2224 2224 */
2225 2225
2226 2226 mutex_enter(&mgp->ss[i].rx_lock);
2227 2227 printf("%s: slice %d rx irq idle\n",
2228 2228 mgp->name, i);
2229 2229 mutex_exit(&mgp->ss[i].rx_lock);
2230 2230
2231 2231 /* verify that the poll handler is inactive */
2232 2232 mutex_enter(&mgp->ss->poll_lock);
2233 2233 polling = mgp->ss->rx_polling;
2234 2234 mutex_exit(&mgp->ss->poll_lock);
2235 2235 if (polling) {
2236 2236 printf("%s: slice %d is polling\n",
2237 2237 mgp->name, i);
2238 2238 delay(1 * drv_usectohz(1000000));
2239 2239 goto again;
2240 2240 }
2241 2241 }
2242 2242 delay(1 * drv_usectohz(1000000));
2243 2243 if (old_down_cnt == *((volatile int *)&mgp->down_cnt)) {
2244 2244 cmn_err(CE_WARN, "%s: Never got down irq\n", mgp->name);
2245 2245 }
2246 2246 }
2247 2247
2248 2248 for (i = 0; i < mgp->num_slices; i++)
2249 2249 myri10ge_teardown_slice(&mgp->ss[i]);
2250 2250
2251 2251 if (mgp->toeplitz_hash_table != NULL) {
2252 2252 kmem_free(mgp->toeplitz_hash_table,
2253 2253 sizeof (uint32_t) * 12 * 256);
2254 2254 mgp->toeplitz_hash_table = NULL;
2255 2255 }
2256 2256 mgp->running = MYRI10GE_ETH_STOPPED;
2257 2257 }
2258 2258
2259 2259 static int
2260 2260 myri10ge_m_start(void *arg)
2261 2261 {
2262 2262 struct myri10ge_priv *mgp = arg;
2263 2263 int status;
2264 2264
2265 2265 mutex_enter(&mgp->intrlock);
2266 2266
2267 2267 if (mgp->running != MYRI10GE_ETH_STOPPED) {
2268 2268 mutex_exit(&mgp->intrlock);
2269 2269 return (DDI_FAILURE);
2270 2270 }
2271 2271 status = myri10ge_start_locked(mgp);
2272 2272 mutex_exit(&mgp->intrlock);
2273 2273
2274 2274 if (status != DDI_SUCCESS)
2275 2275 return (status);
2276 2276
2277 2277 /* start the watchdog timer */
2278 2278 mgp->timer_id = timeout(myri10ge_watchdog, mgp,
2279 2279 mgp->timer_ticks);
2280 2280 return (DDI_SUCCESS);
2281 2281
2282 2282 }
2283 2283
2284 2284 static void
2285 2285 myri10ge_m_stop(void *arg)
2286 2286 {
2287 2287 struct myri10ge_priv *mgp = arg;
2288 2288
2289 2289 mutex_enter(&mgp->intrlock);
2290 2290 /* if the device not running give up */
2291 2291 if (mgp->running != MYRI10GE_ETH_RUNNING) {
2292 2292 mutex_exit(&mgp->intrlock);
2293 2293 return;
2294 2294 }
2295 2295
2296 2296 mgp->running = MYRI10GE_ETH_STOPPING;
2297 2297 mutex_exit(&mgp->intrlock);
2298 2298 (void) untimeout(mgp->timer_id);
2299 2299 mutex_enter(&mgp->intrlock);
2300 2300 myri10ge_stop_locked(mgp);
2301 2301 mutex_exit(&mgp->intrlock);
2302 2302
2303 2303 }
2304 2304
2305 2305 static inline void
2306 2306 myri10ge_rx_csum(mblk_t *mp, struct myri10ge_rx_ring_stats *s, uint32_t csum)
2307 2307 {
2308 2308 struct ether_header *eh;
2309 2309 struct ip *ip;
2310 2310 struct ip6_hdr *ip6;
2311 2311 uint32_t start, stuff, end, partial, hdrlen;
2312 2312
2313 2313
2314 2314 csum = ntohs((uint16_t)csum);
2315 2315 eh = (struct ether_header *)(void *)mp->b_rptr;
2316 2316 hdrlen = sizeof (*eh);
2317 2317 if (eh->ether_dhost.ether_addr_octet[0] & 1) {
2318 2318 if (0 == (bcmp(eh->ether_dhost.ether_addr_octet,
2319 2319 myri10ge_broadcastaddr, sizeof (eh->ether_dhost))))
2320 2320 s->brdcstrcv++;
2321 2321 else
2322 2322 s->multircv++;
2323 2323 }
2324 2324
2325 2325 if (eh->ether_type == BE_16(ETHERTYPE_VLAN)) {
2326 2326 /*
2327 2327 * fix checksum by subtracting 4 bytes after what the
2328 2328 * firmware thought was the end of the ether hdr
2329 2329 */
2330 2330 partial = *(uint32_t *)
2331 2331 (void *)(mp->b_rptr + ETHERNET_HEADER_SIZE);
2332 2332 csum += ~partial;
2333 2333 csum += (csum < ~partial);
2334 2334 csum = (csum >> 16) + (csum & 0xFFFF);
2335 2335 csum = (csum >> 16) + (csum & 0xFFFF);
2336 2336 hdrlen += VLAN_TAGSZ;
2337 2337 }
2338 2338
2339 2339 if (eh->ether_type == BE_16(ETHERTYPE_IP)) {
2340 2340 ip = (struct ip *)(void *)(mp->b_rptr + hdrlen);
2341 2341 start = ip->ip_hl << 2;
2342 2342
2343 2343 if (ip->ip_p == IPPROTO_TCP)
2344 2344 stuff = start + offsetof(struct tcphdr, th_sum);
2345 2345 else if (ip->ip_p == IPPROTO_UDP)
2346 2346 stuff = start + offsetof(struct udphdr, uh_sum);
2347 2347 else
2348 2348 return;
2349 2349 end = ntohs(ip->ip_len);
2350 2350 } else if (eh->ether_type == BE_16(ETHERTYPE_IPV6)) {
2351 2351 ip6 = (struct ip6_hdr *)(void *)(mp->b_rptr + hdrlen);
2352 2352 start = sizeof (*ip6);
2353 2353 if (ip6->ip6_nxt == IPPROTO_TCP) {
2354 2354 stuff = start + offsetof(struct tcphdr, th_sum);
2355 2355 } else if (ip6->ip6_nxt == IPPROTO_UDP)
2356 2356 stuff = start + offsetof(struct udphdr, uh_sum);
2357 2357 else
2358 2358 return;
2359 2359 end = start + ntohs(ip6->ip6_plen);
2360 2360 /*
2361 2361 * IPv6 headers do not contain a checksum, and hence
2362 2362 * do not checksum to zero, so they don't "fall out"
2363 2363 * of the partial checksum calculation like IPv4
2364 2364 * headers do. We need to fix the partial checksum by
2365 2365 * subtracting the checksum of the IPv6 header.
2366 2366 */
2367 2367
2368 2368 partial = myri10ge_csum_generic((uint16_t *)ip6, sizeof (*ip6));
2369 2369 csum += ~partial;
2370 2370 csum += (csum < ~partial);
2371 2371 csum = (csum >> 16) + (csum & 0xFFFF);
2372 2372 csum = (csum >> 16) + (csum & 0xFFFF);
2373 2373 } else {
2374 2374 return;
2375 2375 }
2376 2376
2377 2377 if (MBLKL(mp) > hdrlen + end) {
2378 2378 /* padded frame, so hw csum may be invalid */
2379 2379 return;
2380 2380 }
2381 2381
2382 2382 mac_hcksum_set(mp, start, stuff, end, csum, HCK_PARTIALCKSUM);
2383 2383 }
2384 2384
2385 2385 static mblk_t *
2386 2386 myri10ge_rx_done_small(struct myri10ge_slice_state *ss, uint32_t len,
2387 2387 uint32_t csum)
2388 2388 {
2389 2389 mblk_t *mp;
2390 2390 myri10ge_rx_ring_t *rx;
2391 2391 int idx;
2392 2392
2393 2393 rx = &ss->rx_small;
2394 2394 idx = rx->cnt & rx->mask;
2395 2395 ss->rx_small.cnt++;
2396 2396
2397 2397 /* allocate a new buffer to pass up the stack */
2398 2398 mp = allocb(len + MXGEFW_PAD, 0);
2399 2399 if (mp == NULL) {
2400 2400 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_small_nobuf);
2401 2401 goto abort;
2402 2402 }
2403 2403 bcopy(ss->rx_small.info[idx].ptr,
2404 2404 (caddr_t)mp->b_wptr, len + MXGEFW_PAD);
2405 2405 mp->b_wptr += len + MXGEFW_PAD;
2406 2406 mp->b_rptr += MXGEFW_PAD;
2407 2407
2408 2408 ss->rx_stats.ibytes += len;
2409 2409 ss->rx_stats.ipackets += 1;
2410 2410 myri10ge_rx_csum(mp, &ss->rx_stats, csum);
2411 2411
2412 2412 abort:
2413 2413 if ((idx & 7) == 7) {
2414 2414 myri10ge_submit_8rx(&rx->lanai[idx - 7],
2415 2415 &rx->shadow[idx - 7]);
2416 2416 }
2417 2417
2418 2418 return (mp);
2419 2419 }
2420 2420
2421 2421
2422 2422 static mblk_t *
2423 2423 myri10ge_rx_done_big(struct myri10ge_slice_state *ss, uint32_t len,
2424 2424 uint32_t csum)
2425 2425 {
2426 2426 struct myri10ge_jpool_stuff *jpool;
2427 2427 struct myri10ge_jpool_entry *j;
2428 2428 mblk_t *mp;
2429 2429 int idx, num_owned_by_mcp;
2430 2430
2431 2431 jpool = &ss->jpool;
2432 2432 idx = ss->j_rx_cnt & ss->rx_big.mask;
2433 2433 j = ss->rx_big.info[idx].j;
2434 2434
2435 2435 if (j == NULL) {
2436 2436 printf("%s: null j at idx=%d, rx_big.cnt = %d, j_rx_cnt=%d\n",
2437 2437 ss->mgp->name, idx, ss->rx_big.cnt, ss->j_rx_cnt);
2438 2438 return (NULL);
2439 2439 }
2440 2440
2441 2441
2442 2442 ss->rx_big.info[idx].j = NULL;
2443 2443 ss->j_rx_cnt++;
2444 2444
2445 2445
2446 2446 /*
2447 2447 * Check to see if we are low on rx buffers.
2448 2448 * Note that we must leave at least 8 free so there are
2449 2449 * enough to free in a single 64-byte write.
2450 2450 */
2451 2451 num_owned_by_mcp = ss->rx_big.cnt - ss->j_rx_cnt;
2452 2452 if (num_owned_by_mcp < jpool->low_water) {
2453 2453 mutex_enter(&jpool->mtx);
2454 2454 myri10ge_restock_jumbos(ss);
2455 2455 mutex_exit(&jpool->mtx);
2456 2456 num_owned_by_mcp = ss->rx_big.cnt - ss->j_rx_cnt;
2457 2457 /* if we are still low, then we have to copy */
2458 2458 if (num_owned_by_mcp < 16) {
2459 2459 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_copy);
2460 2460 /* allocate a new buffer to pass up the stack */
2461 2461 mp = allocb(len + MXGEFW_PAD, 0);
2462 2462 if (mp == NULL) {
2463 2463 goto abort;
2464 2464 }
2465 2465 bcopy(j->buf,
2466 2466 (caddr_t)mp->b_wptr, len + MXGEFW_PAD);
2467 2467 myri10ge_jfree_rtn(j);
2468 2468 /* push buffer back to NIC */
2469 2469 mutex_enter(&jpool->mtx);
2470 2470 myri10ge_restock_jumbos(ss);
2471 2471 mutex_exit(&jpool->mtx);
2472 2472 goto set_len;
2473 2473 }
2474 2474 }
2475 2475
2476 2476 /* loan our buffer to the stack */
2477 2477 mp = desballoc((unsigned char *)j->buf, myri10ge_mtu, 0, &j->free_func);
2478 2478 if (mp == NULL) {
2479 2479 goto abort;
2480 2480 }
2481 2481
2482 2482 set_len:
2483 2483 mp->b_rptr += MXGEFW_PAD;
2484 2484 mp->b_wptr = ((unsigned char *) mp->b_rptr + len);
2485 2485
2486 2486 ss->rx_stats.ibytes += len;
2487 2487 ss->rx_stats.ipackets += 1;
2488 2488 myri10ge_rx_csum(mp, &ss->rx_stats, csum);
2489 2489
2490 2490 return (mp);
2491 2491
2492 2492 abort:
2493 2493 myri10ge_jfree_rtn(j);
2494 2494 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_big_nobuf);
2495 2495 return (NULL);
2496 2496 }
2497 2497
2498 2498 /*
2499 2499 * Free all transmit buffers up until the specified index
2500 2500 */
2501 2501 static inline void
2502 2502 myri10ge_tx_done(struct myri10ge_slice_state *ss, uint32_t mcp_index)
2503 2503 {
2504 2504 myri10ge_tx_ring_t *tx;
2505 2505 struct myri10ge_tx_dma_handle_head handles;
2506 2506 int idx;
2507 2507 int limit = 0;
2508 2508
2509 2509 tx = &ss->tx;
2510 2510 handles.head = NULL;
2511 2511 handles.tail = NULL;
2512 2512 while (tx->pkt_done != (int)mcp_index) {
2513 2513 idx = tx->done & tx->mask;
2514 2514
2515 2515 /*
2516 2516 * mblk & DMA handle attached only to first slot
2517 2517 * per buffer in the packet
2518 2518 */
2519 2519
2520 2520 if (tx->info[idx].m) {
2521 2521 (void) ddi_dma_unbind_handle(tx->info[idx].handle->h);
2522 2522 tx->info[idx].handle->next = handles.head;
2523 2523 handles.head = tx->info[idx].handle;
2524 2524 if (handles.tail == NULL)
2525 2525 handles.tail = tx->info[idx].handle;
2526 2526 freeb(tx->info[idx].m);
2527 2527 tx->info[idx].m = 0;
2528 2528 tx->info[idx].handle = 0;
2529 2529 }
2530 2530 if (tx->info[idx].ostat.opackets != 0) {
2531 2531 tx->stats.multixmt += tx->info[idx].ostat.multixmt;
2532 2532 tx->stats.brdcstxmt += tx->info[idx].ostat.brdcstxmt;
2533 2533 tx->stats.obytes += tx->info[idx].ostat.obytes;
2534 2534 tx->stats.opackets += tx->info[idx].ostat.opackets;
2535 2535 tx->info[idx].stat.un.all = 0;
2536 2536 tx->pkt_done++;
2537 2537 }
2538 2538
2539 2539 tx->done++;
2540 2540 /*
2541 2541 * if we stalled the queue, wake it. But Wait until
2542 2542 * we have at least 1/2 our slots free.
2543 2543 */
2544 2544 if ((tx->req - tx->done) < (tx->mask >> 1) &&
2545 2545 tx->stall != tx->sched) {
2546 2546 mutex_enter(&ss->tx.lock);
2547 2547 tx->sched = tx->stall;
2548 2548 mutex_exit(&ss->tx.lock);
2549 2549 mac_tx_ring_update(ss->mgp->mh, tx->rh);
2550 2550 }
2551 2551
2552 2552 /* limit potential for livelock */
2553 2553 if (unlikely(++limit > 2 * tx->mask))
2554 2554 break;
2555 2555 }
2556 2556 if (tx->req == tx->done && tx->stop != NULL) {
2557 2557 /*
2558 2558 * Nic has sent all pending requests, allow him
2559 2559 * to stop polling this queue
2560 2560 */
2561 2561 mutex_enter(&tx->lock);
2562 2562 if (tx->req == tx->done && tx->active) {
2563 2563 *(int *)(void *)tx->stop = 1;
2564 2564 tx->active = 0;
2565 2565 mb();
2566 2566 }
2567 2567 mutex_exit(&tx->lock);
2568 2568 }
2569 2569 if (handles.head != NULL)
2570 2570 myri10ge_free_tx_handles(tx, &handles);
2571 2571 }
2572 2572
2573 2573 static void
2574 2574 myri10ge_mbl_init(struct myri10ge_mblk_list *mbl)
2575 2575 {
2576 2576 mbl->head = NULL;
2577 2577 mbl->tail = &mbl->head;
2578 2578 mbl->cnt = 0;
2579 2579 }
2580 2580
2581 2581 /*ARGSUSED*/
2582 2582 void
2583 2583 myri10ge_mbl_append(struct myri10ge_slice_state *ss,
2584 2584 struct myri10ge_mblk_list *mbl, mblk_t *mp)
2585 2585 {
2586 2586 *(mbl->tail) = mp;
2587 2587 mbl->tail = &mp->b_next;
2588 2588 mp->b_next = NULL;
2589 2589 mbl->cnt++;
2590 2590 }
2591 2591
2592 2592
2593 2593 static inline void
2594 2594 myri10ge_clean_rx_done(struct myri10ge_slice_state *ss,
2595 2595 struct myri10ge_mblk_list *mbl, int limit, boolean_t *stop)
2596 2596 {
2597 2597 myri10ge_rx_done_t *rx_done = &ss->rx_done;
2598 2598 struct myri10ge_priv *mgp = ss->mgp;
2599 2599 mblk_t *mp;
2600 2600 struct lro_entry *lro;
2601 2601 uint16_t length;
2602 2602 uint16_t checksum;
2603 2603
2604 2604
2605 2605 while (rx_done->entry[rx_done->idx].length != 0) {
2606 2606 if (unlikely (*stop)) {
2607 2607 break;
2608 2608 }
2609 2609 length = ntohs(rx_done->entry[rx_done->idx].length);
2610 2610 length &= (~MXGEFW_RSS_HASH_MASK);
2611 2611
2612 2612 /* limit potential for livelock */
2613 2613 limit -= length;
2614 2614 if (unlikely(limit < 0))
2615 2615 break;
2616 2616
2617 2617 rx_done->entry[rx_done->idx].length = 0;
2618 2618 checksum = ntohs(rx_done->entry[rx_done->idx].checksum);
2619 2619 if (length <= myri10ge_small_bytes)
2620 2620 mp = myri10ge_rx_done_small(ss, length, checksum);
2621 2621 else
2622 2622 mp = myri10ge_rx_done_big(ss, length, checksum);
2623 2623 if (mp != NULL) {
2624 2624 if (!myri10ge_lro ||
2625 2625 0 != myri10ge_lro_rx(ss, mp, checksum, mbl))
2626 2626 myri10ge_mbl_append(ss, mbl, mp);
2627 2627 }
2628 2628 rx_done->cnt++;
2629 2629 rx_done->idx = rx_done->cnt & (mgp->max_intr_slots - 1);
2630 2630 }
2631 2631 while (ss->lro_active != NULL) {
2632 2632 lro = ss->lro_active;
2633 2633 ss->lro_active = lro->next;
2634 2634 myri10ge_lro_flush(ss, lro, mbl);
2635 2635 }
2636 2636 }
2637 2637
2638 2638 static void
2639 2639 myri10ge_intr_rx(struct myri10ge_slice_state *ss)
2640 2640 {
2641 2641 uint64_t gen;
2642 2642 struct myri10ge_mblk_list mbl;
2643 2643
2644 2644 myri10ge_mbl_init(&mbl);
2645 2645 if (mutex_tryenter(&ss->rx_lock) == 0)
2646 2646 return;
2647 2647 gen = ss->rx_gen_num;
2648 2648 myri10ge_clean_rx_done(ss, &mbl, MYRI10GE_POLL_NULL,
2649 2649 &ss->rx_polling);
2650 2650 if (mbl.head != NULL)
2651 2651 mac_rx_ring(ss->mgp->mh, ss->rx_rh, mbl.head, gen);
2652 2652 mutex_exit(&ss->rx_lock);
2653 2653
2654 2654 }
2655 2655
2656 2656 static mblk_t *
2657 2657 myri10ge_poll_rx(void *arg, int bytes)
2658 2658 {
2659 2659 struct myri10ge_slice_state *ss = arg;
2660 2660 struct myri10ge_mblk_list mbl;
2661 2661 boolean_t dummy = B_FALSE;
2662 2662
2663 2663 if (bytes == 0)
2664 2664 return (NULL);
2665 2665
2666 2666 myri10ge_mbl_init(&mbl);
2667 2667 mutex_enter(&ss->rx_lock);
2668 2668 if (ss->rx_polling)
2669 2669 myri10ge_clean_rx_done(ss, &mbl, bytes, &dummy);
2670 2670 else
2671 2671 printf("%d: poll_rx: token=%d, polling=%d\n", (int)(ss -
2672 2672 ss->mgp->ss), ss->rx_token, ss->rx_polling);
2673 2673 mutex_exit(&ss->rx_lock);
2674 2674 return (mbl.head);
2675 2675 }
2676 2676
2677 2677 /*ARGSUSED*/
2678 2678 static uint_t
2679 2679 myri10ge_intr(caddr_t arg0, caddr_t arg1)
2680 2680 {
2681 2681 struct myri10ge_slice_state *ss =
2682 2682 (struct myri10ge_slice_state *)(void *)arg0;
2683 2683 struct myri10ge_priv *mgp = ss->mgp;
2684 2684 mcp_irq_data_t *stats = ss->fw_stats;
2685 2685 myri10ge_tx_ring_t *tx = &ss->tx;
2686 2686 uint32_t send_done_count;
2687 2687 uint8_t valid;
2688 2688
2689 2689
2690 2690 /* make sure the DMA has finished */
2691 2691 if (!stats->valid) {
2692 2692 return (DDI_INTR_UNCLAIMED);
2693 2693 }
2694 2694 valid = stats->valid;
2695 2695
2696 2696 /* low bit indicates receives are present */
2697 2697 if (valid & 1)
2698 2698 myri10ge_intr_rx(ss);
2699 2699
2700 2700 if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) {
2701 2701 /* lower legacy IRQ */
2702 2702 *mgp->irq_deassert = 0;
2703 2703 if (!myri10ge_deassert_wait)
2704 2704 /* don't wait for conf. that irq is low */
2705 2705 stats->valid = 0;
2706 2706 mb();
2707 2707 } else {
2708 2708 /* no need to wait for conf. that irq is low */
2709 2709 stats->valid = 0;
2710 2710 }
2711 2711
2712 2712 do {
2713 2713 /* check for transmit completes and receives */
2714 2714 send_done_count = ntohl(stats->send_done_count);
2715 2715 if (send_done_count != tx->pkt_done)
2716 2716 myri10ge_tx_done(ss, (int)send_done_count);
2717 2717 } while (*((volatile uint8_t *) &stats->valid));
2718 2718
2719 2719 if (stats->stats_updated) {
2720 2720 if (mgp->link_state != stats->link_up || stats->link_down) {
2721 2721 mgp->link_state = stats->link_up;
2722 2722 if (stats->link_down) {
2723 2723 mgp->down_cnt += stats->link_down;
2724 2724 mgp->link_state = 0;
2725 2725 }
2726 2726 if (mgp->link_state) {
2727 2727 if (myri10ge_verbose)
2728 2728 printf("%s: link up\n", mgp->name);
2729 2729 mac_link_update(mgp->mh, LINK_STATE_UP);
2730 2730 } else {
2731 2731 if (myri10ge_verbose)
2732 2732 printf("%s: link down\n", mgp->name);
2733 2733 mac_link_update(mgp->mh, LINK_STATE_DOWN);
2734 2734 }
2735 2735 MYRI10GE_NIC_STAT_INC(link_changes);
2736 2736 }
2737 2737 if (mgp->rdma_tags_available !=
2738 2738 ntohl(ss->fw_stats->rdma_tags_available)) {
2739 2739 mgp->rdma_tags_available =
2740 2740 ntohl(ss->fw_stats->rdma_tags_available);
2741 2741 cmn_err(CE_NOTE, "%s: RDMA timed out! "
2742 2742 "%d tags left\n", mgp->name,
2743 2743 mgp->rdma_tags_available);
2744 2744 }
2745 2745 }
2746 2746
2747 2747 mb();
2748 2748 /* check to see if we have rx token to pass back */
2749 2749 if (valid & 0x1) {
2750 2750 mutex_enter(&ss->poll_lock);
2751 2751 if (ss->rx_polling) {
2752 2752 ss->rx_token = 1;
2753 2753 } else {
2754 2754 *ss->irq_claim = BE_32(3);
2755 2755 ss->rx_token = 0;
2756 2756 }
2757 2757 mutex_exit(&ss->poll_lock);
2758 2758 }
2759 2759 *(ss->irq_claim + 1) = BE_32(3);
2760 2760 return (DDI_INTR_CLAIMED);
2761 2761 }
2762 2762
2763 2763 /*
2764 2764 * Add or remove a multicast address. This is called with our
2765 2765 * macinfo's lock held by GLD, so we do not need to worry about
2766 2766 * our own locking here.
2767 2767 */
2768 2768 static int
2769 2769 myri10ge_m_multicst(void *arg, boolean_t add, const uint8_t *multicastaddr)
2770 2770 {
2771 2771 myri10ge_cmd_t cmd;
2772 2772 struct myri10ge_priv *mgp = arg;
2773 2773 int status, join_leave;
2774 2774
2775 2775 if (add)
2776 2776 join_leave = MXGEFW_JOIN_MULTICAST_GROUP;
2777 2777 else
2778 2778 join_leave = MXGEFW_LEAVE_MULTICAST_GROUP;
2779 2779 (void) memcpy(&cmd.data0, multicastaddr, 4);
2780 2780 (void) memcpy(&cmd.data1, multicastaddr + 4, 2);
2781 2781 cmd.data0 = htonl(cmd.data0);
2782 2782 cmd.data1 = htonl(cmd.data1);
2783 2783 status = myri10ge_send_cmd(mgp, join_leave, &cmd);
2784 2784 if (status == 0)
2785 2785 return (0);
2786 2786
2787 2787 cmn_err(CE_WARN, "%s: failed to set multicast address\n",
2788 2788 mgp->name);
2789 2789 return (status);
2790 2790 }
2791 2791
2792 2792
2793 2793 static int
2794 2794 myri10ge_m_promisc(void *arg, boolean_t on)
2795 2795 {
2796 2796 struct myri10ge_priv *mgp = arg;
2797 2797
2798 2798 myri10ge_change_promisc(mgp, on);
2799 2799 return (0);
2800 2800 }
2801 2801
2802 2802 /*
2803 2803 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
2804 2804 * backwards one at a time and handle ring wraps
2805 2805 */
2806 2806
2807 2807 static inline void
2808 2808 myri10ge_submit_req_backwards(myri10ge_tx_ring_t *tx,
2809 2809 mcp_kreq_ether_send_t *src, int cnt)
2810 2810 {
2811 2811 int idx, starting_slot;
2812 2812 starting_slot = tx->req;
2813 2813 while (cnt > 1) {
2814 2814 cnt--;
2815 2815 idx = (starting_slot + cnt) & tx->mask;
2816 2816 myri10ge_pio_copy(&tx->lanai[idx],
2817 2817 &src[cnt], sizeof (*src));
2818 2818 mb();
2819 2819 }
2820 2820 }
2821 2821
2822 2822 /*
2823 2823 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
2824 2824 * at most 32 bytes at a time, so as to avoid involving the software
2825 2825 * pio handler in the nic. We re-write the first segment's flags
2826 2826 * to mark them valid only after writing the entire chain
2827 2827 */
2828 2828
2829 2829 static inline void
2830 2830 myri10ge_submit_req(myri10ge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
2831 2831 int cnt)
2832 2832 {
2833 2833 int idx, i;
2834 2834 uint32_t *src_ints, *dst_ints;
2835 2835 mcp_kreq_ether_send_t *srcp, *dstp, *dst;
2836 2836 uint8_t last_flags;
2837 2837
2838 2838 idx = tx->req & tx->mask;
2839 2839
2840 2840 last_flags = src->flags;
2841 2841 src->flags = 0;
2842 2842 mb();
2843 2843 dst = dstp = &tx->lanai[idx];
2844 2844 srcp = src;
2845 2845
2846 2846 if ((idx + cnt) < tx->mask) {
2847 2847 for (i = 0; i < (cnt - 1); i += 2) {
2848 2848 myri10ge_pio_copy(dstp, srcp, 2 * sizeof (*src));
2849 2849 mb(); /* force write every 32 bytes */
2850 2850 srcp += 2;
2851 2851 dstp += 2;
2852 2852 }
2853 2853 } else {
2854 2854 /*
2855 2855 * submit all but the first request, and ensure
2856 2856 * that it is submitted below
2857 2857 */
2858 2858 myri10ge_submit_req_backwards(tx, src, cnt);
2859 2859 i = 0;
2860 2860 }
2861 2861 if (i < cnt) {
2862 2862 /* submit the first request */
2863 2863 myri10ge_pio_copy(dstp, srcp, sizeof (*src));
2864 2864 mb(); /* barrier before setting valid flag */
2865 2865 }
2866 2866
2867 2867 /* re-write the last 32-bits with the valid flags */
2868 2868 src->flags |= last_flags;
2869 2869 src_ints = (uint32_t *)src;
2870 2870 src_ints += 3;
2871 2871 dst_ints = (uint32_t *)dst;
2872 2872 dst_ints += 3;
2873 2873 *dst_ints = *src_ints;
2874 2874 tx->req += cnt;
2875 2875 mb();
2876 2876 /* notify NIC to poll this tx ring */
2877 2877 if (!tx->active && tx->go != NULL) {
2878 2878 *(int *)(void *)tx->go = 1;
2879 2879 tx->active = 1;
2880 2880 tx->activate++;
2881 2881 mb();
2882 2882 }
2883 2883 }
2884 2884
2885 2885 /* ARGSUSED */
2886 2886 static inline void
2887 2887 myri10ge_lso_info_get(mblk_t *mp, uint32_t *mss, uint32_t *flags)
2888 2888 {
2889 2889 uint32_t lso_flag;
2890 2890 mac_lso_get(mp, mss, &lso_flag);
2891 2891 (*flags) |= lso_flag;
2892 2892 }
2893 2893
2894 2894
2895 2895 /* like pullupmsg, except preserve hcksum/LSO attributes */
2896 2896 static int
2897 2897 myri10ge_pullup(struct myri10ge_slice_state *ss, mblk_t *mp)
2898 2898 {
2899 2899 uint32_t start, stuff, tx_offload_flags, mss;
2900 2900 int ok;
2901 2901
2902 2902 mss = 0;
2903 2903 mac_hcksum_get(mp, &start, &stuff, NULL, NULL, &tx_offload_flags);
2904 2904 myri10ge_lso_info_get(mp, &mss, &tx_offload_flags);
2905 2905
2906 2906 ok = pullupmsg(mp, -1);
2907 2907 if (!ok) {
2908 2908 printf("pullupmsg failed");
2909 2909 return (DDI_FAILURE);
2910 2910 }
2911 2911 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_pullup);
2912 2912 mac_hcksum_set(mp, start, stuff, NULL, NULL, tx_offload_flags);
2913 2913 if (tx_offload_flags & HW_LSO)
2914 2914 DB_LSOMSS(mp) = (uint16_t)mss;
2915 2915 lso_info_set(mp, mss, tx_offload_flags);
2916 2916 return (DDI_SUCCESS);
2917 2917 }
2918 2918
2919 2919 static inline void
2920 2920 myri10ge_tx_stat(struct myri10ge_tx_pkt_stats *s, struct ether_header *eh,
2921 2921 int opackets, int obytes)
2922 2922 {
2923 2923 s->un.all = 0;
2924 2924 if (eh->ether_dhost.ether_addr_octet[0] & 1) {
2925 2925 if (0 == (bcmp(eh->ether_dhost.ether_addr_octet,
2926 2926 myri10ge_broadcastaddr, sizeof (eh->ether_dhost))))
2927 2927 s->un.s.brdcstxmt = 1;
2928 2928 else
2929 2929 s->un.s.multixmt = 1;
2930 2930 }
2931 2931 s->un.s.opackets = (uint16_t)opackets;
2932 2932 s->un.s.obytes = obytes;
2933 2933 }
2934 2934
2935 2935 static int
2936 2936 myri10ge_tx_copy(struct myri10ge_slice_state *ss, mblk_t *mp,
2937 2937 mcp_kreq_ether_send_t *req)
2938 2938 {
2939 2939 myri10ge_tx_ring_t *tx = &ss->tx;
2940 2940 caddr_t ptr;
2941 2941 struct myri10ge_tx_copybuf *cp;
2942 2942 mblk_t *bp;
2943 2943 int idx, mblen, avail;
2944 2944 uint16_t len;
2945 2945
2946 2946 mutex_enter(&tx->lock);
2947 2947 avail = tx->mask - (tx->req - tx->done);
2948 2948 if (avail <= 1) {
2949 2949 mutex_exit(&tx->lock);
2950 2950 return (EBUSY);
2951 2951 }
2952 2952 idx = tx->req & tx->mask;
2953 2953 cp = &tx->cp[idx];
2954 2954 ptr = cp->va;
2955 2955 for (len = 0, bp = mp; bp != NULL; bp = bp->b_cont) {
2956 2956 mblen = MBLKL(bp);
2957 2957 bcopy(bp->b_rptr, ptr, mblen);
2958 2958 ptr += mblen;
2959 2959 len += mblen;
2960 2960 }
2961 2961 /* ensure runts are padded to 60 bytes */
2962 2962 if (len < 60) {
2963 2963 bzero(ptr, 64 - len);
2964 2964 len = 60;
2965 2965 }
2966 2966 req->addr_low = cp->dma.low;
2967 2967 req->addr_high = cp->dma.high;
2968 2968 req->length = htons(len);
2969 2969 req->pad = 0;
2970 2970 req->rdma_count = 1;
2971 2971 myri10ge_tx_stat(&tx->info[idx].stat,
2972 2972 (struct ether_header *)(void *)cp->va, 1, len);
2973 2973 (void) ddi_dma_sync(cp->dma.handle, 0, len, DDI_DMA_SYNC_FORDEV);
2974 2974 myri10ge_submit_req(&ss->tx, req, 1);
2975 2975 mutex_exit(&tx->lock);
2976 2976 freemsg(mp);
2977 2977 return (DDI_SUCCESS);
2978 2978 }
2979 2979
2980 2980
2981 2981 static void
2982 2982 myri10ge_send_locked(myri10ge_tx_ring_t *tx, mcp_kreq_ether_send_t *req_list,
2983 2983 struct myri10ge_tx_buffer_state *tx_info,
2984 2984 int count)
2985 2985 {
2986 2986 int i, idx;
2987 2987
2988 2988 idx = 0; /* gcc -Wuninitialized */
2989 2989 /* store unmapping and bp info for tx irq handler */
2990 2990 for (i = 0; i < count; i++) {
2991 2991 idx = (tx->req + i) & tx->mask;
2992 2992 tx->info[idx].m = tx_info[i].m;
2993 2993 tx->info[idx].handle = tx_info[i].handle;
2994 2994 }
2995 2995 tx->info[idx].stat.un.all = tx_info[0].stat.un.all;
2996 2996
2997 2997 /* submit the frame to the nic */
2998 2998 myri10ge_submit_req(tx, req_list, count);
2999 2999
3000 3000
3001 3001 }
3002 3002
3003 3003
3004 3004
3005 3005 static void
3006 3006 myri10ge_copydata(mblk_t *mp, int off, int len, caddr_t buf)
3007 3007 {
3008 3008 mblk_t *bp;
3009 3009 int seglen;
3010 3010 uint_t count;
3011 3011
3012 3012 bp = mp;
3013 3013
3014 3014 while (off > 0) {
3015 3015 seglen = MBLKL(bp);
3016 3016 if (off < seglen)
3017 3017 break;
3018 3018 off -= seglen;
3019 3019 bp = bp->b_cont;
3020 3020 }
3021 3021 while (len > 0) {
3022 3022 seglen = MBLKL(bp);
3023 3023 count = min(seglen - off, len);
3024 3024 bcopy(bp->b_rptr + off, buf, count);
3025 3025 len -= count;
3026 3026 buf += count;
3027 3027 off = 0;
3028 3028 bp = bp->b_cont;
3029 3029 }
3030 3030 }
3031 3031
3032 3032 static int
3033 3033 myri10ge_ether_parse_header(mblk_t *mp)
3034 3034 {
3035 3035 struct ether_header eh_copy;
3036 3036 struct ether_header *eh;
3037 3037 int eth_hdr_len, seglen;
3038 3038
3039 3039 seglen = MBLKL(mp);
3040 3040 eth_hdr_len = sizeof (*eh);
3041 3041 if (seglen < eth_hdr_len) {
3042 3042 myri10ge_copydata(mp, 0, eth_hdr_len, (caddr_t)&eh_copy);
3043 3043 eh = &eh_copy;
3044 3044 } else {
3045 3045 eh = (struct ether_header *)(void *)mp->b_rptr;
3046 3046 }
3047 3047 if (eh->ether_type == BE_16(ETHERTYPE_VLAN)) {
3048 3048 eth_hdr_len += 4;
3049 3049 }
3050 3050
3051 3051 return (eth_hdr_len);
3052 3052 }
3053 3053
3054 3054 static int
3055 3055 myri10ge_lso_parse_header(mblk_t *mp, int off)
3056 3056 {
3057 3057 char buf[128];
3058 3058 int seglen, sum_off;
3059 3059 struct ip *ip;
3060 3060 struct tcphdr *tcp;
3061 3061
3062 3062 seglen = MBLKL(mp);
3063 3063 if (seglen < off + sizeof (*ip)) {
3064 3064 myri10ge_copydata(mp, off, sizeof (*ip), buf);
3065 3065 ip = (struct ip *)(void *)buf;
3066 3066 } else {
3067 3067 ip = (struct ip *)(void *)(mp->b_rptr + off);
3068 3068 }
3069 3069 if (seglen < off + (ip->ip_hl << 2) + sizeof (*tcp)) {
3070 3070 myri10ge_copydata(mp, off,
3071 3071 (ip->ip_hl << 2) + sizeof (*tcp), buf);
3072 3072 ip = (struct ip *)(void *)buf;
3073 3073 }
3074 3074 tcp = (struct tcphdr *)(void *)((char *)ip + (ip->ip_hl << 2));
3075 3075
3076 3076 /*
3077 3077 * NIC expects ip_sum to be zero. Recent changes to
3078 3078 * OpenSolaris leave the correct ip checksum there, rather
3079 3079 * than the required zero, so we need to zero it. Otherwise,
3080 3080 * the NIC will produce bad checksums when sending LSO packets.
3081 3081 */
3082 3082 if (ip->ip_sum != 0) {
3083 3083 if (((char *)ip) != buf) {
3084 3084 /* ip points into mblk, so just zero it */
3085 3085 ip->ip_sum = 0;
3086 3086 } else {
3087 3087 /*
3088 3088 * ip points into a copy, so walk the chain
3089 3089 * to find the ip_csum, then zero it
3090 3090 */
3091 3091 sum_off = off + _PTRDIFF(&ip->ip_sum, buf);
3092 3092 while (sum_off > (int)(MBLKL(mp) - 1)) {
3093 3093 sum_off -= MBLKL(mp);
3094 3094 mp = mp->b_cont;
3095 3095 }
3096 3096 mp->b_rptr[sum_off] = 0;
3097 3097 sum_off++;
3098 3098 while (sum_off > MBLKL(mp) - 1) {
3099 3099 sum_off -= MBLKL(mp);
3100 3100 mp = mp->b_cont;
3101 3101 }
3102 3102 mp->b_rptr[sum_off] = 0;
3103 3103 }
3104 3104 }
3105 3105 return (off + ((ip->ip_hl + tcp->th_off) << 2));
3106 3106 }
3107 3107
3108 3108 static int
3109 3109 myri10ge_tx_tso_copy(struct myri10ge_slice_state *ss, mblk_t *mp,
3110 3110 mcp_kreq_ether_send_t *req_list, int hdr_size, int pkt_size,
3111 3111 uint16_t mss, uint8_t cksum_offset)
3112 3112 {
3113 3113 myri10ge_tx_ring_t *tx = &ss->tx;
3114 3114 struct myri10ge_priv *mgp = ss->mgp;
3115 3115 mblk_t *bp;
3116 3116 mcp_kreq_ether_send_t *req;
3117 3117 struct myri10ge_tx_copybuf *cp;
3118 3118 caddr_t rptr, ptr;
3119 3119 int mblen, count, cum_len, mss_resid, tx_req, pkt_size_tmp;
3120 3120 int resid, avail, idx, hdr_size_tmp, tx_boundary;
3121 3121 int rdma_count;
3122 3122 uint32_t seglen, len, boundary, low, high_swapped;
3123 3123 uint16_t pseudo_hdr_offset = htons(mss);
3124 3124 uint8_t flags;
3125 3125
3126 3126 tx_boundary = mgp->tx_boundary;
3127 3127 hdr_size_tmp = hdr_size;
3128 3128 resid = tx_boundary;
3129 3129 count = 1;
3130 3130 mutex_enter(&tx->lock);
3131 3131
3132 3132 /* check to see if the slots are really there */
3133 3133 avail = tx->mask - (tx->req - tx->done);
3134 3134 if (unlikely(avail <= MYRI10GE_MAX_SEND_DESC_TSO)) {
3135 3135 atomic_inc_32(&tx->stall);
3136 3136 mutex_exit(&tx->lock);
3137 3137 return (EBUSY);
3138 3138 }
3139 3139
3140 3140 /* copy */
3141 3141 cum_len = -hdr_size;
3142 3142 count = 0;
3143 3143 req = req_list;
3144 3144 idx = tx->mask & tx->req;
3145 3145 cp = &tx->cp[idx];
3146 3146 low = ntohl(cp->dma.low);
3147 3147 ptr = cp->va;
3148 3148 cp->len = 0;
3149 3149 if (mss) {
3150 3150 int payload = pkt_size - hdr_size;
3151 3151 uint16_t opackets = (payload / mss) + ((payload % mss) != 0);
3152 3152 tx->info[idx].ostat.opackets = opackets;
3153 3153 tx->info[idx].ostat.obytes = (opackets - 1) * hdr_size
3154 3154 + pkt_size;
3155 3155 }
3156 3156 hdr_size_tmp = hdr_size;
3157 3157 mss_resid = mss;
3158 3158 flags = (MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST);
3159 3159 tx_req = tx->req;
3160 3160 for (bp = mp; bp != NULL; bp = bp->b_cont) {
3161 3161 mblen = MBLKL(bp);
3162 3162 rptr = (caddr_t)bp->b_rptr;
3163 3163 len = min(hdr_size_tmp, mblen);
3164 3164 if (len) {
3165 3165 bcopy(rptr, ptr, len);
3166 3166 rptr += len;
3167 3167 ptr += len;
3168 3168 resid -= len;
3169 3169 mblen -= len;
3170 3170 hdr_size_tmp -= len;
3171 3171 cp->len += len;
3172 3172 if (hdr_size_tmp)
3173 3173 continue;
3174 3174 if (resid < mss) {
3175 3175 tx_req++;
3176 3176 idx = tx->mask & tx_req;
3177 3177 cp = &tx->cp[idx];
3178 3178 low = ntohl(cp->dma.low);
3179 3179 ptr = cp->va;
3180 3180 resid = tx_boundary;
3181 3181 }
3182 3182 }
3183 3183 while (mblen) {
3184 3184 len = min(mss_resid, mblen);
3185 3185 bcopy(rptr, ptr, len);
3186 3186 mss_resid -= len;
3187 3187 resid -= len;
3188 3188 mblen -= len;
3189 3189 rptr += len;
3190 3190 ptr += len;
3191 3191 cp->len += len;
3192 3192 if (mss_resid == 0) {
3193 3193 mss_resid = mss;
3194 3194 if (resid < mss) {
3195 3195 tx_req++;
3196 3196 idx = tx->mask & tx_req;
3197 3197 cp = &tx->cp[idx];
3198 3198 cp->len = 0;
3199 3199 low = ntohl(cp->dma.low);
3200 3200 ptr = cp->va;
3201 3201 resid = tx_boundary;
3202 3202 }
3203 3203 }
3204 3204 }
3205 3205 }
3206 3206
3207 3207 req = req_list;
3208 3208 pkt_size_tmp = pkt_size;
3209 3209 count = 0;
3210 3210 rdma_count = 0;
3211 3211 tx_req = tx->req;
3212 3212 while (pkt_size_tmp) {
3213 3213 idx = tx->mask & tx_req;
3214 3214 cp = &tx->cp[idx];
3215 3215 high_swapped = cp->dma.high;
3216 3216 low = ntohl(cp->dma.low);
3217 3217 len = cp->len;
3218 3218 if (len == 0) {
3219 3219 printf("len=0! pkt_size_tmp=%d, pkt_size=%d\n",
3220 3220 pkt_size_tmp, pkt_size);
3221 3221 for (bp = mp; bp != NULL; bp = bp->b_cont) {
3222 3222 mblen = MBLKL(bp);
3223 3223 printf("mblen:%d\n", mblen);
3224 3224 }
3225 3225 pkt_size_tmp = pkt_size;
3226 3226 tx_req = tx->req;
3227 3227 while (pkt_size_tmp > 0) {
3228 3228 idx = tx->mask & tx_req;
3229 3229 cp = &tx->cp[idx];
3230 3230 printf("cp->len = %d\n", cp->len);
3231 3231 pkt_size_tmp -= cp->len;
3232 3232 tx_req++;
3233 3233 }
3234 3234 printf("dropped\n");
3235 3235 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err);
3236 3236 goto done;
3237 3237 }
3238 3238 pkt_size_tmp -= len;
3239 3239 while (len) {
3240 3240 while (len) {
3241 3241 uint8_t flags_next;
3242 3242 int cum_len_next;
3243 3243
3244 3244 boundary = (low + mgp->tx_boundary) &
3245 3245 ~(mgp->tx_boundary - 1);
3246 3246 seglen = boundary - low;
3247 3247 if (seglen > len)
3248 3248 seglen = len;
3249 3249
3250 3250 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
3251 3251 cum_len_next = cum_len + seglen;
3252 3252 (req-rdma_count)->rdma_count = rdma_count + 1;
3253 3253 if (likely(cum_len >= 0)) {
3254 3254 /* payload */
3255 3255 int next_is_first, chop;
3256 3256
3257 3257 chop = (cum_len_next > mss);
3258 3258 cum_len_next = cum_len_next % mss;
3259 3259 next_is_first = (cum_len_next == 0);
3260 3260 flags |= chop *
3261 3261 MXGEFW_FLAGS_TSO_CHOP;
3262 3262 flags_next |= next_is_first *
3263 3263 MXGEFW_FLAGS_FIRST;
3264 3264 rdma_count |= -(chop | next_is_first);
3265 3265 rdma_count += chop & !next_is_first;
3266 3266 } else if (likely(cum_len_next >= 0)) {
3267 3267 /* header ends */
3268 3268 int small;
3269 3269
3270 3270 rdma_count = -1;
3271 3271 cum_len_next = 0;
3272 3272 seglen = -cum_len;
3273 3273 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
3274 3274 flags_next = MXGEFW_FLAGS_TSO_PLD |
3275 3275 MXGEFW_FLAGS_FIRST |
3276 3276 (small * MXGEFW_FLAGS_SMALL);
3277 3277 }
3278 3278 req->addr_high = high_swapped;
3279 3279 req->addr_low = htonl(low);
3280 3280 req->pseudo_hdr_offset = pseudo_hdr_offset;
3281 3281 req->pad = 0; /* complete solid 16-byte block */
3282 3282 req->rdma_count = 1;
3283 3283 req->cksum_offset = cksum_offset;
3284 3284 req->length = htons(seglen);
3285 3285 req->flags = flags | ((cum_len & 1) *
3286 3286 MXGEFW_FLAGS_ALIGN_ODD);
3287 3287 if (cksum_offset > seglen)
3288 3288 cksum_offset -= seglen;
3289 3289 else
3290 3290 cksum_offset = 0;
3291 3291 low += seglen;
3292 3292 len -= seglen;
3293 3293 cum_len = cum_len_next;
3294 3294 req++;
3295 3295 req->flags = 0;
3296 3296 flags = flags_next;
3297 3297 count++;
3298 3298 rdma_count++;
3299 3299 }
3300 3300 }
3301 3301 tx_req++;
3302 3302 }
3303 3303 (req-rdma_count)->rdma_count = (uint8_t)rdma_count;
3304 3304 do {
3305 3305 req--;
3306 3306 req->flags |= MXGEFW_FLAGS_TSO_LAST;
3307 3307 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP |
3308 3308 MXGEFW_FLAGS_FIRST)));
3309 3309
3310 3310 myri10ge_submit_req(tx, req_list, count);
3311 3311 done:
3312 3312 mutex_exit(&tx->lock);
3313 3313 freemsg(mp);
3314 3314 return (DDI_SUCCESS);
3315 3315 }
3316 3316
3317 3317 /*
3318 3318 * Try to send the chain of buffers described by the mp. We must not
3319 3319 * encapsulate more than eth->tx.req - eth->tx.done, or
3320 3320 * MXGEFW_MAX_SEND_DESC, whichever is more.
3321 3321 */
3322 3322
3323 3323 static int
3324 3324 myri10ge_send(struct myri10ge_slice_state *ss, mblk_t *mp,
3325 3325 mcp_kreq_ether_send_t *req_list, struct myri10ge_tx_buffer_state *tx_info)
3326 3326 {
3327 3327 struct myri10ge_priv *mgp = ss->mgp;
3328 3328 myri10ge_tx_ring_t *tx = &ss->tx;
3329 3329 mcp_kreq_ether_send_t *req;
3330 3330 struct myri10ge_tx_dma_handle *handles, *dma_handle = NULL;
3331 3331 mblk_t *bp;
3332 3332 ddi_dma_cookie_t cookie;
3333 3333 int err, rv, count, avail, mblen, try_pullup, i, max_segs, maclen,
3334 3334 rdma_count, cum_len, lso_hdr_size;
3335 3335 uint32_t start, stuff, tx_offload_flags;
3336 3336 uint32_t seglen, len, mss, boundary, low, high_swapped;
3337 3337 uint_t ncookies;
3338 3338 uint16_t pseudo_hdr_offset;
3339 3339 uint8_t flags, cksum_offset, odd_flag;
3340 3340 int pkt_size;
3341 3341 int lso_copy = myri10ge_lso_copy;
3342 3342 try_pullup = 1;
3343 3343
3344 3344 again:
3345 3345 /* Setup checksum offloading, if needed */
3346 3346 mac_hcksum_get(mp, &start, &stuff, NULL, NULL, &tx_offload_flags);
3347 3347 myri10ge_lso_info_get(mp, &mss, &tx_offload_flags);
3348 3348 if (tx_offload_flags & HW_LSO) {
3349 3349 max_segs = MYRI10GE_MAX_SEND_DESC_TSO;
3350 3350 if ((tx_offload_flags & HCK_PARTIALCKSUM) == 0) {
3351 3351 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_lsobadflags);
3352 3352 freemsg(mp);
3353 3353 return (DDI_SUCCESS);
3354 3354 }
3355 3355 } else {
3356 3356 max_segs = MXGEFW_MAX_SEND_DESC;
3357 3357 mss = 0;
3358 3358 }
3359 3359 req = req_list;
3360 3360 cksum_offset = 0;
3361 3361 pseudo_hdr_offset = 0;
3362 3362
3363 3363 /* leave an extra slot keep the ring from wrapping */
3364 3364 avail = tx->mask - (tx->req - tx->done);
3365 3365
3366 3366 /*
3367 3367 * If we have > MXGEFW_MAX_SEND_DESC, then any over-length
3368 3368 * message will need to be pulled up in order to fit.
3369 3369 * Otherwise, we are low on transmit descriptors, it is
3370 3370 * probably better to stall and try again rather than pullup a
3371 3371 * message to fit.
3372 3372 */
3373 3373
3374 3374 if (avail < max_segs) {
3375 3375 err = EBUSY;
3376 3376 atomic_inc_32(&tx->stall_early);
3377 3377 goto stall;
3378 3378 }
3379 3379
3380 3380 /* find out how long the frame is and how many segments it is */
3381 3381 count = 0;
3382 3382 odd_flag = 0;
3383 3383 pkt_size = 0;
3384 3384 flags = (MXGEFW_FLAGS_NO_TSO | MXGEFW_FLAGS_FIRST);
3385 3385 for (bp = mp; bp != NULL; bp = bp->b_cont) {
3386 3386 dblk_t *dbp;
3387 3387 mblen = MBLKL(bp);
3388 3388 if (mblen == 0) {
3389 3389 /*
3390 3390 * we can't simply skip over 0-length mblks
3391 3391 * because the hardware can't deal with them,
3392 3392 * and we could leak them.
3393 3393 */
3394 3394 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_zero_len);
3395 3395 err = EIO;
3396 3396 goto pullup;
3397 3397 }
3398 3398 /*
3399 3399 * There's no advantage to copying most gesballoc
3400 3400 * attached blocks, so disable lso copy in that case
3401 3401 */
3402 3402 if (mss && lso_copy == 1 && ((dbp = bp->b_datap) != NULL)) {
3403 3403 if ((void *)dbp->db_lastfree != myri10ge_db_lastfree) {
3404 3404 lso_copy = 0;
3405 3405 }
3406 3406 }
3407 3407 pkt_size += mblen;
3408 3408 count++;
3409 3409 }
3410 3410
3411 3411 /* Try to pull up excessivly long chains */
3412 3412 if (count >= max_segs) {
3413 3413 err = myri10ge_pullup(ss, mp);
3414 3414 if (likely(err == DDI_SUCCESS)) {
3415 3415 count = 1;
3416 3416 } else {
3417 3417 if (count < MYRI10GE_MAX_SEND_DESC_TSO) {
3418 3418 /*
3419 3419 * just let the h/w send it, it will be
3420 3420 * inefficient, but us better than dropping
3421 3421 */
3422 3422 max_segs = MYRI10GE_MAX_SEND_DESC_TSO;
3423 3423 } else {
3424 3424 /* drop it */
3425 3425 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err);
3426 3426 freemsg(mp);
3427 3427 return (0);
3428 3428 }
3429 3429 }
3430 3430 }
3431 3431
3432 3432 cum_len = 0;
3433 3433 maclen = myri10ge_ether_parse_header(mp);
3434 3434
3435 3435 if (tx_offload_flags & HCK_PARTIALCKSUM) {
3436 3436
3437 3437 cksum_offset = start + maclen;
3438 3438 pseudo_hdr_offset = htons(stuff + maclen);
3439 3439 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
3440 3440 flags |= MXGEFW_FLAGS_CKSUM;
3441 3441 }
3442 3442
3443 3443 lso_hdr_size = 0; /* -Wunitinialized */
3444 3444 if (mss) { /* LSO */
3445 3445 /* this removes any CKSUM flag from before */
3446 3446 flags = (MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST);
3447 3447 /*
3448 3448 * parse the headers and set cum_len to a negative
3449 3449 * value to reflect the offset of the TCP payload
3450 3450 */
3451 3451 lso_hdr_size = myri10ge_lso_parse_header(mp, maclen);
3452 3452 cum_len = -lso_hdr_size;
3453 3453 if ((mss < mgp->tx_boundary) && lso_copy) {
3454 3454 err = myri10ge_tx_tso_copy(ss, mp, req_list,
3455 3455 lso_hdr_size, pkt_size, mss, cksum_offset);
3456 3456 return (err);
3457 3457 }
3458 3458
3459 3459 /*
3460 3460 * for TSO, pseudo_hdr_offset holds mss. The firmware
3461 3461 * figures out where to put the checksum by parsing
3462 3462 * the header.
3463 3463 */
3464 3464
3465 3465 pseudo_hdr_offset = htons(mss);
3466 3466 } else if (pkt_size <= MXGEFW_SEND_SMALL_SIZE) {
3467 3467 flags |= MXGEFW_FLAGS_SMALL;
3468 3468 if (pkt_size < myri10ge_tx_copylen) {
3469 3469 req->cksum_offset = cksum_offset;
3470 3470 req->pseudo_hdr_offset = pseudo_hdr_offset;
3471 3471 req->flags = flags;
3472 3472 err = myri10ge_tx_copy(ss, mp, req);
3473 3473 return (err);
3474 3474 }
3475 3475 cum_len = 0;
3476 3476 }
3477 3477
3478 3478 /* pull one DMA handle for each bp from our freelist */
3479 3479 handles = NULL;
3480 3480 err = myri10ge_alloc_tx_handles(ss, count, &handles);
3481 3481 if (err != DDI_SUCCESS) {
3482 3482 err = DDI_FAILURE;
3483 3483 goto stall;
3484 3484 }
3485 3485 count = 0;
3486 3486 rdma_count = 0;
3487 3487 for (bp = mp; bp != NULL; bp = bp->b_cont) {
3488 3488 mblen = MBLKL(bp);
3489 3489 dma_handle = handles;
3490 3490 handles = handles->next;
3491 3491
3492 3492 rv = ddi_dma_addr_bind_handle(dma_handle->h, NULL,
3493 3493 (caddr_t)bp->b_rptr, mblen,
3494 3494 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_SLEEP, NULL,
3495 3495 &cookie, &ncookies);
3496 3496 if (unlikely(rv != DDI_DMA_MAPPED)) {
3497 3497 err = EIO;
3498 3498 try_pullup = 0;
3499 3499 dma_handle->next = handles;
3500 3500 handles = dma_handle;
3501 3501 goto abort_with_handles;
3502 3502 }
3503 3503
3504 3504 /* reserve the slot */
3505 3505 tx_info[count].m = bp;
3506 3506 tx_info[count].handle = dma_handle;
3507 3507
3508 3508 for (; ; ) {
3509 3509 low = MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress);
3510 3510 high_swapped =
3511 3511 htonl(MYRI10GE_HIGHPART_TO_U32(
3512 3512 cookie.dmac_laddress));
3513 3513 len = (uint32_t)cookie.dmac_size;
3514 3514 while (len) {
3515 3515 uint8_t flags_next;
3516 3516 int cum_len_next;
3517 3517
3518 3518 boundary = (low + mgp->tx_boundary) &
3519 3519 ~(mgp->tx_boundary - 1);
3520 3520 seglen = boundary - low;
3521 3521 if (seglen > len)
3522 3522 seglen = len;
3523 3523
3524 3524 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
3525 3525 cum_len_next = cum_len + seglen;
3526 3526 if (mss) {
3527 3527 (req-rdma_count)->rdma_count =
3528 3528 rdma_count + 1;
3529 3529 if (likely(cum_len >= 0)) {
3530 3530 /* payload */
3531 3531 int next_is_first, chop;
3532 3532
3533 3533 chop = (cum_len_next > mss);
3534 3534 cum_len_next =
3535 3535 cum_len_next % mss;
3536 3536 next_is_first =
3537 3537 (cum_len_next == 0);
3538 3538 flags |= chop *
3539 3539 MXGEFW_FLAGS_TSO_CHOP;
3540 3540 flags_next |= next_is_first *
3541 3541 MXGEFW_FLAGS_FIRST;
3542 3542 rdma_count |=
3543 3543 -(chop | next_is_first);
3544 3544 rdma_count +=
3545 3545 chop & !next_is_first;
3546 3546 } else if (likely(cum_len_next >= 0)) {
3547 3547 /* header ends */
3548 3548 int small;
3549 3549
3550 3550 rdma_count = -1;
3551 3551 cum_len_next = 0;
3552 3552 seglen = -cum_len;
3553 3553 small = (mss <=
3554 3554 MXGEFW_SEND_SMALL_SIZE);
3555 3555 flags_next =
3556 3556 MXGEFW_FLAGS_TSO_PLD
3557 3557 | MXGEFW_FLAGS_FIRST
3558 3558 | (small *
3559 3559 MXGEFW_FLAGS_SMALL);
3560 3560 }
3561 3561 }
3562 3562 req->addr_high = high_swapped;
3563 3563 req->addr_low = htonl(low);
3564 3564 req->pseudo_hdr_offset = pseudo_hdr_offset;
3565 3565 req->pad = 0; /* complete solid 16-byte block */
3566 3566 req->rdma_count = 1;
3567 3567 req->cksum_offset = cksum_offset;
3568 3568 req->length = htons(seglen);
3569 3569 req->flags = flags | ((cum_len & 1) * odd_flag);
3570 3570 if (cksum_offset > seglen)
3571 3571 cksum_offset -= seglen;
3572 3572 else
3573 3573 cksum_offset = 0;
3574 3574 low += seglen;
3575 3575 len -= seglen;
3576 3576 cum_len = cum_len_next;
3577 3577 count++;
3578 3578 rdma_count++;
3579 3579 /* make sure all the segments will fit */
3580 3580 if (unlikely(count >= max_segs)) {
3581 3581 MYRI10GE_ATOMIC_SLICE_STAT_INC(
3582 3582 xmit_lowbuf);
3583 3583 /* may try a pullup */
3584 3584 err = EBUSY;
3585 3585 if (try_pullup)
3586 3586 try_pullup = 2;
3587 3587 goto abort_with_handles;
3588 3588 }
3589 3589 req++;
3590 3590 req->flags = 0;
3591 3591 flags = flags_next;
3592 3592 tx_info[count].m = 0;
3593 3593 }
3594 3594 ncookies--;
3595 3595 if (ncookies == 0)
3596 3596 break;
3597 3597 ddi_dma_nextcookie(dma_handle->h, &cookie);
3598 3598 }
3599 3599 }
3600 3600 (req-rdma_count)->rdma_count = (uint8_t)rdma_count;
3601 3601
3602 3602 if (mss) {
3603 3603 do {
3604 3604 req--;
3605 3605 req->flags |= MXGEFW_FLAGS_TSO_LAST;
3606 3606 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP |
3607 3607 MXGEFW_FLAGS_FIRST)));
3608 3608 }
3609 3609
3610 3610 /* calculate tx stats */
3611 3611 if (mss) {
3612 3612 uint16_t opackets;
3613 3613 int payload;
3614 3614
3615 3615 payload = pkt_size - lso_hdr_size;
3616 3616 opackets = (payload / mss) + ((payload % mss) != 0);
3617 3617 tx_info[0].stat.un.all = 0;
3618 3618 tx_info[0].ostat.opackets = opackets;
3619 3619 tx_info[0].ostat.obytes = (opackets - 1) * lso_hdr_size
3620 3620 + pkt_size;
3621 3621 } else {
3622 3622 myri10ge_tx_stat(&tx_info[0].stat,
3623 3623 (struct ether_header *)(void *)mp->b_rptr, 1, pkt_size);
3624 3624 }
3625 3625 mutex_enter(&tx->lock);
3626 3626
3627 3627 /* check to see if the slots are really there */
3628 3628 avail = tx->mask - (tx->req - tx->done);
3629 3629 if (unlikely(avail <= count)) {
3630 3630 mutex_exit(&tx->lock);
3631 3631 err = 0;
3632 3632 goto late_stall;
3633 3633 }
3634 3634
3635 3635 myri10ge_send_locked(tx, req_list, tx_info, count);
3636 3636 mutex_exit(&tx->lock);
3637 3637 return (DDI_SUCCESS);
3638 3638
3639 3639 late_stall:
3640 3640 try_pullup = 0;
3641 3641 atomic_inc_32(&tx->stall_late);
3642 3642
3643 3643 abort_with_handles:
3644 3644 /* unbind and free handles from previous mblks */
3645 3645 for (i = 0; i < count; i++) {
3646 3646 bp = tx_info[i].m;
3647 3647 tx_info[i].m = 0;
3648 3648 if (bp) {
3649 3649 dma_handle = tx_info[i].handle;
3650 3650 (void) ddi_dma_unbind_handle(dma_handle->h);
3651 3651 dma_handle->next = handles;
3652 3652 handles = dma_handle;
3653 3653 tx_info[i].handle = NULL;
3654 3654 tx_info[i].m = NULL;
3655 3655 }
3656 3656 }
3657 3657 myri10ge_free_tx_handle_slist(tx, handles);
3658 3658 pullup:
3659 3659 if (try_pullup) {
3660 3660 err = myri10ge_pullup(ss, mp);
3661 3661 if (err != DDI_SUCCESS && try_pullup == 2) {
3662 3662 /* drop */
3663 3663 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err);
3664 3664 freemsg(mp);
3665 3665 return (0);
3666 3666 }
3667 3667 try_pullup = 0;
3668 3668 goto again;
3669 3669 }
3670 3670
3671 3671 stall:
3672 3672 if (err != 0) {
3673 3673 if (err == EBUSY) {
3674 3674 atomic_inc_32(&tx->stall);
3675 3675 } else {
3676 3676 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err);
3677 3677 }
3678 3678 }
3679 3679 return (err);
3680 3680 }
3681 3681
3682 3682 static mblk_t *
3683 3683 myri10ge_send_wrapper(void *arg, mblk_t *mp)
3684 3684 {
3685 3685 struct myri10ge_slice_state *ss = arg;
3686 3686 int err = 0;
3687 3687 mcp_kreq_ether_send_t *req_list;
3688 3688 #if defined(__i386)
3689 3689 /*
3690 3690 * We need about 2.5KB of scratch space to handle transmits.
3691 3691 * i86pc has only 8KB of kernel stack space, so we malloc the
3692 3692 * scratch space there rather than keeping it on the stack.
3693 3693 */
3694 3694 size_t req_size, tx_info_size;
3695 3695 struct myri10ge_tx_buffer_state *tx_info;
3696 3696 caddr_t req_bytes;
3697 3697
3698 3698 req_size = sizeof (*req_list) * (MYRI10GE_MAX_SEND_DESC_TSO + 4)
3699 3699 + 8;
3700 3700 req_bytes = kmem_alloc(req_size, KM_SLEEP);
3701 3701 tx_info_size = sizeof (*tx_info) * (MYRI10GE_MAX_SEND_DESC_TSO + 1);
3702 3702 tx_info = kmem_alloc(tx_info_size, KM_SLEEP);
3703 3703 #else
3704 3704 char req_bytes[sizeof (*req_list) * (MYRI10GE_MAX_SEND_DESC_TSO + 4)
3705 3705 + 8];
3706 3706 struct myri10ge_tx_buffer_state tx_info[MYRI10GE_MAX_SEND_DESC_TSO + 1];
3707 3707 #endif
3708 3708
3709 3709 /* ensure req_list entries are aligned to 8 bytes */
3710 3710 req_list = (struct mcp_kreq_ether_send *)
3711 3711 (((unsigned long)req_bytes + 7UL) & ~7UL);
3712 3712
3713 3713 err = myri10ge_send(ss, mp, req_list, tx_info);
3714 3714
3715 3715 #if defined(__i386)
3716 3716 kmem_free(tx_info, tx_info_size);
3717 3717 kmem_free(req_bytes, req_size);
3718 3718 #endif
3719 3719 if (err)
3720 3720 return (mp);
3721 3721 else
3722 3722 return (NULL);
3723 3723 }
3724 3724
3725 3725 static int
3726 3726 myri10ge_addmac(void *arg, const uint8_t *mac_addr)
3727 3727 {
3728 3728 struct myri10ge_priv *mgp = arg;
3729 3729 int err;
3730 3730
3731 3731 if (mac_addr == NULL)
3732 3732 return (EINVAL);
3733 3733
3734 3734 mutex_enter(&mgp->intrlock);
3735 3735 if (mgp->macaddr_cnt) {
3736 3736 mutex_exit(&mgp->intrlock);
3737 3737 return (ENOSPC);
3738 3738 }
3739 3739 err = myri10ge_m_unicst(mgp, mac_addr);
3740 3740 if (!err)
3741 3741 mgp->macaddr_cnt++;
3742 3742
3743 3743 mutex_exit(&mgp->intrlock);
3744 3744 if (err)
3745 3745 return (err);
3746 3746
3747 3747 bcopy(mac_addr, mgp->mac_addr, sizeof (mgp->mac_addr));
3748 3748 return (0);
3749 3749 }
3750 3750
3751 3751 /*ARGSUSED*/
3752 3752 static int
3753 3753 myri10ge_remmac(void *arg, const uint8_t *mac_addr)
3754 3754 {
3755 3755 struct myri10ge_priv *mgp = arg;
3756 3756
3757 3757 mutex_enter(&mgp->intrlock);
3758 3758 mgp->macaddr_cnt--;
3759 3759 mutex_exit(&mgp->intrlock);
3760 3760
3761 3761 return (0);
3762 3762 }
3763 3763
3764 3764 /*ARGSUSED*/
3765 3765 static void
3766 3766 myri10ge_fill_group(void *arg, mac_ring_type_t rtype, const int index,
3767 3767 mac_group_info_t *infop, mac_group_handle_t gh)
3768 3768 {
3769 3769 struct myri10ge_priv *mgp = arg;
3770 3770
3771 3771 if (rtype != MAC_RING_TYPE_RX)
3772 3772 return;
3773 3773
3774 3774 infop->mgi_driver = (mac_group_driver_t)mgp;
3775 3775 infop->mgi_start = NULL;
3776 3776 infop->mgi_stop = NULL;
3777 3777 infop->mgi_addmac = myri10ge_addmac;
3778 3778 infop->mgi_remmac = myri10ge_remmac;
3779 3779 infop->mgi_count = mgp->num_slices;
3780 3780 }
3781 3781
3782 3782 static int
3783 3783 myri10ge_ring_start(mac_ring_driver_t rh, uint64_t mr_gen_num)
3784 3784 {
3785 3785 struct myri10ge_slice_state *ss;
3786 3786
3787 3787 ss = (struct myri10ge_slice_state *)rh;
3788 3788 mutex_enter(&ss->rx_lock);
3789 3789 ss->rx_gen_num = mr_gen_num;
3790 3790 mutex_exit(&ss->rx_lock);
3791 3791 return (0);
3792 3792 }
3793 3793
3794 3794 /*
3795 3795 * Retrieve a value for one of the statistics for a particular rx ring
3796 3796 */
3797 3797 int
3798 3798 myri10ge_rx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val)
3799 3799 {
3800 3800 struct myri10ge_slice_state *ss;
3801 3801
3802 3802 ss = (struct myri10ge_slice_state *)rh;
3803 3803 switch (stat) {
3804 3804 case MAC_STAT_RBYTES:
3805 3805 *val = ss->rx_stats.ibytes;
3806 3806 break;
3807 3807
3808 3808 case MAC_STAT_IPACKETS:
3809 3809 *val = ss->rx_stats.ipackets;
3810 3810 break;
3811 3811
3812 3812 default:
3813 3813 *val = 0;
3814 3814 return (ENOTSUP);
3815 3815 }
3816 3816
3817 3817 return (0);
3818 3818 }
3819 3819
3820 3820 /*
3821 3821 * Retrieve a value for one of the statistics for a particular tx ring
3822 3822 */
3823 3823 int
3824 3824 myri10ge_tx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val)
3825 3825 {
3826 3826 struct myri10ge_slice_state *ss;
3827 3827
3828 3828 ss = (struct myri10ge_slice_state *)rh;
3829 3829 switch (stat) {
3830 3830 case MAC_STAT_OBYTES:
3831 3831 *val = ss->tx.stats.obytes;
3832 3832 break;
3833 3833
3834 3834 case MAC_STAT_OPACKETS:
3835 3835 *val = ss->tx.stats.opackets;
3836 3836 break;
3837 3837
3838 3838 default:
3839 3839 *val = 0;
3840 3840 return (ENOTSUP);
3841 3841 }
3842 3842
3843 3843 return (0);
3844 3844 }
3845 3845
3846 3846 static int
3847 3847 myri10ge_rx_ring_intr_disable(mac_intr_handle_t intrh)
3848 3848 {
3849 3849 struct myri10ge_slice_state *ss;
3850 3850
3851 3851 ss = (struct myri10ge_slice_state *)intrh;
3852 3852 mutex_enter(&ss->poll_lock);
3853 3853 ss->rx_polling = B_TRUE;
3854 3854 mutex_exit(&ss->poll_lock);
3855 3855 return (0);
3856 3856 }
3857 3857
3858 3858 static int
3859 3859 myri10ge_rx_ring_intr_enable(mac_intr_handle_t intrh)
3860 3860 {
3861 3861 struct myri10ge_slice_state *ss;
3862 3862
3863 3863 ss = (struct myri10ge_slice_state *)intrh;
3864 3864 mutex_enter(&ss->poll_lock);
3865 3865 ss->rx_polling = B_FALSE;
3866 3866 if (ss->rx_token) {
3867 3867 *ss->irq_claim = BE_32(3);
3868 3868 ss->rx_token = 0;
3869 3869 }
3870 3870 mutex_exit(&ss->poll_lock);
3871 3871 return (0);
3872 3872 }
3873 3873
3874 3874 /*ARGSUSED*/
3875 3875 static void
3876 3876 myri10ge_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index,
3877 3877 const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh)
3878 3878 {
3879 3879 struct myri10ge_priv *mgp = arg;
3880 3880 struct myri10ge_slice_state *ss;
3881 3881 mac_intr_t *mintr = &infop->mri_intr;
3882 3882
3883 3883 ASSERT((unsigned int)ring_index < mgp->num_slices);
3884 3884
3885 3885 ss = &mgp->ss[ring_index];
3886 3886 switch (rtype) {
3887 3887 case MAC_RING_TYPE_RX:
3888 3888 ss->rx_rh = rh;
3889 3889 infop->mri_driver = (mac_ring_driver_t)ss;
3890 3890 infop->mri_start = myri10ge_ring_start;
3891 3891 infop->mri_stop = NULL;
3892 3892 infop->mri_poll = myri10ge_poll_rx;
3893 3893 infop->mri_stat = myri10ge_rx_ring_stat;
3894 3894 mintr->mi_handle = (mac_intr_handle_t)ss;
3895 3895 mintr->mi_enable = myri10ge_rx_ring_intr_enable;
3896 3896 mintr->mi_disable = myri10ge_rx_ring_intr_disable;
3897 3897 break;
3898 3898 case MAC_RING_TYPE_TX:
3899 3899 ss->tx.rh = rh;
3900 3900 infop->mri_driver = (mac_ring_driver_t)ss;
3901 3901 infop->mri_start = NULL;
3902 3902 infop->mri_stop = NULL;
3903 3903 infop->mri_tx = myri10ge_send_wrapper;
3904 3904 infop->mri_stat = myri10ge_tx_ring_stat;
3905 3905 break;
3906 3906 default:
3907 3907 break;
3908 3908 }
3909 3909 }
3910 3910
3911 3911 static void
3912 3912 myri10ge_nic_stat_destroy(struct myri10ge_priv *mgp)
3913 3913 {
3914 3914 if (mgp->ksp_stat == NULL)
3915 3915 return;
3916 3916
3917 3917 kstat_delete(mgp->ksp_stat);
3918 3918 mgp->ksp_stat = NULL;
3919 3919 }
3920 3920
3921 3921 static void
3922 3922 myri10ge_slice_stat_destroy(struct myri10ge_slice_state *ss)
3923 3923 {
3924 3924 if (ss->ksp_stat == NULL)
3925 3925 return;
3926 3926
3927 3927 kstat_delete(ss->ksp_stat);
3928 3928 ss->ksp_stat = NULL;
3929 3929 }
3930 3930
3931 3931 static void
3932 3932 myri10ge_info_destroy(struct myri10ge_priv *mgp)
3933 3933 {
3934 3934 if (mgp->ksp_info == NULL)
3935 3935 return;
3936 3936
3937 3937 kstat_delete(mgp->ksp_info);
3938 3938 mgp->ksp_info = NULL;
3939 3939 }
3940 3940
3941 3941 static int
3942 3942 myri10ge_nic_stat_kstat_update(kstat_t *ksp, int rw)
3943 3943 {
3944 3944 struct myri10ge_nic_stat *ethstat;
3945 3945 struct myri10ge_priv *mgp;
3946 3946 mcp_irq_data_t *fw_stats;
3947 3947
3948 3948
3949 3949 if (rw == KSTAT_WRITE)
3950 3950 return (EACCES);
3951 3951
3952 3952 ethstat = (struct myri10ge_nic_stat *)ksp->ks_data;
3953 3953 mgp = (struct myri10ge_priv *)ksp->ks_private;
3954 3954 fw_stats = mgp->ss[0].fw_stats;
3955 3955
3956 3956 ethstat->dma_read_bw_MBs.value.ul = mgp->read_dma;
3957 3957 ethstat->dma_write_bw_MBs.value.ul = mgp->write_dma;
3958 3958 ethstat->dma_read_write_bw_MBs.value.ul = mgp->read_write_dma;
3959 3959 if (myri10ge_tx_dma_attr.dma_attr_flags & DDI_DMA_FORCE_PHYSICAL)
3960 3960 ethstat->dma_force_physical.value.ul = 1;
3961 3961 else
3962 3962 ethstat->dma_force_physical.value.ul = 0;
3963 3963 ethstat->lanes.value.ul = mgp->pcie_link_width;
3964 3964 ethstat->dropped_bad_crc32.value.ul =
3965 3965 ntohl(fw_stats->dropped_bad_crc32);
3966 3966 ethstat->dropped_bad_phy.value.ul =
3967 3967 ntohl(fw_stats->dropped_bad_phy);
3968 3968 ethstat->dropped_link_error_or_filtered.value.ul =
3969 3969 ntohl(fw_stats->dropped_link_error_or_filtered);
3970 3970 ethstat->dropped_link_overflow.value.ul =
3971 3971 ntohl(fw_stats->dropped_link_overflow);
3972 3972 ethstat->dropped_multicast_filtered.value.ul =
3973 3973 ntohl(fw_stats->dropped_multicast_filtered);
3974 3974 ethstat->dropped_no_big_buffer.value.ul =
3975 3975 ntohl(fw_stats->dropped_no_big_buffer);
3976 3976 ethstat->dropped_no_small_buffer.value.ul =
3977 3977 ntohl(fw_stats->dropped_no_small_buffer);
3978 3978 ethstat->dropped_overrun.value.ul =
3979 3979 ntohl(fw_stats->dropped_overrun);
3980 3980 ethstat->dropped_pause.value.ul =
3981 3981 ntohl(fw_stats->dropped_pause);
3982 3982 ethstat->dropped_runt.value.ul =
3983 3983 ntohl(fw_stats->dropped_runt);
3984 3984 ethstat->link_up.value.ul =
3985 3985 ntohl(fw_stats->link_up);
3986 3986 ethstat->dropped_unicast_filtered.value.ul =
3987 3987 ntohl(fw_stats->dropped_unicast_filtered);
3988 3988 return (0);
3989 3989 }
3990 3990
3991 3991 static int
3992 3992 myri10ge_slice_stat_kstat_update(kstat_t *ksp, int rw)
3993 3993 {
3994 3994 struct myri10ge_slice_stat *ethstat;
3995 3995 struct myri10ge_slice_state *ss;
3996 3996
3997 3997 if (rw == KSTAT_WRITE)
3998 3998 return (EACCES);
3999 3999
4000 4000 ethstat = (struct myri10ge_slice_stat *)ksp->ks_data;
4001 4001 ss = (struct myri10ge_slice_state *)ksp->ks_private;
4002 4002
4003 4003 ethstat->rx_big.value.ul = ss->j_rx_cnt;
4004 4004 ethstat->rx_bigbuf_firmware.value.ul = ss->rx_big.cnt - ss->j_rx_cnt;
4005 4005 ethstat->rx_bigbuf_pool.value.ul =
4006 4006 ss->jpool.num_alloc - ss->jbufs_for_smalls;
4007 4007 ethstat->rx_bigbuf_smalls.value.ul = ss->jbufs_for_smalls;
4008 4008 ethstat->rx_small.value.ul = ss->rx_small.cnt -
4009 4009 (ss->rx_small.mask + 1);
4010 4010 ethstat->tx_done.value.ul = ss->tx.done;
4011 4011 ethstat->tx_req.value.ul = ss->tx.req;
4012 4012 ethstat->tx_activate.value.ul = ss->tx.activate;
4013 4013 ethstat->xmit_sched.value.ul = ss->tx.sched;
4014 4014 ethstat->xmit_stall.value.ul = ss->tx.stall;
4015 4015 ethstat->xmit_stall_early.value.ul = ss->tx.stall_early;
4016 4016 ethstat->xmit_stall_late.value.ul = ss->tx.stall_late;
4017 4017 ethstat->xmit_err.value.ul = MYRI10GE_SLICE_STAT(xmit_err);
4018 4018 return (0);
4019 4019 }
4020 4020
4021 4021 static int
4022 4022 myri10ge_info_kstat_update(kstat_t *ksp, int rw)
4023 4023 {
4024 4024 struct myri10ge_info *info;
4025 4025 struct myri10ge_priv *mgp;
4026 4026
4027 4027
4028 4028 if (rw == KSTAT_WRITE)
4029 4029 return (EACCES);
4030 4030
4031 4031 info = (struct myri10ge_info *)ksp->ks_data;
4032 4032 mgp = (struct myri10ge_priv *)ksp->ks_private;
4033 4033 kstat_named_setstr(&info->driver_version, MYRI10GE_VERSION_STR);
4034 4034 kstat_named_setstr(&info->firmware_version, mgp->fw_version);
4035 4035 kstat_named_setstr(&info->firmware_name, mgp->fw_name);
4036 4036 kstat_named_setstr(&info->interrupt_type, mgp->intr_type);
4037 4037 kstat_named_setstr(&info->product_code, mgp->pc_str);
4038 4038 kstat_named_setstr(&info->serial_number, mgp->sn_str);
4039 4039 return (0);
4040 4040 }
4041 4041
4042 4042 static struct myri10ge_info myri10ge_info_template = {
4043 4043 { "driver_version", KSTAT_DATA_STRING },
4044 4044 { "firmware_version", KSTAT_DATA_STRING },
4045 4045 { "firmware_name", KSTAT_DATA_STRING },
4046 4046 { "interrupt_type", KSTAT_DATA_STRING },
4047 4047 { "product_code", KSTAT_DATA_STRING },
4048 4048 { "serial_number", KSTAT_DATA_STRING },
4049 4049 };
4050 4050 static kmutex_t myri10ge_info_template_lock;
4051 4051
4052 4052
4053 4053 static int
4054 4054 myri10ge_info_init(struct myri10ge_priv *mgp)
4055 4055 {
4056 4056 struct kstat *ksp;
4057 4057
4058 4058 ksp = kstat_create("myri10ge", ddi_get_instance(mgp->dip),
4059 4059 "myri10ge_info", "net", KSTAT_TYPE_NAMED,
4060 4060 sizeof (myri10ge_info_template) /
4061 4061 sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
4062 4062 if (ksp == NULL) {
4063 4063 cmn_err(CE_WARN,
4064 4064 "%s: myri10ge_info_init: kstat_create failed", mgp->name);
4065 4065 return (DDI_FAILURE);
4066 4066 }
4067 4067 mgp->ksp_info = ksp;
4068 4068 ksp->ks_update = myri10ge_info_kstat_update;
4069 4069 ksp->ks_private = (void *) mgp;
4070 4070 ksp->ks_data = &myri10ge_info_template;
4071 4071 ksp->ks_lock = &myri10ge_info_template_lock;
4072 4072 if (MYRI10GE_VERSION_STR != NULL)
4073 4073 ksp->ks_data_size += strlen(MYRI10GE_VERSION_STR) + 1;
4074 4074 if (mgp->fw_version != NULL)
4075 4075 ksp->ks_data_size += strlen(mgp->fw_version) + 1;
4076 4076 ksp->ks_data_size += strlen(mgp->fw_name) + 1;
4077 4077 ksp->ks_data_size += strlen(mgp->intr_type) + 1;
4078 4078 if (mgp->pc_str != NULL)
4079 4079 ksp->ks_data_size += strlen(mgp->pc_str) + 1;
4080 4080 if (mgp->sn_str != NULL)
4081 4081 ksp->ks_data_size += strlen(mgp->sn_str) + 1;
4082 4082
4083 4083 kstat_install(ksp);
4084 4084 return (DDI_SUCCESS);
4085 4085 }
4086 4086
4087 4087
4088 4088 static int
4089 4089 myri10ge_nic_stat_init(struct myri10ge_priv *mgp)
4090 4090 {
4091 4091 struct kstat *ksp;
4092 4092 struct myri10ge_nic_stat *ethstat;
4093 4093
4094 4094 ksp = kstat_create("myri10ge", ddi_get_instance(mgp->dip),
4095 4095 "myri10ge_nic_stats", "net", KSTAT_TYPE_NAMED,
4096 4096 sizeof (*ethstat) / sizeof (kstat_named_t), 0);
4097 4097 if (ksp == NULL) {
4098 4098 cmn_err(CE_WARN,
4099 4099 "%s: myri10ge_stat_init: kstat_create failed", mgp->name);
4100 4100 return (DDI_FAILURE);
4101 4101 }
4102 4102 mgp->ksp_stat = ksp;
4103 4103 ethstat = (struct myri10ge_nic_stat *)(ksp->ks_data);
4104 4104
4105 4105 kstat_named_init(ðstat->dma_read_bw_MBs,
4106 4106 "dma_read_bw_MBs", KSTAT_DATA_ULONG);
4107 4107 kstat_named_init(ðstat->dma_write_bw_MBs,
4108 4108 "dma_write_bw_MBs", KSTAT_DATA_ULONG);
4109 4109 kstat_named_init(ðstat->dma_read_write_bw_MBs,
4110 4110 "dma_read_write_bw_MBs", KSTAT_DATA_ULONG);
4111 4111 kstat_named_init(ðstat->dma_force_physical,
4112 4112 "dma_force_physical", KSTAT_DATA_ULONG);
4113 4113 kstat_named_init(ðstat->lanes,
4114 4114 "lanes", KSTAT_DATA_ULONG);
4115 4115 kstat_named_init(ðstat->dropped_bad_crc32,
4116 4116 "dropped_bad_crc32", KSTAT_DATA_ULONG);
4117 4117 kstat_named_init(ðstat->dropped_bad_phy,
4118 4118 "dropped_bad_phy", KSTAT_DATA_ULONG);
4119 4119 kstat_named_init(ðstat->dropped_link_error_or_filtered,
4120 4120 "dropped_link_error_or_filtered", KSTAT_DATA_ULONG);
4121 4121 kstat_named_init(ðstat->dropped_link_overflow,
4122 4122 "dropped_link_overflow", KSTAT_DATA_ULONG);
4123 4123 kstat_named_init(ðstat->dropped_multicast_filtered,
4124 4124 "dropped_multicast_filtered", KSTAT_DATA_ULONG);
4125 4125 kstat_named_init(ðstat->dropped_no_big_buffer,
4126 4126 "dropped_no_big_buffer", KSTAT_DATA_ULONG);
4127 4127 kstat_named_init(ðstat->dropped_no_small_buffer,
4128 4128 "dropped_no_small_buffer", KSTAT_DATA_ULONG);
4129 4129 kstat_named_init(ðstat->dropped_overrun,
4130 4130 "dropped_overrun", KSTAT_DATA_ULONG);
4131 4131 kstat_named_init(ðstat->dropped_pause,
4132 4132 "dropped_pause", KSTAT_DATA_ULONG);
4133 4133 kstat_named_init(ðstat->dropped_runt,
4134 4134 "dropped_runt", KSTAT_DATA_ULONG);
4135 4135 kstat_named_init(ðstat->dropped_unicast_filtered,
4136 4136 "dropped_unicast_filtered", KSTAT_DATA_ULONG);
4137 4137 kstat_named_init(ðstat->dropped_runt, "dropped_runt",
4138 4138 KSTAT_DATA_ULONG);
4139 4139 kstat_named_init(ðstat->link_up, "link_up", KSTAT_DATA_ULONG);
4140 4140 kstat_named_init(ðstat->link_changes, "link_changes",
4141 4141 KSTAT_DATA_ULONG);
4142 4142 ksp->ks_update = myri10ge_nic_stat_kstat_update;
4143 4143 ksp->ks_private = (void *) mgp;
4144 4144 kstat_install(ksp);
4145 4145 return (DDI_SUCCESS);
4146 4146 }
4147 4147
4148 4148 static int
4149 4149 myri10ge_slice_stat_init(struct myri10ge_slice_state *ss)
4150 4150 {
4151 4151 struct myri10ge_priv *mgp = ss->mgp;
4152 4152 struct kstat *ksp;
4153 4153 struct myri10ge_slice_stat *ethstat;
4154 4154 int instance;
4155 4155
4156 4156 /*
4157 4157 * fake an instance so that the same slice numbers from
4158 4158 * different instances do not collide
4159 4159 */
4160 4160 instance = (ddi_get_instance(mgp->dip) * 1000) + (int)(ss - mgp->ss);
4161 4161 ksp = kstat_create("myri10ge", instance,
4162 4162 "myri10ge_slice_stats", "net", KSTAT_TYPE_NAMED,
4163 4163 sizeof (*ethstat) / sizeof (kstat_named_t), 0);
4164 4164 if (ksp == NULL) {
4165 4165 cmn_err(CE_WARN,
4166 4166 "%s: myri10ge_stat_init: kstat_create failed", mgp->name);
4167 4167 return (DDI_FAILURE);
4168 4168 }
4169 4169 ss->ksp_stat = ksp;
4170 4170 ethstat = (struct myri10ge_slice_stat *)(ksp->ks_data);
4171 4171 kstat_named_init(ðstat->lro_bad_csum, "lro_bad_csum",
4172 4172 KSTAT_DATA_ULONG);
4173 4173 kstat_named_init(ðstat->lro_flushed, "lro_flushed",
4174 4174 KSTAT_DATA_ULONG);
4175 4175 kstat_named_init(ðstat->lro_queued, "lro_queued",
4176 4176 KSTAT_DATA_ULONG);
4177 4177 kstat_named_init(ðstat->rx_bigbuf_firmware, "rx_bigbuf_firmware",
4178 4178 KSTAT_DATA_ULONG);
4179 4179 kstat_named_init(ðstat->rx_bigbuf_pool, "rx_bigbuf_pool",
4180 4180 KSTAT_DATA_ULONG);
4181 4181 kstat_named_init(ðstat->rx_bigbuf_smalls, "rx_bigbuf_smalls",
4182 4182 KSTAT_DATA_ULONG);
4183 4183 kstat_named_init(ðstat->rx_copy, "rx_copy",
4184 4184 KSTAT_DATA_ULONG);
4185 4185 kstat_named_init(ðstat->rx_big_nobuf, "rx_big_nobuf",
4186 4186 KSTAT_DATA_ULONG);
4187 4187 kstat_named_init(ðstat->rx_small_nobuf, "rx_small_nobuf",
4188 4188 KSTAT_DATA_ULONG);
4189 4189 kstat_named_init(ðstat->xmit_zero_len, "xmit_zero_len",
4190 4190 KSTAT_DATA_ULONG);
4191 4191 kstat_named_init(ðstat->xmit_pullup, "xmit_pullup",
4192 4192 KSTAT_DATA_ULONG);
4193 4193 kstat_named_init(ðstat->xmit_pullup_first, "xmit_pullup_first",
4194 4194 KSTAT_DATA_ULONG);
4195 4195 kstat_named_init(ðstat->xmit_lowbuf, "xmit_lowbuf",
4196 4196 KSTAT_DATA_ULONG);
4197 4197 kstat_named_init(ðstat->xmit_lsobadflags, "xmit_lsobadflags",
4198 4198 KSTAT_DATA_ULONG);
4199 4199 kstat_named_init(ðstat->xmit_sched, "xmit_sched",
4200 4200 KSTAT_DATA_ULONG);
4201 4201 kstat_named_init(ðstat->xmit_stall, "xmit_stall",
4202 4202 KSTAT_DATA_ULONG);
4203 4203 kstat_named_init(ðstat->xmit_stall_early, "xmit_stall_early",
4204 4204 KSTAT_DATA_ULONG);
4205 4205 kstat_named_init(ðstat->xmit_stall_late, "xmit_stall_late",
4206 4206 KSTAT_DATA_ULONG);
4207 4207 kstat_named_init(ðstat->xmit_err, "xmit_err",
4208 4208 KSTAT_DATA_ULONG);
4209 4209 kstat_named_init(ðstat->tx_req, "tx_req",
4210 4210 KSTAT_DATA_ULONG);
4211 4211 kstat_named_init(ðstat->tx_activate, "tx_activate",
4212 4212 KSTAT_DATA_ULONG);
4213 4213 kstat_named_init(ðstat->tx_done, "tx_done",
4214 4214 KSTAT_DATA_ULONG);
4215 4215 kstat_named_init(ðstat->tx_handles_alloced, "tx_handles_alloced",
4216 4216 KSTAT_DATA_ULONG);
4217 4217 kstat_named_init(ðstat->rx_big, "rx_big",
4218 4218 KSTAT_DATA_ULONG);
4219 4219 kstat_named_init(ðstat->rx_small, "rx_small",
4220 4220 KSTAT_DATA_ULONG);
4221 4221 ksp->ks_update = myri10ge_slice_stat_kstat_update;
4222 4222 ksp->ks_private = (void *) ss;
4223 4223 kstat_install(ksp);
4224 4224 return (DDI_SUCCESS);
4225 4225 }
4226 4226
4227 4227
4228 4228
4229 4229 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
4230 4230
4231 4231 #include <vm/hat.h>
4232 4232 #include <sys/ddi_isa.h>
4233 4233 void *device_arena_alloc(size_t size, int vm_flag);
4234 4234 void device_arena_free(void *vaddr, size_t size);
4235 4235
4236 4236 static void
4237 4237 myri10ge_enable_nvidia_ecrc(struct myri10ge_priv *mgp)
4238 4238 {
4239 4239 dev_info_t *parent_dip;
4240 4240 ddi_acc_handle_t handle;
4241 4241 unsigned long bus_number, dev_number, func_number;
4242 4242 unsigned long cfg_pa, paddr, base, pgoffset;
4243 4243 char *cvaddr, *ptr;
4244 4244 uint32_t *ptr32;
4245 4245 int retval = DDI_FAILURE;
4246 4246 int dontcare;
4247 4247 uint16_t read_vid, read_did, vendor_id, device_id;
4248 4248
4249 4249 if (!myri10ge_nvidia_ecrc_enable)
4250 4250 return;
4251 4251
4252 4252 parent_dip = ddi_get_parent(mgp->dip);
4253 4253 if (parent_dip == NULL) {
4254 4254 cmn_err(CE_WARN, "%s: I'm an orphan?", mgp->name);
4255 4255 return;
4256 4256 }
4257 4257
4258 4258 if (pci_config_setup(parent_dip, &handle) != DDI_SUCCESS) {
4259 4259 cmn_err(CE_WARN,
4260 4260 "%s: Could not access my parent's registers", mgp->name);
4261 4261 return;
4262 4262 }
4263 4263
4264 4264 vendor_id = pci_config_get16(handle, PCI_CONF_VENID);
4265 4265 device_id = pci_config_get16(handle, PCI_CONF_DEVID);
4266 4266 pci_config_teardown(&handle);
4267 4267
4268 4268 if (myri10ge_verbose) {
4269 4269 unsigned long bus_number, dev_number, func_number;
4270 4270 int reg_set, span;
4271 4271 (void) myri10ge_reg_set(parent_dip, ®_set, &span,
4272 4272 &bus_number, &dev_number, &func_number);
4273 4273 if (myri10ge_verbose)
4274 4274 printf("%s: parent at %ld:%ld:%ld\n", mgp->name,
4275 4275 bus_number, dev_number, func_number);
4276 4276 }
4277 4277
4278 4278 if (vendor_id != 0x10de)
4279 4279 return;
4280 4280
4281 4281 if (device_id != 0x005d /* CK804 */ &&
4282 4282 (device_id < 0x374 || device_id > 0x378) /* MCP55 */) {
4283 4283 return;
4284 4284 }
4285 4285 (void) myri10ge_reg_set(parent_dip, &dontcare, &dontcare,
4286 4286 &bus_number, &dev_number, &func_number);
4287 4287
4288 4288 for (cfg_pa = 0xf0000000UL;
4289 4289 retval != DDI_SUCCESS && cfg_pa >= 0xe0000000UL;
4290 4290 cfg_pa -= 0x10000000UL) {
4291 4291 /* find the config space address for the nvidia bridge */
4292 4292 paddr = (cfg_pa + bus_number * 0x00100000UL +
4293 4293 (dev_number * 8 + func_number) * 0x00001000UL);
4294 4294
4295 4295 base = paddr & (~MMU_PAGEOFFSET);
4296 4296 pgoffset = paddr & MMU_PAGEOFFSET;
4297 4297
4298 4298 /* map it into the kernel */
4299 4299 cvaddr = device_arena_alloc(ptob(1), VM_NOSLEEP);
4300 4300 if (cvaddr == NULL)
4301 4301 cmn_err(CE_WARN, "%s: failed to map nf4: cvaddr\n",
4302 4302 mgp->name);
4303 4303
4304 4304 hat_devload(kas.a_hat, cvaddr, mmu_ptob(1),
4305 4305 i_ddi_paddr_to_pfn(base),
4306 4306 PROT_WRITE|HAT_STRICTORDER, HAT_LOAD_LOCK);
4307 4307
4308 4308 ptr = cvaddr + pgoffset;
4309 4309 read_vid = *(uint16_t *)(void *)(ptr + PCI_CONF_VENID);
4310 4310 read_did = *(uint16_t *)(void *)(ptr + PCI_CONF_DEVID);
4311 4311 if (vendor_id == read_did || device_id == read_did) {
4312 4312 ptr32 = (uint32_t *)(void *)(ptr + 0x178);
4313 4313 if (myri10ge_verbose)
4314 4314 printf("%s: Enabling ECRC on upstream "
4315 4315 "Nvidia bridge (0x%x:0x%x) "
4316 4316 "at %ld:%ld:%ld\n", mgp->name,
4317 4317 read_vid, read_did, bus_number,
4318 4318 dev_number, func_number);
4319 4319 *ptr32 |= 0x40;
4320 4320 retval = DDI_SUCCESS;
4321 4321 }
4322 4322 hat_unload(kas.a_hat, cvaddr, ptob(1), HAT_UNLOAD_UNLOCK);
4323 4323 device_arena_free(cvaddr, ptob(1));
4324 4324 }
4325 4325 }
4326 4326
4327 4327 #else
4328 4328 /*ARGSUSED*/
4329 4329 static void
4330 4330 myri10ge_enable_nvidia_ecrc(struct myri10ge_priv *mgp)
4331 4331 {
4332 4332 }
4333 4333 #endif /* i386 */
4334 4334
4335 4335
4336 4336 /*
4337 4337 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
4338 4338 * when the PCI-E Completion packets are aligned on an 8-byte
4339 4339 * boundary. Some PCI-E chip sets always align Completion packets; on
4340 4340 * the ones that do not, the alignment can be enforced by enabling
4341 4341 * ECRC generation (if supported).
4342 4342 *
4343 4343 * When PCI-E Completion packets are not aligned, it is actually more
4344 4344 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
4345 4345 *
4346 4346 * If the driver can neither enable ECRC nor verify that it has
4347 4347 * already been enabled, then it must use a firmware image which works
4348 4348 * around unaligned completion packets (ethp_z8e.dat), and it should
4349 4349 * also ensure that it never gives the device a Read-DMA which is
4350 4350 * larger than 2KB by setting the tx.boundary to 2KB. If ECRC is
4351 4351 * enabled, then the driver should use the aligned (eth_z8e.dat)
4352 4352 * firmware image, and set tx.boundary to 4KB.
4353 4353 */
4354 4354
4355 4355
4356 4356 static int
4357 4357 myri10ge_firmware_probe(struct myri10ge_priv *mgp)
4358 4358 {
4359 4359 int status;
4360 4360
4361 4361 mgp->tx_boundary = 4096;
4362 4362 /*
4363 4363 * Verify the max read request size was set to 4KB
4364 4364 * before trying the test with 4KB.
4365 4365 */
4366 4366 if (mgp->max_read_request_4k == 0)
4367 4367 mgp->tx_boundary = 2048;
4368 4368 /*
4369 4369 * load the optimized firmware which assumes aligned PCIe
4370 4370 * completions in order to see if it works on this host.
4371 4371 */
4372 4372
4373 4373 mgp->fw_name = "rss_eth_z8e";
4374 4374 mgp->eth_z8e = (unsigned char *)rss_eth_z8e;
4375 4375 mgp->eth_z8e_length = rss_eth_z8e_length;
4376 4376
4377 4377 status = myri10ge_load_firmware(mgp);
4378 4378 if (status != 0) {
4379 4379 return (status);
4380 4380 }
4381 4381 /*
4382 4382 * Enable ECRC if possible
4383 4383 */
4384 4384 myri10ge_enable_nvidia_ecrc(mgp);
4385 4385
4386 4386 /*
4387 4387 * Run a DMA test which watches for unaligned completions and
4388 4388 * aborts on the first one seen.
4389 4389 */
4390 4390 status = myri10ge_dma_test(mgp, MXGEFW_CMD_UNALIGNED_TEST);
4391 4391 if (status == 0)
4392 4392 return (0); /* keep the aligned firmware */
4393 4393
4394 4394 if (status != E2BIG)
4395 4395 cmn_err(CE_WARN, "%s: DMA test failed: %d\n",
4396 4396 mgp->name, status);
4397 4397 if (status == ENOSYS)
4398 4398 cmn_err(CE_WARN, "%s: Falling back to ethp! "
4399 4399 "Please install up to date fw\n", mgp->name);
4400 4400 return (status);
4401 4401 }
4402 4402
4403 4403 static int
4404 4404 myri10ge_select_firmware(struct myri10ge_priv *mgp)
4405 4405 {
4406 4406 int aligned;
4407 4407
4408 4408 aligned = 0;
4409 4409
4410 4410 if (myri10ge_force_firmware == 1) {
4411 4411 if (myri10ge_verbose)
4412 4412 printf("%s: Assuming aligned completions (forced)\n",
4413 4413 mgp->name);
4414 4414 aligned = 1;
4415 4415 goto done;
4416 4416 }
4417 4417
4418 4418 if (myri10ge_force_firmware == 2) {
4419 4419 if (myri10ge_verbose)
4420 4420 printf("%s: Assuming unaligned completions (forced)\n",
4421 4421 mgp->name);
4422 4422 aligned = 0;
4423 4423 goto done;
4424 4424 }
4425 4425
4426 4426 /* If the width is less than 8, we may used the aligned firmware */
4427 4427 if (mgp->pcie_link_width != 0 && mgp->pcie_link_width < 8) {
4428 4428 cmn_err(CE_WARN, "!%s: PCIe link running at x%d\n",
4429 4429 mgp->name, mgp->pcie_link_width);
4430 4430 aligned = 1;
4431 4431 goto done;
4432 4432 }
4433 4433
4434 4434 if (0 == myri10ge_firmware_probe(mgp))
4435 4435 return (0); /* keep optimized firmware */
4436 4436
4437 4437 done:
4438 4438 if (aligned) {
4439 4439 mgp->fw_name = "rss_eth_z8e";
4440 4440 mgp->eth_z8e = (unsigned char *)rss_eth_z8e;
4441 4441 mgp->eth_z8e_length = rss_eth_z8e_length;
4442 4442 mgp->tx_boundary = 4096;
4443 4443 } else {
4444 4444 mgp->fw_name = "rss_ethp_z8e";
4445 4445 mgp->eth_z8e = (unsigned char *)rss_ethp_z8e;
4446 4446 mgp->eth_z8e_length = rss_ethp_z8e_length;
4447 4447 mgp->tx_boundary = 2048;
4448 4448 }
4449 4449
4450 4450 return (myri10ge_load_firmware(mgp));
4451 4451 }
4452 4452
4453 4453 static int
4454 4454 myri10ge_add_intrs(struct myri10ge_priv *mgp, int add_handler)
4455 4455 {
4456 4456 dev_info_t *devinfo = mgp->dip;
4457 4457 int count, avail, actual, intr_types;
4458 4458 int x, y, rc, inum = 0;
4459 4459
4460 4460
4461 4461 rc = ddi_intr_get_supported_types(devinfo, &intr_types);
4462 4462 if (rc != DDI_SUCCESS) {
4463 4463 cmn_err(CE_WARN,
4464 4464 "!%s: ddi_intr_get_nintrs() failure, rc = %d\n", mgp->name,
4465 4465 rc);
4466 4466 return (DDI_FAILURE);
4467 4467 }
4468 4468
4469 4469 if (!myri10ge_use_msi)
4470 4470 intr_types &= ~DDI_INTR_TYPE_MSI;
4471 4471 if (!myri10ge_use_msix)
4472 4472 intr_types &= ~DDI_INTR_TYPE_MSIX;
4473 4473
4474 4474 if (intr_types & DDI_INTR_TYPE_MSIX) {
4475 4475 mgp->ddi_intr_type = DDI_INTR_TYPE_MSIX;
4476 4476 mgp->intr_type = "MSI-X";
4477 4477 } else if (intr_types & DDI_INTR_TYPE_MSI) {
4478 4478 mgp->ddi_intr_type = DDI_INTR_TYPE_MSI;
4479 4479 mgp->intr_type = "MSI";
4480 4480 } else {
4481 4481 mgp->ddi_intr_type = DDI_INTR_TYPE_FIXED;
4482 4482 mgp->intr_type = "Legacy";
4483 4483 }
4484 4484 /* Get number of interrupts */
4485 4485 rc = ddi_intr_get_nintrs(devinfo, mgp->ddi_intr_type, &count);
4486 4486 if ((rc != DDI_SUCCESS) || (count == 0)) {
4487 4487 cmn_err(CE_WARN, "%s: ddi_intr_get_nintrs() failure, rc: %d, "
4488 4488 "count: %d", mgp->name, rc, count);
4489 4489
4490 4490 return (DDI_FAILURE);
4491 4491 }
4492 4492
4493 4493 /* Get number of available interrupts */
4494 4494 rc = ddi_intr_get_navail(devinfo, mgp->ddi_intr_type, &avail);
4495 4495 if ((rc != DDI_SUCCESS) || (avail == 0)) {
4496 4496 cmn_err(CE_WARN, "%s: ddi_intr_get_navail() failure, "
4497 4497 "rc: %d, avail: %d\n", mgp->name, rc, avail);
4498 4498 return (DDI_FAILURE);
4499 4499 }
4500 4500 if (avail < count) {
4501 4501 cmn_err(CE_NOTE,
4502 4502 "!%s: nintrs() returned %d, navail returned %d",
4503 4503 mgp->name, count, avail);
4504 4504 count = avail;
4505 4505 }
4506 4506
4507 4507 if (count < mgp->num_slices)
4508 4508 return (DDI_FAILURE);
4509 4509
4510 4510 if (count > mgp->num_slices)
4511 4511 count = mgp->num_slices;
4512 4512
4513 4513 /* Allocate memory for MSI interrupts */
4514 4514 mgp->intr_size = count * sizeof (ddi_intr_handle_t);
4515 4515 mgp->htable = kmem_alloc(mgp->intr_size, KM_SLEEP);
4516 4516
4517 4517 rc = ddi_intr_alloc(devinfo, mgp->htable, mgp->ddi_intr_type, inum,
4518 4518 count, &actual, DDI_INTR_ALLOC_NORMAL);
4519 4519
4520 4520 if ((rc != DDI_SUCCESS) || (actual == 0)) {
4521 4521 cmn_err(CE_WARN, "%s: ddi_intr_alloc() failed: %d",
4522 4522 mgp->name, rc);
4523 4523
4524 4524 kmem_free(mgp->htable, mgp->intr_size);
4525 4525 mgp->htable = NULL;
4526 4526 return (DDI_FAILURE);
4527 4527 }
4528 4528
4529 4529 if ((actual < count) && myri10ge_verbose) {
4530 4530 cmn_err(CE_NOTE, "%s: got %d/%d slices",
4531 4531 mgp->name, actual, count);
4532 4532 }
4533 4533
4534 4534 mgp->intr_cnt = actual;
4535 4535
4536 4536 /*
4537 4537 * Get priority for first irq, assume remaining are all the same
4538 4538 */
4539 4539 if (ddi_intr_get_pri(mgp->htable[0], &mgp->intr_pri)
4540 4540 != DDI_SUCCESS) {
4541 4541 cmn_err(CE_WARN, "%s: ddi_intr_get_pri() failed", mgp->name);
4542 4542
4543 4543 /* Free already allocated intr */
4544 4544 for (y = 0; y < actual; y++) {
4545 4545 (void) ddi_intr_free(mgp->htable[y]);
4546 4546 }
4547 4547
4548 4548 kmem_free(mgp->htable, mgp->intr_size);
4549 4549 mgp->htable = NULL;
4550 4550 return (DDI_FAILURE);
4551 4551 }
4552 4552
4553 4553 mgp->icookie = (void *)(uintptr_t)mgp->intr_pri;
4554 4554
4555 4555 if (!add_handler)
4556 4556 return (DDI_SUCCESS);
4557 4557
4558 4558 /* Call ddi_intr_add_handler() */
4559 4559 for (x = 0; x < actual; x++) {
4560 4560 if (ddi_intr_add_handler(mgp->htable[x], myri10ge_intr,
4561 4561 (caddr_t)&mgp->ss[x], NULL) != DDI_SUCCESS) {
4562 4562 cmn_err(CE_WARN, "%s: ddi_intr_add_handler() failed",
4563 4563 mgp->name);
4564 4564
4565 4565 /* Free already allocated intr */
4566 4566 for (y = 0; y < actual; y++) {
4567 4567 (void) ddi_intr_free(mgp->htable[y]);
4568 4568 }
4569 4569
4570 4570 kmem_free(mgp->htable, mgp->intr_size);
4571 4571 mgp->htable = NULL;
4572 4572 return (DDI_FAILURE);
4573 4573 }
4574 4574 }
4575 4575
4576 4576 (void) ddi_intr_get_cap(mgp->htable[0], &mgp->intr_cap);
4577 4577 if (mgp->intr_cap & DDI_INTR_FLAG_BLOCK) {
4578 4578 /* Call ddi_intr_block_enable() for MSI */
4579 4579 (void) ddi_intr_block_enable(mgp->htable, mgp->intr_cnt);
4580 4580 } else {
4581 4581 /* Call ddi_intr_enable() for MSI non block enable */
4582 4582 for (x = 0; x < mgp->intr_cnt; x++) {
4583 4583 (void) ddi_intr_enable(mgp->htable[x]);
4584 4584 }
4585 4585 }
4586 4586
4587 4587 return (DDI_SUCCESS);
4588 4588 }
4589 4589
4590 4590 static void
4591 4591 myri10ge_rem_intrs(struct myri10ge_priv *mgp, int handler_installed)
4592 4592 {
4593 4593 int x, err;
4594 4594
4595 4595 /* Disable all interrupts */
4596 4596 if (handler_installed) {
4597 4597 if (mgp->intr_cap & DDI_INTR_FLAG_BLOCK) {
4598 4598 /* Call ddi_intr_block_disable() */
4599 4599 (void) ddi_intr_block_disable(mgp->htable,
4600 4600 mgp->intr_cnt);
4601 4601 } else {
4602 4602 for (x = 0; x < mgp->intr_cnt; x++) {
4603 4603 (void) ddi_intr_disable(mgp->htable[x]);
4604 4604 }
4605 4605 }
4606 4606 }
4607 4607
4608 4608 for (x = 0; x < mgp->intr_cnt; x++) {
4609 4609 if (handler_installed) {
4610 4610 /* Call ddi_intr_remove_handler() */
4611 4611 err = ddi_intr_remove_handler(mgp->htable[x]);
4612 4612 if (err != DDI_SUCCESS) {
4613 4613 cmn_err(CE_WARN,
4614 4614 "%s: ddi_intr_remove_handler for"
4615 4615 "vec %d returned %d\n", mgp->name,
4616 4616 x, err);
4617 4617 }
4618 4618 }
4619 4619 err = ddi_intr_free(mgp->htable[x]);
4620 4620 if (err != DDI_SUCCESS) {
4621 4621 cmn_err(CE_WARN,
4622 4622 "%s: ddi_intr_free for vec %d returned %d\n",
4623 4623 mgp->name, x, err);
4624 4624 }
4625 4625 }
4626 4626 kmem_free(mgp->htable, mgp->intr_size);
4627 4627 mgp->htable = NULL;
4628 4628 }
4629 4629
4630 4630 static void
4631 4631 myri10ge_test_physical(dev_info_t *dip)
4632 4632 {
4633 4633 ddi_dma_handle_t handle;
4634 4634 struct myri10ge_dma_stuff dma;
4635 4635 void *addr;
4636 4636 int err;
4637 4637
4638 4638 /* test #1, sufficient for older sparc systems */
4639 4639 myri10ge_tx_dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
4640 4640 err = ddi_dma_alloc_handle(dip, &myri10ge_tx_dma_attr,
4641 4641 DDI_DMA_DONTWAIT, NULL, &handle);
4642 4642 if (err == DDI_DMA_BADATTR)
4643 4643 goto fail;
4644 4644 ddi_dma_free_handle(&handle);
4645 4645
4646 4646 /* test #2, required on Olympis where the bind is what fails */
4647 4647 addr = myri10ge_dma_alloc(dip, 128, &myri10ge_tx_dma_attr,
4648 4648 &myri10ge_dev_access_attr, DDI_DMA_STREAMING,
4649 4649 DDI_DMA_WRITE|DDI_DMA_STREAMING, &dma, 0, DDI_DMA_DONTWAIT);
4650 4650 if (addr == NULL)
4651 4651 goto fail;
4652 4652 myri10ge_dma_free(&dma);
4653 4653 return;
4654 4654
4655 4655 fail:
4656 4656 if (myri10ge_verbose)
4657 4657 printf("myri10ge%d: DDI_DMA_FORCE_PHYSICAL failed, "
4658 4658 "using IOMMU\n", ddi_get_instance(dip));
4659 4659
4660 4660 myri10ge_tx_dma_attr.dma_attr_flags &= ~DDI_DMA_FORCE_PHYSICAL;
4661 4661 }
4662 4662
4663 4663 static void
4664 4664 myri10ge_get_props(dev_info_t *dip)
4665 4665 {
4666 4666
4667 4667 myri10ge_flow_control = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4668 4668 "myri10ge_flow_control", myri10ge_flow_control);
4669 4669
4670 4670 myri10ge_intr_coal_delay = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4671 4671 "myri10ge_intr_coal_delay", myri10ge_intr_coal_delay);
4672 4672
4673 4673 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
4674 4674 myri10ge_nvidia_ecrc_enable = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4675 4675 "myri10ge_nvidia_ecrc_enable", 1);
4676 4676 #endif
4677 4677
4678 4678
4679 4679 myri10ge_use_msi = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4680 4680 "myri10ge_use_msi", myri10ge_use_msi);
4681 4681
4682 4682 myri10ge_deassert_wait = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4683 4683 "myri10ge_deassert_wait", myri10ge_deassert_wait);
4684 4684
4685 4685 myri10ge_verbose = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4686 4686 "myri10ge_verbose", myri10ge_verbose);
4687 4687
4688 4688 myri10ge_tx_copylen = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4689 4689 "myri10ge_tx_copylen", myri10ge_tx_copylen);
4690 4690
4691 4691 if (myri10ge_tx_copylen < 60) {
4692 4692 cmn_err(CE_WARN,
4693 4693 "myri10ge_tx_copylen must be >= 60 bytes\n");
4694 4694 myri10ge_tx_copylen = 60;
4695 4695 }
4696 4696
4697 4697 myri10ge_mtu_override = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4698 4698 "myri10ge_mtu_override", myri10ge_mtu_override);
4699 4699
4700 4700 if (myri10ge_mtu_override >= 1500 && myri10ge_mtu_override <= 9000)
4701 4701 myri10ge_mtu = myri10ge_mtu_override +
4702 4702 sizeof (struct ether_header) + MXGEFW_PAD + VLAN_TAGSZ;
4703 4703 else if (myri10ge_mtu_override != 0) {
4704 4704 cmn_err(CE_WARN,
4705 4705 "myri10ge_mtu_override must be between 1500 and "
4706 4706 "9000 bytes\n");
4707 4707 }
4708 4708
4709 4709 myri10ge_bigbufs_initial = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4710 4710 "myri10ge_bigbufs_initial", myri10ge_bigbufs_initial);
4711 4711 myri10ge_bigbufs_max = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4712 4712 "myri10ge_bigbufs_max", myri10ge_bigbufs_max);
4713 4713
4714 4714 myri10ge_watchdog_reset = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4715 4715 "myri10ge_watchdog_reset", myri10ge_watchdog_reset);
4716 4716
4717 4717 if (myri10ge_bigbufs_initial < 128) {
4718 4718 cmn_err(CE_WARN,
4719 4719 "myri10ge_bigbufs_initial be at least 128\n");
4720 4720 myri10ge_bigbufs_initial = 128;
4721 4721 }
4722 4722 if (myri10ge_bigbufs_max < 128) {
4723 4723 cmn_err(CE_WARN,
4724 4724 "myri10ge_bigbufs_max be at least 128\n");
4725 4725 myri10ge_bigbufs_max = 128;
4726 4726 }
4727 4727
4728 4728 if (myri10ge_bigbufs_max < myri10ge_bigbufs_initial) {
4729 4729 cmn_err(CE_WARN,
4730 4730 "myri10ge_bigbufs_max must be >= "
4731 4731 "myri10ge_bigbufs_initial\n");
4732 4732 myri10ge_bigbufs_max = myri10ge_bigbufs_initial;
4733 4733 }
4734 4734
4735 4735 myri10ge_force_firmware = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4736 4736 "myri10ge_force_firmware", myri10ge_force_firmware);
4737 4737
4738 4738 myri10ge_max_slices = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4739 4739 "myri10ge_max_slices", myri10ge_max_slices);
4740 4740
4741 4741 myri10ge_use_msix = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4742 4742 "myri10ge_use_msix", myri10ge_use_msix);
4743 4743
4744 4744 myri10ge_rss_hash = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4745 4745 "myri10ge_rss_hash", myri10ge_rss_hash);
4746 4746
4747 4747 if (myri10ge_rss_hash > MXGEFW_RSS_HASH_TYPE_MAX ||
4748 4748 myri10ge_rss_hash < MXGEFW_RSS_HASH_TYPE_IPV4) {
4749 4749 cmn_err(CE_WARN, "myri10ge: Illegal rssh hash type %d\n",
4750 4750 myri10ge_rss_hash);
4751 4751 myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4752 4752 }
4753 4753 myri10ge_lro = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4754 4754 "myri10ge_lro", myri10ge_lro);
4755 4755 myri10ge_lro_cnt = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4756 4756 "myri10ge_lro_cnt", myri10ge_lro_cnt);
4757 4757 myri10ge_lro_max_aggr = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4758 4758 "myri10ge_lro_max_aggr", myri10ge_lro_max_aggr);
4759 4759 myri10ge_tx_hash = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4760 4760 "myri10ge_tx_hash", myri10ge_tx_hash);
4761 4761 myri10ge_use_lso = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4762 4762 "myri10ge_use_lso", myri10ge_use_lso);
4763 4763 myri10ge_lso_copy = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4764 4764 "myri10ge_lso_copy", myri10ge_lso_copy);
4765 4765 myri10ge_tx_handles_initial = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4766 4766 "myri10ge_tx_handles_initial", myri10ge_tx_handles_initial);
4767 4767 myri10ge_small_bytes = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4768 4768 "myri10ge_small_bytes", myri10ge_small_bytes);
4769 4769 if ((myri10ge_small_bytes + MXGEFW_PAD) & (128 -1)) {
4770 4770 cmn_err(CE_WARN, "myri10ge: myri10ge_small_bytes (%d)\n",
4771 4771 myri10ge_small_bytes);
4772 4772 cmn_err(CE_WARN, "must be aligned on 128b bndry -2\n");
4773 4773 myri10ge_small_bytes += 128;
4774 4774 myri10ge_small_bytes &= ~(128 -1);
4775 4775 myri10ge_small_bytes -= MXGEFW_PAD;
4776 4776 cmn_err(CE_WARN, "rounded up to %d\n",
4777 4777 myri10ge_small_bytes);
4778 4778
4779 4779 myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4780 4780 }
4781 4781 }
4782 4782
4783 4783 #ifndef PCI_EXP_LNKSTA
4784 4784 #define PCI_EXP_LNKSTA 18
4785 4785 #endif
4786 4786
4787 4787 static int
4788 4788 myri10ge_find_cap(ddi_acc_handle_t handle, uint8_t *capptr, uint8_t capid)
4789 4789 {
4790 4790 uint16_t status;
4791 4791 uint8_t ptr;
4792 4792
4793 4793 /* check to see if we have capabilities */
4794 4794 status = pci_config_get16(handle, PCI_CONF_STAT);
4795 4795 if (!(status & PCI_STAT_CAP)) {
4796 4796 cmn_err(CE_WARN, "PCI_STAT_CAP not found\n");
4797 4797 return (ENXIO);
4798 4798 }
4799 4799
4800 4800 ptr = pci_config_get8(handle, PCI_CONF_CAP_PTR);
4801 4801
4802 4802 /* Walk the capabilities list, looking for a PCI Express cap */
4803 4803 while (ptr != PCI_CAP_NEXT_PTR_NULL) {
4804 4804 if (pci_config_get8(handle, ptr + PCI_CAP_ID) == capid)
4805 4805 break;
4806 4806 ptr = pci_config_get8(handle, ptr + PCI_CAP_NEXT_PTR);
4807 4807 }
4808 4808 if (ptr < 64) {
4809 4809 cmn_err(CE_WARN, "Bad capability offset %d\n", ptr);
4810 4810 return (ENXIO);
4811 4811 }
4812 4812 *capptr = ptr;
4813 4813 return (0);
4814 4814 }
4815 4815
4816 4816 static int
4817 4817 myri10ge_set_max_readreq(ddi_acc_handle_t handle)
4818 4818 {
4819 4819 int err;
4820 4820 uint16_t val;
4821 4821 uint8_t ptr;
4822 4822
4823 4823 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_PCI_E);
4824 4824 if (err != 0) {
4825 4825 cmn_err(CE_WARN, "could not find PCIe cap\n");
4826 4826 return (ENXIO);
4827 4827 }
4828 4828
4829 4829 /* set max read req to 4096 */
4830 4830 val = pci_config_get16(handle, ptr + PCIE_DEVCTL);
4831 4831 val = (val & ~PCIE_DEVCTL_MAX_READ_REQ_MASK) |
4832 4832 PCIE_DEVCTL_MAX_READ_REQ_4096;
4833 4833 pci_config_put16(handle, ptr + PCIE_DEVCTL, val);
4834 4834 val = pci_config_get16(handle, ptr + PCIE_DEVCTL);
4835 4835 if ((val & (PCIE_DEVCTL_MAX_READ_REQ_4096)) !=
4836 4836 PCIE_DEVCTL_MAX_READ_REQ_4096) {
4837 4837 cmn_err(CE_WARN, "could not set max read req (%x)\n", val);
4838 4838 return (EINVAL);
4839 4839 }
4840 4840 return (0);
4841 4841 }
4842 4842
4843 4843 static int
4844 4844 myri10ge_read_pcie_link_width(ddi_acc_handle_t handle, int *link)
4845 4845 {
4846 4846 int err;
4847 4847 uint16_t val;
4848 4848 uint8_t ptr;
4849 4849
4850 4850 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_PCI_E);
4851 4851 if (err != 0) {
4852 4852 cmn_err(CE_WARN, "could not set max read req\n");
4853 4853 return (ENXIO);
4854 4854 }
4855 4855
4856 4856 /* read link width */
4857 4857 val = pci_config_get16(handle, ptr + PCIE_LINKSTS);
4858 4858 val &= PCIE_LINKSTS_NEG_WIDTH_MASK;
4859 4859 *link = (val >> 4);
4860 4860 return (0);
4861 4861 }
4862 4862
4863 4863 static int
4864 4864 myri10ge_reset_nic(struct myri10ge_priv *mgp)
4865 4865 {
4866 4866 ddi_acc_handle_t handle = mgp->cfg_hdl;
4867 4867 uint32_t reboot;
4868 4868 uint16_t cmd;
4869 4869 int err;
4870 4870
4871 4871 cmd = pci_config_get16(handle, PCI_CONF_COMM);
4872 4872 if ((cmd & PCI_COMM_ME) == 0) {
4873 4873 /*
4874 4874 * Bus master DMA disabled? Check to see if the card
4875 4875 * rebooted due to a parity error For now, just report
4876 4876 * it
4877 4877 */
4878 4878
4879 4879 /* enter read32 mode */
4880 4880 pci_config_put8(handle, mgp->vso + 0x10, 0x3);
4881 4881 /* read REBOOT_STATUS (0xfffffff0) */
4882 4882 pci_config_put32(handle, mgp->vso + 0x18, 0xfffffff0);
4883 4883 reboot = pci_config_get16(handle, mgp->vso + 0x14);
4884 4884 cmn_err(CE_WARN, "%s NIC rebooted 0x%x\n", mgp->name, reboot);
4885 4885 return (0);
4886 4886 }
4887 4887 if (!myri10ge_watchdog_reset) {
4888 4888 cmn_err(CE_WARN, "%s: not resetting\n", mgp->name);
4889 4889 return (1);
4890 4890 }
4891 4891
4892 4892 myri10ge_stop_locked(mgp);
4893 4893 err = myri10ge_start_locked(mgp);
4894 4894 if (err == DDI_FAILURE) {
4895 4895 return (0);
4896 4896 }
4897 4897 mac_tx_update(mgp->mh);
4898 4898 return (1);
4899 4899 }
4900 4900
4901 4901 static inline int
4902 4902 myri10ge_ring_stalled(myri10ge_tx_ring_t *tx)
4903 4903 {
4904 4904 if (tx->sched != tx->stall &&
4905 4905 tx->done == tx->watchdog_done &&
4906 4906 tx->watchdog_req != tx->watchdog_done)
4907 4907 return (1);
4908 4908 return (0);
4909 4909 }
4910 4910
4911 4911 static void
4912 4912 myri10ge_watchdog(void *arg)
4913 4913 {
4914 4914 struct myri10ge_priv *mgp;
4915 4915 struct myri10ge_slice_state *ss;
4916 4916 myri10ge_tx_ring_t *tx;
4917 4917 int nic_ok = 1;
4918 4918 int slices_stalled, rx_pause, i;
4919 4919 int add_rx;
4920 4920
4921 4921 mgp = arg;
4922 4922 mutex_enter(&mgp->intrlock);
4923 4923 if (mgp->running != MYRI10GE_ETH_RUNNING) {
4924 4924 cmn_err(CE_WARN,
4925 4925 "%s not running, not rearming watchdog (%d)\n",
4926 4926 mgp->name, mgp->running);
4927 4927 mutex_exit(&mgp->intrlock);
4928 4928 return;
4929 4929 }
4930 4930
4931 4931 rx_pause = ntohl(mgp->ss[0].fw_stats->dropped_pause);
4932 4932
4933 4933 /*
4934 4934 * make sure nic is stalled before we reset the nic, so as to
4935 4935 * ensure we don't rip the transmit data structures out from
4936 4936 * under a pending transmit
4937 4937 */
4938 4938
4939 4939 for (slices_stalled = 0, i = 0; i < mgp->num_slices; i++) {
4940 4940 tx = &mgp->ss[i].tx;
4941 4941 slices_stalled = myri10ge_ring_stalled(tx);
4942 4942 if (slices_stalled)
4943 4943 break;
4944 4944 }
4945 4945
4946 4946 if (slices_stalled) {
4947 4947 if (mgp->watchdog_rx_pause == rx_pause) {
4948 4948 cmn_err(CE_WARN,
4949 4949 "%s slice %d stalled:(%d, %d, %d, %d, %d %d %d\n)",
4950 4950 mgp->name, i, tx->sched, tx->stall,
4951 4951 tx->done, tx->watchdog_done, tx->req, tx->pkt_done,
4952 4952 (int)ntohl(mgp->ss[i].fw_stats->send_done_count));
4953 4953 nic_ok = myri10ge_reset_nic(mgp);
4954 4954 } else {
4955 4955 cmn_err(CE_WARN,
4956 4956 "%s Flow controlled, check link partner\n",
4957 4957 mgp->name);
4958 4958 }
4959 4959 }
4960 4960
4961 4961 if (!nic_ok) {
4962 4962 cmn_err(CE_WARN,
4963 4963 "%s Nic dead, not rearming watchdog\n", mgp->name);
4964 4964 mutex_exit(&mgp->intrlock);
4965 4965 return;
4966 4966 }
4967 4967 for (i = 0; i < mgp->num_slices; i++) {
4968 4968 ss = &mgp->ss[i];
4969 4969 tx = &ss->tx;
4970 4970 tx->watchdog_done = tx->done;
4971 4971 tx->watchdog_req = tx->req;
4972 4972 if (ss->watchdog_rx_copy != MYRI10GE_SLICE_STAT(rx_copy)) {
4973 4973 ss->watchdog_rx_copy = MYRI10GE_SLICE_STAT(rx_copy);
4974 4974 add_rx =
4975 4975 min(ss->jpool.num_alloc,
4976 4976 myri10ge_bigbufs_max -
4977 4977 (ss->jpool.num_alloc -
4978 4978 ss->jbufs_for_smalls));
4979 4979 if (add_rx != 0) {
4980 4980 (void) myri10ge_add_jbufs(ss, add_rx, 0);
4981 4981 /* now feed them to the firmware */
4982 4982 mutex_enter(&ss->jpool.mtx);
4983 4983 myri10ge_restock_jumbos(ss);
4984 4984 mutex_exit(&ss->jpool.mtx);
4985 4985 }
4986 4986 }
4987 4987 }
4988 4988 mgp->watchdog_rx_pause = rx_pause;
4989 4989
4990 4990 mgp->timer_id = timeout(myri10ge_watchdog, mgp,
4991 4991 mgp->timer_ticks);
4992 4992 mutex_exit(&mgp->intrlock);
4993 4993 }
4994 4994
4995 4995 /*ARGSUSED*/
4996 4996 static int
4997 4997 myri10ge_get_coalesce(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp)
4998 4998
4999 4999 {
5000 5000 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp;
5001 5001 (void) mi_mpprintf(mp, "%d", mgp->intr_coal_delay);
5002 5002 return (0);
5003 5003 }
5004 5004
5005 5005 /*ARGSUSED*/
5006 5006 static int
5007 5007 myri10ge_set_coalesce(queue_t *q, mblk_t *mp, char *value,
5008 5008 caddr_t cp, cred_t *credp)
5009 5009
5010 5010 {
5011 5011 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp;
5012 5012 char *end;
5013 5013 size_t new_value;
5014 5014
5015 5015 new_value = mi_strtol(value, &end, 10);
5016 5016 if (end == value)
5017 5017 return (EINVAL);
5018 5018
5019 5019 mutex_enter(&myri10ge_param_lock);
5020 5020 mgp->intr_coal_delay = (int)new_value;
5021 5021 *mgp->intr_coal_delay_ptr = htonl(mgp->intr_coal_delay);
5022 5022 mutex_exit(&myri10ge_param_lock);
5023 5023 return (0);
5024 5024 }
5025 5025
5026 5026 /*ARGSUSED*/
5027 5027 static int
5028 5028 myri10ge_get_pauseparam(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp)
5029 5029
5030 5030 {
5031 5031 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp;
5032 5032 (void) mi_mpprintf(mp, "%d", mgp->pause);
5033 5033 return (0);
5034 5034 }
5035 5035
5036 5036 /*ARGSUSED*/
5037 5037 static int
5038 5038 myri10ge_set_pauseparam(queue_t *q, mblk_t *mp, char *value,
5039 5039 caddr_t cp, cred_t *credp)
5040 5040
5041 5041 {
5042 5042 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp;
5043 5043 char *end;
5044 5044 size_t new_value;
5045 5045 int err = 0;
5046 5046
5047 5047 new_value = mi_strtol(value, &end, 10);
5048 5048 if (end == value)
5049 5049 return (EINVAL);
5050 5050 if (new_value != 0)
5051 5051 new_value = 1;
5052 5052
5053 5053 mutex_enter(&myri10ge_param_lock);
5054 5054 if (new_value != mgp->pause)
5055 5055 err = myri10ge_change_pause(mgp, new_value);
5056 5056 mutex_exit(&myri10ge_param_lock);
5057 5057 return (err);
5058 5058 }
5059 5059
5060 5060 /*ARGSUSED*/
5061 5061 static int
5062 5062 myri10ge_get_int(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp)
5063 5063
5064 5064 {
5065 5065 (void) mi_mpprintf(mp, "%d", *(int *)(void *)cp);
5066 5066 return (0);
5067 5067 }
5068 5068
5069 5069 /*ARGSUSED*/
5070 5070 static int
5071 5071 myri10ge_set_int(queue_t *q, mblk_t *mp, char *value,
5072 5072 caddr_t cp, cred_t *credp)
5073 5073
5074 5074 {
5075 5075 char *end;
5076 5076 size_t new_value;
5077 5077
5078 5078 new_value = mi_strtol(value, &end, 10);
5079 5079 if (end == value)
5080 5080 return (EINVAL);
5081 5081 *(int *)(void *)cp = new_value;
5082 5082
5083 5083 return (0);
5084 5084 }
5085 5085
5086 5086 static void
5087 5087 myri10ge_ndd_init(struct myri10ge_priv *mgp)
5088 5088 {
5089 5089 mgp->nd_head = NULL;
5090 5090
5091 5091 (void) nd_load(&mgp->nd_head, "myri10ge_intr_coal_delay",
5092 5092 myri10ge_get_coalesce, myri10ge_set_coalesce, (caddr_t)mgp);
5093 5093 (void) nd_load(&mgp->nd_head, "myri10ge_flow_control",
5094 5094 myri10ge_get_pauseparam, myri10ge_set_pauseparam, (caddr_t)mgp);
5095 5095 (void) nd_load(&mgp->nd_head, "myri10ge_verbose",
5096 5096 myri10ge_get_int, myri10ge_set_int, (caddr_t)&myri10ge_verbose);
5097 5097 (void) nd_load(&mgp->nd_head, "myri10ge_deassert_wait",
5098 5098 myri10ge_get_int, myri10ge_set_int,
5099 5099 (caddr_t)&myri10ge_deassert_wait);
5100 5100 (void) nd_load(&mgp->nd_head, "myri10ge_bigbufs_max",
5101 5101 myri10ge_get_int, myri10ge_set_int,
5102 5102 (caddr_t)&myri10ge_bigbufs_max);
5103 5103 (void) nd_load(&mgp->nd_head, "myri10ge_lro",
5104 5104 myri10ge_get_int, myri10ge_set_int,
5105 5105 (caddr_t)&myri10ge_lro);
5106 5106 (void) nd_load(&mgp->nd_head, "myri10ge_lro_max_aggr",
5107 5107 myri10ge_get_int, myri10ge_set_int,
5108 5108 (caddr_t)&myri10ge_lro_max_aggr);
5109 5109 (void) nd_load(&mgp->nd_head, "myri10ge_tx_hash",
5110 5110 myri10ge_get_int, myri10ge_set_int,
5111 5111 (caddr_t)&myri10ge_tx_hash);
5112 5112 (void) nd_load(&mgp->nd_head, "myri10ge_lso_copy",
5113 5113 myri10ge_get_int, myri10ge_set_int,
5114 5114 (caddr_t)&myri10ge_lso_copy);
5115 5115 }
5116 5116
5117 5117 static void
5118 5118 myri10ge_ndd_fini(struct myri10ge_priv *mgp)
5119 5119 {
5120 5120 nd_free(&mgp->nd_head);
5121 5121 }
5122 5122
5123 5123 static void
5124 5124 myri10ge_m_ioctl(void *arg, queue_t *wq, mblk_t *mp)
5125 5125 {
5126 5126 struct iocblk *iocp;
5127 5127 struct myri10ge_priv *mgp = arg;
5128 5128 int cmd, ok, err;
5129 5129
5130 5130 iocp = (struct iocblk *)(void *)mp->b_rptr;
5131 5131 cmd = iocp->ioc_cmd;
5132 5132
5133 5133 ok = 0;
5134 5134 err = 0;
5135 5135
5136 5136 switch (cmd) {
5137 5137 case ND_GET:
5138 5138 case ND_SET:
5139 5139 ok = nd_getset(wq, mgp->nd_head, mp);
5140 5140 break;
5141 5141 default:
5142 5142 break;
5143 5143 }
5144 5144 if (!ok)
5145 5145 err = EINVAL;
5146 5146 else
5147 5147 err = iocp->ioc_error;
5148 5148
5149 5149 if (!err)
5150 5150 miocack(wq, mp, iocp->ioc_count, err);
5151 5151 else
5152 5152 miocnak(wq, mp, 0, err);
5153 5153 }
5154 5154
5155 5155 static struct myri10ge_priv *mgp_list;
5156 5156
5157 5157 struct myri10ge_priv *
5158 5158 myri10ge_get_instance(uint_t unit)
5159 5159 {
5160 5160 struct myri10ge_priv *mgp;
5161 5161
5162 5162 mutex_enter(&myri10ge_param_lock);
5163 5163 for (mgp = mgp_list; mgp != NULL; mgp = mgp->next) {
5164 5164 if (unit == ddi_get_instance(mgp->dip)) {
5165 5165 mgp->refcnt++;
5166 5166 break;
5167 5167 }
5168 5168 }
5169 5169 mutex_exit(&myri10ge_param_lock);
5170 5170 return (mgp);
5171 5171 }
5172 5172
5173 5173 void
5174 5174 myri10ge_put_instance(struct myri10ge_priv *mgp)
5175 5175 {
5176 5176 mutex_enter(&myri10ge_param_lock);
5177 5177 mgp->refcnt--;
5178 5178 mutex_exit(&myri10ge_param_lock);
5179 5179 }
5180 5180
5181 5181 static boolean_t
5182 5182 myri10ge_m_getcapab(void *arg, mac_capab_t cap, void *cap_data)
5183 5183 {
5184 5184 struct myri10ge_priv *mgp = arg;
5185 5185 uint32_t *cap_hcksum;
5186 5186 mac_capab_lso_t *cap_lso;
5187 5187 mac_capab_rings_t *cap_rings;
5188 5188
5189 5189 switch (cap) {
5190 5190 case MAC_CAPAB_HCKSUM:
5191 5191 cap_hcksum = cap_data;
5192 5192 *cap_hcksum = HCKSUM_INET_PARTIAL;
5193 5193 break;
5194 5194 case MAC_CAPAB_RINGS:
5195 5195 cap_rings = cap_data;
5196 5196 switch (cap_rings->mr_type) {
5197 5197 case MAC_RING_TYPE_RX:
5198 5198 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
5199 5199 cap_rings->mr_rnum = mgp->num_slices;
5200 5200 cap_rings->mr_gnum = 1;
5201 5201 cap_rings->mr_rget = myri10ge_fill_ring;
5202 5202 cap_rings->mr_gget = myri10ge_fill_group;
5203 5203 break;
5204 5204 case MAC_RING_TYPE_TX:
5205 5205 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
5206 5206 cap_rings->mr_rnum = mgp->num_slices;
5207 5207 cap_rings->mr_gnum = 0;
5208 5208 cap_rings->mr_rget = myri10ge_fill_ring;
5209 5209 cap_rings->mr_gget = NULL;
5210 5210 break;
5211 5211 default:
5212 5212 return (B_FALSE);
5213 5213 }
5214 5214 break;
5215 5215 case MAC_CAPAB_LSO:
5216 5216 cap_lso = cap_data;
5217 5217 if (!myri10ge_use_lso)
5218 5218 return (B_FALSE);
5219 5219 if (!(mgp->features & MYRI10GE_TSO))
5220 5220 return (B_FALSE);
5221 5221 cap_lso->lso_flags = LSO_TX_BASIC_TCP_IPV4;
5222 5222 cap_lso->lso_basic_tcp_ipv4.lso_max = (uint16_t)-1;
5223 5223 break;
5224 5224
5225 5225 default:
5226 5226 return (B_FALSE);
5227 5227 }
5228 5228 return (B_TRUE);
5229 5229 }
5230 5230
5231 5231
5232 5232 static int
5233 5233 myri10ge_m_stat(void *arg, uint_t stat, uint64_t *val)
5234 5234 {
5235 5235 struct myri10ge_priv *mgp = arg;
5236 5236 struct myri10ge_rx_ring_stats *rstat;
5237 5237 struct myri10ge_tx_ring_stats *tstat;
5238 5238 mcp_irq_data_t *fw_stats = mgp->ss[0].fw_stats;
5239 5239 struct myri10ge_slice_state *ss;
5240 5240 uint64_t tmp = 0;
5241 5241 int i;
5242 5242
5243 5243 switch (stat) {
5244 5244 case MAC_STAT_IFSPEED:
5245 5245 *val = 10ull * 1000ull * 1000000ull;
5246 5246 break;
5247 5247
5248 5248 case MAC_STAT_MULTIRCV:
5249 5249 for (i = 0; i < mgp->num_slices; i++) {
5250 5250 rstat = &mgp->ss[i].rx_stats;
5251 5251 tmp += rstat->multircv;
5252 5252 }
5253 5253 *val = tmp;
5254 5254 break;
5255 5255
5256 5256 case MAC_STAT_BRDCSTRCV:
5257 5257 for (i = 0; i < mgp->num_slices; i++) {
5258 5258 rstat = &mgp->ss[i].rx_stats;
5259 5259 tmp += rstat->brdcstrcv;
5260 5260 }
5261 5261 *val = tmp;
5262 5262 break;
5263 5263
5264 5264 case MAC_STAT_MULTIXMT:
5265 5265 for (i = 0; i < mgp->num_slices; i++) {
5266 5266 tstat = &mgp->ss[i].tx.stats;
5267 5267 tmp += tstat->multixmt;
5268 5268 }
5269 5269 *val = tmp;
5270 5270 break;
5271 5271
5272 5272 case MAC_STAT_BRDCSTXMT:
5273 5273 for (i = 0; i < mgp->num_slices; i++) {
5274 5274 tstat = &mgp->ss[i].tx.stats;
5275 5275 tmp += tstat->brdcstxmt;
5276 5276 }
5277 5277 *val = tmp;
5278 5278 break;
5279 5279
5280 5280 case MAC_STAT_NORCVBUF:
5281 5281 tmp = ntohl(fw_stats->dropped_no_big_buffer);
5282 5282 tmp += ntohl(fw_stats->dropped_no_small_buffer);
5283 5283 tmp += ntohl(fw_stats->dropped_link_overflow);
5284 5284 for (i = 0; i < mgp->num_slices; i++) {
5285 5285 ss = &mgp->ss[i];
5286 5286 tmp += MYRI10GE_SLICE_STAT(rx_big_nobuf);
5287 5287 tmp += MYRI10GE_SLICE_STAT(rx_small_nobuf);
5288 5288 }
5289 5289 *val = tmp;
5290 5290 break;
5291 5291
5292 5292 case MAC_STAT_IERRORS:
5293 5293 tmp += ntohl(fw_stats->dropped_bad_crc32);
5294 5294 tmp += ntohl(fw_stats->dropped_bad_phy);
5295 5295 tmp += ntohl(fw_stats->dropped_runt);
5296 5296 tmp += ntohl(fw_stats->dropped_overrun);
5297 5297 *val = tmp;
5298 5298 break;
5299 5299
5300 5300 case MAC_STAT_OERRORS:
5301 5301 for (i = 0; i < mgp->num_slices; i++) {
5302 5302 ss = &mgp->ss[i];
5303 5303 tmp += MYRI10GE_SLICE_STAT(xmit_lsobadflags);
5304 5304 tmp += MYRI10GE_SLICE_STAT(xmit_err);
5305 5305 }
5306 5306 *val = tmp;
5307 5307 break;
5308 5308
5309 5309 case MAC_STAT_RBYTES:
5310 5310 for (i = 0; i < mgp->num_slices; i++) {
5311 5311 rstat = &mgp->ss[i].rx_stats;
5312 5312 tmp += rstat->ibytes;
5313 5313 }
5314 5314 *val = tmp;
5315 5315 break;
5316 5316
5317 5317 case MAC_STAT_IPACKETS:
5318 5318 for (i = 0; i < mgp->num_slices; i++) {
5319 5319 rstat = &mgp->ss[i].rx_stats;
5320 5320 tmp += rstat->ipackets;
5321 5321 }
5322 5322 *val = tmp;
5323 5323 break;
5324 5324
5325 5325 case MAC_STAT_OBYTES:
5326 5326 for (i = 0; i < mgp->num_slices; i++) {
5327 5327 tstat = &mgp->ss[i].tx.stats;
5328 5328 tmp += tstat->obytes;
5329 5329 }
5330 5330 *val = tmp;
5331 5331 break;
5332 5332
5333 5333 case MAC_STAT_OPACKETS:
5334 5334 for (i = 0; i < mgp->num_slices; i++) {
5335 5335 tstat = &mgp->ss[i].tx.stats;
5336 5336 tmp += tstat->opackets;
5337 5337 }
5338 5338 *val = tmp;
5339 5339 break;
5340 5340
5341 5341 case ETHER_STAT_TOOLONG_ERRORS:
5342 5342 *val = ntohl(fw_stats->dropped_overrun);
5343 5343 break;
5344 5344
5345 5345 #ifdef SOLARIS_S11
5346 5346 case ETHER_STAT_TOOSHORT_ERRORS:
5347 5347 *val = ntohl(fw_stats->dropped_runt);
5348 5348 break;
5349 5349 #endif
5350 5350
5351 5351 case ETHER_STAT_LINK_PAUSE:
5352 5352 *val = mgp->pause;
5353 5353 break;
5354 5354
5355 5355 case ETHER_STAT_LINK_AUTONEG:
5356 5356 *val = 1;
5357 5357 break;
5358 5358
5359 5359 case ETHER_STAT_LINK_DUPLEX:
5360 5360 *val = LINK_DUPLEX_FULL;
5361 5361 break;
5362 5362
5363 5363 default:
5364 5364 return (ENOTSUP);
5365 5365 }
5366 5366
5367 5367 return (0);
5368 5368 }
5369 5369
5370 5370 static mac_callbacks_t myri10ge_m_callbacks = {
5371 5371 (MC_IOCTL | MC_GETCAPAB),
5372 5372 myri10ge_m_stat,
5373 5373 myri10ge_m_start,
5374 5374 myri10ge_m_stop,
5375 5375 myri10ge_m_promisc,
5376 5376 myri10ge_m_multicst,
5377 5377 NULL,
5378 5378 NULL,
5379 5379 NULL,
5380 5380 myri10ge_m_ioctl,
5381 5381 myri10ge_m_getcapab
5382 5382 };
5383 5383
5384 5384
5385 5385 static int
5386 5386 myri10ge_probe_slices(struct myri10ge_priv *mgp)
5387 5387 {
5388 5388 myri10ge_cmd_t cmd;
5389 5389 int status;
5390 5390
5391 5391 mgp->num_slices = 1;
5392 5392
5393 5393 /* hit the board with a reset to ensure it is alive */
5394 5394 (void) memset(&cmd, 0, sizeof (cmd));
5395 5395 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd);
5396 5396 if (status != 0) {
5397 5397 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name);
5398 5398 return (ENXIO);
5399 5399 }
5400 5400
5401 5401 if (myri10ge_use_msix == 0)
5402 5402 return (0);
5403 5403
5404 5404 /* tell it the size of the interrupt queues */
5405 5405 cmd.data0 = mgp->max_intr_slots * sizeof (struct mcp_slot);
5406 5406 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
5407 5407 if (status != 0) {
5408 5408 cmn_err(CE_WARN, "%s: failed MXGEFW_CMD_SET_INTRQ_SIZE\n",
5409 5409 mgp->name);
5410 5410 return (ENXIO);
5411 5411 }
5412 5412
5413 5413 /* ask the maximum number of slices it supports */
5414 5414 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
5415 5415 &cmd);
5416 5416 if (status != 0)
5417 5417 return (0);
5418 5418
5419 5419 mgp->num_slices = cmd.data0;
5420 5420
5421 5421 /*
5422 5422 * if the admin did not specify a limit to how many
5423 5423 * slices we should use, cap it automatically to the
5424 5424 * number of CPUs currently online
5425 5425 */
5426 5426 if (myri10ge_max_slices == -1)
5427 5427 myri10ge_max_slices = ncpus;
5428 5428
5429 5429 if (mgp->num_slices > myri10ge_max_slices)
↓ open down ↓ |
3278 lines elided |
↑ open up ↑ |
5430 5430 mgp->num_slices = myri10ge_max_slices;
5431 5431
5432 5432
5433 5433 /*
5434 5434 * Now try to allocate as many MSI-X vectors as we have
5435 5435 * slices. We give up on MSI-X if we can only get a single
5436 5436 * vector.
5437 5437 */
5438 5438 while (mgp->num_slices > 1) {
5439 5439 /* make sure it is a power of two */
5440 - while (mgp->num_slices & (mgp->num_slices - 1))
5440 + while (!ISP2(mgp->num_slices))
5441 5441 mgp->num_slices--;
5442 5442 if (mgp->num_slices == 1)
5443 5443 return (0);
5444 5444
5445 5445 status = myri10ge_add_intrs(mgp, 0);
5446 5446 if (status == 0) {
5447 5447 myri10ge_rem_intrs(mgp, 0);
5448 5448 if (mgp->intr_cnt == mgp->num_slices) {
5449 5449 if (myri10ge_verbose)
5450 5450 printf("Got %d slices!\n",
5451 5451 mgp->num_slices);
5452 5452 return (0);
5453 5453 }
5454 5454 mgp->num_slices = mgp->intr_cnt;
5455 5455 } else {
5456 5456 mgp->num_slices = mgp->num_slices / 2;
5457 5457 }
5458 5458 }
5459 5459
5460 5460 if (myri10ge_verbose)
5461 5461 printf("Got %d slices\n", mgp->num_slices);
5462 5462 return (0);
5463 5463 }
5464 5464
5465 5465 static void
5466 5466 myri10ge_lro_free(struct myri10ge_slice_state *ss)
5467 5467 {
5468 5468 struct lro_entry *lro;
5469 5469
5470 5470 while (ss->lro_free != NULL) {
5471 5471 lro = ss->lro_free;
5472 5472 ss->lro_free = lro->next;
5473 5473 kmem_free(lro, sizeof (*lro));
5474 5474 }
5475 5475 }
5476 5476
5477 5477 static void
5478 5478 myri10ge_lro_alloc(struct myri10ge_slice_state *ss)
5479 5479 {
5480 5480 struct lro_entry *lro;
5481 5481 int idx;
5482 5482
5483 5483 ss->lro_free = NULL;
5484 5484 ss->lro_active = NULL;
5485 5485
5486 5486 for (idx = 0; idx < myri10ge_lro_cnt; idx++) {
5487 5487 lro = kmem_zalloc(sizeof (*lro), KM_SLEEP);
5488 5488 if (lro == NULL)
5489 5489 continue;
5490 5490 lro->next = ss->lro_free;
5491 5491 ss->lro_free = lro;
5492 5492 }
5493 5493 }
5494 5494
5495 5495 static void
5496 5496 myri10ge_free_slices(struct myri10ge_priv *mgp)
5497 5497 {
5498 5498 struct myri10ge_slice_state *ss;
5499 5499 size_t bytes;
5500 5500 int i;
5501 5501
5502 5502 if (mgp->ss == NULL)
5503 5503 return;
5504 5504
5505 5505 for (i = 0; i < mgp->num_slices; i++) {
5506 5506 ss = &mgp->ss[i];
5507 5507 if (ss->rx_done.entry == NULL)
5508 5508 continue;
5509 5509 myri10ge_dma_free(&ss->rx_done.dma);
5510 5510 ss->rx_done.entry = NULL;
5511 5511 if (ss->fw_stats == NULL)
5512 5512 continue;
5513 5513 myri10ge_dma_free(&ss->fw_stats_dma);
5514 5514 ss->fw_stats = NULL;
5515 5515 mutex_destroy(&ss->rx_lock);
5516 5516 mutex_destroy(&ss->tx.lock);
5517 5517 mutex_destroy(&ss->tx.handle_lock);
5518 5518 mutex_destroy(&ss->poll_lock);
5519 5519 myri10ge_jpool_fini(ss);
5520 5520 myri10ge_slice_stat_destroy(ss);
5521 5521 myri10ge_lro_free(ss);
5522 5522 }
5523 5523 bytes = sizeof (*mgp->ss) * mgp->num_slices;
5524 5524 kmem_free(mgp->ss, bytes);
5525 5525 mgp->ss = NULL;
5526 5526 }
5527 5527
5528 5528
5529 5529 static int
5530 5530 myri10ge_alloc_slices(struct myri10ge_priv *mgp)
5531 5531 {
5532 5532 struct myri10ge_slice_state *ss;
5533 5533 size_t bytes;
5534 5534 int i;
5535 5535
5536 5536 bytes = sizeof (*mgp->ss) * mgp->num_slices;
5537 5537 mgp->ss = kmem_zalloc(bytes, KM_SLEEP);
5538 5538 if (mgp->ss == NULL)
5539 5539 return (ENOMEM);
5540 5540 for (i = 0; i < mgp->num_slices; i++) {
5541 5541 ss = &mgp->ss[i];
5542 5542
5543 5543 ss->mgp = mgp;
5544 5544
5545 5545 /* allocate the per-slice firmware stats */
5546 5546 bytes = sizeof (*ss->fw_stats);
5547 5547 ss->fw_stats = (mcp_irq_data_t *)(void *)
5548 5548 myri10ge_dma_alloc(mgp->dip, bytes,
5549 5549 &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr,
5550 5550 DDI_DMA_CONSISTENT, DDI_DMA_READ|DDI_DMA_CONSISTENT,
5551 5551 &ss->fw_stats_dma, 1, DDI_DMA_DONTWAIT);
5552 5552 if (ss->fw_stats == NULL)
5553 5553 goto abort;
5554 5554 (void) memset(ss->fw_stats, 0, bytes);
5555 5555
5556 5556 /* allocate rx done ring */
5557 5557 bytes = mgp->max_intr_slots *
5558 5558 sizeof (*ss->rx_done.entry);
5559 5559 ss->rx_done.entry = (mcp_slot_t *)(void *)
5560 5560 myri10ge_dma_alloc(mgp->dip, bytes,
5561 5561 &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr,
5562 5562 DDI_DMA_CONSISTENT, DDI_DMA_READ|DDI_DMA_CONSISTENT,
5563 5563 &ss->rx_done.dma, 1, DDI_DMA_DONTWAIT);
5564 5564 if (ss->rx_done.entry == NULL) {
5565 5565 goto abort;
5566 5566 }
5567 5567 (void) memset(ss->rx_done.entry, 0, bytes);
5568 5568 mutex_init(&ss->rx_lock, NULL, MUTEX_DEFAULT, mgp->icookie);
5569 5569 mutex_init(&ss->tx.lock, NULL, MUTEX_DEFAULT, NULL);
5570 5570 mutex_init(&ss->tx.handle_lock, NULL, MUTEX_DEFAULT, NULL);
5571 5571 mutex_init(&ss->poll_lock, NULL, MUTEX_DEFAULT, NULL);
5572 5572 myri10ge_jpool_init(ss);
5573 5573 (void) myri10ge_slice_stat_init(ss);
5574 5574 myri10ge_lro_alloc(ss);
5575 5575 }
5576 5576
5577 5577 return (0);
5578 5578
5579 5579 abort:
5580 5580 myri10ge_free_slices(mgp);
5581 5581 return (ENOMEM);
5582 5582 }
5583 5583
5584 5584 static int
5585 5585 myri10ge_save_msi_state(struct myri10ge_priv *mgp,
5586 5586 ddi_acc_handle_t handle)
5587 5587 {
5588 5588 uint8_t ptr;
5589 5589 int err;
5590 5590
5591 5591 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_MSI);
5592 5592 if (err != 0) {
5593 5593 cmn_err(CE_WARN, "%s: could not find MSI cap\n",
5594 5594 mgp->name);
5595 5595 return (DDI_FAILURE);
5596 5596 }
5597 5597 mgp->pci_saved_state.msi_ctrl =
5598 5598 pci_config_get16(handle, ptr + PCI_MSI_CTRL);
5599 5599 mgp->pci_saved_state.msi_addr_low =
5600 5600 pci_config_get32(handle, ptr + PCI_MSI_ADDR_OFFSET);
5601 5601 mgp->pci_saved_state.msi_addr_high =
5602 5602 pci_config_get32(handle, ptr + PCI_MSI_ADDR_OFFSET + 4);
5603 5603 mgp->pci_saved_state.msi_data_32 =
5604 5604 pci_config_get16(handle, ptr + PCI_MSI_32BIT_DATA);
5605 5605 mgp->pci_saved_state.msi_data_64 =
5606 5606 pci_config_get16(handle, ptr + PCI_MSI_64BIT_DATA);
5607 5607 return (DDI_SUCCESS);
5608 5608 }
5609 5609
5610 5610 static int
5611 5611 myri10ge_restore_msi_state(struct myri10ge_priv *mgp,
5612 5612 ddi_acc_handle_t handle)
5613 5613 {
5614 5614 uint8_t ptr;
5615 5615 int err;
5616 5616
5617 5617 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_MSI);
5618 5618 if (err != 0) {
5619 5619 cmn_err(CE_WARN, "%s: could not find MSI cap\n",
5620 5620 mgp->name);
5621 5621 return (DDI_FAILURE);
5622 5622 }
5623 5623
5624 5624 pci_config_put16(handle, ptr + PCI_MSI_CTRL,
5625 5625 mgp->pci_saved_state.msi_ctrl);
5626 5626 pci_config_put32(handle, ptr + PCI_MSI_ADDR_OFFSET,
5627 5627 mgp->pci_saved_state.msi_addr_low);
5628 5628 pci_config_put32(handle, ptr + PCI_MSI_ADDR_OFFSET + 4,
5629 5629 mgp->pci_saved_state.msi_addr_high);
5630 5630 pci_config_put16(handle, ptr + PCI_MSI_32BIT_DATA,
5631 5631 mgp->pci_saved_state.msi_data_32);
5632 5632 pci_config_put16(handle, ptr + PCI_MSI_64BIT_DATA,
5633 5633 mgp->pci_saved_state.msi_data_64);
5634 5634
5635 5635 return (DDI_SUCCESS);
5636 5636 }
5637 5637
5638 5638 static int
5639 5639 myri10ge_save_pci_state(struct myri10ge_priv *mgp)
5640 5640 {
5641 5641 ddi_acc_handle_t handle = mgp->cfg_hdl;
5642 5642 int i;
5643 5643 int err = DDI_SUCCESS;
5644 5644
5645 5645
5646 5646 /* Save the non-extended PCI config space 32-bits at a time */
5647 5647 for (i = 0; i < 16; i++)
5648 5648 mgp->pci_saved_state.base[i] =
5649 5649 pci_config_get32(handle, i*4);
5650 5650
5651 5651 /* now save MSI interrupt state *, if needed */
5652 5652 if (mgp->ddi_intr_type == DDI_INTR_TYPE_MSI)
5653 5653 err = myri10ge_save_msi_state(mgp, handle);
5654 5654
5655 5655 return (err);
5656 5656 }
5657 5657
5658 5658 static int
5659 5659 myri10ge_restore_pci_state(struct myri10ge_priv *mgp)
5660 5660 {
5661 5661 ddi_acc_handle_t handle = mgp->cfg_hdl;
5662 5662 int i;
5663 5663 int err = DDI_SUCCESS;
5664 5664
5665 5665
5666 5666 /* Restore the non-extended PCI config space 32-bits at a time */
5667 5667 for (i = 15; i >= 0; i--)
5668 5668 pci_config_put32(handle, i*4, mgp->pci_saved_state.base[i]);
5669 5669
5670 5670 /* now restore MSI interrupt state *, if needed */
5671 5671 if (mgp->ddi_intr_type == DDI_INTR_TYPE_MSI)
5672 5672 err = myri10ge_restore_msi_state(mgp, handle);
5673 5673
5674 5674 if (mgp->max_read_request_4k)
5675 5675 (void) myri10ge_set_max_readreq(handle);
5676 5676 return (err);
5677 5677 }
5678 5678
5679 5679
5680 5680 static int
5681 5681 myri10ge_suspend(dev_info_t *dip)
5682 5682 {
5683 5683 struct myri10ge_priv *mgp = ddi_get_driver_private(dip);
5684 5684 int status;
5685 5685
5686 5686 if (mgp == NULL) {
5687 5687 cmn_err(CE_WARN, "null dip in myri10ge_suspend\n");
5688 5688 return (DDI_FAILURE);
5689 5689 }
5690 5690 if (mgp->dip != dip) {
5691 5691 cmn_err(CE_WARN, "bad dip in myri10ge_suspend\n");
5692 5692 return (DDI_FAILURE);
5693 5693 }
5694 5694 mutex_enter(&mgp->intrlock);
5695 5695 if (mgp->running == MYRI10GE_ETH_RUNNING) {
5696 5696 mgp->running = MYRI10GE_ETH_STOPPING;
5697 5697 mutex_exit(&mgp->intrlock);
5698 5698 (void) untimeout(mgp->timer_id);
5699 5699 mutex_enter(&mgp->intrlock);
5700 5700 myri10ge_stop_locked(mgp);
5701 5701 mgp->running = MYRI10GE_ETH_SUSPENDED_RUNNING;
5702 5702 }
5703 5703 status = myri10ge_save_pci_state(mgp);
5704 5704 mutex_exit(&mgp->intrlock);
5705 5705 return (status);
5706 5706 }
5707 5707
5708 5708 static int
5709 5709 myri10ge_resume(dev_info_t *dip)
5710 5710 {
5711 5711 struct myri10ge_priv *mgp = ddi_get_driver_private(dip);
5712 5712 int status = DDI_SUCCESS;
5713 5713
5714 5714 if (mgp == NULL) {
5715 5715 cmn_err(CE_WARN, "null dip in myri10ge_resume\n");
5716 5716 return (DDI_FAILURE);
5717 5717 }
5718 5718 if (mgp->dip != dip) {
5719 5719 cmn_err(CE_WARN, "bad dip in myri10ge_resume\n");
5720 5720 return (DDI_FAILURE);
5721 5721 }
5722 5722
5723 5723 mutex_enter(&mgp->intrlock);
5724 5724 status = myri10ge_restore_pci_state(mgp);
5725 5725 if (status == DDI_SUCCESS &&
5726 5726 mgp->running == MYRI10GE_ETH_SUSPENDED_RUNNING) {
5727 5727 status = myri10ge_start_locked(mgp);
5728 5728 }
5729 5729 mutex_exit(&mgp->intrlock);
5730 5730 if (status != DDI_SUCCESS)
5731 5731 return (status);
5732 5732
5733 5733 /* start the watchdog timer */
5734 5734 mgp->timer_id = timeout(myri10ge_watchdog, mgp,
5735 5735 mgp->timer_ticks);
5736 5736 return (DDI_SUCCESS);
5737 5737 }
5738 5738
5739 5739 static int
5740 5740 myri10ge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5741 5741 {
5742 5742
5743 5743 struct myri10ge_priv *mgp;
5744 5744 mac_register_t *macp, *omacp;
5745 5745 ddi_acc_handle_t handle;
5746 5746 uint32_t csr, hdr_offset;
5747 5747 int status, span, link_width, max_read_request_4k;
5748 5748 unsigned long bus_number, dev_number, func_number;
5749 5749 size_t bytes;
5750 5750 offset_t ss_offset;
5751 5751 uint8_t vso;
5752 5752
5753 5753 if (cmd == DDI_RESUME) {
5754 5754 return (myri10ge_resume(dip));
5755 5755 }
5756 5756
5757 5757 if (cmd != DDI_ATTACH)
5758 5758 return (DDI_FAILURE);
5759 5759 if (pci_config_setup(dip, &handle) != DDI_SUCCESS)
5760 5760 return (DDI_FAILURE);
5761 5761
5762 5762 /* enable busmater and io space access */
5763 5763 csr = pci_config_get32(handle, PCI_CONF_COMM);
5764 5764 pci_config_put32(handle, PCI_CONF_COMM,
5765 5765 (csr |PCI_COMM_ME|PCI_COMM_MAE));
5766 5766 status = myri10ge_read_pcie_link_width(handle, &link_width);
5767 5767 if (status != 0) {
5768 5768 cmn_err(CE_WARN, "could not read link width!\n");
5769 5769 link_width = 0;
5770 5770 }
5771 5771 max_read_request_4k = !myri10ge_set_max_readreq(handle);
5772 5772 status = myri10ge_find_cap(handle, &vso, PCI_CAP_ID_VS);
5773 5773 if (status != 0)
5774 5774 goto abort_with_cfg_hdl;
5775 5775 if ((omacp = mac_alloc(MAC_VERSION)) == NULL)
5776 5776 goto abort_with_cfg_hdl;
5777 5777 /*
5778 5778 * XXXX Hack: mac_register_t grows in newer kernels. To be
5779 5779 * able to write newer fields, such as m_margin, without
5780 5780 * writing outside allocated memory, we allocate our own macp
5781 5781 * and pass that to mac_register()
5782 5782 */
5783 5783 macp = kmem_zalloc(sizeof (*macp) * 8, KM_SLEEP);
5784 5784 macp->m_version = omacp->m_version;
5785 5785
5786 5786 if ((mgp = (struct myri10ge_priv *)
5787 5787 kmem_zalloc(sizeof (*mgp), KM_SLEEP)) == NULL) {
5788 5788 goto abort_with_macinfo;
5789 5789 }
5790 5790 ddi_set_driver_private(dip, mgp);
5791 5791
5792 5792 /* setup device name for log messages */
5793 5793 (void) sprintf(mgp->name, "myri10ge%d", ddi_get_instance(dip));
5794 5794
5795 5795 mutex_enter(&myri10ge_param_lock);
5796 5796 myri10ge_get_props(dip);
5797 5797 mgp->intr_coal_delay = myri10ge_intr_coal_delay;
5798 5798 mgp->pause = myri10ge_flow_control;
5799 5799 mutex_exit(&myri10ge_param_lock);
5800 5800
5801 5801 mgp->max_read_request_4k = max_read_request_4k;
5802 5802 mgp->pcie_link_width = link_width;
5803 5803 mgp->running = MYRI10GE_ETH_STOPPED;
5804 5804 mgp->vso = vso;
5805 5805 mgp->dip = dip;
5806 5806 mgp->cfg_hdl = handle;
5807 5807
5808 5808 mgp->timer_ticks = 5 * drv_usectohz(1000000); /* 5 seconds */
5809 5809 myri10ge_test_physical(dip);
5810 5810
5811 5811 /* allocate command page */
5812 5812 bytes = sizeof (*mgp->cmd);
5813 5813 mgp->cmd = (mcp_cmd_response_t *)
5814 5814 (void *)myri10ge_dma_alloc(dip, bytes,
5815 5815 &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr,
5816 5816 DDI_DMA_CONSISTENT, DDI_DMA_RDWR|DDI_DMA_CONSISTENT,
5817 5817 &mgp->cmd_dma, 1, DDI_DMA_DONTWAIT);
5818 5818 if (mgp->cmd == NULL)
5819 5819 goto abort_with_mgp;
5820 5820
5821 5821 (void) myri10ge_reg_set(dip, &mgp->reg_set, &span, &bus_number,
5822 5822 &dev_number, &func_number);
5823 5823 if (myri10ge_verbose)
5824 5824 printf("%s at %ld:%ld:%ld attaching\n", mgp->name,
5825 5825 bus_number, dev_number, func_number);
5826 5826 status = ddi_regs_map_setup(dip, mgp->reg_set, (caddr_t *)&mgp->sram,
5827 5827 (offset_t)0, (offset_t)span, &myri10ge_dev_access_attr,
5828 5828 &mgp->io_handle);
5829 5829 if (status != DDI_SUCCESS) {
5830 5830 cmn_err(CE_WARN, "%s: couldn't map memory space", mgp->name);
5831 5831 printf("%s: reg_set = %d, span = %d, status = %d",
5832 5832 mgp->name, mgp->reg_set, span, status);
5833 5833 goto abort_with_mgp;
5834 5834 }
5835 5835
5836 5836 hdr_offset = *(uint32_t *)(void*)(mgp->sram + MCP_HEADER_PTR_OFFSET);
5837 5837 hdr_offset = ntohl(hdr_offset) & 0xffffc;
5838 5838 ss_offset = hdr_offset +
5839 5839 offsetof(struct mcp_gen_header, string_specs);
5840 5840 mgp->sram_size = ntohl(*(uint32_t *)(void*)(mgp->sram + ss_offset));
5841 5841 myri10ge_pio_copy32(mgp->eeprom_strings,
5842 5842 (uint32_t *)(void*)((char *)mgp->sram + mgp->sram_size),
5843 5843 MYRI10GE_EEPROM_STRINGS_SIZE);
5844 5844 (void) memset(mgp->eeprom_strings +
5845 5845 MYRI10GE_EEPROM_STRINGS_SIZE - 2, 0, 2);
5846 5846
5847 5847 status = myri10ge_read_mac_addr(mgp);
5848 5848 if (status) {
5849 5849 goto abort_with_mapped;
5850 5850 }
5851 5851
5852 5852 status = myri10ge_select_firmware(mgp);
5853 5853 if (status != 0) {
5854 5854 cmn_err(CE_WARN, "%s: failed to load firmware\n", mgp->name);
5855 5855 goto abort_with_mapped;
5856 5856 }
5857 5857
5858 5858 status = myri10ge_probe_slices(mgp);
5859 5859 if (status != 0) {
5860 5860 cmn_err(CE_WARN, "%s: failed to probe slices\n", mgp->name);
5861 5861 goto abort_with_dummy_rdma;
5862 5862 }
5863 5863
5864 5864 status = myri10ge_alloc_slices(mgp);
5865 5865 if (status != 0) {
5866 5866 cmn_err(CE_WARN, "%s: failed to alloc slices\n", mgp->name);
5867 5867 goto abort_with_dummy_rdma;
5868 5868 }
5869 5869
5870 5870 /* add the interrupt handler */
5871 5871 status = myri10ge_add_intrs(mgp, 1);
5872 5872 if (status != 0) {
5873 5873 cmn_err(CE_WARN, "%s: Failed to add interrupt\n",
5874 5874 mgp->name);
5875 5875 goto abort_with_slices;
5876 5876 }
5877 5877
5878 5878 /* now that we have an iblock_cookie, init the mutexes */
5879 5879 mutex_init(&mgp->cmd_lock, NULL, MUTEX_DRIVER, mgp->icookie);
5880 5880 mutex_init(&mgp->intrlock, NULL, MUTEX_DRIVER, mgp->icookie);
5881 5881
5882 5882
5883 5883 status = myri10ge_nic_stat_init(mgp);
5884 5884 if (status != DDI_SUCCESS)
5885 5885 goto abort_with_interrupts;
5886 5886 status = myri10ge_info_init(mgp);
5887 5887 if (status != DDI_SUCCESS)
5888 5888 goto abort_with_stats;
5889 5889
5890 5890 /*
5891 5891 * Initialize GLD state
5892 5892 */
5893 5893
5894 5894 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
5895 5895 macp->m_driver = mgp;
5896 5896 macp->m_dip = dip;
5897 5897 macp->m_src_addr = mgp->mac_addr;
5898 5898 macp->m_callbacks = &myri10ge_m_callbacks;
5899 5899 macp->m_min_sdu = 0;
5900 5900 macp->m_max_sdu = myri10ge_mtu -
5901 5901 (sizeof (struct ether_header) + MXGEFW_PAD + VLAN_TAGSZ);
5902 5902 #ifdef SOLARIS_S11
5903 5903 macp->m_margin = VLAN_TAGSZ;
5904 5904 #endif
5905 5905 macp->m_v12n = MAC_VIRT_LEVEL1;
5906 5906 status = mac_register(macp, &mgp->mh);
5907 5907 if (status != 0) {
5908 5908 cmn_err(CE_WARN, "%s: mac_register failed with %d\n",
5909 5909 mgp->name, status);
5910 5910 goto abort_with_info;
5911 5911 }
5912 5912 myri10ge_ndd_init(mgp);
5913 5913 if (myri10ge_verbose)
5914 5914 printf("%s: %s, tx bndry %d, fw %s\n", mgp->name,
5915 5915 mgp->intr_type, mgp->tx_boundary, mgp->fw_name);
5916 5916 mutex_enter(&myri10ge_param_lock);
5917 5917 mgp->next = mgp_list;
5918 5918 mgp_list = mgp;
5919 5919 mutex_exit(&myri10ge_param_lock);
5920 5920 kmem_free(macp, sizeof (*macp) * 8);
5921 5921 mac_free(omacp);
5922 5922 return (DDI_SUCCESS);
5923 5923
5924 5924 abort_with_info:
5925 5925 myri10ge_info_destroy(mgp);
5926 5926
5927 5927 abort_with_stats:
5928 5928 myri10ge_nic_stat_destroy(mgp);
5929 5929
5930 5930 abort_with_interrupts:
5931 5931 mutex_destroy(&mgp->cmd_lock);
5932 5932 mutex_destroy(&mgp->intrlock);
5933 5933 myri10ge_rem_intrs(mgp, 1);
5934 5934
5935 5935 abort_with_slices:
5936 5936 myri10ge_free_slices(mgp);
5937 5937
5938 5938 abort_with_dummy_rdma:
5939 5939 myri10ge_dummy_rdma(mgp, 0);
5940 5940
5941 5941 abort_with_mapped:
5942 5942 ddi_regs_map_free(&mgp->io_handle);
5943 5943
5944 5944 myri10ge_dma_free(&mgp->cmd_dma);
5945 5945
5946 5946 abort_with_mgp:
5947 5947 kmem_free(mgp, sizeof (*mgp));
5948 5948
5949 5949 abort_with_macinfo:
5950 5950 kmem_free(macp, sizeof (*macp) * 8);
5951 5951 mac_free(omacp);
5952 5952
5953 5953 abort_with_cfg_hdl:
5954 5954 pci_config_teardown(&handle);
5955 5955 return (DDI_FAILURE);
5956 5956
5957 5957 }
5958 5958
5959 5959
5960 5960 static int
5961 5961 myri10ge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5962 5962 {
5963 5963 struct myri10ge_priv *mgp, *tmp;
5964 5964 int status, i, jbufs_alloced;
5965 5965
5966 5966 if (cmd == DDI_SUSPEND) {
5967 5967 status = myri10ge_suspend(dip);
5968 5968 return (status);
5969 5969 }
5970 5970
5971 5971 if (cmd != DDI_DETACH) {
5972 5972 return (DDI_FAILURE);
5973 5973 }
5974 5974 /* Get the driver private (gld_mac_info_t) structure */
5975 5975 mgp = ddi_get_driver_private(dip);
5976 5976
5977 5977 mutex_enter(&mgp->intrlock);
5978 5978 jbufs_alloced = 0;
5979 5979 for (i = 0; i < mgp->num_slices; i++) {
5980 5980 myri10ge_remove_jbufs(&mgp->ss[i]);
5981 5981 jbufs_alloced += mgp->ss[i].jpool.num_alloc;
5982 5982 }
5983 5983 mutex_exit(&mgp->intrlock);
5984 5984 if (jbufs_alloced != 0) {
5985 5985 cmn_err(CE_NOTE, "%s: %d loaned rx buffers remain\n",
5986 5986 mgp->name, jbufs_alloced);
5987 5987 return (DDI_FAILURE);
5988 5988 }
5989 5989
5990 5990 mutex_enter(&myri10ge_param_lock);
5991 5991 if (mgp->refcnt != 0) {
5992 5992 mutex_exit(&myri10ge_param_lock);
5993 5993 cmn_err(CE_NOTE, "%s: %d external refs remain\n",
5994 5994 mgp->name, mgp->refcnt);
5995 5995 return (DDI_FAILURE);
5996 5996 }
5997 5997 mutex_exit(&myri10ge_param_lock);
5998 5998
5999 5999 status = mac_unregister(mgp->mh);
6000 6000 if (status != DDI_SUCCESS)
6001 6001 return (status);
6002 6002
6003 6003 myri10ge_ndd_fini(mgp);
6004 6004 myri10ge_dummy_rdma(mgp, 0);
6005 6005 myri10ge_nic_stat_destroy(mgp);
6006 6006 myri10ge_info_destroy(mgp);
6007 6007
6008 6008 mutex_destroy(&mgp->cmd_lock);
6009 6009 mutex_destroy(&mgp->intrlock);
6010 6010
6011 6011 myri10ge_rem_intrs(mgp, 1);
6012 6012
6013 6013 myri10ge_free_slices(mgp);
6014 6014 ddi_regs_map_free(&mgp->io_handle);
6015 6015 myri10ge_dma_free(&mgp->cmd_dma);
6016 6016 pci_config_teardown(&mgp->cfg_hdl);
6017 6017
6018 6018 mutex_enter(&myri10ge_param_lock);
6019 6019 if (mgp_list == mgp) {
6020 6020 mgp_list = mgp->next;
6021 6021 } else {
6022 6022 tmp = mgp_list;
6023 6023 while (tmp->next != mgp && tmp->next != NULL)
6024 6024 tmp = tmp->next;
6025 6025 if (tmp->next != NULL)
6026 6026 tmp->next = tmp->next->next;
6027 6027 }
6028 6028 kmem_free(mgp, sizeof (*mgp));
6029 6029 mutex_exit(&myri10ge_param_lock);
6030 6030 return (DDI_SUCCESS);
6031 6031 }
6032 6032
6033 6033 /*
6034 6034 * Helper for quiesce entry point: Interrupt threads are not being
6035 6035 * scheduled, so we must poll for the confirmation DMA to arrive in
6036 6036 * the firmware stats block for slice 0. We're essentially running
6037 6037 * the guts of the interrupt handler, and just cherry picking the
6038 6038 * confirmation that the NIC is queuesced (stats->link_down)
6039 6039 */
6040 6040
6041 6041 static int
6042 6042 myri10ge_poll_down(struct myri10ge_priv *mgp)
6043 6043 {
6044 6044 struct myri10ge_slice_state *ss = mgp->ss;
6045 6045 mcp_irq_data_t *stats = ss->fw_stats;
6046 6046 int valid;
6047 6047 int found_down = 0;
6048 6048
6049 6049
6050 6050 /* check for a pending IRQ */
6051 6051
6052 6052 if (! *((volatile uint8_t *)& stats->valid))
6053 6053 return (0);
6054 6054 valid = stats->valid;
6055 6055
6056 6056 /*
6057 6057 * Make sure to tell the NIC to lower a legacy IRQ, else
6058 6058 * it may have corrupt state after restarting
6059 6059 */
6060 6060
6061 6061 if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) {
6062 6062 /* lower legacy IRQ */
6063 6063 *mgp->irq_deassert = 0;
6064 6064 mb();
6065 6065 /* wait for irq conf DMA */
6066 6066 while (*((volatile uint8_t *)& stats->valid))
6067 6067 ;
6068 6068 }
6069 6069 if (stats->stats_updated && stats->link_down)
6070 6070 found_down = 1;
6071 6071
6072 6072 if (valid & 0x1)
6073 6073 *ss->irq_claim = BE_32(3);
6074 6074 *(ss->irq_claim + 1) = BE_32(3);
6075 6075
6076 6076 return (found_down);
6077 6077 }
6078 6078
6079 6079 static int
6080 6080 myri10ge_quiesce(dev_info_t *dip)
6081 6081 {
6082 6082 struct myri10ge_priv *mgp;
6083 6083 myri10ge_cmd_t cmd;
6084 6084 int status, down, i;
6085 6085
6086 6086 mgp = ddi_get_driver_private(dip);
6087 6087 if (mgp == NULL)
6088 6088 return (DDI_FAILURE);
6089 6089
6090 6090 /* if devices was unplumbed, it is guaranteed to be quiescent */
6091 6091 if (mgp->running == MYRI10GE_ETH_STOPPED)
6092 6092 return (DDI_SUCCESS);
6093 6093
6094 6094 /* send a down CMD to queuesce NIC */
6095 6095 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
6096 6096 if (status) {
6097 6097 cmn_err(CE_WARN, "%s: Couldn't bring down link\n", mgp->name);
6098 6098 return (DDI_FAILURE);
6099 6099 }
6100 6100
6101 6101 for (i = 0; i < 20; i++) {
6102 6102 down = myri10ge_poll_down(mgp);
6103 6103 if (down)
6104 6104 break;
6105 6105 delay(drv_usectohz(100000));
6106 6106 mb();
6107 6107 }
6108 6108 if (down)
6109 6109 return (DDI_SUCCESS);
6110 6110 return (DDI_FAILURE);
6111 6111 }
6112 6112
6113 6113 /*
6114 6114 * Distinguish between allocb'ed blocks, and gesballoc'ed attached
6115 6115 * storage.
6116 6116 */
6117 6117 static void
6118 6118 myri10ge_find_lastfree(void)
6119 6119 {
6120 6120 mblk_t *mp = allocb(1024, 0);
6121 6121 dblk_t *dbp;
6122 6122
6123 6123 if (mp == NULL) {
6124 6124 cmn_err(CE_WARN, "myri10ge_find_lastfree failed\n");
6125 6125 return;
6126 6126 }
6127 6127 dbp = mp->b_datap;
6128 6128 myri10ge_db_lastfree = (void *)dbp->db_lastfree;
6129 6129 }
6130 6130
6131 6131 int
6132 6132 _init(void)
6133 6133 {
6134 6134 int i;
6135 6135
6136 6136 if (myri10ge_verbose)
6137 6137 cmn_err(CE_NOTE,
6138 6138 "Myricom 10G driver (10GbE) version %s loading\n",
6139 6139 MYRI10GE_VERSION_STR);
6140 6140 myri10ge_find_lastfree();
6141 6141 mac_init_ops(&myri10ge_ops, "myri10ge");
6142 6142 mutex_init(&myri10ge_param_lock, NULL, MUTEX_DEFAULT, NULL);
6143 6143 if ((i = mod_install(&modlinkage)) != 0) {
6144 6144 cmn_err(CE_WARN, "mod_install returned %d\n", i);
6145 6145 mac_fini_ops(&myri10ge_ops);
6146 6146 mutex_destroy(&myri10ge_param_lock);
6147 6147 }
6148 6148 return (i);
6149 6149 }
6150 6150
6151 6151 int
6152 6152 _fini(void)
6153 6153 {
6154 6154 int i;
6155 6155 i = mod_remove(&modlinkage);
6156 6156 if (i != 0) {
6157 6157 return (i);
6158 6158 }
6159 6159 mac_fini_ops(&myri10ge_ops);
6160 6160 mutex_destroy(&myri10ge_param_lock);
6161 6161 return (0);
6162 6162 }
6163 6163
6164 6164 int
6165 6165 _info(struct modinfo *modinfop)
6166 6166 {
6167 6167 return (mod_info(&modlinkage, modinfop));
6168 6168 }
6169 6169
6170 6170
6171 6171 /*
6172 6172 * This file uses MyriGE driver indentation.
6173 6173 *
6174 6174 * Local Variables:
6175 6175 * c-file-style:"sun"
6176 6176 * tab-width:8
6177 6177 * End:
6178 6178 */
↓ open down ↓ |
728 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX