1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright 2007-2009 Myricom, Inc. All rights reserved. 29 * Use is subject to license terms. 30 */ 31 32 /* 33 * Copyright (c) 2014, Joyent, Inc. 34 */ 35 36 #ifndef lint 37 static const char __idstring[] = 38 "@(#)$Id: myri10ge.c,v 1.186 2009-06-29 13:47:22 gallatin Exp $"; 39 #endif 40 41 #define MXGEFW_NDIS 42 #include "myri10ge_var.h" 43 #include "rss_eth_z8e.h" 44 #include "rss_ethp_z8e.h" 45 #include "mcp_gen_header.h" 46 47 #define MYRI10GE_MAX_ETHER_MTU 9014 48 #define MYRI10GE_MAX_GLD_MTU 9000 49 #define MYRI10GE_MIN_GLD_MTU 1500 50 51 #define MYRI10GE_ETH_STOPPED 0 52 #define MYRI10GE_ETH_STOPPING 1 53 #define MYRI10GE_ETH_STARTING 2 54 #define MYRI10GE_ETH_RUNNING 3 55 #define MYRI10GE_ETH_OPEN_FAILED 4 56 #define MYRI10GE_ETH_SUSPENDED_RUNNING 5 57 58 static int myri10ge_small_bytes = 510; 59 static int myri10ge_intr_coal_delay = 125; 60 static int myri10ge_flow_control = 1; 61 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 62 static int myri10ge_nvidia_ecrc_enable = 1; 63 #endif 64 static int myri10ge_mtu_override = 0; 65 static int myri10ge_tx_copylen = 512; 66 static int myri10ge_deassert_wait = 1; 67 static int myri10ge_verbose = 0; 68 static int myri10ge_watchdog_reset = 0; 69 static int myri10ge_use_msix = 1; 70 static int myri10ge_max_slices = -1; 71 static int myri10ge_use_msi = 1; 72 int myri10ge_force_firmware = 0; 73 static boolean_t myri10ge_use_lso = B_TRUE; 74 static int myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 75 static int myri10ge_tx_hash = 1; 76 static int myri10ge_lro = 0; 77 static int myri10ge_lro_cnt = 8; 78 int myri10ge_lro_max_aggr = 2; 79 static int myri10ge_lso_copy = 0; 80 static mblk_t *myri10ge_send_wrapper(void *arg, mblk_t *mp); 81 int myri10ge_tx_handles_initial = 128; 82 83 static kmutex_t myri10ge_param_lock; 84 static void* myri10ge_db_lastfree; 85 86 static int myri10ge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); 87 static int myri10ge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); 88 static int myri10ge_quiesce(dev_info_t *dip); 89 90 DDI_DEFINE_STREAM_OPS(myri10ge_ops, nulldev, nulldev, myri10ge_attach, 91 myri10ge_detach, nodev, NULL, D_MP, NULL, myri10ge_quiesce); 92 93 94 static struct modldrv modldrv = { 95 &mod_driverops, 96 "Myricom 10G driver (10GbE)", 97 &myri10ge_ops, 98 }; 99 100 101 static struct modlinkage modlinkage = { 102 MODREV_1, 103 {&modldrv, NULL}, 104 }; 105 106 unsigned char myri10ge_broadcastaddr[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 107 108 static ddi_dma_attr_t myri10ge_misc_dma_attr = { 109 DMA_ATTR_V0, /* version number. */ 110 (uint64_t)0, /* low address */ 111 (uint64_t)0xffffffffffffffffULL, /* high address */ 112 (uint64_t)0x7ffffff, /* address counter max */ 113 (uint64_t)4096, /* alignment */ 114 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */ 115 (uint32_t)0x1, /* minimum transfer size */ 116 (uint64_t)0x7fffffff, /* maximum transfer size */ 117 (uint64_t)0x7fffffff, /* maximum segment size */ 118 1, /* scatter/gather list length */ 119 1, /* granularity */ 120 0 /* attribute flags */ 121 }; 122 123 /* 124 * The Myri10GE NIC has the following constraints on receive buffers: 125 * 1) Buffers which cross a 4KB boundary must be aligned to 4KB 126 * 2) Buffers which are not aligned to 4KB must not cross a 4KB boundary 127 */ 128 129 static ddi_dma_attr_t myri10ge_rx_jumbo_dma_attr = { 130 DMA_ATTR_V0, /* version number. */ 131 (uint64_t)0, /* low address */ 132 (uint64_t)0xffffffffffffffffULL, /* high address */ 133 (uint64_t)0x7ffffff, /* address counter max */ 134 (uint64_t)4096, /* alignment */ 135 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */ 136 (uint32_t)0x1, /* minimum transfer size */ 137 (uint64_t)0x7fffffff, /* maximum transfer size */ 138 UINT64_MAX, /* maximum segment size */ 139 1, /* scatter/gather list length */ 140 1, /* granularity */ 141 0 /* attribute flags */ 142 }; 143 144 static ddi_dma_attr_t myri10ge_rx_std_dma_attr = { 145 DMA_ATTR_V0, /* version number. */ 146 (uint64_t)0, /* low address */ 147 (uint64_t)0xffffffffffffffffULL, /* high address */ 148 (uint64_t)0x7ffffff, /* address counter max */ 149 #if defined sparc64 || defined __sparcv9 150 (uint64_t)4096, /* alignment */ 151 #else 152 (uint64_t)0x80, /* alignment */ 153 #endif 154 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */ 155 (uint32_t)0x1, /* minimum transfer size */ 156 (uint64_t)0x7fffffff, /* maximum transfer size */ 157 #if defined sparc64 || defined __sparcv9 158 UINT64_MAX, /* maximum segment size */ 159 #else 160 (uint64_t)0xfff, /* maximum segment size */ 161 #endif 162 1, /* scatter/gather list length */ 163 1, /* granularity */ 164 0 /* attribute flags */ 165 }; 166 167 static ddi_dma_attr_t myri10ge_tx_dma_attr = { 168 DMA_ATTR_V0, /* version number. */ 169 (uint64_t)0, /* low address */ 170 (uint64_t)0xffffffffffffffffULL, /* high address */ 171 (uint64_t)0x7ffffff, /* address counter max */ 172 (uint64_t)1, /* alignment */ 173 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */ 174 (uint32_t)0x1, /* minimum transfer size */ 175 (uint64_t)0x7fffffff, /* maximum transfer size */ 176 UINT64_MAX, /* maximum segment size */ 177 INT32_MAX, /* scatter/gather list length */ 178 1, /* granularity */ 179 0 /* attribute flags */ 180 }; 181 182 #if defined sparc64 || defined __sparcv9 183 #define WC 0 184 #else 185 #define WC 1 186 #endif 187 188 struct ddi_device_acc_attr myri10ge_dev_access_attr = { 189 DDI_DEVICE_ATTR_V0, /* version */ 190 DDI_NEVERSWAP_ACC, /* endian flash */ 191 #if WC 192 DDI_MERGING_OK_ACC /* data order */ 193 #else 194 DDI_STRICTORDER_ACC 195 #endif 196 }; 197 198 static void myri10ge_watchdog(void *arg); 199 200 #ifdef MYRICOM_PRIV 201 int myri10ge_mtu = MYRI10GE_MAX_ETHER_MTU + MXGEFW_PAD + VLAN_TAGSZ; 202 #define MYRI10GE_DEFAULT_GLD_MTU MYRI10GE_MAX_GLD_MTU 203 #else 204 int myri10ge_mtu = ETHERMAX + MXGEFW_PAD + VLAN_TAGSZ; 205 #define MYRI10GE_DEFAULT_GLD_MTU MYRI10GE_MIN_GLD_MTU 206 #endif 207 int myri10ge_bigbufs_initial = 1024; 208 int myri10ge_bigbufs_max = 4096; 209 210 211 caddr_t 212 myri10ge_dma_alloc(dev_info_t *dip, size_t len, 213 ddi_dma_attr_t *attr, ddi_device_acc_attr_t *accattr, 214 uint_t alloc_flags, int bind_flags, struct myri10ge_dma_stuff *dma, 215 int warn, int (*wait)(caddr_t)) 216 { 217 caddr_t kaddr; 218 size_t real_length; 219 ddi_dma_cookie_t cookie; 220 uint_t count; 221 int err; 222 223 err = ddi_dma_alloc_handle(dip, attr, wait, 224 NULL, &dma->handle); 225 if (err != DDI_SUCCESS) { 226 if (warn) 227 cmn_err(CE_WARN, 228 "myri10ge: ddi_dma_alloc_handle failed\n"); 229 goto abort_with_nothing; 230 } 231 232 err = ddi_dma_mem_alloc(dma->handle, len, accattr, alloc_flags, 233 wait, NULL, &kaddr, &real_length, 234 &dma->acc_handle); 235 if (err != DDI_SUCCESS) { 236 if (warn) 237 cmn_err(CE_WARN, 238 "myri10ge: ddi_dma_mem_alloc failed\n"); 239 goto abort_with_handle; 240 } 241 242 err = ddi_dma_addr_bind_handle(dma->handle, NULL, kaddr, len, 243 bind_flags, wait, NULL, &cookie, &count); 244 245 if (err != DDI_SUCCESS) { 246 if (warn) 247 cmn_err(CE_WARN, 248 "myri10ge: ddi_dma_addr_bind_handle failed\n"); 249 goto abort_with_mem; 250 } 251 252 if (count != 1) { 253 if (warn) 254 cmn_err(CE_WARN, 255 "myri10ge: got too many dma segments "); 256 goto abort_with_bind; 257 } 258 dma->low = htonl(MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress)); 259 dma->high = htonl(MYRI10GE_HIGHPART_TO_U32(cookie.dmac_laddress)); 260 return (kaddr); 261 262 abort_with_bind: 263 (void) ddi_dma_unbind_handle(dma->handle); 264 265 abort_with_mem: 266 ddi_dma_mem_free(&dma->acc_handle); 267 268 abort_with_handle: 269 ddi_dma_free_handle(&dma->handle); 270 abort_with_nothing: 271 if (warn) { 272 cmn_err(CE_WARN, "myri10ge: myri10ge_dma_alloc failed.\n "); 273 cmn_err(CE_WARN, "args: dip=%p len=0x%lx ddi_dma_attr=%p\n", 274 (void*) dip, len, (void*) attr); 275 cmn_err(CE_WARN, 276 "args: ddi_device_acc_attr=%p alloc_flags=0x%x\n", 277 (void*) accattr, alloc_flags); 278 cmn_err(CE_WARN, "args: bind_flags=0x%x dmastuff=%p", 279 bind_flags, (void*) dma); 280 } 281 return (NULL); 282 283 } 284 285 void 286 myri10ge_dma_free(struct myri10ge_dma_stuff *dma) 287 { 288 (void) ddi_dma_unbind_handle(dma->handle); 289 ddi_dma_mem_free(&dma->acc_handle); 290 ddi_dma_free_handle(&dma->handle); 291 } 292 293 static inline void 294 myri10ge_pio_copy32(void *to, uint32_t *from32, size_t size) 295 { 296 register volatile uint32_t *to32; 297 size_t i; 298 299 to32 = (volatile uint32_t *) to; 300 for (i = (size / 4); i; i--) { 301 *to32 = *from32; 302 to32++; 303 from32++; 304 } 305 } 306 307 #if defined(_LP64) 308 static inline void 309 myri10ge_pio_copy64(void *to, uint64_t *from64, size_t size) 310 { 311 register volatile uint64_t *to64; 312 size_t i; 313 314 to64 = (volatile uint64_t *) to; 315 for (i = (size / 8); i; i--) { 316 *to64 = *from64; 317 to64++; 318 from64++; 319 } 320 } 321 #endif 322 323 /* 324 * This routine copies memory from the host to the NIC. 325 * The "size" argument must always be a multiple of 326 * the size of long (4 or 8 bytes), and to/from must also 327 * be naturally aligned. 328 */ 329 static inline void 330 myri10ge_pio_copy(void *to, void *from, size_t size) 331 { 332 #if !defined(_LP64) 333 ASSERT((size % 4) == 0); 334 myri10ge_pio_copy32(to, (uint32_t *)from, size); 335 #else 336 ASSERT((size % 8) == 0); 337 myri10ge_pio_copy64(to, (uint64_t *)from, size); 338 #endif 339 } 340 341 342 /* 343 * Due to various bugs in Solaris (especially bug 6186772 where the 344 * TCP/UDP checksum is calculated incorrectly on mblk chains with more 345 * than two elements), and the design bug where hardware checksums are 346 * ignored on mblk chains with more than 2 elements, we need to 347 * allocate private pool of physically contiguous receive buffers. 348 */ 349 350 static void 351 myri10ge_jpool_init(struct myri10ge_slice_state *ss) 352 { 353 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 354 355 bzero(jpool, sizeof (*jpool)); 356 mutex_init(&jpool->mtx, NULL, MUTEX_DRIVER, 357 ss->mgp->icookie); 358 jpool->head = NULL; 359 } 360 361 static void 362 myri10ge_jpool_fini(struct myri10ge_slice_state *ss) 363 { 364 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 365 366 if (jpool->head != NULL) { 367 cmn_err(CE_WARN, 368 "%s: BUG! myri10ge_jpool_fini called on non-empty pool\n", 369 ss->mgp->name); 370 } 371 mutex_destroy(&jpool->mtx); 372 } 373 374 375 /* 376 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 377 * at most 32 bytes at a time, so as to avoid involving the software 378 * pio handler in the nic. We re-write the first segment's low 379 * DMA address to mark it valid only after we write the entire chunk 380 * in a burst 381 */ 382 static inline void 383 myri10ge_submit_8rx(mcp_kreq_ether_recv_t *dst, mcp_kreq_ether_recv_t *src) 384 { 385 src->addr_low |= BE_32(1); 386 myri10ge_pio_copy(dst, src, 4 * sizeof (*src)); 387 mb(); 388 myri10ge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 389 mb(); 390 src->addr_low &= ~(BE_32(1)); 391 dst->addr_low = src->addr_low; 392 mb(); 393 } 394 395 static void 396 myri10ge_pull_jpool(struct myri10ge_slice_state *ss) 397 { 398 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 399 struct myri10ge_jpool_entry *jtail, *j, *jfree; 400 volatile uintptr_t *putp; 401 uintptr_t put; 402 int i; 403 404 /* find tail */ 405 jtail = NULL; 406 if (jpool->head != NULL) { 407 j = jpool->head; 408 while (j->next != NULL) 409 j = j->next; 410 jtail = j; 411 } 412 413 /* 414 * iterate over all per-CPU caches, and add contents into 415 * jpool 416 */ 417 for (i = 0; i < MYRI10GE_MAX_CPUS; i++) { 418 /* take per-CPU free list */ 419 putp = (void *)&jpool->cpu[i & MYRI10GE_MAX_CPU_MASK].head; 420 if (*putp == NULL) 421 continue; 422 put = atomic_swap_ulong(putp, 0); 423 jfree = (struct myri10ge_jpool_entry *)put; 424 425 /* append to pool */ 426 if (jtail == NULL) { 427 jpool->head = jfree; 428 } else { 429 jtail->next = jfree; 430 } 431 j = jfree; 432 while (j->next != NULL) 433 j = j->next; 434 jtail = j; 435 } 436 } 437 438 /* 439 * Transfers buffers from the free pool to the nic 440 * Must be called holding the jpool mutex. 441 */ 442 443 static inline void 444 myri10ge_restock_jumbos(struct myri10ge_slice_state *ss) 445 { 446 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 447 struct myri10ge_jpool_entry *j; 448 myri10ge_rx_ring_t *rx; 449 int i, idx, limit; 450 451 rx = &ss->rx_big; 452 limit = ss->j_rx_cnt + (rx->mask + 1); 453 454 for (i = rx->cnt; i != limit; i++) { 455 idx = i & (rx->mask); 456 j = jpool->head; 457 if (j == NULL) { 458 myri10ge_pull_jpool(ss); 459 j = jpool->head; 460 if (j == NULL) { 461 break; 462 } 463 } 464 jpool->head = j->next; 465 rx->info[idx].j = j; 466 rx->shadow[idx].addr_low = j->dma.low; 467 rx->shadow[idx].addr_high = j->dma.high; 468 /* copy 4 descriptors (32-bytes) to the mcp at a time */ 469 if ((idx & 7) == 7) { 470 myri10ge_submit_8rx(&rx->lanai[idx - 7], 471 &rx->shadow[idx - 7]); 472 } 473 } 474 rx->cnt = i; 475 } 476 477 /* 478 * Transfer buffers from the nic to the free pool. 479 * Should be called holding the jpool mutex 480 */ 481 482 static inline void 483 myri10ge_unstock_jumbos(struct myri10ge_slice_state *ss) 484 { 485 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 486 struct myri10ge_jpool_entry *j; 487 myri10ge_rx_ring_t *rx; 488 int i; 489 490 mutex_enter(&jpool->mtx); 491 rx = &ss->rx_big; 492 493 for (i = 0; i < rx->mask + 1; i++) { 494 j = rx->info[i].j; 495 rx->info[i].j = NULL; 496 if (j == NULL) 497 continue; 498 j->next = jpool->head; 499 jpool->head = j; 500 } 501 mutex_exit(&jpool->mtx); 502 503 } 504 505 506 /* 507 * Free routine which is called when the mblk allocated via 508 * esballoc() is freed. Here we return the jumbo buffer 509 * to the free pool, and possibly pass some jumbo buffers 510 * to the nic 511 */ 512 513 static void 514 myri10ge_jfree_rtn(void *arg) 515 { 516 struct myri10ge_jpool_entry *j = (struct myri10ge_jpool_entry *)arg; 517 struct myri10ge_jpool_stuff *jpool; 518 volatile uintptr_t *putp; 519 uintptr_t old, new; 520 521 jpool = &j->ss->jpool; 522 523 /* prepend buffer locklessly to per-CPU freelist */ 524 putp = (void *)&jpool->cpu[CPU->cpu_seqid & MYRI10GE_MAX_CPU_MASK].head; 525 new = (uintptr_t)j; 526 do { 527 old = *putp; 528 j->next = (void *)old; 529 } while (atomic_cas_ulong(putp, old, new) != old); 530 } 531 532 static void 533 myri10ge_remove_jbuf(struct myri10ge_jpool_entry *j) 534 { 535 (void) ddi_dma_unbind_handle(j->dma_handle); 536 ddi_dma_mem_free(&j->acc_handle); 537 ddi_dma_free_handle(&j->dma_handle); 538 kmem_free(j, sizeof (*j)); 539 } 540 541 542 /* 543 * Allocates one physically contiguous descriptor 544 * and add it to the jumbo buffer pool. 545 */ 546 547 static int 548 myri10ge_add_jbuf(struct myri10ge_slice_state *ss) 549 { 550 struct myri10ge_jpool_entry *j; 551 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 552 ddi_dma_attr_t *rx_dma_attr; 553 size_t real_length; 554 ddi_dma_cookie_t cookie; 555 uint_t count; 556 int err; 557 558 if (myri10ge_mtu < 2048) 559 rx_dma_attr = &myri10ge_rx_std_dma_attr; 560 else 561 rx_dma_attr = &myri10ge_rx_jumbo_dma_attr; 562 563 again: 564 j = (struct myri10ge_jpool_entry *) 565 kmem_alloc(sizeof (*j), KM_SLEEP); 566 err = ddi_dma_alloc_handle(ss->mgp->dip, rx_dma_attr, 567 DDI_DMA_DONTWAIT, NULL, &j->dma_handle); 568 if (err != DDI_SUCCESS) 569 goto abort_with_j; 570 571 err = ddi_dma_mem_alloc(j->dma_handle, myri10ge_mtu, 572 &myri10ge_dev_access_attr, DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 573 NULL, &j->buf, &real_length, &j->acc_handle); 574 if (err != DDI_SUCCESS) 575 goto abort_with_handle; 576 577 err = ddi_dma_addr_bind_handle(j->dma_handle, NULL, j->buf, 578 real_length, DDI_DMA_READ|DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 579 NULL, &cookie, &count); 580 if (err != DDI_SUCCESS) 581 goto abort_with_mem; 582 583 /* 584 * Make certain std MTU buffers do not cross a 4KB boundary: 585 * 586 * Setting dma_attr_align=4096 will do this, but the system 587 * will only allocate 1 RX buffer per 4KB page, rather than 2. 588 * Setting dma_attr_granular=4096 *seems* to work around this, 589 * but I'm paranoid about future systems no longer honoring 590 * this, so fall back to the safe, but memory wasting way if a 591 * buffer crosses a 4KB boundary. 592 */ 593 594 if (rx_dma_attr == &myri10ge_rx_std_dma_attr && 595 rx_dma_attr->dma_attr_align != 4096) { 596 uint32_t start, end; 597 598 start = MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress); 599 end = start + myri10ge_mtu; 600 if (((end >> 12) != (start >> 12)) && (start & 4095U)) { 601 printf("std buffer crossed a 4KB boundary!\n"); 602 myri10ge_remove_jbuf(j); 603 rx_dma_attr->dma_attr_align = 4096; 604 rx_dma_attr->dma_attr_seg = UINT64_MAX; 605 goto again; 606 } 607 } 608 609 j->dma.low = 610 htonl(MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress)); 611 j->dma.high = 612 htonl(MYRI10GE_HIGHPART_TO_U32(cookie.dmac_laddress)); 613 j->ss = ss; 614 615 616 j->free_func.free_func = myri10ge_jfree_rtn; 617 j->free_func.free_arg = (char *)j; 618 mutex_enter(&jpool->mtx); 619 j->next = jpool->head; 620 jpool->head = j; 621 jpool->num_alloc++; 622 mutex_exit(&jpool->mtx); 623 return (0); 624 625 abort_with_mem: 626 ddi_dma_mem_free(&j->acc_handle); 627 628 abort_with_handle: 629 ddi_dma_free_handle(&j->dma_handle); 630 631 abort_with_j: 632 kmem_free(j, sizeof (*j)); 633 634 /* 635 * If an allocation failed, perhaps it failed because it could 636 * not satisfy granularity requirement. Disable that, and 637 * try agin. 638 */ 639 if (rx_dma_attr == &myri10ge_rx_std_dma_attr && 640 rx_dma_attr->dma_attr_align != 4096) { 641 cmn_err(CE_NOTE, 642 "!alloc failed, reverting to gran=1\n"); 643 rx_dma_attr->dma_attr_align = 4096; 644 rx_dma_attr->dma_attr_seg = UINT64_MAX; 645 goto again; 646 } 647 return (err); 648 } 649 650 static int 651 myri10ge_jfree_cnt(struct myri10ge_jpool_stuff *jpool) 652 { 653 int i; 654 struct myri10ge_jpool_entry *j; 655 656 mutex_enter(&jpool->mtx); 657 j = jpool->head; 658 i = 0; 659 while (j != NULL) { 660 i++; 661 j = j->next; 662 } 663 mutex_exit(&jpool->mtx); 664 return (i); 665 } 666 667 static int 668 myri10ge_add_jbufs(struct myri10ge_slice_state *ss, int num, int total) 669 { 670 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 671 int allocated = 0; 672 int err; 673 int needed; 674 675 /* 676 * if total is set, user wants "num" jbufs in the pool, 677 * otherwise the user wants to "num" additional jbufs 678 * added to the pool 679 */ 680 if (total && jpool->num_alloc) { 681 allocated = myri10ge_jfree_cnt(jpool); 682 needed = num - allocated; 683 } else { 684 needed = num; 685 } 686 687 while (needed > 0) { 688 needed--; 689 err = myri10ge_add_jbuf(ss); 690 if (err == 0) { 691 allocated++; 692 } 693 } 694 return (allocated); 695 } 696 697 static void 698 myri10ge_remove_jbufs(struct myri10ge_slice_state *ss) 699 { 700 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 701 struct myri10ge_jpool_entry *j; 702 703 mutex_enter(&jpool->mtx); 704 myri10ge_pull_jpool(ss); 705 while (jpool->head != NULL) { 706 jpool->num_alloc--; 707 j = jpool->head; 708 jpool->head = j->next; 709 myri10ge_remove_jbuf(j); 710 } 711 mutex_exit(&jpool->mtx); 712 } 713 714 static void 715 myri10ge_carve_up_jbufs_into_small_ring(struct myri10ge_slice_state *ss) 716 { 717 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 718 struct myri10ge_jpool_entry *j = NULL; 719 caddr_t ptr; 720 uint32_t dma_low, dma_high; 721 int idx, len; 722 unsigned int alloc_size; 723 724 dma_low = dma_high = len = 0; 725 alloc_size = myri10ge_small_bytes + MXGEFW_PAD; 726 ptr = NULL; 727 for (idx = 0; idx < ss->rx_small.mask + 1; idx++) { 728 /* Allocate a jumbo frame and carve it into small frames */ 729 if (len < alloc_size) { 730 mutex_enter(&jpool->mtx); 731 /* remove jumbo from freelist */ 732 j = jpool->head; 733 jpool->head = j->next; 734 /* place it onto small list */ 735 j->next = ss->small_jpool; 736 ss->small_jpool = j; 737 mutex_exit(&jpool->mtx); 738 len = myri10ge_mtu; 739 dma_low = ntohl(j->dma.low); 740 dma_high = ntohl(j->dma.high); 741 ptr = j->buf; 742 } 743 ss->rx_small.info[idx].ptr = ptr; 744 ss->rx_small.shadow[idx].addr_low = htonl(dma_low); 745 ss->rx_small.shadow[idx].addr_high = htonl(dma_high); 746 len -= alloc_size; 747 ptr += alloc_size; 748 dma_low += alloc_size; 749 } 750 } 751 752 /* 753 * Return the jumbo bufs we carved up for small to the jumbo pool 754 */ 755 756 static void 757 myri10ge_release_small_jbufs(struct myri10ge_slice_state *ss) 758 { 759 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 760 struct myri10ge_jpool_entry *j = NULL; 761 762 mutex_enter(&jpool->mtx); 763 while (ss->small_jpool != NULL) { 764 j = ss->small_jpool; 765 ss->small_jpool = j->next; 766 j->next = jpool->head; 767 jpool->head = j; 768 } 769 mutex_exit(&jpool->mtx); 770 ss->jbufs_for_smalls = 0; 771 } 772 773 static int 774 myri10ge_add_tx_handle(struct myri10ge_slice_state *ss) 775 { 776 myri10ge_tx_ring_t *tx = &ss->tx; 777 struct myri10ge_priv *mgp = ss->mgp; 778 struct myri10ge_tx_dma_handle *handle; 779 int err; 780 781 handle = kmem_zalloc(sizeof (*handle), KM_SLEEP); 782 err = ddi_dma_alloc_handle(mgp->dip, 783 &myri10ge_tx_dma_attr, 784 DDI_DMA_SLEEP, NULL, 785 &handle->h); 786 if (err) { 787 static int limit = 0; 788 if (limit == 0) 789 cmn_err(CE_WARN, "%s: Falled to alloc tx dma handle\n", 790 mgp->name); 791 limit++; 792 kmem_free(handle, sizeof (*handle)); 793 return (err); 794 } 795 mutex_enter(&tx->handle_lock); 796 MYRI10GE_SLICE_STAT_INC(tx_handles_alloced); 797 handle->next = tx->free_tx_handles; 798 tx->free_tx_handles = handle; 799 mutex_exit(&tx->handle_lock); 800 return (DDI_SUCCESS); 801 } 802 803 static void 804 myri10ge_remove_tx_handles(struct myri10ge_slice_state *ss) 805 { 806 myri10ge_tx_ring_t *tx = &ss->tx; 807 struct myri10ge_tx_dma_handle *handle; 808 mutex_enter(&tx->handle_lock); 809 810 handle = tx->free_tx_handles; 811 while (handle != NULL) { 812 tx->free_tx_handles = handle->next; 813 ddi_dma_free_handle(&handle->h); 814 kmem_free(handle, sizeof (*handle)); 815 handle = tx->free_tx_handles; 816 MYRI10GE_SLICE_STAT_DEC(tx_handles_alloced); 817 } 818 mutex_exit(&tx->handle_lock); 819 if (MYRI10GE_SLICE_STAT(tx_handles_alloced) != 0) { 820 cmn_err(CE_WARN, "%s: %d tx dma handles allocated at close\n", 821 ss->mgp->name, 822 (int)MYRI10GE_SLICE_STAT(tx_handles_alloced)); 823 } 824 } 825 826 static void 827 myri10ge_free_tx_handles(myri10ge_tx_ring_t *tx, 828 struct myri10ge_tx_dma_handle_head *list) 829 { 830 mutex_enter(&tx->handle_lock); 831 list->tail->next = tx->free_tx_handles; 832 tx->free_tx_handles = list->head; 833 mutex_exit(&tx->handle_lock); 834 } 835 836 static void 837 myri10ge_free_tx_handle_slist(myri10ge_tx_ring_t *tx, 838 struct myri10ge_tx_dma_handle *handle) 839 { 840 struct myri10ge_tx_dma_handle_head list; 841 842 if (handle == NULL) 843 return; 844 list.head = handle; 845 list.tail = handle; 846 while (handle != NULL) { 847 list.tail = handle; 848 handle = handle->next; 849 } 850 myri10ge_free_tx_handles(tx, &list); 851 } 852 853 static int 854 myri10ge_alloc_tx_handles(struct myri10ge_slice_state *ss, int count, 855 struct myri10ge_tx_dma_handle **ret) 856 { 857 myri10ge_tx_ring_t *tx = &ss->tx; 858 struct myri10ge_tx_dma_handle *handle; 859 int err, i; 860 861 mutex_enter(&tx->handle_lock); 862 for (i = 0; i < count; i++) { 863 handle = tx->free_tx_handles; 864 while (handle == NULL) { 865 mutex_exit(&tx->handle_lock); 866 err = myri10ge_add_tx_handle(ss); 867 if (err != DDI_SUCCESS) { 868 goto abort_with_handles; 869 } 870 mutex_enter(&tx->handle_lock); 871 handle = tx->free_tx_handles; 872 } 873 tx->free_tx_handles = handle->next; 874 handle->next = *ret; 875 *ret = handle; 876 } 877 mutex_exit(&tx->handle_lock); 878 return (DDI_SUCCESS); 879 880 abort_with_handles: 881 myri10ge_free_tx_handle_slist(tx, *ret); 882 return (err); 883 } 884 885 886 /* 887 * Frees DMA resources associated with the send ring 888 */ 889 static void 890 myri10ge_unprepare_tx_ring(struct myri10ge_slice_state *ss) 891 { 892 myri10ge_tx_ring_t *tx; 893 struct myri10ge_tx_dma_handle_head handles; 894 size_t bytes; 895 int idx; 896 897 tx = &ss->tx; 898 handles.head = NULL; 899 handles.tail = NULL; 900 for (idx = 0; idx < ss->tx.mask + 1; idx++) { 901 if (tx->info[idx].m) { 902 (void) ddi_dma_unbind_handle(tx->info[idx].handle->h); 903 handles.head = tx->info[idx].handle; 904 if (handles.tail == NULL) 905 handles.tail = tx->info[idx].handle; 906 freeb(tx->info[idx].m); 907 tx->info[idx].m = 0; 908 tx->info[idx].handle = 0; 909 } 910 tx->cp[idx].va = NULL; 911 myri10ge_dma_free(&tx->cp[idx].dma); 912 } 913 bytes = sizeof (*tx->cp) * (tx->mask + 1); 914 kmem_free(tx->cp, bytes); 915 tx->cp = NULL; 916 if (handles.head != NULL) 917 myri10ge_free_tx_handles(tx, &handles); 918 myri10ge_remove_tx_handles(ss); 919 } 920 921 /* 922 * Allocates DMA handles associated with the send ring 923 */ 924 static inline int 925 myri10ge_prepare_tx_ring(struct myri10ge_slice_state *ss) 926 { 927 struct myri10ge_tx_dma_handle *handles; 928 int h; 929 size_t bytes; 930 931 bytes = sizeof (*ss->tx.cp) * (ss->tx.mask + 1); 932 ss->tx.cp = kmem_zalloc(bytes, KM_SLEEP); 933 if (ss->tx.cp == NULL) { 934 cmn_err(CE_WARN, 935 "%s: Failed to allocate tx copyblock storage\n", 936 ss->mgp->name); 937 return (DDI_FAILURE); 938 } 939 940 941 /* allocate the TX copyblocks */ 942 for (h = 0; h < ss->tx.mask + 1; h++) { 943 ss->tx.cp[h].va = myri10ge_dma_alloc(ss->mgp->dip, 944 4096, &myri10ge_rx_jumbo_dma_attr, 945 &myri10ge_dev_access_attr, DDI_DMA_STREAMING, 946 DDI_DMA_WRITE|DDI_DMA_STREAMING, &ss->tx.cp[h].dma, 1, 947 DDI_DMA_DONTWAIT); 948 if (ss->tx.cp[h].va == NULL) { 949 cmn_err(CE_WARN, "%s: Failed to allocate tx " 950 "copyblock %d\n", ss->mgp->name, h); 951 goto abort_with_copyblocks; 952 } 953 } 954 /* pre-allocate transmit handles */ 955 handles = NULL; 956 (void) myri10ge_alloc_tx_handles(ss, myri10ge_tx_handles_initial, 957 &handles); 958 if (handles != NULL) 959 myri10ge_free_tx_handle_slist(&ss->tx, handles); 960 961 return (DDI_SUCCESS); 962 963 abort_with_copyblocks: 964 while (h > 0) { 965 h--; 966 myri10ge_dma_free(&ss->tx.cp[h].dma); 967 } 968 969 bytes = sizeof (*ss->tx.cp) * (ss->tx.mask + 1); 970 kmem_free(ss->tx.cp, bytes); 971 ss->tx.cp = NULL; 972 return (DDI_FAILURE); 973 } 974 975 /* 976 * The eeprom strings on the lanaiX have the format 977 * SN=x\0 978 * MAC=x:x:x:x:x:x\0 979 * PT:ddd mmm xx xx:xx:xx xx\0 980 * PV:ddd mmm xx xx:xx:xx xx\0 981 */ 982 static int 983 myri10ge_read_mac_addr(struct myri10ge_priv *mgp) 984 { 985 #define MYRI10GE_NEXT_STRING(p) while (ptr < limit && *ptr++) 986 #define myri10ge_digit(c) (((c) >= '0' && (c) <= '9') ? ((c) - '0') : \ 987 (((c) >= 'A' && (c) <= 'F') ? (10 + (c) - 'A') : \ 988 (((c) >= 'a' && (c) <= 'f') ? (10 + (c) - 'a') : -1))) 989 990 char *ptr, *limit; 991 int i, hv, lv; 992 993 ptr = mgp->eeprom_strings; 994 limit = mgp->eeprom_strings + MYRI10GE_EEPROM_STRINGS_SIZE; 995 996 while (*ptr != '\0' && ptr < limit) { 997 if (memcmp(ptr, "MAC=", 4) == 0) { 998 ptr += 4; 999 if (myri10ge_verbose) 1000 printf("%s: mac address = %s\n", mgp->name, 1001 ptr); 1002 mgp->mac_addr_string = ptr; 1003 for (i = 0; i < 6; i++) { 1004 if ((ptr + 2) > limit) 1005 goto abort; 1006 1007 if (*(ptr+1) == ':') { 1008 hv = 0; 1009 lv = myri10ge_digit(*ptr); ptr++; 1010 } else { 1011 hv = myri10ge_digit(*ptr); ptr++; 1012 lv = myri10ge_digit(*ptr); ptr++; 1013 } 1014 mgp->mac_addr[i] = (hv << 4) | lv; 1015 ptr++; 1016 } 1017 } 1018 if (memcmp((const void *)ptr, "SN=", 3) == 0) { 1019 ptr += 3; 1020 mgp->sn_str = (char *)ptr; 1021 } 1022 if (memcmp((const void *)ptr, "PC=", 3) == 0) { 1023 ptr += 3; 1024 mgp->pc_str = (char *)ptr; 1025 } 1026 MYRI10GE_NEXT_STRING(ptr); 1027 } 1028 1029 return (0); 1030 1031 abort: 1032 cmn_err(CE_WARN, "%s: failed to parse eeprom_strings", mgp->name); 1033 return (ENXIO); 1034 } 1035 1036 1037 /* 1038 * Determine the register set containing the PCI resource we 1039 * want to map: the memory-mappable part of the interface. We do 1040 * this by scanning the DDI "reg" property of the interface, 1041 * which is an array of mx_ddi_reg_set structures. 1042 */ 1043 static int 1044 myri10ge_reg_set(dev_info_t *dip, int *reg_set, int *span, 1045 unsigned long *busno, unsigned long *devno, 1046 unsigned long *funcno) 1047 { 1048 1049 #define REGISTER_NUMBER(ip) (ip[0] >> 0 & 0xff) 1050 #define FUNCTION_NUMBER(ip) (ip[0] >> 8 & 0x07) 1051 #define DEVICE_NUMBER(ip) (ip[0] >> 11 & 0x1f) 1052 #define BUS_NUMBER(ip) (ip[0] >> 16 & 0xff) 1053 #define ADDRESS_SPACE(ip) (ip[0] >> 24 & 0x03) 1054 #define PCI_ADDR_HIGH(ip) (ip[1]) 1055 #define PCI_ADDR_LOW(ip) (ip[2]) 1056 #define PCI_SPAN_HIGH(ip) (ip[3]) 1057 #define PCI_SPAN_LOW(ip) (ip[4]) 1058 1059 #define MX_DDI_REG_SET_32_BIT_MEMORY_SPACE 2 1060 #define MX_DDI_REG_SET_64_BIT_MEMORY_SPACE 3 1061 1062 int *data, i, *rs; 1063 uint32_t nelementsp; 1064 1065 #ifdef MYRI10GE_REGSET_VERBOSE 1066 char *address_space_name[] = { "Configuration Space", 1067 "I/O Space", 1068 "32-bit Memory Space", 1069 "64-bit Memory Space" 1070 }; 1071 #endif 1072 1073 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 1074 "reg", &data, &nelementsp) != DDI_SUCCESS) { 1075 printf("Could not determine register set.\n"); 1076 return (ENXIO); 1077 } 1078 1079 #ifdef MYRI10GE_REGSET_VERBOSE 1080 printf("There are %d register sets.\n", nelementsp / 5); 1081 #endif 1082 if (!nelementsp) { 1083 printf("Didn't find any \"reg\" properties.\n"); 1084 ddi_prop_free(data); 1085 return (ENODEV); 1086 } 1087 1088 /* Scan for the register number. */ 1089 rs = &data[0]; 1090 *busno = BUS_NUMBER(rs); 1091 *devno = DEVICE_NUMBER(rs); 1092 *funcno = FUNCTION_NUMBER(rs); 1093 1094 #ifdef MYRI10GE_REGSET_VERBOSE 1095 printf("*** Scanning for register number.\n"); 1096 #endif 1097 for (i = 0; i < nelementsp / 5; i++) { 1098 rs = &data[5 * i]; 1099 #ifdef MYRI10GE_REGSET_VERBOSE 1100 printf("Examining register set %d:\n", i); 1101 printf(" Register number = %d.\n", REGISTER_NUMBER(rs)); 1102 printf(" Function number = %d.\n", FUNCTION_NUMBER(rs)); 1103 printf(" Device number = %d.\n", DEVICE_NUMBER(rs)); 1104 printf(" Bus number = %d.\n", BUS_NUMBER(rs)); 1105 printf(" Address space = %d (%s ).\n", ADDRESS_SPACE(rs), 1106 address_space_name[ADDRESS_SPACE(rs)]); 1107 printf(" pci address 0x%08x %08x\n", PCI_ADDR_HIGH(rs), 1108 PCI_ADDR_LOW(rs)); 1109 printf(" pci span 0x%08x %08x\n", PCI_SPAN_HIGH(rs), 1110 PCI_SPAN_LOW(rs)); 1111 #endif 1112 /* We are looking for a memory property. */ 1113 1114 if (ADDRESS_SPACE(rs) == MX_DDI_REG_SET_64_BIT_MEMORY_SPACE || 1115 ADDRESS_SPACE(rs) == MX_DDI_REG_SET_32_BIT_MEMORY_SPACE) { 1116 *reg_set = i; 1117 1118 #ifdef MYRI10GE_REGSET_VERBOSE 1119 printf("%s uses register set %d.\n", 1120 address_space_name[ADDRESS_SPACE(rs)], *reg_set); 1121 #endif 1122 1123 *span = (PCI_SPAN_LOW(rs)); 1124 #ifdef MYRI10GE_REGSET_VERBOSE 1125 printf("Board span is 0x%x\n", *span); 1126 #endif 1127 break; 1128 } 1129 } 1130 1131 ddi_prop_free(data); 1132 1133 /* If no match, fail. */ 1134 if (i >= nelementsp / 5) { 1135 return (EIO); 1136 } 1137 1138 return (0); 1139 } 1140 1141 1142 static int 1143 myri10ge_load_firmware_from_zlib(struct myri10ge_priv *mgp, uint32_t *limit) 1144 { 1145 void *inflate_buffer; 1146 int rv, status; 1147 size_t sram_size = mgp->sram_size - MYRI10GE_EEPROM_STRINGS_SIZE; 1148 size_t destlen; 1149 mcp_gen_header_t *hdr; 1150 unsigned hdr_offset, i; 1151 1152 1153 *limit = 0; /* -Wuninitialized */ 1154 status = 0; 1155 1156 inflate_buffer = kmem_zalloc(sram_size, KM_NOSLEEP); 1157 if (!inflate_buffer) { 1158 cmn_err(CE_WARN, 1159 "%s: Could not allocate buffer to inflate mcp\n", 1160 mgp->name); 1161 return (ENOMEM); 1162 } 1163 1164 destlen = sram_size; 1165 rv = z_uncompress(inflate_buffer, &destlen, mgp->eth_z8e, 1166 mgp->eth_z8e_length); 1167 1168 if (rv != Z_OK) { 1169 cmn_err(CE_WARN, "%s: Could not inflate mcp: %s\n", 1170 mgp->name, z_strerror(rv)); 1171 status = ENXIO; 1172 goto abort; 1173 } 1174 1175 *limit = (uint32_t)destlen; 1176 1177 hdr_offset = htonl(*(uint32_t *)(void *)((char *)inflate_buffer + 1178 MCP_HEADER_PTR_OFFSET)); 1179 hdr = (void *)((char *)inflate_buffer + hdr_offset); 1180 if (ntohl(hdr->mcp_type) != MCP_TYPE_ETH) { 1181 cmn_err(CE_WARN, "%s: Bad firmware type: 0x%x\n", mgp->name, 1182 ntohl(hdr->mcp_type)); 1183 status = EIO; 1184 goto abort; 1185 } 1186 1187 /* save firmware version for kstat */ 1188 (void) strncpy(mgp->fw_version, hdr->version, sizeof (mgp->fw_version)); 1189 if (myri10ge_verbose) 1190 printf("%s: firmware id: %s\n", mgp->name, hdr->version); 1191 1192 /* Copy the inflated firmware to NIC SRAM. */ 1193 for (i = 0; i < *limit; i += 256) { 1194 myri10ge_pio_copy((char *)mgp->sram + MYRI10GE_FW_OFFSET + i, 1195 (char *)inflate_buffer + i, 1196 min(256U, (unsigned)(*limit - i))); 1197 mb(); 1198 (void) *(int *)(void *)mgp->sram; 1199 mb(); 1200 } 1201 1202 abort: 1203 kmem_free(inflate_buffer, sram_size); 1204 1205 return (status); 1206 1207 } 1208 1209 1210 int 1211 myri10ge_send_cmd(struct myri10ge_priv *mgp, uint32_t cmd, 1212 myri10ge_cmd_t *data) 1213 { 1214 mcp_cmd_t *buf; 1215 char buf_bytes[sizeof (*buf) + 8]; 1216 volatile mcp_cmd_response_t *response = mgp->cmd; 1217 volatile char *cmd_addr = 1218 (volatile char *)mgp->sram + MXGEFW_ETH_CMD; 1219 int sleep_total = 0; 1220 1221 /* ensure buf is aligned to 8 bytes */ 1222 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 1223 1224 buf->data0 = htonl(data->data0); 1225 buf->data1 = htonl(data->data1); 1226 buf->data2 = htonl(data->data2); 1227 buf->cmd = htonl(cmd); 1228 buf->response_addr.low = mgp->cmd_dma.low; 1229 buf->response_addr.high = mgp->cmd_dma.high; 1230 mutex_enter(&mgp->cmd_lock); 1231 response->result = 0xffffffff; 1232 mb(); 1233 1234 myri10ge_pio_copy((void *)cmd_addr, buf, sizeof (*buf)); 1235 1236 /* wait up to 20ms */ 1237 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 1238 mb(); 1239 if (response->result != 0xffffffff) { 1240 if (response->result == 0) { 1241 data->data0 = ntohl(response->data); 1242 mutex_exit(&mgp->cmd_lock); 1243 return (0); 1244 } else if (ntohl(response->result) 1245 == MXGEFW_CMD_UNKNOWN) { 1246 mutex_exit(&mgp->cmd_lock); 1247 return (ENOSYS); 1248 } else if (ntohl(response->result) 1249 == MXGEFW_CMD_ERROR_UNALIGNED) { 1250 mutex_exit(&mgp->cmd_lock); 1251 return (E2BIG); 1252 } else { 1253 cmn_err(CE_WARN, 1254 "%s: command %d failed, result = %d\n", 1255 mgp->name, cmd, ntohl(response->result)); 1256 mutex_exit(&mgp->cmd_lock); 1257 return (ENXIO); 1258 } 1259 } 1260 drv_usecwait(1000); 1261 } 1262 mutex_exit(&mgp->cmd_lock); 1263 cmn_err(CE_WARN, "%s: command %d timed out, result = %d\n", 1264 mgp->name, cmd, ntohl(response->result)); 1265 return (EAGAIN); 1266 } 1267 1268 /* 1269 * Enable or disable periodic RDMAs from the host to make certain 1270 * chipsets resend dropped PCIe messages 1271 */ 1272 1273 static void 1274 myri10ge_dummy_rdma(struct myri10ge_priv *mgp, int enable) 1275 { 1276 char buf_bytes[72]; 1277 volatile uint32_t *confirm; 1278 volatile char *submit; 1279 uint32_t *buf; 1280 int i; 1281 1282 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 1283 1284 /* clear confirmation addr */ 1285 confirm = (volatile uint32_t *)mgp->cmd; 1286 *confirm = 0; 1287 mb(); 1288 1289 /* 1290 * send an rdma command to the PCIe engine, and wait for the 1291 * response in the confirmation address. The firmware should 1292 * write a -1 there to indicate it is alive and well 1293 */ 1294 1295 buf[0] = mgp->cmd_dma.high; /* confirm addr MSW */ 1296 buf[1] = mgp->cmd_dma.low; /* confirm addr LSW */ 1297 buf[2] = htonl(0xffffffff); /* confirm data */ 1298 buf[3] = htonl(mgp->cmd_dma.high); /* dummy addr MSW */ 1299 buf[4] = htonl(mgp->cmd_dma.low); /* dummy addr LSW */ 1300 buf[5] = htonl(enable); /* enable? */ 1301 1302 1303 submit = (volatile char *)(mgp->sram + MXGEFW_BOOT_DUMMY_RDMA); 1304 1305 myri10ge_pio_copy((char *)submit, buf, 64); 1306 mb(); 1307 drv_usecwait(1000); 1308 mb(); 1309 i = 0; 1310 while (*confirm != 0xffffffff && i < 20) { 1311 drv_usecwait(1000); 1312 i++; 1313 } 1314 if (*confirm != 0xffffffff) { 1315 cmn_err(CE_WARN, "%s: dummy rdma %s failed (%p = 0x%x)", 1316 mgp->name, 1317 (enable ? "enable" : "disable"), (void*) confirm, *confirm); 1318 } 1319 } 1320 1321 static int 1322 myri10ge_load_firmware(struct myri10ge_priv *mgp) 1323 { 1324 myri10ge_cmd_t cmd; 1325 volatile uint32_t *confirm; 1326 volatile char *submit; 1327 char buf_bytes[72]; 1328 uint32_t *buf, size; 1329 int status, i; 1330 1331 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 1332 1333 status = myri10ge_load_firmware_from_zlib(mgp, &size); 1334 if (status) { 1335 cmn_err(CE_WARN, "%s: firmware loading failed\n", mgp->name); 1336 return (status); 1337 } 1338 1339 /* clear confirmation addr */ 1340 confirm = (volatile uint32_t *)mgp->cmd; 1341 *confirm = 0; 1342 mb(); 1343 1344 /* 1345 * send a reload command to the bootstrap MCP, and wait for the 1346 * response in the confirmation address. The firmware should 1347 * write a -1 there to indicate it is alive and well 1348 */ 1349 1350 buf[0] = mgp->cmd_dma.high; /* confirm addr MSW */ 1351 buf[1] = mgp->cmd_dma.low; /* confirm addr LSW */ 1352 buf[2] = htonl(0xffffffff); /* confirm data */ 1353 1354 /* 1355 * FIX: All newest firmware should un-protect the bottom of 1356 * the sram before handoff. However, the very first interfaces 1357 * do not. Therefore the handoff copy must skip the first 8 bytes 1358 */ 1359 buf[3] = htonl(MYRI10GE_FW_OFFSET + 8); /* where the code starts */ 1360 buf[4] = htonl(size - 8); /* length of code */ 1361 buf[5] = htonl(8); /* where to copy to */ 1362 buf[6] = htonl(0); /* where to jump to */ 1363 1364 submit = (volatile char *)(mgp->sram + MXGEFW_BOOT_HANDOFF); 1365 1366 myri10ge_pio_copy((char *)submit, buf, 64); 1367 mb(); 1368 drv_usecwait(1000); 1369 mb(); 1370 i = 0; 1371 while (*confirm != 0xffffffff && i < 1000) { 1372 drv_usecwait(1000); 1373 i++; 1374 } 1375 if (*confirm != 0xffffffff) { 1376 cmn_err(CE_WARN, "%s: handoff failed (%p = 0x%x)", 1377 mgp->name, (void *) confirm, *confirm); 1378 1379 return (ENXIO); 1380 } 1381 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 1382 if (status != 0) { 1383 cmn_err(CE_WARN, "%s: failed MXGEFW_CMD_GET_RX_RING_SIZE\n", 1384 mgp->name); 1385 return (ENXIO); 1386 } 1387 1388 mgp->max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 1389 myri10ge_dummy_rdma(mgp, 1); 1390 return (0); 1391 } 1392 1393 static int 1394 myri10ge_m_unicst(void *arg, const uint8_t *addr) 1395 { 1396 struct myri10ge_priv *mgp = arg; 1397 myri10ge_cmd_t cmd; 1398 int status; 1399 1400 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1401 | (addr[2] << 8) | addr[3]); 1402 1403 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1404 1405 status = myri10ge_send_cmd(mgp, MXGEFW_SET_MAC_ADDRESS, &cmd); 1406 if (status == 0 && (addr != mgp->mac_addr)) 1407 (void) memcpy(mgp->mac_addr, addr, sizeof (mgp->mac_addr)); 1408 1409 return (status); 1410 } 1411 1412 static int 1413 myri10ge_change_pause(struct myri10ge_priv *mgp, int pause) 1414 { 1415 myri10ge_cmd_t cmd; 1416 int status; 1417 1418 if (pause) 1419 status = myri10ge_send_cmd(mgp, MXGEFW_ENABLE_FLOW_CONTROL, 1420 &cmd); 1421 else 1422 status = myri10ge_send_cmd(mgp, MXGEFW_DISABLE_FLOW_CONTROL, 1423 &cmd); 1424 1425 if (status) { 1426 cmn_err(CE_WARN, "%s: Failed to set flow control mode\n", 1427 mgp->name); 1428 return (ENXIO); 1429 } 1430 mgp->pause = pause; 1431 return (0); 1432 } 1433 1434 static void 1435 myri10ge_change_promisc(struct myri10ge_priv *mgp, int promisc) 1436 { 1437 myri10ge_cmd_t cmd; 1438 int status; 1439 1440 if (promisc) 1441 status = myri10ge_send_cmd(mgp, MXGEFW_ENABLE_PROMISC, &cmd); 1442 else 1443 status = myri10ge_send_cmd(mgp, MXGEFW_DISABLE_PROMISC, &cmd); 1444 1445 if (status) { 1446 cmn_err(CE_WARN, "%s: Failed to set promisc mode\n", 1447 mgp->name); 1448 } 1449 } 1450 1451 static int 1452 myri10ge_dma_test(struct myri10ge_priv *mgp, int test_type) 1453 { 1454 myri10ge_cmd_t cmd; 1455 int status; 1456 uint32_t len; 1457 void *dmabench; 1458 struct myri10ge_dma_stuff dmabench_dma; 1459 char *test = " "; 1460 1461 /* 1462 * Run a small DMA test. 1463 * The magic multipliers to the length tell the firmware 1464 * tp do DMA read, write, or read+write tests. The 1465 * results are returned in cmd.data0. The upper 16 1466 * bits or the return is the number of transfers completed. 1467 * The lower 16 bits is the time in 0.5us ticks that the 1468 * transfers took to complete 1469 */ 1470 1471 len = mgp->tx_boundary; 1472 1473 dmabench = myri10ge_dma_alloc(mgp->dip, len, 1474 &myri10ge_rx_jumbo_dma_attr, &myri10ge_dev_access_attr, 1475 DDI_DMA_STREAMING, DDI_DMA_RDWR|DDI_DMA_STREAMING, 1476 &dmabench_dma, 1, DDI_DMA_DONTWAIT); 1477 mgp->read_dma = mgp->write_dma = mgp->read_write_dma = 0; 1478 if (dmabench == NULL) { 1479 cmn_err(CE_WARN, "%s dma benchmark aborted\n", mgp->name); 1480 return (ENOMEM); 1481 } 1482 1483 cmd.data0 = ntohl(dmabench_dma.low); 1484 cmd.data1 = ntohl(dmabench_dma.high); 1485 cmd.data2 = len * 0x10000; 1486 status = myri10ge_send_cmd(mgp, test_type, &cmd); 1487 if (status != 0) { 1488 test = "read"; 1489 goto abort; 1490 } 1491 mgp->read_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff); 1492 1493 cmd.data0 = ntohl(dmabench_dma.low); 1494 cmd.data1 = ntohl(dmabench_dma.high); 1495 cmd.data2 = len * 0x1; 1496 status = myri10ge_send_cmd(mgp, test_type, &cmd); 1497 if (status != 0) { 1498 test = "write"; 1499 goto abort; 1500 } 1501 mgp->write_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff); 1502 1503 cmd.data0 = ntohl(dmabench_dma.low); 1504 cmd.data1 = ntohl(dmabench_dma.high); 1505 cmd.data2 = len * 0x10001; 1506 status = myri10ge_send_cmd(mgp, test_type, &cmd); 1507 if (status != 0) { 1508 test = "read/write"; 1509 goto abort; 1510 } 1511 mgp->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 1512 (cmd.data0 & 0xffff); 1513 1514 1515 abort: 1516 myri10ge_dma_free(&dmabench_dma); 1517 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 1518 cmn_err(CE_WARN, "%s %s dma benchmark failed\n", mgp->name, 1519 test); 1520 return (status); 1521 } 1522 1523 static int 1524 myri10ge_reset(struct myri10ge_priv *mgp) 1525 { 1526 myri10ge_cmd_t cmd; 1527 struct myri10ge_nic_stat *ethstat; 1528 struct myri10ge_slice_state *ss; 1529 int i, status; 1530 size_t bytes; 1531 1532 /* send a reset command to the card to see if it is alive */ 1533 (void) memset(&cmd, 0, sizeof (cmd)); 1534 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd); 1535 if (status != 0) { 1536 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name); 1537 return (ENXIO); 1538 } 1539 1540 /* Now exchange information about interrupts */ 1541 1542 bytes = mgp->max_intr_slots * sizeof (*mgp->ss[0].rx_done.entry); 1543 cmd.data0 = (uint32_t)bytes; 1544 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1545 1546 /* 1547 * Even though we already know how many slices are supported 1548 * via myri10ge_probe_slices() MXGEFW_CMD_GET_MAX_RSS_QUEUES 1549 * has magic side effects, and must be called after a reset. 1550 * It must be called prior to calling any RSS related cmds, 1551 * including assigning an interrupt queue for anything but 1552 * slice 0. It must also be called *after* 1553 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1554 * the firmware to compute offsets. 1555 */ 1556 1557 if (mgp->num_slices > 1) { 1558 1559 /* ask the maximum number of slices it supports */ 1560 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1561 &cmd); 1562 if (status != 0) { 1563 cmn_err(CE_WARN, 1564 "%s: failed to get number of slices\n", 1565 mgp->name); 1566 return (status); 1567 } 1568 1569 /* 1570 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1571 * to setting up the interrupt queue DMA 1572 */ 1573 1574 cmd.data0 = mgp->num_slices; 1575 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE | 1576 MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1577 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1578 &cmd); 1579 if (status != 0) { 1580 cmn_err(CE_WARN, 1581 "%s: failed to set number of slices\n", 1582 mgp->name); 1583 return (status); 1584 } 1585 } 1586 for (i = 0; i < mgp->num_slices; i++) { 1587 ss = &mgp->ss[i]; 1588 cmd.data0 = ntohl(ss->rx_done.dma.low); 1589 cmd.data1 = ntohl(ss->rx_done.dma.high); 1590 cmd.data2 = i; 1591 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_DMA, 1592 &cmd); 1593 }; 1594 1595 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1596 for (i = 0; i < mgp->num_slices; i++) { 1597 ss = &mgp->ss[i]; 1598 ss->irq_claim = (volatile unsigned int *) 1599 (void *)(mgp->sram + cmd.data0 + 8 * i); 1600 } 1601 1602 if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) { 1603 status |= myri10ge_send_cmd(mgp, 1604 MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, &cmd); 1605 mgp->irq_deassert = (uint32_t *)(void *)(mgp->sram + cmd.data0); 1606 } 1607 1608 status |= myri10ge_send_cmd(mgp, 1609 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1610 mgp->intr_coal_delay_ptr = (uint32_t *)(void *)(mgp->sram + cmd.data0); 1611 1612 if (status != 0) { 1613 cmn_err(CE_WARN, "%s: failed set interrupt parameters\n", 1614 mgp->name); 1615 return (status); 1616 } 1617 1618 *mgp->intr_coal_delay_ptr = htonl(mgp->intr_coal_delay); 1619 (void) myri10ge_dma_test(mgp, MXGEFW_DMA_TEST); 1620 1621 /* reset mcp/driver shared state back to 0 */ 1622 1623 for (i = 0; i < mgp->num_slices; i++) { 1624 ss = &mgp->ss[i]; 1625 bytes = mgp->max_intr_slots * 1626 sizeof (*mgp->ss[0].rx_done.entry); 1627 (void) memset(ss->rx_done.entry, 0, bytes); 1628 ss->tx.req = 0; 1629 ss->tx.done = 0; 1630 ss->tx.pkt_done = 0; 1631 ss->rx_big.cnt = 0; 1632 ss->rx_small.cnt = 0; 1633 ss->rx_done.idx = 0; 1634 ss->rx_done.cnt = 0; 1635 ss->rx_token = 0; 1636 ss->tx.watchdog_done = 0; 1637 ss->tx.watchdog_req = 0; 1638 ss->tx.active = 0; 1639 ss->tx.activate = 0; 1640 } 1641 mgp->watchdog_rx_pause = 0; 1642 if (mgp->ksp_stat != NULL) { 1643 ethstat = (struct myri10ge_nic_stat *)mgp->ksp_stat->ks_data; 1644 ethstat->link_changes.value.ul = 0; 1645 } 1646 status = myri10ge_m_unicst(mgp, mgp->mac_addr); 1647 myri10ge_change_promisc(mgp, 0); 1648 (void) myri10ge_change_pause(mgp, mgp->pause); 1649 return (status); 1650 } 1651 1652 static int 1653 myri10ge_init_toeplitz(struct myri10ge_priv *mgp) 1654 { 1655 myri10ge_cmd_t cmd; 1656 int i, b, s, t, j; 1657 int status; 1658 uint32_t k[8]; 1659 uint32_t tmp; 1660 uint8_t *key; 1661 1662 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RSS_KEY_OFFSET, 1663 &cmd); 1664 if (status != 0) { 1665 cmn_err(CE_WARN, "%s: failed to get rss key\n", 1666 mgp->name); 1667 return (EIO); 1668 } 1669 myri10ge_pio_copy32(mgp->rss_key, 1670 (uint32_t *)(void*)((char *)mgp->sram + cmd.data0), 1671 sizeof (mgp->rss_key)); 1672 1673 mgp->toeplitz_hash_table = kmem_alloc(sizeof (uint32_t) * 12 * 256, 1674 KM_SLEEP); 1675 key = (uint8_t *)mgp->rss_key; 1676 t = 0; 1677 for (b = 0; b < 12; b++) { 1678 for (s = 0; s < 8; s++) { 1679 /* Bits: b*8+s, ..., b*8+s+31 */ 1680 k[s] = 0; 1681 for (j = 0; j < 32; j++) { 1682 int bit = b*8+s+j; 1683 bit = 0x1 & (key[bit / 8] >> (7 -(bit & 0x7))); 1684 k[s] |= bit << (31 - j); 1685 } 1686 } 1687 1688 for (i = 0; i <= 0xff; i++) { 1689 tmp = 0; 1690 if (i & (1 << 7)) { tmp ^= k[0]; } 1691 if (i & (1 << 6)) { tmp ^= k[1]; } 1692 if (i & (1 << 5)) { tmp ^= k[2]; } 1693 if (i & (1 << 4)) { tmp ^= k[3]; } 1694 if (i & (1 << 3)) { tmp ^= k[4]; } 1695 if (i & (1 << 2)) { tmp ^= k[5]; } 1696 if (i & (1 << 1)) { tmp ^= k[6]; } 1697 if (i & (1 << 0)) { tmp ^= k[7]; } 1698 mgp->toeplitz_hash_table[t++] = tmp; 1699 } 1700 } 1701 return (0); 1702 } 1703 1704 static inline struct myri10ge_slice_state * 1705 myri10ge_toeplitz_send_hash(struct myri10ge_priv *mgp, struct ip *ip) 1706 { 1707 struct tcphdr *hdr; 1708 uint32_t saddr, daddr; 1709 uint32_t hash, slice; 1710 uint32_t *table = mgp->toeplitz_hash_table; 1711 uint16_t src, dst; 1712 1713 /* 1714 * Note hashing order is reversed from how it is done 1715 * in the NIC, so as to generate the same hash value 1716 * for the connection to try to keep connections CPU local 1717 */ 1718 1719 /* hash on IPv4 src/dst address */ 1720 saddr = ntohl(ip->ip_src.s_addr); 1721 daddr = ntohl(ip->ip_dst.s_addr); 1722 hash = table[(256 * 0) + ((daddr >> 24) & 0xff)]; 1723 hash ^= table[(256 * 1) + ((daddr >> 16) & 0xff)]; 1724 hash ^= table[(256 * 2) + ((daddr >> 8) & 0xff)]; 1725 hash ^= table[(256 * 3) + ((daddr) & 0xff)]; 1726 hash ^= table[(256 * 4) + ((saddr >> 24) & 0xff)]; 1727 hash ^= table[(256 * 5) + ((saddr >> 16) & 0xff)]; 1728 hash ^= table[(256 * 6) + ((saddr >> 8) & 0xff)]; 1729 hash ^= table[(256 * 7) + ((saddr) & 0xff)]; 1730 /* hash on TCP port, if required */ 1731 if ((myri10ge_rss_hash & MXGEFW_RSS_HASH_TYPE_TCP_IPV4) && 1732 ip->ip_p == IPPROTO_TCP) { 1733 hdr = (struct tcphdr *)(void *) 1734 (((uint8_t *)ip) + (ip->ip_hl << 2)); 1735 src = ntohs(hdr->th_sport); 1736 dst = ntohs(hdr->th_dport); 1737 1738 hash ^= table[(256 * 8) + ((dst >> 8) & 0xff)]; 1739 hash ^= table[(256 * 9) + ((dst) & 0xff)]; 1740 hash ^= table[(256 * 10) + ((src >> 8) & 0xff)]; 1741 hash ^= table[(256 * 11) + ((src) & 0xff)]; 1742 } 1743 slice = (mgp->num_slices - 1) & hash; 1744 return (&mgp->ss[slice]); 1745 1746 } 1747 1748 static inline struct myri10ge_slice_state * 1749 myri10ge_simple_send_hash(struct myri10ge_priv *mgp, struct ip *ip) 1750 { 1751 struct tcphdr *hdr; 1752 uint32_t slice, hash_val; 1753 1754 1755 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP) { 1756 return (&mgp->ss[0]); 1757 } 1758 hdr = (struct tcphdr *)(void *)(((uint8_t *)ip) + (ip->ip_hl << 2)); 1759 1760 /* 1761 * Use the second byte of the *destination* address for 1762 * MXGEFW_RSS_HASH_TYPE_SRC_PORT, so as to match NIC's hashing 1763 */ 1764 hash_val = ntohs(hdr->th_dport) & 0xff; 1765 if (myri10ge_rss_hash == MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT) 1766 hash_val += ntohs(hdr->th_sport) & 0xff; 1767 1768 slice = (mgp->num_slices - 1) & hash_val; 1769 return (&mgp->ss[slice]); 1770 } 1771 1772 static inline struct myri10ge_slice_state * 1773 myri10ge_send_hash(struct myri10ge_priv *mgp, mblk_t *mp) 1774 { 1775 unsigned int slice = 0; 1776 struct ether_header *eh; 1777 struct ether_vlan_header *vh; 1778 struct ip *ip; 1779 int ehl, ihl; 1780 1781 if (mgp->num_slices == 1) 1782 return (&mgp->ss[0]); 1783 1784 if (myri10ge_tx_hash == 0) { 1785 slice = CPU->cpu_id & (mgp->num_slices - 1); 1786 return (&mgp->ss[slice]); 1787 } 1788 1789 /* 1790 * ensure it is a TCP or UDP over IPv4 packet, and that the 1791 * headers are in the 1st mblk. Otherwise, punt 1792 */ 1793 ehl = sizeof (*eh); 1794 ihl = sizeof (*ip); 1795 if ((MBLKL(mp)) < (ehl + ihl + 8)) 1796 return (&mgp->ss[0]); 1797 eh = (struct ether_header *)(void *)mp->b_rptr; 1798 ip = (struct ip *)(void *)(eh + 1); 1799 if (eh->ether_type != BE_16(ETHERTYPE_IP)) { 1800 if (eh->ether_type != BE_16(ETHERTYPE_VLAN)) 1801 return (&mgp->ss[0]); 1802 vh = (struct ether_vlan_header *)(void *)mp->b_rptr; 1803 if (vh->ether_type != BE_16(ETHERTYPE_IP)) 1804 return (&mgp->ss[0]); 1805 ehl += 4; 1806 ip = (struct ip *)(void *)(vh + 1); 1807 } 1808 ihl = ip->ip_hl << 2; 1809 if (MBLKL(mp) < (ehl + ihl + 8)) 1810 return (&mgp->ss[0]); 1811 switch (myri10ge_rss_hash) { 1812 case MXGEFW_RSS_HASH_TYPE_IPV4: 1813 /* fallthru */ 1814 case MXGEFW_RSS_HASH_TYPE_TCP_IPV4: 1815 /* fallthru */ 1816 case (MXGEFW_RSS_HASH_TYPE_IPV4|MXGEFW_RSS_HASH_TYPE_TCP_IPV4): 1817 return (myri10ge_toeplitz_send_hash(mgp, ip)); 1818 case MXGEFW_RSS_HASH_TYPE_SRC_PORT: 1819 /* fallthru */ 1820 case MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT: 1821 return (myri10ge_simple_send_hash(mgp, ip)); 1822 default: 1823 break; 1824 } 1825 return (&mgp->ss[0]); 1826 } 1827 1828 static int 1829 myri10ge_setup_slice(struct myri10ge_slice_state *ss) 1830 { 1831 struct myri10ge_priv *mgp = ss->mgp; 1832 myri10ge_cmd_t cmd; 1833 int tx_ring_size, rx_ring_size; 1834 int tx_ring_entries, rx_ring_entries; 1835 int slice, status; 1836 int allocated, idx; 1837 size_t bytes; 1838 1839 slice = ss - mgp->ss; 1840 cmd.data0 = slice; 1841 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 1842 tx_ring_size = cmd.data0; 1843 cmd.data0 = slice; 1844 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 1845 if (status != 0) 1846 return (status); 1847 rx_ring_size = cmd.data0; 1848 1849 tx_ring_entries = tx_ring_size / sizeof (struct mcp_kreq_ether_send); 1850 rx_ring_entries = rx_ring_size / sizeof (struct mcp_dma_addr); 1851 ss->tx.mask = tx_ring_entries - 1; 1852 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 1853 1854 /* get the lanai pointers to the send and receive rings */ 1855 1856 cmd.data0 = slice; 1857 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 1858 ss->tx.lanai = (mcp_kreq_ether_send_t *)(void *)(mgp->sram + cmd.data0); 1859 if (mgp->num_slices > 1) { 1860 ss->tx.go = (char *)mgp->sram + MXGEFW_ETH_SEND_GO + 64 * slice; 1861 ss->tx.stop = (char *)mgp->sram + MXGEFW_ETH_SEND_STOP + 1862 64 * slice; 1863 } else { 1864 ss->tx.go = NULL; 1865 ss->tx.stop = NULL; 1866 } 1867 1868 cmd.data0 = slice; 1869 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 1870 ss->rx_small.lanai = (mcp_kreq_ether_recv_t *) 1871 (void *)(mgp->sram + cmd.data0); 1872 1873 cmd.data0 = slice; 1874 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 1875 ss->rx_big.lanai = (mcp_kreq_ether_recv_t *)(void *) 1876 (mgp->sram + cmd.data0); 1877 1878 if (status != 0) { 1879 cmn_err(CE_WARN, 1880 "%s: failed to get ring sizes or locations\n", mgp->name); 1881 return (status); 1882 } 1883 1884 status = ENOMEM; 1885 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 1886 ss->rx_small.shadow = kmem_zalloc(bytes, KM_SLEEP); 1887 if (ss->rx_small.shadow == NULL) 1888 goto abort; 1889 (void) memset(ss->rx_small.shadow, 0, bytes); 1890 1891 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 1892 ss->rx_big.shadow = kmem_zalloc(bytes, KM_SLEEP); 1893 if (ss->rx_big.shadow == NULL) 1894 goto abort_with_rx_small_shadow; 1895 (void) memset(ss->rx_big.shadow, 0, bytes); 1896 1897 /* allocate the host info rings */ 1898 1899 bytes = tx_ring_entries * sizeof (*ss->tx.info); 1900 ss->tx.info = kmem_zalloc(bytes, KM_SLEEP); 1901 if (ss->tx.info == NULL) 1902 goto abort_with_rx_big_shadow; 1903 (void) memset(ss->tx.info, 0, bytes); 1904 1905 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 1906 ss->rx_small.info = kmem_zalloc(bytes, KM_SLEEP); 1907 if (ss->rx_small.info == NULL) 1908 goto abort_with_tx_info; 1909 (void) memset(ss->rx_small.info, 0, bytes); 1910 1911 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 1912 ss->rx_big.info = kmem_zalloc(bytes, KM_SLEEP); 1913 if (ss->rx_big.info == NULL) 1914 goto abort_with_rx_small_info; 1915 (void) memset(ss->rx_big.info, 0, bytes); 1916 1917 ss->tx.stall = ss->tx.sched = 0; 1918 ss->tx.stall_early = ss->tx.stall_late = 0; 1919 1920 ss->jbufs_for_smalls = 1 + (1 + ss->rx_small.mask) / 1921 (myri10ge_mtu / (myri10ge_small_bytes + MXGEFW_PAD)); 1922 1923 allocated = myri10ge_add_jbufs(ss, 1924 myri10ge_bigbufs_initial + ss->jbufs_for_smalls, 1); 1925 if (allocated < ss->jbufs_for_smalls + myri10ge_bigbufs_initial) { 1926 cmn_err(CE_WARN, 1927 "%s: Could not allocate enough receive buffers (%d/%d)\n", 1928 mgp->name, allocated, 1929 myri10ge_bigbufs_initial + ss->jbufs_for_smalls); 1930 goto abort_with_jumbos; 1931 } 1932 1933 myri10ge_carve_up_jbufs_into_small_ring(ss); 1934 ss->j_rx_cnt = 0; 1935 1936 mutex_enter(&ss->jpool.mtx); 1937 if (allocated < rx_ring_entries) 1938 ss->jpool.low_water = allocated / 4; 1939 else 1940 ss->jpool.low_water = rx_ring_entries / 2; 1941 1942 /* 1943 * invalidate the big receive ring in case we do not 1944 * allocate sufficient jumbos to fill it 1945 */ 1946 (void) memset(ss->rx_big.shadow, 1, 1947 (ss->rx_big.mask + 1) * sizeof (ss->rx_big.shadow[0])); 1948 for (idx = 7; idx <= ss->rx_big.mask; idx += 8) { 1949 myri10ge_submit_8rx(&ss->rx_big.lanai[idx - 7], 1950 &ss->rx_big.shadow[idx - 7]); 1951 mb(); 1952 } 1953 1954 1955 myri10ge_restock_jumbos(ss); 1956 1957 for (idx = 7; idx <= ss->rx_small.mask; idx += 8) { 1958 myri10ge_submit_8rx(&ss->rx_small.lanai[idx - 7], 1959 &ss->rx_small.shadow[idx - 7]); 1960 mb(); 1961 } 1962 ss->rx_small.cnt = ss->rx_small.mask + 1; 1963 1964 mutex_exit(&ss->jpool.mtx); 1965 1966 status = myri10ge_prepare_tx_ring(ss); 1967 1968 if (status != 0) 1969 goto abort_with_small_jbufs; 1970 1971 cmd.data0 = ntohl(ss->fw_stats_dma.low); 1972 cmd.data1 = ntohl(ss->fw_stats_dma.high); 1973 cmd.data2 = sizeof (mcp_irq_data_t); 1974 cmd.data2 |= (slice << 16); 1975 bzero(ss->fw_stats, sizeof (*ss->fw_stats)); 1976 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 1977 if (status == ENOSYS) { 1978 cmd.data0 = ntohl(ss->fw_stats_dma.low) + 1979 offsetof(mcp_irq_data_t, send_done_count); 1980 cmd.data1 = ntohl(ss->fw_stats_dma.high); 1981 status = myri10ge_send_cmd(mgp, 1982 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, &cmd); 1983 } 1984 if (status) { 1985 cmn_err(CE_WARN, "%s: Couldn't set stats DMA\n", mgp->name); 1986 goto abort_with_tx; 1987 } 1988 1989 return (0); 1990 1991 abort_with_tx: 1992 myri10ge_unprepare_tx_ring(ss); 1993 1994 abort_with_small_jbufs: 1995 myri10ge_release_small_jbufs(ss); 1996 1997 abort_with_jumbos: 1998 if (allocated != 0) { 1999 mutex_enter(&ss->jpool.mtx); 2000 ss->jpool.low_water = 0; 2001 mutex_exit(&ss->jpool.mtx); 2002 myri10ge_unstock_jumbos(ss); 2003 myri10ge_remove_jbufs(ss); 2004 } 2005 2006 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 2007 kmem_free(ss->rx_big.info, bytes); 2008 2009 abort_with_rx_small_info: 2010 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 2011 kmem_free(ss->rx_small.info, bytes); 2012 2013 abort_with_tx_info: 2014 bytes = tx_ring_entries * sizeof (*ss->tx.info); 2015 kmem_free(ss->tx.info, bytes); 2016 2017 abort_with_rx_big_shadow: 2018 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 2019 kmem_free(ss->rx_big.shadow, bytes); 2020 2021 abort_with_rx_small_shadow: 2022 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 2023 kmem_free(ss->rx_small.shadow, bytes); 2024 abort: 2025 return (status); 2026 2027 } 2028 2029 static void 2030 myri10ge_teardown_slice(struct myri10ge_slice_state *ss) 2031 { 2032 int tx_ring_entries, rx_ring_entries; 2033 size_t bytes; 2034 2035 /* ignore slices that have not been fully setup */ 2036 if (ss->tx.cp == NULL) 2037 return; 2038 /* Free the TX copy buffers */ 2039 myri10ge_unprepare_tx_ring(ss); 2040 2041 /* stop passing returned buffers to firmware */ 2042 2043 mutex_enter(&ss->jpool.mtx); 2044 ss->jpool.low_water = 0; 2045 mutex_exit(&ss->jpool.mtx); 2046 myri10ge_release_small_jbufs(ss); 2047 2048 /* Release the free jumbo frame pool */ 2049 myri10ge_unstock_jumbos(ss); 2050 myri10ge_remove_jbufs(ss); 2051 2052 rx_ring_entries = ss->rx_big.mask + 1; 2053 tx_ring_entries = ss->tx.mask + 1; 2054 2055 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 2056 kmem_free(ss->rx_big.info, bytes); 2057 2058 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 2059 kmem_free(ss->rx_small.info, bytes); 2060 2061 bytes = tx_ring_entries * sizeof (*ss->tx.info); 2062 kmem_free(ss->tx.info, bytes); 2063 2064 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 2065 kmem_free(ss->rx_big.shadow, bytes); 2066 2067 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 2068 kmem_free(ss->rx_small.shadow, bytes); 2069 2070 } 2071 static int 2072 myri10ge_start_locked(struct myri10ge_priv *mgp) 2073 { 2074 myri10ge_cmd_t cmd; 2075 int status, big_pow2, i; 2076 volatile uint8_t *itable; 2077 2078 status = DDI_SUCCESS; 2079 /* Allocate DMA resources and receive buffers */ 2080 2081 status = myri10ge_reset(mgp); 2082 if (status != 0) { 2083 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name); 2084 return (DDI_FAILURE); 2085 } 2086 2087 if (mgp->num_slices > 1) { 2088 cmd.data0 = mgp->num_slices; 2089 cmd.data1 = 1; /* use MSI-X */ 2090 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES, 2091 &cmd); 2092 if (status != 0) { 2093 cmn_err(CE_WARN, 2094 "%s: failed to set number of slices\n", 2095 mgp->name); 2096 goto abort_with_nothing; 2097 } 2098 /* setup the indirection table */ 2099 cmd.data0 = mgp->num_slices; 2100 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 2101 &cmd); 2102 2103 status |= myri10ge_send_cmd(mgp, 2104 MXGEFW_CMD_GET_RSS_TABLE_OFFSET, &cmd); 2105 if (status != 0) { 2106 cmn_err(CE_WARN, 2107 "%s: failed to setup rss tables\n", mgp->name); 2108 } 2109 2110 /* just enable an identity mapping */ 2111 itable = mgp->sram + cmd.data0; 2112 for (i = 0; i < mgp->num_slices; i++) 2113 itable[i] = (uint8_t)i; 2114 2115 if (myri10ge_rss_hash & MYRI10GE_TOEPLITZ_HASH) { 2116 status = myri10ge_init_toeplitz(mgp); 2117 if (status != 0) { 2118 cmn_err(CE_WARN, "%s: failed to setup " 2119 "toeplitz tx hash table", mgp->name); 2120 goto abort_with_nothing; 2121 } 2122 } 2123 cmd.data0 = 1; 2124 cmd.data1 = myri10ge_rss_hash; 2125 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_ENABLE, 2126 &cmd); 2127 if (status != 0) { 2128 cmn_err(CE_WARN, 2129 "%s: failed to enable slices\n", mgp->name); 2130 goto abort_with_toeplitz; 2131 } 2132 } 2133 2134 for (i = 0; i < mgp->num_slices; i++) { 2135 status = myri10ge_setup_slice(&mgp->ss[i]); 2136 if (status != 0) 2137 goto abort_with_slices; 2138 } 2139 2140 /* 2141 * Tell the MCP how many buffers he has, and to 2142 * bring the ethernet interface up 2143 * 2144 * Firmware needs the big buff size as a power of 2. Lie and 2145 * tell him the buffer is larger, because we only use 1 2146 * buffer/pkt, and the mtu will prevent overruns 2147 */ 2148 big_pow2 = myri10ge_mtu + MXGEFW_PAD; 2149 while (!ISP2(big_pow2)) 2150 big_pow2++; 2151 2152 /* now give firmware buffers sizes, and MTU */ 2153 cmd.data0 = myri10ge_mtu; 2154 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_MTU, &cmd); 2155 cmd.data0 = myri10ge_small_bytes; 2156 status |= 2157 myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd); 2158 cmd.data0 = big_pow2; 2159 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 2160 if (status) { 2161 cmn_err(CE_WARN, "%s: Couldn't set buffer sizes\n", mgp->name); 2162 goto abort_with_slices; 2163 } 2164 2165 2166 cmd.data0 = 1; 2167 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_TSO_MODE, &cmd); 2168 if (status) { 2169 cmn_err(CE_WARN, "%s: unable to setup TSO (%d)\n", 2170 mgp->name, status); 2171 } else { 2172 mgp->features |= MYRI10GE_TSO; 2173 } 2174 2175 mgp->link_state = -1; 2176 mgp->rdma_tags_available = 15; 2177 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_UP, &cmd); 2178 if (status) { 2179 cmn_err(CE_WARN, "%s: unable to start ethernet\n", mgp->name); 2180 goto abort_with_slices; 2181 } 2182 mgp->running = MYRI10GE_ETH_RUNNING; 2183 return (DDI_SUCCESS); 2184 2185 abort_with_slices: 2186 for (i = 0; i < mgp->num_slices; i++) 2187 myri10ge_teardown_slice(&mgp->ss[i]); 2188 2189 mgp->running = MYRI10GE_ETH_STOPPED; 2190 2191 abort_with_toeplitz: 2192 if (mgp->toeplitz_hash_table != NULL) { 2193 kmem_free(mgp->toeplitz_hash_table, 2194 sizeof (uint32_t) * 12 * 256); 2195 mgp->toeplitz_hash_table = NULL; 2196 } 2197 2198 abort_with_nothing: 2199 return (DDI_FAILURE); 2200 } 2201 2202 static void 2203 myri10ge_stop_locked(struct myri10ge_priv *mgp) 2204 { 2205 int status, old_down_cnt; 2206 myri10ge_cmd_t cmd; 2207 int wait_time = 10; 2208 int i, polling; 2209 2210 old_down_cnt = mgp->down_cnt; 2211 mb(); 2212 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 2213 if (status) { 2214 cmn_err(CE_WARN, "%s: Couldn't bring down link\n", mgp->name); 2215 } 2216 2217 while (old_down_cnt == *((volatile int *)&mgp->down_cnt)) { 2218 delay(1 * drv_usectohz(1000000)); 2219 wait_time--; 2220 if (wait_time == 0) 2221 break; 2222 } 2223 again: 2224 if (old_down_cnt == *((volatile int *)&mgp->down_cnt)) { 2225 cmn_err(CE_WARN, "%s: didn't get down irq\n", mgp->name); 2226 for (i = 0; i < mgp->num_slices; i++) { 2227 /* 2228 * take and release the rx lock to ensure 2229 * that no interrupt thread is blocked 2230 * elsewhere in the stack, preventing 2231 * completion 2232 */ 2233 2234 mutex_enter(&mgp->ss[i].rx_lock); 2235 printf("%s: slice %d rx irq idle\n", 2236 mgp->name, i); 2237 mutex_exit(&mgp->ss[i].rx_lock); 2238 2239 /* verify that the poll handler is inactive */ 2240 mutex_enter(&mgp->ss->poll_lock); 2241 polling = mgp->ss->rx_polling; 2242 mutex_exit(&mgp->ss->poll_lock); 2243 if (polling) { 2244 printf("%s: slice %d is polling\n", 2245 mgp->name, i); 2246 delay(1 * drv_usectohz(1000000)); 2247 goto again; 2248 } 2249 } 2250 delay(1 * drv_usectohz(1000000)); 2251 if (old_down_cnt == *((volatile int *)&mgp->down_cnt)) { 2252 cmn_err(CE_WARN, "%s: Never got down irq\n", mgp->name); 2253 } 2254 } 2255 2256 for (i = 0; i < mgp->num_slices; i++) 2257 myri10ge_teardown_slice(&mgp->ss[i]); 2258 2259 if (mgp->toeplitz_hash_table != NULL) { 2260 kmem_free(mgp->toeplitz_hash_table, 2261 sizeof (uint32_t) * 12 * 256); 2262 mgp->toeplitz_hash_table = NULL; 2263 } 2264 mgp->running = MYRI10GE_ETH_STOPPED; 2265 } 2266 2267 static int 2268 myri10ge_m_start(void *arg) 2269 { 2270 struct myri10ge_priv *mgp = arg; 2271 int status; 2272 2273 mutex_enter(&mgp->intrlock); 2274 2275 if (mgp->running != MYRI10GE_ETH_STOPPED) { 2276 mutex_exit(&mgp->intrlock); 2277 return (DDI_FAILURE); 2278 } 2279 status = myri10ge_start_locked(mgp); 2280 mutex_exit(&mgp->intrlock); 2281 2282 if (status != DDI_SUCCESS) 2283 return (status); 2284 2285 /* start the watchdog timer */ 2286 mgp->timer_id = timeout(myri10ge_watchdog, mgp, 2287 mgp->timer_ticks); 2288 return (DDI_SUCCESS); 2289 2290 } 2291 2292 static void 2293 myri10ge_m_stop(void *arg) 2294 { 2295 struct myri10ge_priv *mgp = arg; 2296 2297 mutex_enter(&mgp->intrlock); 2298 /* if the device not running give up */ 2299 if (mgp->running != MYRI10GE_ETH_RUNNING) { 2300 mutex_exit(&mgp->intrlock); 2301 return; 2302 } 2303 2304 mgp->running = MYRI10GE_ETH_STOPPING; 2305 mutex_exit(&mgp->intrlock); 2306 (void) untimeout(mgp->timer_id); 2307 mutex_enter(&mgp->intrlock); 2308 myri10ge_stop_locked(mgp); 2309 mutex_exit(&mgp->intrlock); 2310 2311 } 2312 2313 static inline void 2314 myri10ge_rx_csum(mblk_t *mp, struct myri10ge_rx_ring_stats *s, uint32_t csum) 2315 { 2316 struct ether_header *eh; 2317 struct ip *ip; 2318 struct ip6_hdr *ip6; 2319 uint32_t start, stuff, end, partial, hdrlen; 2320 2321 2322 csum = ntohs((uint16_t)csum); 2323 eh = (struct ether_header *)(void *)mp->b_rptr; 2324 hdrlen = sizeof (*eh); 2325 if (eh->ether_dhost.ether_addr_octet[0] & 1) { 2326 if (0 == (bcmp(eh->ether_dhost.ether_addr_octet, 2327 myri10ge_broadcastaddr, sizeof (eh->ether_dhost)))) 2328 s->brdcstrcv++; 2329 else 2330 s->multircv++; 2331 } 2332 2333 if (eh->ether_type == BE_16(ETHERTYPE_VLAN)) { 2334 /* 2335 * fix checksum by subtracting 4 bytes after what the 2336 * firmware thought was the end of the ether hdr 2337 */ 2338 partial = *(uint32_t *) 2339 (void *)(mp->b_rptr + ETHERNET_HEADER_SIZE); 2340 csum += ~partial; 2341 csum += (csum < ~partial); 2342 csum = (csum >> 16) + (csum & 0xFFFF); 2343 csum = (csum >> 16) + (csum & 0xFFFF); 2344 hdrlen += VLAN_TAGSZ; 2345 } 2346 2347 if (eh->ether_type == BE_16(ETHERTYPE_IP)) { 2348 ip = (struct ip *)(void *)(mp->b_rptr + hdrlen); 2349 start = ip->ip_hl << 2; 2350 2351 if (ip->ip_p == IPPROTO_TCP) 2352 stuff = start + offsetof(struct tcphdr, th_sum); 2353 else if (ip->ip_p == IPPROTO_UDP) 2354 stuff = start + offsetof(struct udphdr, uh_sum); 2355 else 2356 return; 2357 end = ntohs(ip->ip_len); 2358 } else if (eh->ether_type == BE_16(ETHERTYPE_IPV6)) { 2359 ip6 = (struct ip6_hdr *)(void *)(mp->b_rptr + hdrlen); 2360 start = sizeof (*ip6); 2361 if (ip6->ip6_nxt == IPPROTO_TCP) { 2362 stuff = start + offsetof(struct tcphdr, th_sum); 2363 } else if (ip6->ip6_nxt == IPPROTO_UDP) 2364 stuff = start + offsetof(struct udphdr, uh_sum); 2365 else 2366 return; 2367 end = start + ntohs(ip6->ip6_plen); 2368 /* 2369 * IPv6 headers do not contain a checksum, and hence 2370 * do not checksum to zero, so they don't "fall out" 2371 * of the partial checksum calculation like IPv4 2372 * headers do. We need to fix the partial checksum by 2373 * subtracting the checksum of the IPv6 header. 2374 */ 2375 2376 partial = myri10ge_csum_generic((uint16_t *)ip6, sizeof (*ip6)); 2377 csum += ~partial; 2378 csum += (csum < ~partial); 2379 csum = (csum >> 16) + (csum & 0xFFFF); 2380 csum = (csum >> 16) + (csum & 0xFFFF); 2381 } else { 2382 return; 2383 } 2384 2385 if (MBLKL(mp) > hdrlen + end) { 2386 /* padded frame, so hw csum may be invalid */ 2387 return; 2388 } 2389 2390 mac_hcksum_set(mp, start, stuff, end, csum, HCK_PARTIALCKSUM); 2391 } 2392 2393 static mblk_t * 2394 myri10ge_rx_done_small(struct myri10ge_slice_state *ss, uint32_t len, 2395 uint32_t csum) 2396 { 2397 mblk_t *mp; 2398 myri10ge_rx_ring_t *rx; 2399 int idx; 2400 2401 rx = &ss->rx_small; 2402 idx = rx->cnt & rx->mask; 2403 ss->rx_small.cnt++; 2404 2405 /* allocate a new buffer to pass up the stack */ 2406 mp = allocb(len + MXGEFW_PAD, 0); 2407 if (mp == NULL) { 2408 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_small_nobuf); 2409 goto abort; 2410 } 2411 bcopy(ss->rx_small.info[idx].ptr, 2412 (caddr_t)mp->b_wptr, len + MXGEFW_PAD); 2413 mp->b_wptr += len + MXGEFW_PAD; 2414 mp->b_rptr += MXGEFW_PAD; 2415 2416 ss->rx_stats.ibytes += len; 2417 ss->rx_stats.ipackets += 1; 2418 myri10ge_rx_csum(mp, &ss->rx_stats, csum); 2419 2420 abort: 2421 if ((idx & 7) == 7) { 2422 myri10ge_submit_8rx(&rx->lanai[idx - 7], 2423 &rx->shadow[idx - 7]); 2424 } 2425 2426 return (mp); 2427 } 2428 2429 2430 static mblk_t * 2431 myri10ge_rx_done_big(struct myri10ge_slice_state *ss, uint32_t len, 2432 uint32_t csum) 2433 { 2434 struct myri10ge_jpool_stuff *jpool; 2435 struct myri10ge_jpool_entry *j; 2436 mblk_t *mp; 2437 int idx, num_owned_by_mcp; 2438 2439 jpool = &ss->jpool; 2440 idx = ss->j_rx_cnt & ss->rx_big.mask; 2441 j = ss->rx_big.info[idx].j; 2442 2443 if (j == NULL) { 2444 printf("%s: null j at idx=%d, rx_big.cnt = %d, j_rx_cnt=%d\n", 2445 ss->mgp->name, idx, ss->rx_big.cnt, ss->j_rx_cnt); 2446 return (NULL); 2447 } 2448 2449 2450 ss->rx_big.info[idx].j = NULL; 2451 ss->j_rx_cnt++; 2452 2453 2454 /* 2455 * Check to see if we are low on rx buffers. 2456 * Note that we must leave at least 8 free so there are 2457 * enough to free in a single 64-byte write. 2458 */ 2459 num_owned_by_mcp = ss->rx_big.cnt - ss->j_rx_cnt; 2460 if (num_owned_by_mcp < jpool->low_water) { 2461 mutex_enter(&jpool->mtx); 2462 myri10ge_restock_jumbos(ss); 2463 mutex_exit(&jpool->mtx); 2464 num_owned_by_mcp = ss->rx_big.cnt - ss->j_rx_cnt; 2465 /* if we are still low, then we have to copy */ 2466 if (num_owned_by_mcp < 16) { 2467 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_copy); 2468 /* allocate a new buffer to pass up the stack */ 2469 mp = allocb(len + MXGEFW_PAD, 0); 2470 if (mp == NULL) { 2471 goto abort; 2472 } 2473 bcopy(j->buf, 2474 (caddr_t)mp->b_wptr, len + MXGEFW_PAD); 2475 myri10ge_jfree_rtn(j); 2476 /* push buffer back to NIC */ 2477 mutex_enter(&jpool->mtx); 2478 myri10ge_restock_jumbos(ss); 2479 mutex_exit(&jpool->mtx); 2480 goto set_len; 2481 } 2482 } 2483 2484 /* loan our buffer to the stack */ 2485 mp = desballoc((unsigned char *)j->buf, myri10ge_mtu, 0, &j->free_func); 2486 if (mp == NULL) { 2487 goto abort; 2488 } 2489 2490 set_len: 2491 mp->b_rptr += MXGEFW_PAD; 2492 mp->b_wptr = ((unsigned char *) mp->b_rptr + len); 2493 2494 ss->rx_stats.ibytes += len; 2495 ss->rx_stats.ipackets += 1; 2496 myri10ge_rx_csum(mp, &ss->rx_stats, csum); 2497 2498 return (mp); 2499 2500 abort: 2501 myri10ge_jfree_rtn(j); 2502 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_big_nobuf); 2503 return (NULL); 2504 } 2505 2506 /* 2507 * Free all transmit buffers up until the specified index 2508 */ 2509 static inline void 2510 myri10ge_tx_done(struct myri10ge_slice_state *ss, uint32_t mcp_index) 2511 { 2512 myri10ge_tx_ring_t *tx; 2513 struct myri10ge_tx_dma_handle_head handles; 2514 int idx; 2515 int limit = 0; 2516 2517 tx = &ss->tx; 2518 handles.head = NULL; 2519 handles.tail = NULL; 2520 while (tx->pkt_done != (int)mcp_index) { 2521 idx = tx->done & tx->mask; 2522 2523 /* 2524 * mblk & DMA handle attached only to first slot 2525 * per buffer in the packet 2526 */ 2527 2528 if (tx->info[idx].m) { 2529 (void) ddi_dma_unbind_handle(tx->info[idx].handle->h); 2530 tx->info[idx].handle->next = handles.head; 2531 handles.head = tx->info[idx].handle; 2532 if (handles.tail == NULL) 2533 handles.tail = tx->info[idx].handle; 2534 freeb(tx->info[idx].m); 2535 tx->info[idx].m = 0; 2536 tx->info[idx].handle = 0; 2537 } 2538 if (tx->info[idx].ostat.opackets != 0) { 2539 tx->stats.multixmt += tx->info[idx].ostat.multixmt; 2540 tx->stats.brdcstxmt += tx->info[idx].ostat.brdcstxmt; 2541 tx->stats.obytes += tx->info[idx].ostat.obytes; 2542 tx->stats.opackets += tx->info[idx].ostat.opackets; 2543 tx->info[idx].stat.un.all = 0; 2544 tx->pkt_done++; 2545 } 2546 2547 tx->done++; 2548 /* 2549 * if we stalled the queue, wake it. But Wait until 2550 * we have at least 1/2 our slots free. 2551 */ 2552 if ((tx->req - tx->done) < (tx->mask >> 1) && 2553 tx->stall != tx->sched) { 2554 mutex_enter(&ss->tx.lock); 2555 tx->sched = tx->stall; 2556 mutex_exit(&ss->tx.lock); 2557 mac_tx_ring_update(ss->mgp->mh, tx->rh); 2558 } 2559 2560 /* limit potential for livelock */ 2561 if (unlikely(++limit > 2 * tx->mask)) 2562 break; 2563 } 2564 if (tx->req == tx->done && tx->stop != NULL) { 2565 /* 2566 * Nic has sent all pending requests, allow him 2567 * to stop polling this queue 2568 */ 2569 mutex_enter(&tx->lock); 2570 if (tx->req == tx->done && tx->active) { 2571 *(int *)(void *)tx->stop = 1; 2572 tx->active = 0; 2573 mb(); 2574 } 2575 mutex_exit(&tx->lock); 2576 } 2577 if (handles.head != NULL) 2578 myri10ge_free_tx_handles(tx, &handles); 2579 } 2580 2581 static void 2582 myri10ge_mbl_init(struct myri10ge_mblk_list *mbl) 2583 { 2584 mbl->head = NULL; 2585 mbl->tail = &mbl->head; 2586 mbl->cnt = 0; 2587 } 2588 2589 /*ARGSUSED*/ 2590 void 2591 myri10ge_mbl_append(struct myri10ge_slice_state *ss, 2592 struct myri10ge_mblk_list *mbl, mblk_t *mp) 2593 { 2594 *(mbl->tail) = mp; 2595 mbl->tail = &mp->b_next; 2596 mp->b_next = NULL; 2597 mbl->cnt++; 2598 } 2599 2600 2601 static inline void 2602 myri10ge_clean_rx_done(struct myri10ge_slice_state *ss, 2603 struct myri10ge_mblk_list *mbl, int limit, boolean_t *stop) 2604 { 2605 myri10ge_rx_done_t *rx_done = &ss->rx_done; 2606 struct myri10ge_priv *mgp = ss->mgp; 2607 mblk_t *mp; 2608 struct lro_entry *lro; 2609 uint16_t length; 2610 uint16_t checksum; 2611 2612 2613 while (rx_done->entry[rx_done->idx].length != 0) { 2614 if (unlikely (*stop)) { 2615 break; 2616 } 2617 length = ntohs(rx_done->entry[rx_done->idx].length); 2618 length &= (~MXGEFW_RSS_HASH_MASK); 2619 2620 /* limit potential for livelock */ 2621 limit -= length; 2622 if (unlikely(limit < 0)) 2623 break; 2624 2625 rx_done->entry[rx_done->idx].length = 0; 2626 checksum = ntohs(rx_done->entry[rx_done->idx].checksum); 2627 if (length <= myri10ge_small_bytes) 2628 mp = myri10ge_rx_done_small(ss, length, checksum); 2629 else 2630 mp = myri10ge_rx_done_big(ss, length, checksum); 2631 if (mp != NULL) { 2632 if (!myri10ge_lro || 2633 0 != myri10ge_lro_rx(ss, mp, checksum, mbl)) 2634 myri10ge_mbl_append(ss, mbl, mp); 2635 } 2636 rx_done->cnt++; 2637 rx_done->idx = rx_done->cnt & (mgp->max_intr_slots - 1); 2638 } 2639 while (ss->lro_active != NULL) { 2640 lro = ss->lro_active; 2641 ss->lro_active = lro->next; 2642 myri10ge_lro_flush(ss, lro, mbl); 2643 } 2644 } 2645 2646 static void 2647 myri10ge_intr_rx(struct myri10ge_slice_state *ss) 2648 { 2649 uint64_t gen; 2650 struct myri10ge_mblk_list mbl; 2651 2652 myri10ge_mbl_init(&mbl); 2653 if (mutex_tryenter(&ss->rx_lock) == 0) 2654 return; 2655 gen = ss->rx_gen_num; 2656 myri10ge_clean_rx_done(ss, &mbl, MYRI10GE_POLL_NULL, 2657 &ss->rx_polling); 2658 if (mbl.head != NULL) 2659 mac_rx_ring(ss->mgp->mh, ss->rx_rh, mbl.head, gen); 2660 mutex_exit(&ss->rx_lock); 2661 2662 } 2663 2664 static mblk_t * 2665 myri10ge_poll_rx(void *arg, int bytes) 2666 { 2667 struct myri10ge_slice_state *ss = arg; 2668 struct myri10ge_mblk_list mbl; 2669 boolean_t dummy = B_FALSE; 2670 2671 if (bytes == 0) 2672 return (NULL); 2673 2674 myri10ge_mbl_init(&mbl); 2675 mutex_enter(&ss->rx_lock); 2676 if (ss->rx_polling) 2677 myri10ge_clean_rx_done(ss, &mbl, bytes, &dummy); 2678 else 2679 printf("%d: poll_rx: token=%d, polling=%d\n", (int)(ss - 2680 ss->mgp->ss), ss->rx_token, ss->rx_polling); 2681 mutex_exit(&ss->rx_lock); 2682 return (mbl.head); 2683 } 2684 2685 /*ARGSUSED*/ 2686 static uint_t 2687 myri10ge_intr(caddr_t arg0, caddr_t arg1) 2688 { 2689 struct myri10ge_slice_state *ss = 2690 (struct myri10ge_slice_state *)(void *)arg0; 2691 struct myri10ge_priv *mgp = ss->mgp; 2692 mcp_irq_data_t *stats = ss->fw_stats; 2693 myri10ge_tx_ring_t *tx = &ss->tx; 2694 uint32_t send_done_count; 2695 uint8_t valid; 2696 2697 2698 /* make sure the DMA has finished */ 2699 if (!stats->valid) { 2700 return (DDI_INTR_UNCLAIMED); 2701 } 2702 valid = stats->valid; 2703 2704 /* low bit indicates receives are present */ 2705 if (valid & 1) 2706 myri10ge_intr_rx(ss); 2707 2708 if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) { 2709 /* lower legacy IRQ */ 2710 *mgp->irq_deassert = 0; 2711 if (!myri10ge_deassert_wait) 2712 /* don't wait for conf. that irq is low */ 2713 stats->valid = 0; 2714 mb(); 2715 } else { 2716 /* no need to wait for conf. that irq is low */ 2717 stats->valid = 0; 2718 } 2719 2720 do { 2721 /* check for transmit completes and receives */ 2722 send_done_count = ntohl(stats->send_done_count); 2723 if (send_done_count != tx->pkt_done) 2724 myri10ge_tx_done(ss, (int)send_done_count); 2725 } while (*((volatile uint8_t *) &stats->valid)); 2726 2727 if (stats->stats_updated) { 2728 if (mgp->link_state != stats->link_up || stats->link_down) { 2729 mgp->link_state = stats->link_up; 2730 if (stats->link_down) { 2731 mgp->down_cnt += stats->link_down; 2732 mgp->link_state = 0; 2733 } 2734 if (mgp->link_state) { 2735 if (myri10ge_verbose) 2736 printf("%s: link up\n", mgp->name); 2737 mac_link_update(mgp->mh, LINK_STATE_UP); 2738 } else { 2739 if (myri10ge_verbose) 2740 printf("%s: link down\n", mgp->name); 2741 mac_link_update(mgp->mh, LINK_STATE_DOWN); 2742 } 2743 MYRI10GE_NIC_STAT_INC(link_changes); 2744 } 2745 if (mgp->rdma_tags_available != 2746 ntohl(ss->fw_stats->rdma_tags_available)) { 2747 mgp->rdma_tags_available = 2748 ntohl(ss->fw_stats->rdma_tags_available); 2749 cmn_err(CE_NOTE, "%s: RDMA timed out! " 2750 "%d tags left\n", mgp->name, 2751 mgp->rdma_tags_available); 2752 } 2753 } 2754 2755 mb(); 2756 /* check to see if we have rx token to pass back */ 2757 if (valid & 0x1) { 2758 mutex_enter(&ss->poll_lock); 2759 if (ss->rx_polling) { 2760 ss->rx_token = 1; 2761 } else { 2762 *ss->irq_claim = BE_32(3); 2763 ss->rx_token = 0; 2764 } 2765 mutex_exit(&ss->poll_lock); 2766 } 2767 *(ss->irq_claim + 1) = BE_32(3); 2768 return (DDI_INTR_CLAIMED); 2769 } 2770 2771 /* 2772 * Add or remove a multicast address. This is called with our 2773 * macinfo's lock held by GLD, so we do not need to worry about 2774 * our own locking here. 2775 */ 2776 static int 2777 myri10ge_m_multicst(void *arg, boolean_t add, const uint8_t *multicastaddr) 2778 { 2779 myri10ge_cmd_t cmd; 2780 struct myri10ge_priv *mgp = arg; 2781 int status, join_leave; 2782 2783 if (add) 2784 join_leave = MXGEFW_JOIN_MULTICAST_GROUP; 2785 else 2786 join_leave = MXGEFW_LEAVE_MULTICAST_GROUP; 2787 (void) memcpy(&cmd.data0, multicastaddr, 4); 2788 (void) memcpy(&cmd.data1, multicastaddr + 4, 2); 2789 cmd.data0 = htonl(cmd.data0); 2790 cmd.data1 = htonl(cmd.data1); 2791 status = myri10ge_send_cmd(mgp, join_leave, &cmd); 2792 if (status == 0) 2793 return (0); 2794 2795 cmn_err(CE_WARN, "%s: failed to set multicast address\n", 2796 mgp->name); 2797 return (status); 2798 } 2799 2800 2801 static int 2802 myri10ge_m_promisc(void *arg, boolean_t on) 2803 { 2804 struct myri10ge_priv *mgp = arg; 2805 2806 myri10ge_change_promisc(mgp, on); 2807 return (0); 2808 } 2809 2810 /* 2811 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 2812 * backwards one at a time and handle ring wraps 2813 */ 2814 2815 static inline void 2816 myri10ge_submit_req_backwards(myri10ge_tx_ring_t *tx, 2817 mcp_kreq_ether_send_t *src, int cnt) 2818 { 2819 int idx, starting_slot; 2820 starting_slot = tx->req; 2821 while (cnt > 1) { 2822 cnt--; 2823 idx = (starting_slot + cnt) & tx->mask; 2824 myri10ge_pio_copy(&tx->lanai[idx], 2825 &src[cnt], sizeof (*src)); 2826 mb(); 2827 } 2828 } 2829 2830 /* 2831 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 2832 * at most 32 bytes at a time, so as to avoid involving the software 2833 * pio handler in the nic. We re-write the first segment's flags 2834 * to mark them valid only after writing the entire chain 2835 */ 2836 2837 static inline void 2838 myri10ge_submit_req(myri10ge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 2839 int cnt) 2840 { 2841 int idx, i; 2842 uint32_t *src_ints, *dst_ints; 2843 mcp_kreq_ether_send_t *srcp, *dstp, *dst; 2844 uint8_t last_flags; 2845 2846 idx = tx->req & tx->mask; 2847 2848 last_flags = src->flags; 2849 src->flags = 0; 2850 mb(); 2851 dst = dstp = &tx->lanai[idx]; 2852 srcp = src; 2853 2854 if ((idx + cnt) < tx->mask) { 2855 for (i = 0; i < (cnt - 1); i += 2) { 2856 myri10ge_pio_copy(dstp, srcp, 2 * sizeof (*src)); 2857 mb(); /* force write every 32 bytes */ 2858 srcp += 2; 2859 dstp += 2; 2860 } 2861 } else { 2862 /* 2863 * submit all but the first request, and ensure 2864 * that it is submitted below 2865 */ 2866 myri10ge_submit_req_backwards(tx, src, cnt); 2867 i = 0; 2868 } 2869 if (i < cnt) { 2870 /* submit the first request */ 2871 myri10ge_pio_copy(dstp, srcp, sizeof (*src)); 2872 mb(); /* barrier before setting valid flag */ 2873 } 2874 2875 /* re-write the last 32-bits with the valid flags */ 2876 src->flags |= last_flags; 2877 src_ints = (uint32_t *)src; 2878 src_ints += 3; 2879 dst_ints = (uint32_t *)dst; 2880 dst_ints += 3; 2881 *dst_ints = *src_ints; 2882 tx->req += cnt; 2883 mb(); 2884 /* notify NIC to poll this tx ring */ 2885 if (!tx->active && tx->go != NULL) { 2886 *(int *)(void *)tx->go = 1; 2887 tx->active = 1; 2888 tx->activate++; 2889 mb(); 2890 } 2891 } 2892 2893 /* ARGSUSED */ 2894 static inline void 2895 myri10ge_lso_info_get(mblk_t *mp, uint32_t *mss, uint32_t *flags) 2896 { 2897 uint32_t lso_flag; 2898 mac_lso_get(mp, mss, &lso_flag); 2899 (*flags) |= lso_flag; 2900 } 2901 2902 2903 /* like pullupmsg, except preserve hcksum/LSO attributes */ 2904 static int 2905 myri10ge_pullup(struct myri10ge_slice_state *ss, mblk_t *mp) 2906 { 2907 uint32_t start, stuff, tx_offload_flags, mss; 2908 int ok; 2909 2910 mss = 0; 2911 mac_hcksum_get(mp, &start, &stuff, NULL, NULL, &tx_offload_flags); 2912 myri10ge_lso_info_get(mp, &mss, &tx_offload_flags); 2913 2914 ok = pullupmsg(mp, -1); 2915 if (!ok) { 2916 printf("pullupmsg failed"); 2917 return (DDI_FAILURE); 2918 } 2919 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_pullup); 2920 mac_hcksum_set(mp, start, stuff, NULL, NULL, tx_offload_flags); 2921 if (tx_offload_flags & HW_LSO) 2922 DB_LSOMSS(mp) = (uint16_t)mss; 2923 lso_info_set(mp, mss, tx_offload_flags); 2924 return (DDI_SUCCESS); 2925 } 2926 2927 static inline void 2928 myri10ge_tx_stat(struct myri10ge_tx_pkt_stats *s, struct ether_header *eh, 2929 int opackets, int obytes) 2930 { 2931 s->un.all = 0; 2932 if (eh->ether_dhost.ether_addr_octet[0] & 1) { 2933 if (0 == (bcmp(eh->ether_dhost.ether_addr_octet, 2934 myri10ge_broadcastaddr, sizeof (eh->ether_dhost)))) 2935 s->un.s.brdcstxmt = 1; 2936 else 2937 s->un.s.multixmt = 1; 2938 } 2939 s->un.s.opackets = (uint16_t)opackets; 2940 s->un.s.obytes = obytes; 2941 } 2942 2943 static int 2944 myri10ge_tx_copy(struct myri10ge_slice_state *ss, mblk_t *mp, 2945 mcp_kreq_ether_send_t *req) 2946 { 2947 myri10ge_tx_ring_t *tx = &ss->tx; 2948 caddr_t ptr; 2949 struct myri10ge_tx_copybuf *cp; 2950 mblk_t *bp; 2951 int idx, mblen, avail; 2952 uint16_t len; 2953 2954 mutex_enter(&tx->lock); 2955 avail = tx->mask - (tx->req - tx->done); 2956 if (avail <= 1) { 2957 mutex_exit(&tx->lock); 2958 return (EBUSY); 2959 } 2960 idx = tx->req & tx->mask; 2961 cp = &tx->cp[idx]; 2962 ptr = cp->va; 2963 for (len = 0, bp = mp; bp != NULL; bp = bp->b_cont) { 2964 mblen = MBLKL(bp); 2965 bcopy(bp->b_rptr, ptr, mblen); 2966 ptr += mblen; 2967 len += mblen; 2968 } 2969 /* ensure runts are padded to 60 bytes */ 2970 if (len < 60) { 2971 bzero(ptr, 64 - len); 2972 len = 60; 2973 } 2974 req->addr_low = cp->dma.low; 2975 req->addr_high = cp->dma.high; 2976 req->length = htons(len); 2977 req->pad = 0; 2978 req->rdma_count = 1; 2979 myri10ge_tx_stat(&tx->info[idx].stat, 2980 (struct ether_header *)(void *)cp->va, 1, len); 2981 (void) ddi_dma_sync(cp->dma.handle, 0, len, DDI_DMA_SYNC_FORDEV); 2982 myri10ge_submit_req(&ss->tx, req, 1); 2983 mutex_exit(&tx->lock); 2984 freemsg(mp); 2985 return (DDI_SUCCESS); 2986 } 2987 2988 2989 static void 2990 myri10ge_send_locked(myri10ge_tx_ring_t *tx, mcp_kreq_ether_send_t *req_list, 2991 struct myri10ge_tx_buffer_state *tx_info, 2992 int count) 2993 { 2994 int i, idx; 2995 2996 idx = 0; /* gcc -Wuninitialized */ 2997 /* store unmapping and bp info for tx irq handler */ 2998 for (i = 0; i < count; i++) { 2999 idx = (tx->req + i) & tx->mask; 3000 tx->info[idx].m = tx_info[i].m; 3001 tx->info[idx].handle = tx_info[i].handle; 3002 } 3003 tx->info[idx].stat.un.all = tx_info[0].stat.un.all; 3004 3005 /* submit the frame to the nic */ 3006 myri10ge_submit_req(tx, req_list, count); 3007 3008 3009 } 3010 3011 3012 3013 static void 3014 myri10ge_copydata(mblk_t *mp, int off, int len, caddr_t buf) 3015 { 3016 mblk_t *bp; 3017 int seglen; 3018 uint_t count; 3019 3020 bp = mp; 3021 3022 while (off > 0) { 3023 seglen = MBLKL(bp); 3024 if (off < seglen) 3025 break; 3026 off -= seglen; 3027 bp = bp->b_cont; 3028 } 3029 while (len > 0) { 3030 seglen = MBLKL(bp); 3031 count = min(seglen - off, len); 3032 bcopy(bp->b_rptr + off, buf, count); 3033 len -= count; 3034 buf += count; 3035 off = 0; 3036 bp = bp->b_cont; 3037 } 3038 } 3039 3040 static int 3041 myri10ge_ether_parse_header(mblk_t *mp) 3042 { 3043 struct ether_header eh_copy; 3044 struct ether_header *eh; 3045 int eth_hdr_len, seglen; 3046 3047 seglen = MBLKL(mp); 3048 eth_hdr_len = sizeof (*eh); 3049 if (seglen < eth_hdr_len) { 3050 myri10ge_copydata(mp, 0, eth_hdr_len, (caddr_t)&eh_copy); 3051 eh = &eh_copy; 3052 } else { 3053 eh = (struct ether_header *)(void *)mp->b_rptr; 3054 } 3055 if (eh->ether_type == BE_16(ETHERTYPE_VLAN)) { 3056 eth_hdr_len += 4; 3057 } 3058 3059 return (eth_hdr_len); 3060 } 3061 3062 static int 3063 myri10ge_lso_parse_header(mblk_t *mp, int off) 3064 { 3065 char buf[128]; 3066 int seglen, sum_off; 3067 struct ip *ip; 3068 struct tcphdr *tcp; 3069 3070 seglen = MBLKL(mp); 3071 if (seglen < off + sizeof (*ip)) { 3072 myri10ge_copydata(mp, off, sizeof (*ip), buf); 3073 ip = (struct ip *)(void *)buf; 3074 } else { 3075 ip = (struct ip *)(void *)(mp->b_rptr + off); 3076 } 3077 if (seglen < off + (ip->ip_hl << 2) + sizeof (*tcp)) { 3078 myri10ge_copydata(mp, off, 3079 (ip->ip_hl << 2) + sizeof (*tcp), buf); 3080 ip = (struct ip *)(void *)buf; 3081 } 3082 tcp = (struct tcphdr *)(void *)((char *)ip + (ip->ip_hl << 2)); 3083 3084 /* 3085 * NIC expects ip_sum to be zero. Recent changes to 3086 * OpenSolaris leave the correct ip checksum there, rather 3087 * than the required zero, so we need to zero it. Otherwise, 3088 * the NIC will produce bad checksums when sending LSO packets. 3089 */ 3090 if (ip->ip_sum != 0) { 3091 if (((char *)ip) != buf) { 3092 /* ip points into mblk, so just zero it */ 3093 ip->ip_sum = 0; 3094 } else { 3095 /* 3096 * ip points into a copy, so walk the chain 3097 * to find the ip_csum, then zero it 3098 */ 3099 sum_off = off + _PTRDIFF(&ip->ip_sum, buf); 3100 while (sum_off > (int)(MBLKL(mp) - 1)) { 3101 sum_off -= MBLKL(mp); 3102 mp = mp->b_cont; 3103 } 3104 mp->b_rptr[sum_off] = 0; 3105 sum_off++; 3106 while (sum_off > MBLKL(mp) - 1) { 3107 sum_off -= MBLKL(mp); 3108 mp = mp->b_cont; 3109 } 3110 mp->b_rptr[sum_off] = 0; 3111 } 3112 } 3113 return (off + ((ip->ip_hl + tcp->th_off) << 2)); 3114 } 3115 3116 static int 3117 myri10ge_tx_tso_copy(struct myri10ge_slice_state *ss, mblk_t *mp, 3118 mcp_kreq_ether_send_t *req_list, int hdr_size, int pkt_size, 3119 uint16_t mss, uint8_t cksum_offset) 3120 { 3121 myri10ge_tx_ring_t *tx = &ss->tx; 3122 struct myri10ge_priv *mgp = ss->mgp; 3123 mblk_t *bp; 3124 mcp_kreq_ether_send_t *req; 3125 struct myri10ge_tx_copybuf *cp; 3126 caddr_t rptr, ptr; 3127 int mblen, count, cum_len, mss_resid, tx_req, pkt_size_tmp; 3128 int resid, avail, idx, hdr_size_tmp, tx_boundary; 3129 int rdma_count; 3130 uint32_t seglen, len, boundary, low, high_swapped; 3131 uint16_t pseudo_hdr_offset = htons(mss); 3132 uint8_t flags; 3133 3134 tx_boundary = mgp->tx_boundary; 3135 hdr_size_tmp = hdr_size; 3136 resid = tx_boundary; 3137 count = 1; 3138 mutex_enter(&tx->lock); 3139 3140 /* check to see if the slots are really there */ 3141 avail = tx->mask - (tx->req - tx->done); 3142 if (unlikely(avail <= MYRI10GE_MAX_SEND_DESC_TSO)) { 3143 atomic_inc_32(&tx->stall); 3144 mutex_exit(&tx->lock); 3145 return (EBUSY); 3146 } 3147 3148 /* copy */ 3149 cum_len = -hdr_size; 3150 count = 0; 3151 req = req_list; 3152 idx = tx->mask & tx->req; 3153 cp = &tx->cp[idx]; 3154 low = ntohl(cp->dma.low); 3155 ptr = cp->va; 3156 cp->len = 0; 3157 if (mss) { 3158 int payload = pkt_size - hdr_size; 3159 uint16_t opackets = (payload / mss) + ((payload % mss) != 0); 3160 tx->info[idx].ostat.opackets = opackets; 3161 tx->info[idx].ostat.obytes = (opackets - 1) * hdr_size 3162 + pkt_size; 3163 } 3164 hdr_size_tmp = hdr_size; 3165 mss_resid = mss; 3166 flags = (MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST); 3167 tx_req = tx->req; 3168 for (bp = mp; bp != NULL; bp = bp->b_cont) { 3169 mblen = MBLKL(bp); 3170 rptr = (caddr_t)bp->b_rptr; 3171 len = min(hdr_size_tmp, mblen); 3172 if (len) { 3173 bcopy(rptr, ptr, len); 3174 rptr += len; 3175 ptr += len; 3176 resid -= len; 3177 mblen -= len; 3178 hdr_size_tmp -= len; 3179 cp->len += len; 3180 if (hdr_size_tmp) 3181 continue; 3182 if (resid < mss) { 3183 tx_req++; 3184 idx = tx->mask & tx_req; 3185 cp = &tx->cp[idx]; 3186 low = ntohl(cp->dma.low); 3187 ptr = cp->va; 3188 resid = tx_boundary; 3189 } 3190 } 3191 while (mblen) { 3192 len = min(mss_resid, mblen); 3193 bcopy(rptr, ptr, len); 3194 mss_resid -= len; 3195 resid -= len; 3196 mblen -= len; 3197 rptr += len; 3198 ptr += len; 3199 cp->len += len; 3200 if (mss_resid == 0) { 3201 mss_resid = mss; 3202 if (resid < mss) { 3203 tx_req++; 3204 idx = tx->mask & tx_req; 3205 cp = &tx->cp[idx]; 3206 cp->len = 0; 3207 low = ntohl(cp->dma.low); 3208 ptr = cp->va; 3209 resid = tx_boundary; 3210 } 3211 } 3212 } 3213 } 3214 3215 req = req_list; 3216 pkt_size_tmp = pkt_size; 3217 count = 0; 3218 rdma_count = 0; 3219 tx_req = tx->req; 3220 while (pkt_size_tmp) { 3221 idx = tx->mask & tx_req; 3222 cp = &tx->cp[idx]; 3223 high_swapped = cp->dma.high; 3224 low = ntohl(cp->dma.low); 3225 len = cp->len; 3226 if (len == 0) { 3227 printf("len=0! pkt_size_tmp=%d, pkt_size=%d\n", 3228 pkt_size_tmp, pkt_size); 3229 for (bp = mp; bp != NULL; bp = bp->b_cont) { 3230 mblen = MBLKL(bp); 3231 printf("mblen:%d\n", mblen); 3232 } 3233 pkt_size_tmp = pkt_size; 3234 tx_req = tx->req; 3235 while (pkt_size_tmp > 0) { 3236 idx = tx->mask & tx_req; 3237 cp = &tx->cp[idx]; 3238 printf("cp->len = %d\n", cp->len); 3239 pkt_size_tmp -= cp->len; 3240 tx_req++; 3241 } 3242 printf("dropped\n"); 3243 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err); 3244 goto done; 3245 } 3246 pkt_size_tmp -= len; 3247 while (len) { 3248 while (len) { 3249 uint8_t flags_next; 3250 int cum_len_next; 3251 3252 boundary = (low + mgp->tx_boundary) & 3253 ~(mgp->tx_boundary - 1); 3254 seglen = boundary - low; 3255 if (seglen > len) 3256 seglen = len; 3257 3258 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 3259 cum_len_next = cum_len + seglen; 3260 (req-rdma_count)->rdma_count = rdma_count + 1; 3261 if (likely(cum_len >= 0)) { 3262 /* payload */ 3263 int next_is_first, chop; 3264 3265 chop = (cum_len_next > mss); 3266 cum_len_next = cum_len_next % mss; 3267 next_is_first = (cum_len_next == 0); 3268 flags |= chop * 3269 MXGEFW_FLAGS_TSO_CHOP; 3270 flags_next |= next_is_first * 3271 MXGEFW_FLAGS_FIRST; 3272 rdma_count |= -(chop | next_is_first); 3273 rdma_count += chop & !next_is_first; 3274 } else if (likely(cum_len_next >= 0)) { 3275 /* header ends */ 3276 int small; 3277 3278 rdma_count = -1; 3279 cum_len_next = 0; 3280 seglen = -cum_len; 3281 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 3282 flags_next = MXGEFW_FLAGS_TSO_PLD | 3283 MXGEFW_FLAGS_FIRST | 3284 (small * MXGEFW_FLAGS_SMALL); 3285 } 3286 req->addr_high = high_swapped; 3287 req->addr_low = htonl(low); 3288 req->pseudo_hdr_offset = pseudo_hdr_offset; 3289 req->pad = 0; /* complete solid 16-byte block */ 3290 req->rdma_count = 1; 3291 req->cksum_offset = cksum_offset; 3292 req->length = htons(seglen); 3293 req->flags = flags | ((cum_len & 1) * 3294 MXGEFW_FLAGS_ALIGN_ODD); 3295 if (cksum_offset > seglen) 3296 cksum_offset -= seglen; 3297 else 3298 cksum_offset = 0; 3299 low += seglen; 3300 len -= seglen; 3301 cum_len = cum_len_next; 3302 req++; 3303 req->flags = 0; 3304 flags = flags_next; 3305 count++; 3306 rdma_count++; 3307 } 3308 } 3309 tx_req++; 3310 } 3311 (req-rdma_count)->rdma_count = (uint8_t)rdma_count; 3312 do { 3313 req--; 3314 req->flags |= MXGEFW_FLAGS_TSO_LAST; 3315 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | 3316 MXGEFW_FLAGS_FIRST))); 3317 3318 myri10ge_submit_req(tx, req_list, count); 3319 done: 3320 mutex_exit(&tx->lock); 3321 freemsg(mp); 3322 return (DDI_SUCCESS); 3323 } 3324 3325 /* 3326 * Try to send the chain of buffers described by the mp. We must not 3327 * encapsulate more than eth->tx.req - eth->tx.done, or 3328 * MXGEFW_MAX_SEND_DESC, whichever is more. 3329 */ 3330 3331 static int 3332 myri10ge_send(struct myri10ge_slice_state *ss, mblk_t *mp, 3333 mcp_kreq_ether_send_t *req_list, struct myri10ge_tx_buffer_state *tx_info) 3334 { 3335 struct myri10ge_priv *mgp = ss->mgp; 3336 myri10ge_tx_ring_t *tx = &ss->tx; 3337 mcp_kreq_ether_send_t *req; 3338 struct myri10ge_tx_dma_handle *handles, *dma_handle = NULL; 3339 mblk_t *bp; 3340 ddi_dma_cookie_t cookie; 3341 int err, rv, count, avail, mblen, try_pullup, i, max_segs, maclen, 3342 rdma_count, cum_len, lso_hdr_size; 3343 uint32_t start, stuff, tx_offload_flags; 3344 uint32_t seglen, len, mss, boundary, low, high_swapped; 3345 uint_t ncookies; 3346 uint16_t pseudo_hdr_offset; 3347 uint8_t flags, cksum_offset, odd_flag; 3348 int pkt_size; 3349 int lso_copy = myri10ge_lso_copy; 3350 try_pullup = 1; 3351 3352 again: 3353 /* Setup checksum offloading, if needed */ 3354 mac_hcksum_get(mp, &start, &stuff, NULL, NULL, &tx_offload_flags); 3355 myri10ge_lso_info_get(mp, &mss, &tx_offload_flags); 3356 if (tx_offload_flags & HW_LSO) { 3357 max_segs = MYRI10GE_MAX_SEND_DESC_TSO; 3358 if ((tx_offload_flags & HCK_PARTIALCKSUM) == 0) { 3359 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_lsobadflags); 3360 freemsg(mp); 3361 return (DDI_SUCCESS); 3362 } 3363 } else { 3364 max_segs = MXGEFW_MAX_SEND_DESC; 3365 mss = 0; 3366 } 3367 req = req_list; 3368 cksum_offset = 0; 3369 pseudo_hdr_offset = 0; 3370 3371 /* leave an extra slot keep the ring from wrapping */ 3372 avail = tx->mask - (tx->req - tx->done); 3373 3374 /* 3375 * If we have > MXGEFW_MAX_SEND_DESC, then any over-length 3376 * message will need to be pulled up in order to fit. 3377 * Otherwise, we are low on transmit descriptors, it is 3378 * probably better to stall and try again rather than pullup a 3379 * message to fit. 3380 */ 3381 3382 if (avail < max_segs) { 3383 err = EBUSY; 3384 atomic_inc_32(&tx->stall_early); 3385 goto stall; 3386 } 3387 3388 /* find out how long the frame is and how many segments it is */ 3389 count = 0; 3390 odd_flag = 0; 3391 pkt_size = 0; 3392 flags = (MXGEFW_FLAGS_NO_TSO | MXGEFW_FLAGS_FIRST); 3393 for (bp = mp; bp != NULL; bp = bp->b_cont) { 3394 dblk_t *dbp; 3395 mblen = MBLKL(bp); 3396 if (mblen == 0) { 3397 /* 3398 * we can't simply skip over 0-length mblks 3399 * because the hardware can't deal with them, 3400 * and we could leak them. 3401 */ 3402 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_zero_len); 3403 err = EIO; 3404 goto pullup; 3405 } 3406 /* 3407 * There's no advantage to copying most gesballoc 3408 * attached blocks, so disable lso copy in that case 3409 */ 3410 if (mss && lso_copy == 1 && ((dbp = bp->b_datap) != NULL)) { 3411 if ((void *)dbp->db_lastfree != myri10ge_db_lastfree) { 3412 lso_copy = 0; 3413 } 3414 } 3415 pkt_size += mblen; 3416 count++; 3417 } 3418 3419 /* Try to pull up excessivly long chains */ 3420 if (count >= max_segs) { 3421 err = myri10ge_pullup(ss, mp); 3422 if (likely(err == DDI_SUCCESS)) { 3423 count = 1; 3424 } else { 3425 if (count < MYRI10GE_MAX_SEND_DESC_TSO) { 3426 /* 3427 * just let the h/w send it, it will be 3428 * inefficient, but us better than dropping 3429 */ 3430 max_segs = MYRI10GE_MAX_SEND_DESC_TSO; 3431 } else { 3432 /* drop it */ 3433 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err); 3434 freemsg(mp); 3435 return (0); 3436 } 3437 } 3438 } 3439 3440 cum_len = 0; 3441 maclen = myri10ge_ether_parse_header(mp); 3442 3443 if (tx_offload_flags & HCK_PARTIALCKSUM) { 3444 3445 cksum_offset = start + maclen; 3446 pseudo_hdr_offset = htons(stuff + maclen); 3447 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 3448 flags |= MXGEFW_FLAGS_CKSUM; 3449 } 3450 3451 lso_hdr_size = 0; /* -Wunitinialized */ 3452 if (mss) { /* LSO */ 3453 /* this removes any CKSUM flag from before */ 3454 flags = (MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST); 3455 /* 3456 * parse the headers and set cum_len to a negative 3457 * value to reflect the offset of the TCP payload 3458 */ 3459 lso_hdr_size = myri10ge_lso_parse_header(mp, maclen); 3460 cum_len = -lso_hdr_size; 3461 if ((mss < mgp->tx_boundary) && lso_copy) { 3462 err = myri10ge_tx_tso_copy(ss, mp, req_list, 3463 lso_hdr_size, pkt_size, mss, cksum_offset); 3464 return (err); 3465 } 3466 3467 /* 3468 * for TSO, pseudo_hdr_offset holds mss. The firmware 3469 * figures out where to put the checksum by parsing 3470 * the header. 3471 */ 3472 3473 pseudo_hdr_offset = htons(mss); 3474 } else if (pkt_size <= MXGEFW_SEND_SMALL_SIZE) { 3475 flags |= MXGEFW_FLAGS_SMALL; 3476 if (pkt_size < myri10ge_tx_copylen) { 3477 req->cksum_offset = cksum_offset; 3478 req->pseudo_hdr_offset = pseudo_hdr_offset; 3479 req->flags = flags; 3480 err = myri10ge_tx_copy(ss, mp, req); 3481 return (err); 3482 } 3483 cum_len = 0; 3484 } 3485 3486 /* pull one DMA handle for each bp from our freelist */ 3487 handles = NULL; 3488 err = myri10ge_alloc_tx_handles(ss, count, &handles); 3489 if (err != DDI_SUCCESS) { 3490 err = DDI_FAILURE; 3491 goto stall; 3492 } 3493 count = 0; 3494 rdma_count = 0; 3495 for (bp = mp; bp != NULL; bp = bp->b_cont) { 3496 mblen = MBLKL(bp); 3497 dma_handle = handles; 3498 handles = handles->next; 3499 3500 rv = ddi_dma_addr_bind_handle(dma_handle->h, NULL, 3501 (caddr_t)bp->b_rptr, mblen, 3502 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_SLEEP, NULL, 3503 &cookie, &ncookies); 3504 if (unlikely(rv != DDI_DMA_MAPPED)) { 3505 err = EIO; 3506 try_pullup = 0; 3507 dma_handle->next = handles; 3508 handles = dma_handle; 3509 goto abort_with_handles; 3510 } 3511 3512 /* reserve the slot */ 3513 tx_info[count].m = bp; 3514 tx_info[count].handle = dma_handle; 3515 3516 for (; ; ) { 3517 low = MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress); 3518 high_swapped = 3519 htonl(MYRI10GE_HIGHPART_TO_U32( 3520 cookie.dmac_laddress)); 3521 len = (uint32_t)cookie.dmac_size; 3522 while (len) { 3523 uint8_t flags_next; 3524 int cum_len_next; 3525 3526 boundary = (low + mgp->tx_boundary) & 3527 ~(mgp->tx_boundary - 1); 3528 seglen = boundary - low; 3529 if (seglen > len) 3530 seglen = len; 3531 3532 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 3533 cum_len_next = cum_len + seglen; 3534 if (mss) { 3535 (req-rdma_count)->rdma_count = 3536 rdma_count + 1; 3537 if (likely(cum_len >= 0)) { 3538 /* payload */ 3539 int next_is_first, chop; 3540 3541 chop = (cum_len_next > mss); 3542 cum_len_next = 3543 cum_len_next % mss; 3544 next_is_first = 3545 (cum_len_next == 0); 3546 flags |= chop * 3547 MXGEFW_FLAGS_TSO_CHOP; 3548 flags_next |= next_is_first * 3549 MXGEFW_FLAGS_FIRST; 3550 rdma_count |= 3551 -(chop | next_is_first); 3552 rdma_count += 3553 chop & !next_is_first; 3554 } else if (likely(cum_len_next >= 0)) { 3555 /* header ends */ 3556 int small; 3557 3558 rdma_count = -1; 3559 cum_len_next = 0; 3560 seglen = -cum_len; 3561 small = (mss <= 3562 MXGEFW_SEND_SMALL_SIZE); 3563 flags_next = 3564 MXGEFW_FLAGS_TSO_PLD 3565 | MXGEFW_FLAGS_FIRST 3566 | (small * 3567 MXGEFW_FLAGS_SMALL); 3568 } 3569 } 3570 req->addr_high = high_swapped; 3571 req->addr_low = htonl(low); 3572 req->pseudo_hdr_offset = pseudo_hdr_offset; 3573 req->pad = 0; /* complete solid 16-byte block */ 3574 req->rdma_count = 1; 3575 req->cksum_offset = cksum_offset; 3576 req->length = htons(seglen); 3577 req->flags = flags | ((cum_len & 1) * odd_flag); 3578 if (cksum_offset > seglen) 3579 cksum_offset -= seglen; 3580 else 3581 cksum_offset = 0; 3582 low += seglen; 3583 len -= seglen; 3584 cum_len = cum_len_next; 3585 count++; 3586 rdma_count++; 3587 /* make sure all the segments will fit */ 3588 if (unlikely(count >= max_segs)) { 3589 MYRI10GE_ATOMIC_SLICE_STAT_INC( 3590 xmit_lowbuf); 3591 /* may try a pullup */ 3592 err = EBUSY; 3593 if (try_pullup) 3594 try_pullup = 2; 3595 goto abort_with_handles; 3596 } 3597 req++; 3598 req->flags = 0; 3599 flags = flags_next; 3600 tx_info[count].m = 0; 3601 } 3602 ncookies--; 3603 if (ncookies == 0) 3604 break; 3605 ddi_dma_nextcookie(dma_handle->h, &cookie); 3606 } 3607 } 3608 (req-rdma_count)->rdma_count = (uint8_t)rdma_count; 3609 3610 if (mss) { 3611 do { 3612 req--; 3613 req->flags |= MXGEFW_FLAGS_TSO_LAST; 3614 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | 3615 MXGEFW_FLAGS_FIRST))); 3616 } 3617 3618 /* calculate tx stats */ 3619 if (mss) { 3620 uint16_t opackets; 3621 int payload; 3622 3623 payload = pkt_size - lso_hdr_size; 3624 opackets = (payload / mss) + ((payload % mss) != 0); 3625 tx_info[0].stat.un.all = 0; 3626 tx_info[0].ostat.opackets = opackets; 3627 tx_info[0].ostat.obytes = (opackets - 1) * lso_hdr_size 3628 + pkt_size; 3629 } else { 3630 myri10ge_tx_stat(&tx_info[0].stat, 3631 (struct ether_header *)(void *)mp->b_rptr, 1, pkt_size); 3632 } 3633 mutex_enter(&tx->lock); 3634 3635 /* check to see if the slots are really there */ 3636 avail = tx->mask - (tx->req - tx->done); 3637 if (unlikely(avail <= count)) { 3638 mutex_exit(&tx->lock); 3639 err = 0; 3640 goto late_stall; 3641 } 3642 3643 myri10ge_send_locked(tx, req_list, tx_info, count); 3644 mutex_exit(&tx->lock); 3645 return (DDI_SUCCESS); 3646 3647 late_stall: 3648 try_pullup = 0; 3649 atomic_inc_32(&tx->stall_late); 3650 3651 abort_with_handles: 3652 /* unbind and free handles from previous mblks */ 3653 for (i = 0; i < count; i++) { 3654 bp = tx_info[i].m; 3655 tx_info[i].m = 0; 3656 if (bp) { 3657 dma_handle = tx_info[i].handle; 3658 (void) ddi_dma_unbind_handle(dma_handle->h); 3659 dma_handle->next = handles; 3660 handles = dma_handle; 3661 tx_info[i].handle = NULL; 3662 tx_info[i].m = NULL; 3663 } 3664 } 3665 myri10ge_free_tx_handle_slist(tx, handles); 3666 pullup: 3667 if (try_pullup) { 3668 err = myri10ge_pullup(ss, mp); 3669 if (err != DDI_SUCCESS && try_pullup == 2) { 3670 /* drop */ 3671 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err); 3672 freemsg(mp); 3673 return (0); 3674 } 3675 try_pullup = 0; 3676 goto again; 3677 } 3678 3679 stall: 3680 if (err != 0) { 3681 if (err == EBUSY) { 3682 atomic_inc_32(&tx->stall); 3683 } else { 3684 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err); 3685 } 3686 } 3687 return (err); 3688 } 3689 3690 static mblk_t * 3691 myri10ge_send_wrapper(void *arg, mblk_t *mp) 3692 { 3693 struct myri10ge_slice_state *ss = arg; 3694 int err = 0; 3695 mcp_kreq_ether_send_t *req_list; 3696 #if defined(__i386) 3697 /* 3698 * We need about 2.5KB of scratch space to handle transmits. 3699 * i86pc has only 8KB of kernel stack space, so we malloc the 3700 * scratch space there rather than keeping it on the stack. 3701 */ 3702 size_t req_size, tx_info_size; 3703 struct myri10ge_tx_buffer_state *tx_info; 3704 caddr_t req_bytes; 3705 3706 req_size = sizeof (*req_list) * (MYRI10GE_MAX_SEND_DESC_TSO + 4) 3707 + 8; 3708 req_bytes = kmem_alloc(req_size, KM_SLEEP); 3709 tx_info_size = sizeof (*tx_info) * (MYRI10GE_MAX_SEND_DESC_TSO + 1); 3710 tx_info = kmem_alloc(tx_info_size, KM_SLEEP); 3711 #else 3712 char req_bytes[sizeof (*req_list) * (MYRI10GE_MAX_SEND_DESC_TSO + 4) 3713 + 8]; 3714 struct myri10ge_tx_buffer_state tx_info[MYRI10GE_MAX_SEND_DESC_TSO + 1]; 3715 #endif 3716 3717 /* ensure req_list entries are aligned to 8 bytes */ 3718 req_list = (struct mcp_kreq_ether_send *) 3719 (((unsigned long)req_bytes + 7UL) & ~7UL); 3720 3721 err = myri10ge_send(ss, mp, req_list, tx_info); 3722 3723 #if defined(__i386) 3724 kmem_free(tx_info, tx_info_size); 3725 kmem_free(req_bytes, req_size); 3726 #endif 3727 if (err) 3728 return (mp); 3729 else 3730 return (NULL); 3731 } 3732 3733 static int 3734 myri10ge_addmac(void *arg, const uint8_t *mac_addr) 3735 { 3736 struct myri10ge_priv *mgp = arg; 3737 int err; 3738 3739 if (mac_addr == NULL) 3740 return (EINVAL); 3741 3742 mutex_enter(&mgp->intrlock); 3743 if (mgp->macaddr_cnt) { 3744 mutex_exit(&mgp->intrlock); 3745 return (ENOSPC); 3746 } 3747 err = myri10ge_m_unicst(mgp, mac_addr); 3748 if (!err) 3749 mgp->macaddr_cnt++; 3750 3751 mutex_exit(&mgp->intrlock); 3752 if (err) 3753 return (err); 3754 3755 bcopy(mac_addr, mgp->mac_addr, sizeof (mgp->mac_addr)); 3756 return (0); 3757 } 3758 3759 /*ARGSUSED*/ 3760 static int 3761 myri10ge_remmac(void *arg, const uint8_t *mac_addr) 3762 { 3763 struct myri10ge_priv *mgp = arg; 3764 3765 mutex_enter(&mgp->intrlock); 3766 mgp->macaddr_cnt--; 3767 mutex_exit(&mgp->intrlock); 3768 3769 return (0); 3770 } 3771 3772 /*ARGSUSED*/ 3773 static void 3774 myri10ge_fill_group(void *arg, mac_ring_type_t rtype, const int index, 3775 mac_group_info_t *infop, mac_group_handle_t gh) 3776 { 3777 struct myri10ge_priv *mgp = arg; 3778 3779 if (rtype != MAC_RING_TYPE_RX) 3780 return; 3781 3782 infop->mgi_driver = (mac_group_driver_t)mgp; 3783 infop->mgi_start = NULL; 3784 infop->mgi_stop = NULL; 3785 infop->mgi_addmac = myri10ge_addmac; 3786 infop->mgi_remmac = myri10ge_remmac; 3787 infop->mgi_count = mgp->num_slices; 3788 } 3789 3790 static int 3791 myri10ge_ring_start(mac_ring_driver_t rh, uint64_t mr_gen_num) 3792 { 3793 struct myri10ge_slice_state *ss; 3794 3795 ss = (struct myri10ge_slice_state *)rh; 3796 mutex_enter(&ss->rx_lock); 3797 ss->rx_gen_num = mr_gen_num; 3798 mutex_exit(&ss->rx_lock); 3799 return (0); 3800 } 3801 3802 /* 3803 * Retrieve a value for one of the statistics for a particular rx ring 3804 */ 3805 int 3806 myri10ge_rx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) 3807 { 3808 struct myri10ge_slice_state *ss; 3809 3810 ss = (struct myri10ge_slice_state *)rh; 3811 switch (stat) { 3812 case MAC_STAT_RBYTES: 3813 *val = ss->rx_stats.ibytes; 3814 break; 3815 3816 case MAC_STAT_IPACKETS: 3817 *val = ss->rx_stats.ipackets; 3818 break; 3819 3820 default: 3821 *val = 0; 3822 return (ENOTSUP); 3823 } 3824 3825 return (0); 3826 } 3827 3828 /* 3829 * Retrieve a value for one of the statistics for a particular tx ring 3830 */ 3831 int 3832 myri10ge_tx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) 3833 { 3834 struct myri10ge_slice_state *ss; 3835 3836 ss = (struct myri10ge_slice_state *)rh; 3837 switch (stat) { 3838 case MAC_STAT_OBYTES: 3839 *val = ss->tx.stats.obytes; 3840 break; 3841 3842 case MAC_STAT_OPACKETS: 3843 *val = ss->tx.stats.opackets; 3844 break; 3845 3846 default: 3847 *val = 0; 3848 return (ENOTSUP); 3849 } 3850 3851 return (0); 3852 } 3853 3854 static int 3855 myri10ge_rx_ring_intr_disable(mac_intr_handle_t intrh) 3856 { 3857 struct myri10ge_slice_state *ss; 3858 3859 ss = (struct myri10ge_slice_state *)intrh; 3860 mutex_enter(&ss->poll_lock); 3861 ss->rx_polling = B_TRUE; 3862 mutex_exit(&ss->poll_lock); 3863 return (0); 3864 } 3865 3866 static int 3867 myri10ge_rx_ring_intr_enable(mac_intr_handle_t intrh) 3868 { 3869 struct myri10ge_slice_state *ss; 3870 3871 ss = (struct myri10ge_slice_state *)intrh; 3872 mutex_enter(&ss->poll_lock); 3873 ss->rx_polling = B_FALSE; 3874 if (ss->rx_token) { 3875 *ss->irq_claim = BE_32(3); 3876 ss->rx_token = 0; 3877 } 3878 mutex_exit(&ss->poll_lock); 3879 return (0); 3880 } 3881 3882 /*ARGSUSED*/ 3883 static void 3884 myri10ge_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, 3885 const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh) 3886 { 3887 struct myri10ge_priv *mgp = arg; 3888 struct myri10ge_slice_state *ss; 3889 mac_intr_t *mintr = &infop->mri_intr; 3890 3891 ASSERT((unsigned int)ring_index < mgp->num_slices); 3892 3893 ss = &mgp->ss[ring_index]; 3894 switch (rtype) { 3895 case MAC_RING_TYPE_RX: 3896 ss->rx_rh = rh; 3897 infop->mri_driver = (mac_ring_driver_t)ss; 3898 infop->mri_start = myri10ge_ring_start; 3899 infop->mri_stop = NULL; 3900 infop->mri_poll = myri10ge_poll_rx; 3901 infop->mri_stat = myri10ge_rx_ring_stat; 3902 mintr->mi_handle = (mac_intr_handle_t)ss; 3903 mintr->mi_enable = myri10ge_rx_ring_intr_enable; 3904 mintr->mi_disable = myri10ge_rx_ring_intr_disable; 3905 break; 3906 case MAC_RING_TYPE_TX: 3907 ss->tx.rh = rh; 3908 infop->mri_driver = (mac_ring_driver_t)ss; 3909 infop->mri_start = NULL; 3910 infop->mri_stop = NULL; 3911 infop->mri_tx = myri10ge_send_wrapper; 3912 infop->mri_stat = myri10ge_tx_ring_stat; 3913 break; 3914 default: 3915 break; 3916 } 3917 } 3918 3919 static void 3920 myri10ge_nic_stat_destroy(struct myri10ge_priv *mgp) 3921 { 3922 if (mgp->ksp_stat == NULL) 3923 return; 3924 3925 kstat_delete(mgp->ksp_stat); 3926 mgp->ksp_stat = NULL; 3927 } 3928 3929 static void 3930 myri10ge_slice_stat_destroy(struct myri10ge_slice_state *ss) 3931 { 3932 if (ss->ksp_stat == NULL) 3933 return; 3934 3935 kstat_delete(ss->ksp_stat); 3936 ss->ksp_stat = NULL; 3937 } 3938 3939 static void 3940 myri10ge_info_destroy(struct myri10ge_priv *mgp) 3941 { 3942 if (mgp->ksp_info == NULL) 3943 return; 3944 3945 kstat_delete(mgp->ksp_info); 3946 mgp->ksp_info = NULL; 3947 } 3948 3949 static int 3950 myri10ge_nic_stat_kstat_update(kstat_t *ksp, int rw) 3951 { 3952 struct myri10ge_nic_stat *ethstat; 3953 struct myri10ge_priv *mgp; 3954 mcp_irq_data_t *fw_stats; 3955 3956 3957 if (rw == KSTAT_WRITE) 3958 return (EACCES); 3959 3960 ethstat = (struct myri10ge_nic_stat *)ksp->ks_data; 3961 mgp = (struct myri10ge_priv *)ksp->ks_private; 3962 fw_stats = mgp->ss[0].fw_stats; 3963 3964 ethstat->dma_read_bw_MBs.value.ul = mgp->read_dma; 3965 ethstat->dma_write_bw_MBs.value.ul = mgp->write_dma; 3966 ethstat->dma_read_write_bw_MBs.value.ul = mgp->read_write_dma; 3967 if (myri10ge_tx_dma_attr.dma_attr_flags & DDI_DMA_FORCE_PHYSICAL) 3968 ethstat->dma_force_physical.value.ul = 1; 3969 else 3970 ethstat->dma_force_physical.value.ul = 0; 3971 ethstat->lanes.value.ul = mgp->pcie_link_width; 3972 ethstat->dropped_bad_crc32.value.ul = 3973 ntohl(fw_stats->dropped_bad_crc32); 3974 ethstat->dropped_bad_phy.value.ul = 3975 ntohl(fw_stats->dropped_bad_phy); 3976 ethstat->dropped_link_error_or_filtered.value.ul = 3977 ntohl(fw_stats->dropped_link_error_or_filtered); 3978 ethstat->dropped_link_overflow.value.ul = 3979 ntohl(fw_stats->dropped_link_overflow); 3980 ethstat->dropped_multicast_filtered.value.ul = 3981 ntohl(fw_stats->dropped_multicast_filtered); 3982 ethstat->dropped_no_big_buffer.value.ul = 3983 ntohl(fw_stats->dropped_no_big_buffer); 3984 ethstat->dropped_no_small_buffer.value.ul = 3985 ntohl(fw_stats->dropped_no_small_buffer); 3986 ethstat->dropped_overrun.value.ul = 3987 ntohl(fw_stats->dropped_overrun); 3988 ethstat->dropped_pause.value.ul = 3989 ntohl(fw_stats->dropped_pause); 3990 ethstat->dropped_runt.value.ul = 3991 ntohl(fw_stats->dropped_runt); 3992 ethstat->link_up.value.ul = 3993 ntohl(fw_stats->link_up); 3994 ethstat->dropped_unicast_filtered.value.ul = 3995 ntohl(fw_stats->dropped_unicast_filtered); 3996 return (0); 3997 } 3998 3999 static int 4000 myri10ge_slice_stat_kstat_update(kstat_t *ksp, int rw) 4001 { 4002 struct myri10ge_slice_stat *ethstat; 4003 struct myri10ge_slice_state *ss; 4004 4005 if (rw == KSTAT_WRITE) 4006 return (EACCES); 4007 4008 ethstat = (struct myri10ge_slice_stat *)ksp->ks_data; 4009 ss = (struct myri10ge_slice_state *)ksp->ks_private; 4010 4011 ethstat->rx_big.value.ul = ss->j_rx_cnt; 4012 ethstat->rx_bigbuf_firmware.value.ul = ss->rx_big.cnt - ss->j_rx_cnt; 4013 ethstat->rx_bigbuf_pool.value.ul = 4014 ss->jpool.num_alloc - ss->jbufs_for_smalls; 4015 ethstat->rx_bigbuf_smalls.value.ul = ss->jbufs_for_smalls; 4016 ethstat->rx_small.value.ul = ss->rx_small.cnt - 4017 (ss->rx_small.mask + 1); 4018 ethstat->tx_done.value.ul = ss->tx.done; 4019 ethstat->tx_req.value.ul = ss->tx.req; 4020 ethstat->tx_activate.value.ul = ss->tx.activate; 4021 ethstat->xmit_sched.value.ul = ss->tx.sched; 4022 ethstat->xmit_stall.value.ul = ss->tx.stall; 4023 ethstat->xmit_stall_early.value.ul = ss->tx.stall_early; 4024 ethstat->xmit_stall_late.value.ul = ss->tx.stall_late; 4025 ethstat->xmit_err.value.ul = MYRI10GE_SLICE_STAT(xmit_err); 4026 return (0); 4027 } 4028 4029 static int 4030 myri10ge_info_kstat_update(kstat_t *ksp, int rw) 4031 { 4032 struct myri10ge_info *info; 4033 struct myri10ge_priv *mgp; 4034 4035 4036 if (rw == KSTAT_WRITE) 4037 return (EACCES); 4038 4039 info = (struct myri10ge_info *)ksp->ks_data; 4040 mgp = (struct myri10ge_priv *)ksp->ks_private; 4041 kstat_named_setstr(&info->driver_version, MYRI10GE_VERSION_STR); 4042 kstat_named_setstr(&info->firmware_version, mgp->fw_version); 4043 kstat_named_setstr(&info->firmware_name, mgp->fw_name); 4044 kstat_named_setstr(&info->interrupt_type, mgp->intr_type); 4045 kstat_named_setstr(&info->product_code, mgp->pc_str); 4046 kstat_named_setstr(&info->serial_number, mgp->sn_str); 4047 return (0); 4048 } 4049 4050 static struct myri10ge_info myri10ge_info_template = { 4051 { "driver_version", KSTAT_DATA_STRING }, 4052 { "firmware_version", KSTAT_DATA_STRING }, 4053 { "firmware_name", KSTAT_DATA_STRING }, 4054 { "interrupt_type", KSTAT_DATA_STRING }, 4055 { "product_code", KSTAT_DATA_STRING }, 4056 { "serial_number", KSTAT_DATA_STRING }, 4057 }; 4058 static kmutex_t myri10ge_info_template_lock; 4059 4060 4061 static int 4062 myri10ge_info_init(struct myri10ge_priv *mgp) 4063 { 4064 struct kstat *ksp; 4065 4066 ksp = kstat_create("myri10ge", ddi_get_instance(mgp->dip), 4067 "myri10ge_info", "net", KSTAT_TYPE_NAMED, 4068 sizeof (myri10ge_info_template) / 4069 sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); 4070 if (ksp == NULL) { 4071 cmn_err(CE_WARN, 4072 "%s: myri10ge_info_init: kstat_create failed", mgp->name); 4073 return (DDI_FAILURE); 4074 } 4075 mgp->ksp_info = ksp; 4076 ksp->ks_update = myri10ge_info_kstat_update; 4077 ksp->ks_private = (void *) mgp; 4078 ksp->ks_data = &myri10ge_info_template; 4079 ksp->ks_lock = &myri10ge_info_template_lock; 4080 if (MYRI10GE_VERSION_STR != NULL) 4081 ksp->ks_data_size += strlen(MYRI10GE_VERSION_STR) + 1; 4082 if (mgp->fw_version != NULL) 4083 ksp->ks_data_size += strlen(mgp->fw_version) + 1; 4084 ksp->ks_data_size += strlen(mgp->fw_name) + 1; 4085 ksp->ks_data_size += strlen(mgp->intr_type) + 1; 4086 if (mgp->pc_str != NULL) 4087 ksp->ks_data_size += strlen(mgp->pc_str) + 1; 4088 if (mgp->sn_str != NULL) 4089 ksp->ks_data_size += strlen(mgp->sn_str) + 1; 4090 4091 kstat_install(ksp); 4092 return (DDI_SUCCESS); 4093 } 4094 4095 4096 static int 4097 myri10ge_nic_stat_init(struct myri10ge_priv *mgp) 4098 { 4099 struct kstat *ksp; 4100 struct myri10ge_nic_stat *ethstat; 4101 4102 ksp = kstat_create("myri10ge", ddi_get_instance(mgp->dip), 4103 "myri10ge_nic_stats", "net", KSTAT_TYPE_NAMED, 4104 sizeof (*ethstat) / sizeof (kstat_named_t), 0); 4105 if (ksp == NULL) { 4106 cmn_err(CE_WARN, 4107 "%s: myri10ge_stat_init: kstat_create failed", mgp->name); 4108 return (DDI_FAILURE); 4109 } 4110 mgp->ksp_stat = ksp; 4111 ethstat = (struct myri10ge_nic_stat *)(ksp->ks_data); 4112 4113 kstat_named_init(ðstat->dma_read_bw_MBs, 4114 "dma_read_bw_MBs", KSTAT_DATA_ULONG); 4115 kstat_named_init(ðstat->dma_write_bw_MBs, 4116 "dma_write_bw_MBs", KSTAT_DATA_ULONG); 4117 kstat_named_init(ðstat->dma_read_write_bw_MBs, 4118 "dma_read_write_bw_MBs", KSTAT_DATA_ULONG); 4119 kstat_named_init(ðstat->dma_force_physical, 4120 "dma_force_physical", KSTAT_DATA_ULONG); 4121 kstat_named_init(ðstat->lanes, 4122 "lanes", KSTAT_DATA_ULONG); 4123 kstat_named_init(ðstat->dropped_bad_crc32, 4124 "dropped_bad_crc32", KSTAT_DATA_ULONG); 4125 kstat_named_init(ðstat->dropped_bad_phy, 4126 "dropped_bad_phy", KSTAT_DATA_ULONG); 4127 kstat_named_init(ðstat->dropped_link_error_or_filtered, 4128 "dropped_link_error_or_filtered", KSTAT_DATA_ULONG); 4129 kstat_named_init(ðstat->dropped_link_overflow, 4130 "dropped_link_overflow", KSTAT_DATA_ULONG); 4131 kstat_named_init(ðstat->dropped_multicast_filtered, 4132 "dropped_multicast_filtered", KSTAT_DATA_ULONG); 4133 kstat_named_init(ðstat->dropped_no_big_buffer, 4134 "dropped_no_big_buffer", KSTAT_DATA_ULONG); 4135 kstat_named_init(ðstat->dropped_no_small_buffer, 4136 "dropped_no_small_buffer", KSTAT_DATA_ULONG); 4137 kstat_named_init(ðstat->dropped_overrun, 4138 "dropped_overrun", KSTAT_DATA_ULONG); 4139 kstat_named_init(ðstat->dropped_pause, 4140 "dropped_pause", KSTAT_DATA_ULONG); 4141 kstat_named_init(ðstat->dropped_runt, 4142 "dropped_runt", KSTAT_DATA_ULONG); 4143 kstat_named_init(ðstat->dropped_unicast_filtered, 4144 "dropped_unicast_filtered", KSTAT_DATA_ULONG); 4145 kstat_named_init(ðstat->dropped_runt, "dropped_runt", 4146 KSTAT_DATA_ULONG); 4147 kstat_named_init(ðstat->link_up, "link_up", KSTAT_DATA_ULONG); 4148 kstat_named_init(ðstat->link_changes, "link_changes", 4149 KSTAT_DATA_ULONG); 4150 ksp->ks_update = myri10ge_nic_stat_kstat_update; 4151 ksp->ks_private = (void *) mgp; 4152 kstat_install(ksp); 4153 return (DDI_SUCCESS); 4154 } 4155 4156 static int 4157 myri10ge_slice_stat_init(struct myri10ge_slice_state *ss) 4158 { 4159 struct myri10ge_priv *mgp = ss->mgp; 4160 struct kstat *ksp; 4161 struct myri10ge_slice_stat *ethstat; 4162 int instance; 4163 4164 /* 4165 * fake an instance so that the same slice numbers from 4166 * different instances do not collide 4167 */ 4168 instance = (ddi_get_instance(mgp->dip) * 1000) + (int)(ss - mgp->ss); 4169 ksp = kstat_create("myri10ge", instance, 4170 "myri10ge_slice_stats", "net", KSTAT_TYPE_NAMED, 4171 sizeof (*ethstat) / sizeof (kstat_named_t), 0); 4172 if (ksp == NULL) { 4173 cmn_err(CE_WARN, 4174 "%s: myri10ge_stat_init: kstat_create failed", mgp->name); 4175 return (DDI_FAILURE); 4176 } 4177 ss->ksp_stat = ksp; 4178 ethstat = (struct myri10ge_slice_stat *)(ksp->ks_data); 4179 kstat_named_init(ðstat->lro_bad_csum, "lro_bad_csum", 4180 KSTAT_DATA_ULONG); 4181 kstat_named_init(ðstat->lro_flushed, "lro_flushed", 4182 KSTAT_DATA_ULONG); 4183 kstat_named_init(ðstat->lro_queued, "lro_queued", 4184 KSTAT_DATA_ULONG); 4185 kstat_named_init(ðstat->rx_bigbuf_firmware, "rx_bigbuf_firmware", 4186 KSTAT_DATA_ULONG); 4187 kstat_named_init(ðstat->rx_bigbuf_pool, "rx_bigbuf_pool", 4188 KSTAT_DATA_ULONG); 4189 kstat_named_init(ðstat->rx_bigbuf_smalls, "rx_bigbuf_smalls", 4190 KSTAT_DATA_ULONG); 4191 kstat_named_init(ðstat->rx_copy, "rx_copy", 4192 KSTAT_DATA_ULONG); 4193 kstat_named_init(ðstat->rx_big_nobuf, "rx_big_nobuf", 4194 KSTAT_DATA_ULONG); 4195 kstat_named_init(ðstat->rx_small_nobuf, "rx_small_nobuf", 4196 KSTAT_DATA_ULONG); 4197 kstat_named_init(ðstat->xmit_zero_len, "xmit_zero_len", 4198 KSTAT_DATA_ULONG); 4199 kstat_named_init(ðstat->xmit_pullup, "xmit_pullup", 4200 KSTAT_DATA_ULONG); 4201 kstat_named_init(ðstat->xmit_pullup_first, "xmit_pullup_first", 4202 KSTAT_DATA_ULONG); 4203 kstat_named_init(ðstat->xmit_lowbuf, "xmit_lowbuf", 4204 KSTAT_DATA_ULONG); 4205 kstat_named_init(ðstat->xmit_lsobadflags, "xmit_lsobadflags", 4206 KSTAT_DATA_ULONG); 4207 kstat_named_init(ðstat->xmit_sched, "xmit_sched", 4208 KSTAT_DATA_ULONG); 4209 kstat_named_init(ðstat->xmit_stall, "xmit_stall", 4210 KSTAT_DATA_ULONG); 4211 kstat_named_init(ðstat->xmit_stall_early, "xmit_stall_early", 4212 KSTAT_DATA_ULONG); 4213 kstat_named_init(ðstat->xmit_stall_late, "xmit_stall_late", 4214 KSTAT_DATA_ULONG); 4215 kstat_named_init(ðstat->xmit_err, "xmit_err", 4216 KSTAT_DATA_ULONG); 4217 kstat_named_init(ðstat->tx_req, "tx_req", 4218 KSTAT_DATA_ULONG); 4219 kstat_named_init(ðstat->tx_activate, "tx_activate", 4220 KSTAT_DATA_ULONG); 4221 kstat_named_init(ðstat->tx_done, "tx_done", 4222 KSTAT_DATA_ULONG); 4223 kstat_named_init(ðstat->tx_handles_alloced, "tx_handles_alloced", 4224 KSTAT_DATA_ULONG); 4225 kstat_named_init(ðstat->rx_big, "rx_big", 4226 KSTAT_DATA_ULONG); 4227 kstat_named_init(ðstat->rx_small, "rx_small", 4228 KSTAT_DATA_ULONG); 4229 ksp->ks_update = myri10ge_slice_stat_kstat_update; 4230 ksp->ks_private = (void *) ss; 4231 kstat_install(ksp); 4232 return (DDI_SUCCESS); 4233 } 4234 4235 4236 4237 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 4238 4239 #include <vm/hat.h> 4240 #include <sys/ddi_isa.h> 4241 void *device_arena_alloc(size_t size, int vm_flag); 4242 void device_arena_free(void *vaddr, size_t size); 4243 4244 static void 4245 myri10ge_enable_nvidia_ecrc(struct myri10ge_priv *mgp) 4246 { 4247 dev_info_t *parent_dip; 4248 ddi_acc_handle_t handle; 4249 unsigned long bus_number, dev_number, func_number; 4250 unsigned long cfg_pa, paddr, base, pgoffset; 4251 char *cvaddr, *ptr; 4252 uint32_t *ptr32; 4253 int retval = DDI_FAILURE; 4254 int dontcare; 4255 uint16_t read_vid, read_did, vendor_id, device_id; 4256 4257 if (!myri10ge_nvidia_ecrc_enable) 4258 return; 4259 4260 parent_dip = ddi_get_parent(mgp->dip); 4261 if (parent_dip == NULL) { 4262 cmn_err(CE_WARN, "%s: I'm an orphan?", mgp->name); 4263 return; 4264 } 4265 4266 if (pci_config_setup(parent_dip, &handle) != DDI_SUCCESS) { 4267 cmn_err(CE_WARN, 4268 "%s: Could not access my parent's registers", mgp->name); 4269 return; 4270 } 4271 4272 vendor_id = pci_config_get16(handle, PCI_CONF_VENID); 4273 device_id = pci_config_get16(handle, PCI_CONF_DEVID); 4274 pci_config_teardown(&handle); 4275 4276 if (myri10ge_verbose) { 4277 unsigned long bus_number, dev_number, func_number; 4278 int reg_set, span; 4279 (void) myri10ge_reg_set(parent_dip, ®_set, &span, 4280 &bus_number, &dev_number, &func_number); 4281 if (myri10ge_verbose) 4282 printf("%s: parent at %ld:%ld:%ld\n", mgp->name, 4283 bus_number, dev_number, func_number); 4284 } 4285 4286 if (vendor_id != 0x10de) 4287 return; 4288 4289 if (device_id != 0x005d /* CK804 */ && 4290 (device_id < 0x374 || device_id > 0x378) /* MCP55 */) { 4291 return; 4292 } 4293 (void) myri10ge_reg_set(parent_dip, &dontcare, &dontcare, 4294 &bus_number, &dev_number, &func_number); 4295 4296 for (cfg_pa = 0xf0000000UL; 4297 retval != DDI_SUCCESS && cfg_pa >= 0xe0000000UL; 4298 cfg_pa -= 0x10000000UL) { 4299 /* find the config space address for the nvidia bridge */ 4300 paddr = (cfg_pa + bus_number * 0x00100000UL + 4301 (dev_number * 8 + func_number) * 0x00001000UL); 4302 4303 base = paddr & (~MMU_PAGEOFFSET); 4304 pgoffset = paddr & MMU_PAGEOFFSET; 4305 4306 /* map it into the kernel */ 4307 cvaddr = device_arena_alloc(ptob(1), VM_NOSLEEP); 4308 if (cvaddr == NULL) 4309 cmn_err(CE_WARN, "%s: failed to map nf4: cvaddr\n", 4310 mgp->name); 4311 4312 hat_devload(kas.a_hat, cvaddr, mmu_ptob(1), 4313 i_ddi_paddr_to_pfn(base), 4314 PROT_WRITE|HAT_STRICTORDER, HAT_LOAD_LOCK); 4315 4316 ptr = cvaddr + pgoffset; 4317 read_vid = *(uint16_t *)(void *)(ptr + PCI_CONF_VENID); 4318 read_did = *(uint16_t *)(void *)(ptr + PCI_CONF_DEVID); 4319 if (vendor_id == read_did || device_id == read_did) { 4320 ptr32 = (uint32_t *)(void *)(ptr + 0x178); 4321 if (myri10ge_verbose) 4322 printf("%s: Enabling ECRC on upstream " 4323 "Nvidia bridge (0x%x:0x%x) " 4324 "at %ld:%ld:%ld\n", mgp->name, 4325 read_vid, read_did, bus_number, 4326 dev_number, func_number); 4327 *ptr32 |= 0x40; 4328 retval = DDI_SUCCESS; 4329 } 4330 hat_unload(kas.a_hat, cvaddr, ptob(1), HAT_UNLOAD_UNLOCK); 4331 device_arena_free(cvaddr, ptob(1)); 4332 } 4333 } 4334 4335 #else 4336 /*ARGSUSED*/ 4337 static void 4338 myri10ge_enable_nvidia_ecrc(struct myri10ge_priv *mgp) 4339 { 4340 } 4341 #endif /* i386 */ 4342 4343 4344 /* 4345 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 4346 * when the PCI-E Completion packets are aligned on an 8-byte 4347 * boundary. Some PCI-E chip sets always align Completion packets; on 4348 * the ones that do not, the alignment can be enforced by enabling 4349 * ECRC generation (if supported). 4350 * 4351 * When PCI-E Completion packets are not aligned, it is actually more 4352 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 4353 * 4354 * If the driver can neither enable ECRC nor verify that it has 4355 * already been enabled, then it must use a firmware image which works 4356 * around unaligned completion packets (ethp_z8e.dat), and it should 4357 * also ensure that it never gives the device a Read-DMA which is 4358 * larger than 2KB by setting the tx.boundary to 2KB. If ECRC is 4359 * enabled, then the driver should use the aligned (eth_z8e.dat) 4360 * firmware image, and set tx.boundary to 4KB. 4361 */ 4362 4363 4364 static int 4365 myri10ge_firmware_probe(struct myri10ge_priv *mgp) 4366 { 4367 int status; 4368 4369 mgp->tx_boundary = 4096; 4370 /* 4371 * Verify the max read request size was set to 4KB 4372 * before trying the test with 4KB. 4373 */ 4374 if (mgp->max_read_request_4k == 0) 4375 mgp->tx_boundary = 2048; 4376 /* 4377 * load the optimized firmware which assumes aligned PCIe 4378 * completions in order to see if it works on this host. 4379 */ 4380 4381 mgp->fw_name = "rss_eth_z8e"; 4382 mgp->eth_z8e = (unsigned char *)rss_eth_z8e; 4383 mgp->eth_z8e_length = rss_eth_z8e_length; 4384 4385 status = myri10ge_load_firmware(mgp); 4386 if (status != 0) { 4387 return (status); 4388 } 4389 /* 4390 * Enable ECRC if possible 4391 */ 4392 myri10ge_enable_nvidia_ecrc(mgp); 4393 4394 /* 4395 * Run a DMA test which watches for unaligned completions and 4396 * aborts on the first one seen. 4397 */ 4398 status = myri10ge_dma_test(mgp, MXGEFW_CMD_UNALIGNED_TEST); 4399 if (status == 0) 4400 return (0); /* keep the aligned firmware */ 4401 4402 if (status != E2BIG) 4403 cmn_err(CE_WARN, "%s: DMA test failed: %d\n", 4404 mgp->name, status); 4405 if (status == ENOSYS) 4406 cmn_err(CE_WARN, "%s: Falling back to ethp! " 4407 "Please install up to date fw\n", mgp->name); 4408 return (status); 4409 } 4410 4411 static int 4412 myri10ge_select_firmware(struct myri10ge_priv *mgp) 4413 { 4414 int aligned; 4415 4416 aligned = 0; 4417 4418 if (myri10ge_force_firmware == 1) { 4419 if (myri10ge_verbose) 4420 printf("%s: Assuming aligned completions (forced)\n", 4421 mgp->name); 4422 aligned = 1; 4423 goto done; 4424 } 4425 4426 if (myri10ge_force_firmware == 2) { 4427 if (myri10ge_verbose) 4428 printf("%s: Assuming unaligned completions (forced)\n", 4429 mgp->name); 4430 aligned = 0; 4431 goto done; 4432 } 4433 4434 /* If the width is less than 8, we may used the aligned firmware */ 4435 if (mgp->pcie_link_width != 0 && mgp->pcie_link_width < 8) { 4436 cmn_err(CE_WARN, "!%s: PCIe link running at x%d\n", 4437 mgp->name, mgp->pcie_link_width); 4438 aligned = 1; 4439 goto done; 4440 } 4441 4442 if (0 == myri10ge_firmware_probe(mgp)) 4443 return (0); /* keep optimized firmware */ 4444 4445 done: 4446 if (aligned) { 4447 mgp->fw_name = "rss_eth_z8e"; 4448 mgp->eth_z8e = (unsigned char *)rss_eth_z8e; 4449 mgp->eth_z8e_length = rss_eth_z8e_length; 4450 mgp->tx_boundary = 4096; 4451 } else { 4452 mgp->fw_name = "rss_ethp_z8e"; 4453 mgp->eth_z8e = (unsigned char *)rss_ethp_z8e; 4454 mgp->eth_z8e_length = rss_ethp_z8e_length; 4455 mgp->tx_boundary = 2048; 4456 } 4457 4458 return (myri10ge_load_firmware(mgp)); 4459 } 4460 4461 static int 4462 myri10ge_add_intrs(struct myri10ge_priv *mgp, int add_handler) 4463 { 4464 dev_info_t *devinfo = mgp->dip; 4465 int count, avail, actual, intr_types; 4466 int x, y, rc, inum = 0; 4467 4468 4469 rc = ddi_intr_get_supported_types(devinfo, &intr_types); 4470 if (rc != DDI_SUCCESS) { 4471 cmn_err(CE_WARN, 4472 "!%s: ddi_intr_get_nintrs() failure, rc = %d\n", mgp->name, 4473 rc); 4474 return (DDI_FAILURE); 4475 } 4476 4477 if (!myri10ge_use_msi) 4478 intr_types &= ~DDI_INTR_TYPE_MSI; 4479 if (!myri10ge_use_msix) 4480 intr_types &= ~DDI_INTR_TYPE_MSIX; 4481 4482 if (intr_types & DDI_INTR_TYPE_MSIX) { 4483 mgp->ddi_intr_type = DDI_INTR_TYPE_MSIX; 4484 mgp->intr_type = "MSI-X"; 4485 } else if (intr_types & DDI_INTR_TYPE_MSI) { 4486 mgp->ddi_intr_type = DDI_INTR_TYPE_MSI; 4487 mgp->intr_type = "MSI"; 4488 } else { 4489 mgp->ddi_intr_type = DDI_INTR_TYPE_FIXED; 4490 mgp->intr_type = "Legacy"; 4491 } 4492 /* Get number of interrupts */ 4493 rc = ddi_intr_get_nintrs(devinfo, mgp->ddi_intr_type, &count); 4494 if ((rc != DDI_SUCCESS) || (count == 0)) { 4495 cmn_err(CE_WARN, "%s: ddi_intr_get_nintrs() failure, rc: %d, " 4496 "count: %d", mgp->name, rc, count); 4497 4498 return (DDI_FAILURE); 4499 } 4500 4501 /* Get number of available interrupts */ 4502 rc = ddi_intr_get_navail(devinfo, mgp->ddi_intr_type, &avail); 4503 if ((rc != DDI_SUCCESS) || (avail == 0)) { 4504 cmn_err(CE_WARN, "%s: ddi_intr_get_navail() failure, " 4505 "rc: %d, avail: %d\n", mgp->name, rc, avail); 4506 return (DDI_FAILURE); 4507 } 4508 if (avail < count) { 4509 cmn_err(CE_NOTE, 4510 "!%s: nintrs() returned %d, navail returned %d", 4511 mgp->name, count, avail); 4512 count = avail; 4513 } 4514 4515 if (count < mgp->num_slices) 4516 return (DDI_FAILURE); 4517 4518 if (count > mgp->num_slices) 4519 count = mgp->num_slices; 4520 4521 /* Allocate memory for MSI interrupts */ 4522 mgp->intr_size = count * sizeof (ddi_intr_handle_t); 4523 mgp->htable = kmem_alloc(mgp->intr_size, KM_SLEEP); 4524 4525 rc = ddi_intr_alloc(devinfo, mgp->htable, mgp->ddi_intr_type, inum, 4526 count, &actual, DDI_INTR_ALLOC_NORMAL); 4527 4528 if ((rc != DDI_SUCCESS) || (actual == 0)) { 4529 cmn_err(CE_WARN, "%s: ddi_intr_alloc() failed: %d", 4530 mgp->name, rc); 4531 4532 kmem_free(mgp->htable, mgp->intr_size); 4533 mgp->htable = NULL; 4534 return (DDI_FAILURE); 4535 } 4536 4537 if ((actual < count) && myri10ge_verbose) { 4538 cmn_err(CE_NOTE, "%s: got %d/%d slices", 4539 mgp->name, actual, count); 4540 } 4541 4542 mgp->intr_cnt = actual; 4543 4544 /* 4545 * Get priority for first irq, assume remaining are all the same 4546 */ 4547 if (ddi_intr_get_pri(mgp->htable[0], &mgp->intr_pri) 4548 != DDI_SUCCESS) { 4549 cmn_err(CE_WARN, "%s: ddi_intr_get_pri() failed", mgp->name); 4550 4551 /* Free already allocated intr */ 4552 for (y = 0; y < actual; y++) { 4553 (void) ddi_intr_free(mgp->htable[y]); 4554 } 4555 4556 kmem_free(mgp->htable, mgp->intr_size); 4557 mgp->htable = NULL; 4558 return (DDI_FAILURE); 4559 } 4560 4561 mgp->icookie = (void *)(uintptr_t)mgp->intr_pri; 4562 4563 if (!add_handler) 4564 return (DDI_SUCCESS); 4565 4566 /* Call ddi_intr_add_handler() */ 4567 for (x = 0; x < actual; x++) { 4568 if (ddi_intr_add_handler(mgp->htable[x], myri10ge_intr, 4569 (caddr_t)&mgp->ss[x], NULL) != DDI_SUCCESS) { 4570 cmn_err(CE_WARN, "%s: ddi_intr_add_handler() failed", 4571 mgp->name); 4572 4573 /* Free already allocated intr */ 4574 for (y = 0; y < actual; y++) { 4575 (void) ddi_intr_free(mgp->htable[y]); 4576 } 4577 4578 kmem_free(mgp->htable, mgp->intr_size); 4579 mgp->htable = NULL; 4580 return (DDI_FAILURE); 4581 } 4582 } 4583 4584 (void) ddi_intr_get_cap(mgp->htable[0], &mgp->intr_cap); 4585 if (mgp->intr_cap & DDI_INTR_FLAG_BLOCK) { 4586 /* Call ddi_intr_block_enable() for MSI */ 4587 (void) ddi_intr_block_enable(mgp->htable, mgp->intr_cnt); 4588 } else { 4589 /* Call ddi_intr_enable() for MSI non block enable */ 4590 for (x = 0; x < mgp->intr_cnt; x++) { 4591 (void) ddi_intr_enable(mgp->htable[x]); 4592 } 4593 } 4594 4595 return (DDI_SUCCESS); 4596 } 4597 4598 static void 4599 myri10ge_rem_intrs(struct myri10ge_priv *mgp, int handler_installed) 4600 { 4601 int x, err; 4602 4603 /* Disable all interrupts */ 4604 if (handler_installed) { 4605 if (mgp->intr_cap & DDI_INTR_FLAG_BLOCK) { 4606 /* Call ddi_intr_block_disable() */ 4607 (void) ddi_intr_block_disable(mgp->htable, 4608 mgp->intr_cnt); 4609 } else { 4610 for (x = 0; x < mgp->intr_cnt; x++) { 4611 (void) ddi_intr_disable(mgp->htable[x]); 4612 } 4613 } 4614 } 4615 4616 for (x = 0; x < mgp->intr_cnt; x++) { 4617 if (handler_installed) { 4618 /* Call ddi_intr_remove_handler() */ 4619 err = ddi_intr_remove_handler(mgp->htable[x]); 4620 if (err != DDI_SUCCESS) { 4621 cmn_err(CE_WARN, 4622 "%s: ddi_intr_remove_handler for" 4623 "vec %d returned %d\n", mgp->name, 4624 x, err); 4625 } 4626 } 4627 err = ddi_intr_free(mgp->htable[x]); 4628 if (err != DDI_SUCCESS) { 4629 cmn_err(CE_WARN, 4630 "%s: ddi_intr_free for vec %d returned %d\n", 4631 mgp->name, x, err); 4632 } 4633 } 4634 kmem_free(mgp->htable, mgp->intr_size); 4635 mgp->htable = NULL; 4636 } 4637 4638 static void 4639 myri10ge_test_physical(dev_info_t *dip) 4640 { 4641 ddi_dma_handle_t handle; 4642 struct myri10ge_dma_stuff dma; 4643 void *addr; 4644 int err; 4645 4646 /* test #1, sufficient for older sparc systems */ 4647 myri10ge_tx_dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL; 4648 err = ddi_dma_alloc_handle(dip, &myri10ge_tx_dma_attr, 4649 DDI_DMA_DONTWAIT, NULL, &handle); 4650 if (err == DDI_DMA_BADATTR) 4651 goto fail; 4652 ddi_dma_free_handle(&handle); 4653 4654 /* test #2, required on Olympis where the bind is what fails */ 4655 addr = myri10ge_dma_alloc(dip, 128, &myri10ge_tx_dma_attr, 4656 &myri10ge_dev_access_attr, DDI_DMA_STREAMING, 4657 DDI_DMA_WRITE|DDI_DMA_STREAMING, &dma, 0, DDI_DMA_DONTWAIT); 4658 if (addr == NULL) 4659 goto fail; 4660 myri10ge_dma_free(&dma); 4661 return; 4662 4663 fail: 4664 if (myri10ge_verbose) 4665 printf("myri10ge%d: DDI_DMA_FORCE_PHYSICAL failed, " 4666 "using IOMMU\n", ddi_get_instance(dip)); 4667 4668 myri10ge_tx_dma_attr.dma_attr_flags &= ~DDI_DMA_FORCE_PHYSICAL; 4669 } 4670 4671 static void 4672 myri10ge_get_props(dev_info_t *dip) 4673 { 4674 4675 myri10ge_flow_control = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4676 "myri10ge_flow_control", myri10ge_flow_control); 4677 4678 myri10ge_intr_coal_delay = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4679 "myri10ge_intr_coal_delay", myri10ge_intr_coal_delay); 4680 4681 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 4682 myri10ge_nvidia_ecrc_enable = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4683 "myri10ge_nvidia_ecrc_enable", 1); 4684 #endif 4685 4686 4687 myri10ge_use_msi = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4688 "myri10ge_use_msi", myri10ge_use_msi); 4689 4690 myri10ge_deassert_wait = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4691 "myri10ge_deassert_wait", myri10ge_deassert_wait); 4692 4693 myri10ge_verbose = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4694 "myri10ge_verbose", myri10ge_verbose); 4695 4696 myri10ge_tx_copylen = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4697 "myri10ge_tx_copylen", myri10ge_tx_copylen); 4698 4699 if (myri10ge_tx_copylen < 60) { 4700 cmn_err(CE_WARN, 4701 "myri10ge_tx_copylen must be >= 60 bytes\n"); 4702 myri10ge_tx_copylen = 60; 4703 } 4704 4705 myri10ge_mtu_override = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4706 "myri10ge_mtu_override", myri10ge_mtu_override); 4707 4708 if (myri10ge_mtu_override >= MYRI10GE_MIN_GLD_MTU && 4709 myri10ge_mtu_override <= MYRI10GE_MAX_GLD_MTU) 4710 myri10ge_mtu = myri10ge_mtu_override + 4711 sizeof (struct ether_header) + MXGEFW_PAD + VLAN_TAGSZ; 4712 else if (myri10ge_mtu_override != 0) { 4713 cmn_err(CE_WARN, 4714 "myri10ge_mtu_override must be between 1500 and " 4715 "9000 bytes\n"); 4716 } 4717 4718 myri10ge_bigbufs_initial = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4719 "myri10ge_bigbufs_initial", myri10ge_bigbufs_initial); 4720 myri10ge_bigbufs_max = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4721 "myri10ge_bigbufs_max", myri10ge_bigbufs_max); 4722 4723 myri10ge_watchdog_reset = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4724 "myri10ge_watchdog_reset", myri10ge_watchdog_reset); 4725 4726 if (myri10ge_bigbufs_initial < 128) { 4727 cmn_err(CE_WARN, 4728 "myri10ge_bigbufs_initial be at least 128\n"); 4729 myri10ge_bigbufs_initial = 128; 4730 } 4731 if (myri10ge_bigbufs_max < 128) { 4732 cmn_err(CE_WARN, 4733 "myri10ge_bigbufs_max be at least 128\n"); 4734 myri10ge_bigbufs_max = 128; 4735 } 4736 4737 if (myri10ge_bigbufs_max < myri10ge_bigbufs_initial) { 4738 cmn_err(CE_WARN, 4739 "myri10ge_bigbufs_max must be >= " 4740 "myri10ge_bigbufs_initial\n"); 4741 myri10ge_bigbufs_max = myri10ge_bigbufs_initial; 4742 } 4743 4744 myri10ge_force_firmware = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4745 "myri10ge_force_firmware", myri10ge_force_firmware); 4746 4747 myri10ge_max_slices = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4748 "myri10ge_max_slices", myri10ge_max_slices); 4749 4750 myri10ge_use_msix = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4751 "myri10ge_use_msix", myri10ge_use_msix); 4752 4753 myri10ge_rss_hash = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4754 "myri10ge_rss_hash", myri10ge_rss_hash); 4755 4756 if (myri10ge_rss_hash > MXGEFW_RSS_HASH_TYPE_MAX || 4757 myri10ge_rss_hash < MXGEFW_RSS_HASH_TYPE_IPV4) { 4758 cmn_err(CE_WARN, "myri10ge: Illegal rssh hash type %d\n", 4759 myri10ge_rss_hash); 4760 myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4761 } 4762 myri10ge_lro = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4763 "myri10ge_lro", myri10ge_lro); 4764 myri10ge_lro_cnt = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4765 "myri10ge_lro_cnt", myri10ge_lro_cnt); 4766 myri10ge_lro_max_aggr = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4767 "myri10ge_lro_max_aggr", myri10ge_lro_max_aggr); 4768 myri10ge_tx_hash = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4769 "myri10ge_tx_hash", myri10ge_tx_hash); 4770 myri10ge_use_lso = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4771 "myri10ge_use_lso", myri10ge_use_lso); 4772 myri10ge_lso_copy = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4773 "myri10ge_lso_copy", myri10ge_lso_copy); 4774 myri10ge_tx_handles_initial = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4775 "myri10ge_tx_handles_initial", myri10ge_tx_handles_initial); 4776 myri10ge_small_bytes = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4777 "myri10ge_small_bytes", myri10ge_small_bytes); 4778 if ((myri10ge_small_bytes + MXGEFW_PAD) & (128 -1)) { 4779 cmn_err(CE_WARN, "myri10ge: myri10ge_small_bytes (%d)\n", 4780 myri10ge_small_bytes); 4781 cmn_err(CE_WARN, "must be aligned on 128b bndry -2\n"); 4782 myri10ge_small_bytes += 128; 4783 myri10ge_small_bytes &= ~(128 -1); 4784 myri10ge_small_bytes -= MXGEFW_PAD; 4785 cmn_err(CE_WARN, "rounded up to %d\n", 4786 myri10ge_small_bytes); 4787 4788 myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4789 } 4790 } 4791 4792 #ifndef PCI_EXP_LNKSTA 4793 #define PCI_EXP_LNKSTA 18 4794 #endif 4795 4796 static int 4797 myri10ge_find_cap(ddi_acc_handle_t handle, uint8_t *capptr, uint8_t capid) 4798 { 4799 uint16_t status; 4800 uint8_t ptr; 4801 4802 /* check to see if we have capabilities */ 4803 status = pci_config_get16(handle, PCI_CONF_STAT); 4804 if (!(status & PCI_STAT_CAP)) { 4805 cmn_err(CE_WARN, "PCI_STAT_CAP not found\n"); 4806 return (ENXIO); 4807 } 4808 4809 ptr = pci_config_get8(handle, PCI_CONF_CAP_PTR); 4810 4811 /* Walk the capabilities list, looking for a PCI Express cap */ 4812 while (ptr != PCI_CAP_NEXT_PTR_NULL) { 4813 if (pci_config_get8(handle, ptr + PCI_CAP_ID) == capid) 4814 break; 4815 ptr = pci_config_get8(handle, ptr + PCI_CAP_NEXT_PTR); 4816 } 4817 if (ptr < 64) { 4818 cmn_err(CE_WARN, "Bad capability offset %d\n", ptr); 4819 return (ENXIO); 4820 } 4821 *capptr = ptr; 4822 return (0); 4823 } 4824 4825 static int 4826 myri10ge_set_max_readreq(ddi_acc_handle_t handle) 4827 { 4828 int err; 4829 uint16_t val; 4830 uint8_t ptr; 4831 4832 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_PCI_E); 4833 if (err != 0) { 4834 cmn_err(CE_WARN, "could not find PCIe cap\n"); 4835 return (ENXIO); 4836 } 4837 4838 /* set max read req to 4096 */ 4839 val = pci_config_get16(handle, ptr + PCIE_DEVCTL); 4840 val = (val & ~PCIE_DEVCTL_MAX_READ_REQ_MASK) | 4841 PCIE_DEVCTL_MAX_READ_REQ_4096; 4842 pci_config_put16(handle, ptr + PCIE_DEVCTL, val); 4843 val = pci_config_get16(handle, ptr + PCIE_DEVCTL); 4844 if ((val & (PCIE_DEVCTL_MAX_READ_REQ_4096)) != 4845 PCIE_DEVCTL_MAX_READ_REQ_4096) { 4846 cmn_err(CE_WARN, "could not set max read req (%x)\n", val); 4847 return (EINVAL); 4848 } 4849 return (0); 4850 } 4851 4852 static int 4853 myri10ge_read_pcie_link_width(ddi_acc_handle_t handle, int *link) 4854 { 4855 int err; 4856 uint16_t val; 4857 uint8_t ptr; 4858 4859 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_PCI_E); 4860 if (err != 0) { 4861 cmn_err(CE_WARN, "could not set max read req\n"); 4862 return (ENXIO); 4863 } 4864 4865 /* read link width */ 4866 val = pci_config_get16(handle, ptr + PCIE_LINKSTS); 4867 val &= PCIE_LINKSTS_NEG_WIDTH_MASK; 4868 *link = (val >> 4); 4869 return (0); 4870 } 4871 4872 static int 4873 myri10ge_reset_nic(struct myri10ge_priv *mgp) 4874 { 4875 ddi_acc_handle_t handle = mgp->cfg_hdl; 4876 uint32_t reboot; 4877 uint16_t cmd; 4878 int err; 4879 4880 cmd = pci_config_get16(handle, PCI_CONF_COMM); 4881 if ((cmd & PCI_COMM_ME) == 0) { 4882 /* 4883 * Bus master DMA disabled? Check to see if the card 4884 * rebooted due to a parity error For now, just report 4885 * it 4886 */ 4887 4888 /* enter read32 mode */ 4889 pci_config_put8(handle, mgp->vso + 0x10, 0x3); 4890 /* read REBOOT_STATUS (0xfffffff0) */ 4891 pci_config_put32(handle, mgp->vso + 0x18, 0xfffffff0); 4892 reboot = pci_config_get16(handle, mgp->vso + 0x14); 4893 cmn_err(CE_WARN, "%s NIC rebooted 0x%x\n", mgp->name, reboot); 4894 return (0); 4895 } 4896 if (!myri10ge_watchdog_reset) { 4897 cmn_err(CE_WARN, "%s: not resetting\n", mgp->name); 4898 return (1); 4899 } 4900 4901 myri10ge_stop_locked(mgp); 4902 err = myri10ge_start_locked(mgp); 4903 if (err == DDI_FAILURE) { 4904 return (0); 4905 } 4906 mac_tx_update(mgp->mh); 4907 return (1); 4908 } 4909 4910 static inline int 4911 myri10ge_ring_stalled(myri10ge_tx_ring_t *tx) 4912 { 4913 if (tx->sched != tx->stall && 4914 tx->done == tx->watchdog_done && 4915 tx->watchdog_req != tx->watchdog_done) 4916 return (1); 4917 return (0); 4918 } 4919 4920 static void 4921 myri10ge_watchdog(void *arg) 4922 { 4923 struct myri10ge_priv *mgp; 4924 struct myri10ge_slice_state *ss; 4925 myri10ge_tx_ring_t *tx; 4926 int nic_ok = 1; 4927 int slices_stalled, rx_pause, i; 4928 int add_rx; 4929 4930 mgp = arg; 4931 mutex_enter(&mgp->intrlock); 4932 if (mgp->running != MYRI10GE_ETH_RUNNING) { 4933 cmn_err(CE_WARN, 4934 "%s not running, not rearming watchdog (%d)\n", 4935 mgp->name, mgp->running); 4936 mutex_exit(&mgp->intrlock); 4937 return; 4938 } 4939 4940 rx_pause = ntohl(mgp->ss[0].fw_stats->dropped_pause); 4941 4942 /* 4943 * make sure nic is stalled before we reset the nic, so as to 4944 * ensure we don't rip the transmit data structures out from 4945 * under a pending transmit 4946 */ 4947 4948 for (slices_stalled = 0, i = 0; i < mgp->num_slices; i++) { 4949 tx = &mgp->ss[i].tx; 4950 slices_stalled = myri10ge_ring_stalled(tx); 4951 if (slices_stalled) 4952 break; 4953 } 4954 4955 if (slices_stalled) { 4956 if (mgp->watchdog_rx_pause == rx_pause) { 4957 cmn_err(CE_WARN, 4958 "%s slice %d stalled:(%d, %d, %d, %d, %d %d %d\n)", 4959 mgp->name, i, tx->sched, tx->stall, 4960 tx->done, tx->watchdog_done, tx->req, tx->pkt_done, 4961 (int)ntohl(mgp->ss[i].fw_stats->send_done_count)); 4962 nic_ok = myri10ge_reset_nic(mgp); 4963 } else { 4964 cmn_err(CE_WARN, 4965 "%s Flow controlled, check link partner\n", 4966 mgp->name); 4967 } 4968 } 4969 4970 if (!nic_ok) { 4971 cmn_err(CE_WARN, 4972 "%s Nic dead, not rearming watchdog\n", mgp->name); 4973 mutex_exit(&mgp->intrlock); 4974 return; 4975 } 4976 for (i = 0; i < mgp->num_slices; i++) { 4977 ss = &mgp->ss[i]; 4978 tx = &ss->tx; 4979 tx->watchdog_done = tx->done; 4980 tx->watchdog_req = tx->req; 4981 if (ss->watchdog_rx_copy != MYRI10GE_SLICE_STAT(rx_copy)) { 4982 ss->watchdog_rx_copy = MYRI10GE_SLICE_STAT(rx_copy); 4983 add_rx = 4984 min(ss->jpool.num_alloc, 4985 myri10ge_bigbufs_max - 4986 (ss->jpool.num_alloc - 4987 ss->jbufs_for_smalls)); 4988 if (add_rx != 0) { 4989 (void) myri10ge_add_jbufs(ss, add_rx, 0); 4990 /* now feed them to the firmware */ 4991 mutex_enter(&ss->jpool.mtx); 4992 myri10ge_restock_jumbos(ss); 4993 mutex_exit(&ss->jpool.mtx); 4994 } 4995 } 4996 } 4997 mgp->watchdog_rx_pause = rx_pause; 4998 4999 mgp->timer_id = timeout(myri10ge_watchdog, mgp, 5000 mgp->timer_ticks); 5001 mutex_exit(&mgp->intrlock); 5002 } 5003 5004 /*ARGSUSED*/ 5005 static int 5006 myri10ge_get_coalesce(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp) 5007 5008 { 5009 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp; 5010 (void) mi_mpprintf(mp, "%d", mgp->intr_coal_delay); 5011 return (0); 5012 } 5013 5014 /*ARGSUSED*/ 5015 static int 5016 myri10ge_set_coalesce(queue_t *q, mblk_t *mp, char *value, 5017 caddr_t cp, cred_t *credp) 5018 5019 { 5020 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp; 5021 char *end; 5022 size_t new_value; 5023 5024 new_value = mi_strtol(value, &end, 10); 5025 if (end == value) 5026 return (EINVAL); 5027 5028 mutex_enter(&myri10ge_param_lock); 5029 mgp->intr_coal_delay = (int)new_value; 5030 *mgp->intr_coal_delay_ptr = htonl(mgp->intr_coal_delay); 5031 mutex_exit(&myri10ge_param_lock); 5032 return (0); 5033 } 5034 5035 /*ARGSUSED*/ 5036 static int 5037 myri10ge_get_pauseparam(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp) 5038 5039 { 5040 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp; 5041 (void) mi_mpprintf(mp, "%d", mgp->pause); 5042 return (0); 5043 } 5044 5045 /*ARGSUSED*/ 5046 static int 5047 myri10ge_set_pauseparam(queue_t *q, mblk_t *mp, char *value, 5048 caddr_t cp, cred_t *credp) 5049 5050 { 5051 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp; 5052 char *end; 5053 size_t new_value; 5054 int err = 0; 5055 5056 new_value = mi_strtol(value, &end, 10); 5057 if (end == value) 5058 return (EINVAL); 5059 if (new_value != 0) 5060 new_value = 1; 5061 5062 mutex_enter(&myri10ge_param_lock); 5063 if (new_value != mgp->pause) 5064 err = myri10ge_change_pause(mgp, new_value); 5065 mutex_exit(&myri10ge_param_lock); 5066 return (err); 5067 } 5068 5069 /*ARGSUSED*/ 5070 static int 5071 myri10ge_get_int(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp) 5072 5073 { 5074 (void) mi_mpprintf(mp, "%d", *(int *)(void *)cp); 5075 return (0); 5076 } 5077 5078 /*ARGSUSED*/ 5079 static int 5080 myri10ge_set_int(queue_t *q, mblk_t *mp, char *value, 5081 caddr_t cp, cred_t *credp) 5082 5083 { 5084 char *end; 5085 size_t new_value; 5086 5087 new_value = mi_strtol(value, &end, 10); 5088 if (end == value) 5089 return (EINVAL); 5090 *(int *)(void *)cp = new_value; 5091 5092 return (0); 5093 } 5094 5095 static void 5096 myri10ge_ndd_init(struct myri10ge_priv *mgp) 5097 { 5098 mgp->nd_head = NULL; 5099 5100 (void) nd_load(&mgp->nd_head, "myri10ge_intr_coal_delay", 5101 myri10ge_get_coalesce, myri10ge_set_coalesce, (caddr_t)mgp); 5102 (void) nd_load(&mgp->nd_head, "myri10ge_flow_control", 5103 myri10ge_get_pauseparam, myri10ge_set_pauseparam, (caddr_t)mgp); 5104 (void) nd_load(&mgp->nd_head, "myri10ge_verbose", 5105 myri10ge_get_int, myri10ge_set_int, (caddr_t)&myri10ge_verbose); 5106 (void) nd_load(&mgp->nd_head, "myri10ge_deassert_wait", 5107 myri10ge_get_int, myri10ge_set_int, 5108 (caddr_t)&myri10ge_deassert_wait); 5109 (void) nd_load(&mgp->nd_head, "myri10ge_bigbufs_max", 5110 myri10ge_get_int, myri10ge_set_int, 5111 (caddr_t)&myri10ge_bigbufs_max); 5112 (void) nd_load(&mgp->nd_head, "myri10ge_lro", 5113 myri10ge_get_int, myri10ge_set_int, 5114 (caddr_t)&myri10ge_lro); 5115 (void) nd_load(&mgp->nd_head, "myri10ge_lro_max_aggr", 5116 myri10ge_get_int, myri10ge_set_int, 5117 (caddr_t)&myri10ge_lro_max_aggr); 5118 (void) nd_load(&mgp->nd_head, "myri10ge_tx_hash", 5119 myri10ge_get_int, myri10ge_set_int, 5120 (caddr_t)&myri10ge_tx_hash); 5121 (void) nd_load(&mgp->nd_head, "myri10ge_lso_copy", 5122 myri10ge_get_int, myri10ge_set_int, 5123 (caddr_t)&myri10ge_lso_copy); 5124 } 5125 5126 static void 5127 myri10ge_ndd_fini(struct myri10ge_priv *mgp) 5128 { 5129 nd_free(&mgp->nd_head); 5130 } 5131 5132 static void 5133 myri10ge_m_ioctl(void *arg, queue_t *wq, mblk_t *mp) 5134 { 5135 struct iocblk *iocp; 5136 struct myri10ge_priv *mgp = arg; 5137 int cmd, ok, err; 5138 5139 iocp = (struct iocblk *)(void *)mp->b_rptr; 5140 cmd = iocp->ioc_cmd; 5141 5142 ok = 0; 5143 err = 0; 5144 5145 switch (cmd) { 5146 case ND_GET: 5147 case ND_SET: 5148 ok = nd_getset(wq, mgp->nd_head, mp); 5149 break; 5150 default: 5151 break; 5152 } 5153 if (!ok) 5154 err = EINVAL; 5155 else 5156 err = iocp->ioc_error; 5157 5158 if (!err) 5159 miocack(wq, mp, iocp->ioc_count, err); 5160 else 5161 miocnak(wq, mp, 0, err); 5162 } 5163 5164 static struct myri10ge_priv *mgp_list; 5165 5166 struct myri10ge_priv * 5167 myri10ge_get_instance(uint_t unit) 5168 { 5169 struct myri10ge_priv *mgp; 5170 5171 mutex_enter(&myri10ge_param_lock); 5172 for (mgp = mgp_list; mgp != NULL; mgp = mgp->next) { 5173 if (unit == ddi_get_instance(mgp->dip)) { 5174 mgp->refcnt++; 5175 break; 5176 } 5177 } 5178 mutex_exit(&myri10ge_param_lock); 5179 return (mgp); 5180 } 5181 5182 void 5183 myri10ge_put_instance(struct myri10ge_priv *mgp) 5184 { 5185 mutex_enter(&myri10ge_param_lock); 5186 mgp->refcnt--; 5187 mutex_exit(&myri10ge_param_lock); 5188 } 5189 5190 static boolean_t 5191 myri10ge_m_getcapab(void *arg, mac_capab_t cap, void *cap_data) 5192 { 5193 struct myri10ge_priv *mgp = arg; 5194 uint32_t *cap_hcksum; 5195 mac_capab_lso_t *cap_lso; 5196 mac_capab_rings_t *cap_rings; 5197 5198 switch (cap) { 5199 case MAC_CAPAB_HCKSUM: 5200 cap_hcksum = cap_data; 5201 *cap_hcksum = HCKSUM_INET_PARTIAL; 5202 break; 5203 case MAC_CAPAB_RINGS: 5204 cap_rings = cap_data; 5205 switch (cap_rings->mr_type) { 5206 case MAC_RING_TYPE_RX: 5207 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; 5208 cap_rings->mr_rnum = mgp->num_slices; 5209 cap_rings->mr_gnum = 1; 5210 cap_rings->mr_rget = myri10ge_fill_ring; 5211 cap_rings->mr_gget = myri10ge_fill_group; 5212 break; 5213 case MAC_RING_TYPE_TX: 5214 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; 5215 cap_rings->mr_rnum = mgp->num_slices; 5216 cap_rings->mr_gnum = 0; 5217 cap_rings->mr_rget = myri10ge_fill_ring; 5218 cap_rings->mr_gget = NULL; 5219 break; 5220 default: 5221 return (B_FALSE); 5222 } 5223 break; 5224 case MAC_CAPAB_LSO: 5225 cap_lso = cap_data; 5226 if (!myri10ge_use_lso) 5227 return (B_FALSE); 5228 if (!(mgp->features & MYRI10GE_TSO)) 5229 return (B_FALSE); 5230 cap_lso->lso_flags = LSO_TX_BASIC_TCP_IPV4; 5231 cap_lso->lso_basic_tcp_ipv4.lso_max = (uint16_t)-1; 5232 break; 5233 5234 default: 5235 return (B_FALSE); 5236 } 5237 return (B_TRUE); 5238 } 5239 5240 5241 static int 5242 myri10ge_m_stat(void *arg, uint_t stat, uint64_t *val) 5243 { 5244 struct myri10ge_priv *mgp = arg; 5245 struct myri10ge_rx_ring_stats *rstat; 5246 struct myri10ge_tx_ring_stats *tstat; 5247 mcp_irq_data_t *fw_stats = mgp->ss[0].fw_stats; 5248 struct myri10ge_slice_state *ss; 5249 uint64_t tmp = 0; 5250 int i; 5251 5252 switch (stat) { 5253 case MAC_STAT_IFSPEED: 5254 *val = 10ull * 1000ull * 1000000ull; 5255 break; 5256 5257 case MAC_STAT_MULTIRCV: 5258 for (i = 0; i < mgp->num_slices; i++) { 5259 rstat = &mgp->ss[i].rx_stats; 5260 tmp += rstat->multircv; 5261 } 5262 *val = tmp; 5263 break; 5264 5265 case MAC_STAT_BRDCSTRCV: 5266 for (i = 0; i < mgp->num_slices; i++) { 5267 rstat = &mgp->ss[i].rx_stats; 5268 tmp += rstat->brdcstrcv; 5269 } 5270 *val = tmp; 5271 break; 5272 5273 case MAC_STAT_MULTIXMT: 5274 for (i = 0; i < mgp->num_slices; i++) { 5275 tstat = &mgp->ss[i].tx.stats; 5276 tmp += tstat->multixmt; 5277 } 5278 *val = tmp; 5279 break; 5280 5281 case MAC_STAT_BRDCSTXMT: 5282 for (i = 0; i < mgp->num_slices; i++) { 5283 tstat = &mgp->ss[i].tx.stats; 5284 tmp += tstat->brdcstxmt; 5285 } 5286 *val = tmp; 5287 break; 5288 5289 case MAC_STAT_NORCVBUF: 5290 tmp = ntohl(fw_stats->dropped_no_big_buffer); 5291 tmp += ntohl(fw_stats->dropped_no_small_buffer); 5292 tmp += ntohl(fw_stats->dropped_link_overflow); 5293 for (i = 0; i < mgp->num_slices; i++) { 5294 ss = &mgp->ss[i]; 5295 tmp += MYRI10GE_SLICE_STAT(rx_big_nobuf); 5296 tmp += MYRI10GE_SLICE_STAT(rx_small_nobuf); 5297 } 5298 *val = tmp; 5299 break; 5300 5301 case MAC_STAT_IERRORS: 5302 tmp += ntohl(fw_stats->dropped_bad_crc32); 5303 tmp += ntohl(fw_stats->dropped_bad_phy); 5304 tmp += ntohl(fw_stats->dropped_runt); 5305 tmp += ntohl(fw_stats->dropped_overrun); 5306 *val = tmp; 5307 break; 5308 5309 case MAC_STAT_OERRORS: 5310 for (i = 0; i < mgp->num_slices; i++) { 5311 ss = &mgp->ss[i]; 5312 tmp += MYRI10GE_SLICE_STAT(xmit_lsobadflags); 5313 tmp += MYRI10GE_SLICE_STAT(xmit_err); 5314 } 5315 *val = tmp; 5316 break; 5317 5318 case MAC_STAT_RBYTES: 5319 for (i = 0; i < mgp->num_slices; i++) { 5320 rstat = &mgp->ss[i].rx_stats; 5321 tmp += rstat->ibytes; 5322 } 5323 *val = tmp; 5324 break; 5325 5326 case MAC_STAT_IPACKETS: 5327 for (i = 0; i < mgp->num_slices; i++) { 5328 rstat = &mgp->ss[i].rx_stats; 5329 tmp += rstat->ipackets; 5330 } 5331 *val = tmp; 5332 break; 5333 5334 case MAC_STAT_OBYTES: 5335 for (i = 0; i < mgp->num_slices; i++) { 5336 tstat = &mgp->ss[i].tx.stats; 5337 tmp += tstat->obytes; 5338 } 5339 *val = tmp; 5340 break; 5341 5342 case MAC_STAT_OPACKETS: 5343 for (i = 0; i < mgp->num_slices; i++) { 5344 tstat = &mgp->ss[i].tx.stats; 5345 tmp += tstat->opackets; 5346 } 5347 *val = tmp; 5348 break; 5349 5350 case ETHER_STAT_TOOLONG_ERRORS: 5351 *val = ntohl(fw_stats->dropped_overrun); 5352 break; 5353 5354 #ifdef SOLARIS_S11 5355 case ETHER_STAT_TOOSHORT_ERRORS: 5356 *val = ntohl(fw_stats->dropped_runt); 5357 break; 5358 #endif 5359 5360 case ETHER_STAT_LINK_PAUSE: 5361 *val = mgp->pause; 5362 break; 5363 5364 case ETHER_STAT_LINK_AUTONEG: 5365 *val = 1; 5366 break; 5367 5368 case ETHER_STAT_LINK_DUPLEX: 5369 *val = LINK_DUPLEX_FULL; 5370 break; 5371 5372 default: 5373 return (ENOTSUP); 5374 } 5375 5376 return (0); 5377 } 5378 5379 /* ARGSUSED */ 5380 static void 5381 myri10ge_m_propinfo(void *arg, const char *pr_name, 5382 mac_prop_id_t pr_num, mac_prop_info_handle_t prh) 5383 { 5384 switch (pr_num) { 5385 case MAC_PROP_MTU: 5386 mac_prop_info_set_default_uint32(prh, MYRI10GE_DEFAULT_GLD_MTU); 5387 mac_prop_info_set_range_uint32(prh, MYRI10GE_MIN_GLD_MTU, 5388 MYRI10GE_MAX_GLD_MTU); 5389 break; 5390 default: 5391 break; 5392 } 5393 } 5394 5395 /*ARGSUSED*/ 5396 static int 5397 myri10ge_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, 5398 uint_t pr_valsize, const void *pr_val) 5399 { 5400 int err = 0; 5401 struct myri10ge_priv *mgp = arg; 5402 5403 switch (pr_num) { 5404 case MAC_PROP_MTU: { 5405 uint32_t mtu; 5406 if (pr_valsize < sizeof (mtu)) { 5407 err = EINVAL; 5408 break; 5409 } 5410 bcopy(pr_val, &mtu, sizeof (mtu)); 5411 if (mtu > MYRI10GE_MAX_GLD_MTU || 5412 mtu < MYRI10GE_MIN_GLD_MTU) { 5413 err = EINVAL; 5414 break; 5415 } 5416 5417 mutex_enter(&mgp->intrlock); 5418 if (mgp->running != MYRI10GE_ETH_STOPPED) { 5419 err = EBUSY; 5420 mutex_exit(&mgp->intrlock); 5421 break; 5422 } 5423 5424 myri10ge_mtu = mtu + sizeof (struct ether_header) + 5425 MXGEFW_PAD + VLAN_TAGSZ; 5426 mutex_exit(&mgp->intrlock); 5427 break; 5428 } 5429 default: 5430 err = ENOTSUP; 5431 break; 5432 } 5433 5434 return (err); 5435 } 5436 5437 static mac_callbacks_t myri10ge_m_callbacks = { 5438 (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_PROPINFO), 5439 myri10ge_m_stat, 5440 myri10ge_m_start, 5441 myri10ge_m_stop, 5442 myri10ge_m_promisc, 5443 myri10ge_m_multicst, 5444 NULL, 5445 NULL, 5446 NULL, 5447 myri10ge_m_ioctl, 5448 myri10ge_m_getcapab, 5449 NULL, 5450 NULL, 5451 myri10ge_m_setprop, 5452 NULL, 5453 myri10ge_m_propinfo 5454 }; 5455 5456 5457 static int 5458 myri10ge_probe_slices(struct myri10ge_priv *mgp) 5459 { 5460 myri10ge_cmd_t cmd; 5461 int status; 5462 5463 mgp->num_slices = 1; 5464 5465 /* hit the board with a reset to ensure it is alive */ 5466 (void) memset(&cmd, 0, sizeof (cmd)); 5467 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd); 5468 if (status != 0) { 5469 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name); 5470 return (ENXIO); 5471 } 5472 5473 if (myri10ge_use_msix == 0) 5474 return (0); 5475 5476 /* tell it the size of the interrupt queues */ 5477 cmd.data0 = mgp->max_intr_slots * sizeof (struct mcp_slot); 5478 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 5479 if (status != 0) { 5480 cmn_err(CE_WARN, "%s: failed MXGEFW_CMD_SET_INTRQ_SIZE\n", 5481 mgp->name); 5482 return (ENXIO); 5483 } 5484 5485 /* ask the maximum number of slices it supports */ 5486 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 5487 &cmd); 5488 if (status != 0) 5489 return (0); 5490 5491 mgp->num_slices = cmd.data0; 5492 5493 /* 5494 * if the admin did not specify a limit to how many 5495 * slices we should use, cap it automatically to the 5496 * number of CPUs currently online 5497 */ 5498 if (myri10ge_max_slices == -1) 5499 myri10ge_max_slices = ncpus; 5500 5501 if (mgp->num_slices > myri10ge_max_slices) 5502 mgp->num_slices = myri10ge_max_slices; 5503 5504 5505 /* 5506 * Now try to allocate as many MSI-X vectors as we have 5507 * slices. We give up on MSI-X if we can only get a single 5508 * vector. 5509 */ 5510 while (mgp->num_slices > 1) { 5511 /* make sure it is a power of two */ 5512 while (!ISP2(mgp->num_slices)) 5513 mgp->num_slices--; 5514 if (mgp->num_slices == 1) 5515 return (0); 5516 5517 status = myri10ge_add_intrs(mgp, 0); 5518 if (status == 0) { 5519 myri10ge_rem_intrs(mgp, 0); 5520 if (mgp->intr_cnt == mgp->num_slices) { 5521 if (myri10ge_verbose) 5522 printf("Got %d slices!\n", 5523 mgp->num_slices); 5524 return (0); 5525 } 5526 mgp->num_slices = mgp->intr_cnt; 5527 } else { 5528 mgp->num_slices = mgp->num_slices / 2; 5529 } 5530 } 5531 5532 if (myri10ge_verbose) 5533 printf("Got %d slices\n", mgp->num_slices); 5534 return (0); 5535 } 5536 5537 static void 5538 myri10ge_lro_free(struct myri10ge_slice_state *ss) 5539 { 5540 struct lro_entry *lro; 5541 5542 while (ss->lro_free != NULL) { 5543 lro = ss->lro_free; 5544 ss->lro_free = lro->next; 5545 kmem_free(lro, sizeof (*lro)); 5546 } 5547 } 5548 5549 static void 5550 myri10ge_lro_alloc(struct myri10ge_slice_state *ss) 5551 { 5552 struct lro_entry *lro; 5553 int idx; 5554 5555 ss->lro_free = NULL; 5556 ss->lro_active = NULL; 5557 5558 for (idx = 0; idx < myri10ge_lro_cnt; idx++) { 5559 lro = kmem_zalloc(sizeof (*lro), KM_SLEEP); 5560 if (lro == NULL) 5561 continue; 5562 lro->next = ss->lro_free; 5563 ss->lro_free = lro; 5564 } 5565 } 5566 5567 static void 5568 myri10ge_free_slices(struct myri10ge_priv *mgp) 5569 { 5570 struct myri10ge_slice_state *ss; 5571 size_t bytes; 5572 int i; 5573 5574 if (mgp->ss == NULL) 5575 return; 5576 5577 for (i = 0; i < mgp->num_slices; i++) { 5578 ss = &mgp->ss[i]; 5579 if (ss->rx_done.entry == NULL) 5580 continue; 5581 myri10ge_dma_free(&ss->rx_done.dma); 5582 ss->rx_done.entry = NULL; 5583 if (ss->fw_stats == NULL) 5584 continue; 5585 myri10ge_dma_free(&ss->fw_stats_dma); 5586 ss->fw_stats = NULL; 5587 mutex_destroy(&ss->rx_lock); 5588 mutex_destroy(&ss->tx.lock); 5589 mutex_destroy(&ss->tx.handle_lock); 5590 mutex_destroy(&ss->poll_lock); 5591 myri10ge_jpool_fini(ss); 5592 myri10ge_slice_stat_destroy(ss); 5593 myri10ge_lro_free(ss); 5594 } 5595 bytes = sizeof (*mgp->ss) * mgp->num_slices; 5596 kmem_free(mgp->ss, bytes); 5597 mgp->ss = NULL; 5598 } 5599 5600 5601 static int 5602 myri10ge_alloc_slices(struct myri10ge_priv *mgp) 5603 { 5604 struct myri10ge_slice_state *ss; 5605 size_t bytes; 5606 int i; 5607 5608 bytes = sizeof (*mgp->ss) * mgp->num_slices; 5609 mgp->ss = kmem_zalloc(bytes, KM_SLEEP); 5610 if (mgp->ss == NULL) 5611 return (ENOMEM); 5612 for (i = 0; i < mgp->num_slices; i++) { 5613 ss = &mgp->ss[i]; 5614 5615 ss->mgp = mgp; 5616 5617 /* allocate the per-slice firmware stats */ 5618 bytes = sizeof (*ss->fw_stats); 5619 ss->fw_stats = (mcp_irq_data_t *)(void *) 5620 myri10ge_dma_alloc(mgp->dip, bytes, 5621 &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr, 5622 DDI_DMA_CONSISTENT, DDI_DMA_READ|DDI_DMA_CONSISTENT, 5623 &ss->fw_stats_dma, 1, DDI_DMA_DONTWAIT); 5624 if (ss->fw_stats == NULL) 5625 goto abort; 5626 (void) memset(ss->fw_stats, 0, bytes); 5627 5628 /* allocate rx done ring */ 5629 bytes = mgp->max_intr_slots * 5630 sizeof (*ss->rx_done.entry); 5631 ss->rx_done.entry = (mcp_slot_t *)(void *) 5632 myri10ge_dma_alloc(mgp->dip, bytes, 5633 &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr, 5634 DDI_DMA_CONSISTENT, DDI_DMA_READ|DDI_DMA_CONSISTENT, 5635 &ss->rx_done.dma, 1, DDI_DMA_DONTWAIT); 5636 if (ss->rx_done.entry == NULL) { 5637 goto abort; 5638 } 5639 (void) memset(ss->rx_done.entry, 0, bytes); 5640 mutex_init(&ss->rx_lock, NULL, MUTEX_DEFAULT, mgp->icookie); 5641 mutex_init(&ss->tx.lock, NULL, MUTEX_DEFAULT, NULL); 5642 mutex_init(&ss->tx.handle_lock, NULL, MUTEX_DEFAULT, NULL); 5643 mutex_init(&ss->poll_lock, NULL, MUTEX_DEFAULT, NULL); 5644 myri10ge_jpool_init(ss); 5645 (void) myri10ge_slice_stat_init(ss); 5646 myri10ge_lro_alloc(ss); 5647 } 5648 5649 return (0); 5650 5651 abort: 5652 myri10ge_free_slices(mgp); 5653 return (ENOMEM); 5654 } 5655 5656 static int 5657 myri10ge_save_msi_state(struct myri10ge_priv *mgp, 5658 ddi_acc_handle_t handle) 5659 { 5660 uint8_t ptr; 5661 int err; 5662 5663 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_MSI); 5664 if (err != 0) { 5665 cmn_err(CE_WARN, "%s: could not find MSI cap\n", 5666 mgp->name); 5667 return (DDI_FAILURE); 5668 } 5669 mgp->pci_saved_state.msi_ctrl = 5670 pci_config_get16(handle, ptr + PCI_MSI_CTRL); 5671 mgp->pci_saved_state.msi_addr_low = 5672 pci_config_get32(handle, ptr + PCI_MSI_ADDR_OFFSET); 5673 mgp->pci_saved_state.msi_addr_high = 5674 pci_config_get32(handle, ptr + PCI_MSI_ADDR_OFFSET + 4); 5675 mgp->pci_saved_state.msi_data_32 = 5676 pci_config_get16(handle, ptr + PCI_MSI_32BIT_DATA); 5677 mgp->pci_saved_state.msi_data_64 = 5678 pci_config_get16(handle, ptr + PCI_MSI_64BIT_DATA); 5679 return (DDI_SUCCESS); 5680 } 5681 5682 static int 5683 myri10ge_restore_msi_state(struct myri10ge_priv *mgp, 5684 ddi_acc_handle_t handle) 5685 { 5686 uint8_t ptr; 5687 int err; 5688 5689 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_MSI); 5690 if (err != 0) { 5691 cmn_err(CE_WARN, "%s: could not find MSI cap\n", 5692 mgp->name); 5693 return (DDI_FAILURE); 5694 } 5695 5696 pci_config_put16(handle, ptr + PCI_MSI_CTRL, 5697 mgp->pci_saved_state.msi_ctrl); 5698 pci_config_put32(handle, ptr + PCI_MSI_ADDR_OFFSET, 5699 mgp->pci_saved_state.msi_addr_low); 5700 pci_config_put32(handle, ptr + PCI_MSI_ADDR_OFFSET + 4, 5701 mgp->pci_saved_state.msi_addr_high); 5702 pci_config_put16(handle, ptr + PCI_MSI_32BIT_DATA, 5703 mgp->pci_saved_state.msi_data_32); 5704 pci_config_put16(handle, ptr + PCI_MSI_64BIT_DATA, 5705 mgp->pci_saved_state.msi_data_64); 5706 5707 return (DDI_SUCCESS); 5708 } 5709 5710 static int 5711 myri10ge_save_pci_state(struct myri10ge_priv *mgp) 5712 { 5713 ddi_acc_handle_t handle = mgp->cfg_hdl; 5714 int i; 5715 int err = DDI_SUCCESS; 5716 5717 5718 /* Save the non-extended PCI config space 32-bits at a time */ 5719 for (i = 0; i < 16; i++) 5720 mgp->pci_saved_state.base[i] = 5721 pci_config_get32(handle, i*4); 5722 5723 /* now save MSI interrupt state *, if needed */ 5724 if (mgp->ddi_intr_type == DDI_INTR_TYPE_MSI) 5725 err = myri10ge_save_msi_state(mgp, handle); 5726 5727 return (err); 5728 } 5729 5730 static int 5731 myri10ge_restore_pci_state(struct myri10ge_priv *mgp) 5732 { 5733 ddi_acc_handle_t handle = mgp->cfg_hdl; 5734 int i; 5735 int err = DDI_SUCCESS; 5736 5737 5738 /* Restore the non-extended PCI config space 32-bits at a time */ 5739 for (i = 15; i >= 0; i--) 5740 pci_config_put32(handle, i*4, mgp->pci_saved_state.base[i]); 5741 5742 /* now restore MSI interrupt state *, if needed */ 5743 if (mgp->ddi_intr_type == DDI_INTR_TYPE_MSI) 5744 err = myri10ge_restore_msi_state(mgp, handle); 5745 5746 if (mgp->max_read_request_4k) 5747 (void) myri10ge_set_max_readreq(handle); 5748 return (err); 5749 } 5750 5751 5752 static int 5753 myri10ge_suspend(dev_info_t *dip) 5754 { 5755 struct myri10ge_priv *mgp = ddi_get_driver_private(dip); 5756 int status; 5757 5758 if (mgp == NULL) { 5759 cmn_err(CE_WARN, "null dip in myri10ge_suspend\n"); 5760 return (DDI_FAILURE); 5761 } 5762 if (mgp->dip != dip) { 5763 cmn_err(CE_WARN, "bad dip in myri10ge_suspend\n"); 5764 return (DDI_FAILURE); 5765 } 5766 mutex_enter(&mgp->intrlock); 5767 if (mgp->running == MYRI10GE_ETH_RUNNING) { 5768 mgp->running = MYRI10GE_ETH_STOPPING; 5769 mutex_exit(&mgp->intrlock); 5770 (void) untimeout(mgp->timer_id); 5771 mutex_enter(&mgp->intrlock); 5772 myri10ge_stop_locked(mgp); 5773 mgp->running = MYRI10GE_ETH_SUSPENDED_RUNNING; 5774 } 5775 status = myri10ge_save_pci_state(mgp); 5776 mutex_exit(&mgp->intrlock); 5777 return (status); 5778 } 5779 5780 static int 5781 myri10ge_resume(dev_info_t *dip) 5782 { 5783 struct myri10ge_priv *mgp = ddi_get_driver_private(dip); 5784 int status = DDI_SUCCESS; 5785 5786 if (mgp == NULL) { 5787 cmn_err(CE_WARN, "null dip in myri10ge_resume\n"); 5788 return (DDI_FAILURE); 5789 } 5790 if (mgp->dip != dip) { 5791 cmn_err(CE_WARN, "bad dip in myri10ge_resume\n"); 5792 return (DDI_FAILURE); 5793 } 5794 5795 mutex_enter(&mgp->intrlock); 5796 status = myri10ge_restore_pci_state(mgp); 5797 if (status == DDI_SUCCESS && 5798 mgp->running == MYRI10GE_ETH_SUSPENDED_RUNNING) { 5799 status = myri10ge_start_locked(mgp); 5800 } 5801 mutex_exit(&mgp->intrlock); 5802 if (status != DDI_SUCCESS) 5803 return (status); 5804 5805 /* start the watchdog timer */ 5806 mgp->timer_id = timeout(myri10ge_watchdog, mgp, 5807 mgp->timer_ticks); 5808 return (DDI_SUCCESS); 5809 } 5810 5811 static int 5812 myri10ge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 5813 { 5814 5815 struct myri10ge_priv *mgp; 5816 mac_register_t *macp, *omacp; 5817 ddi_acc_handle_t handle; 5818 uint32_t csr, hdr_offset; 5819 int status, span, link_width, max_read_request_4k; 5820 unsigned long bus_number, dev_number, func_number; 5821 size_t bytes; 5822 offset_t ss_offset; 5823 uint8_t vso; 5824 5825 if (cmd == DDI_RESUME) { 5826 return (myri10ge_resume(dip)); 5827 } 5828 5829 if (cmd != DDI_ATTACH) 5830 return (DDI_FAILURE); 5831 if (pci_config_setup(dip, &handle) != DDI_SUCCESS) 5832 return (DDI_FAILURE); 5833 5834 /* enable busmater and io space access */ 5835 csr = pci_config_get32(handle, PCI_CONF_COMM); 5836 pci_config_put32(handle, PCI_CONF_COMM, 5837 (csr |PCI_COMM_ME|PCI_COMM_MAE)); 5838 status = myri10ge_read_pcie_link_width(handle, &link_width); 5839 if (status != 0) { 5840 cmn_err(CE_WARN, "could not read link width!\n"); 5841 link_width = 0; 5842 } 5843 max_read_request_4k = !myri10ge_set_max_readreq(handle); 5844 status = myri10ge_find_cap(handle, &vso, PCI_CAP_ID_VS); 5845 if (status != 0) 5846 goto abort_with_cfg_hdl; 5847 if ((omacp = mac_alloc(MAC_VERSION)) == NULL) 5848 goto abort_with_cfg_hdl; 5849 /* 5850 * XXXX Hack: mac_register_t grows in newer kernels. To be 5851 * able to write newer fields, such as m_margin, without 5852 * writing outside allocated memory, we allocate our own macp 5853 * and pass that to mac_register() 5854 */ 5855 macp = kmem_zalloc(sizeof (*macp) * 8, KM_SLEEP); 5856 macp->m_version = omacp->m_version; 5857 5858 if ((mgp = (struct myri10ge_priv *) 5859 kmem_zalloc(sizeof (*mgp), KM_SLEEP)) == NULL) { 5860 goto abort_with_macinfo; 5861 } 5862 ddi_set_driver_private(dip, mgp); 5863 5864 /* setup device name for log messages */ 5865 (void) sprintf(mgp->name, "myri10ge%d", ddi_get_instance(dip)); 5866 5867 mutex_enter(&myri10ge_param_lock); 5868 myri10ge_get_props(dip); 5869 mgp->intr_coal_delay = myri10ge_intr_coal_delay; 5870 mgp->pause = myri10ge_flow_control; 5871 mutex_exit(&myri10ge_param_lock); 5872 5873 mgp->max_read_request_4k = max_read_request_4k; 5874 mgp->pcie_link_width = link_width; 5875 mgp->running = MYRI10GE_ETH_STOPPED; 5876 mgp->vso = vso; 5877 mgp->dip = dip; 5878 mgp->cfg_hdl = handle; 5879 5880 mgp->timer_ticks = 5 * drv_usectohz(1000000); /* 5 seconds */ 5881 myri10ge_test_physical(dip); 5882 5883 /* allocate command page */ 5884 bytes = sizeof (*mgp->cmd); 5885 mgp->cmd = (mcp_cmd_response_t *) 5886 (void *)myri10ge_dma_alloc(dip, bytes, 5887 &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr, 5888 DDI_DMA_CONSISTENT, DDI_DMA_RDWR|DDI_DMA_CONSISTENT, 5889 &mgp->cmd_dma, 1, DDI_DMA_DONTWAIT); 5890 if (mgp->cmd == NULL) 5891 goto abort_with_mgp; 5892 5893 (void) myri10ge_reg_set(dip, &mgp->reg_set, &span, &bus_number, 5894 &dev_number, &func_number); 5895 if (myri10ge_verbose) 5896 printf("%s at %ld:%ld:%ld attaching\n", mgp->name, 5897 bus_number, dev_number, func_number); 5898 status = ddi_regs_map_setup(dip, mgp->reg_set, (caddr_t *)&mgp->sram, 5899 (offset_t)0, (offset_t)span, &myri10ge_dev_access_attr, 5900 &mgp->io_handle); 5901 if (status != DDI_SUCCESS) { 5902 cmn_err(CE_WARN, "%s: couldn't map memory space", mgp->name); 5903 printf("%s: reg_set = %d, span = %d, status = %d", 5904 mgp->name, mgp->reg_set, span, status); 5905 goto abort_with_mgp; 5906 } 5907 5908 hdr_offset = *(uint32_t *)(void*)(mgp->sram + MCP_HEADER_PTR_OFFSET); 5909 hdr_offset = ntohl(hdr_offset) & 0xffffc; 5910 ss_offset = hdr_offset + 5911 offsetof(struct mcp_gen_header, string_specs); 5912 mgp->sram_size = ntohl(*(uint32_t *)(void*)(mgp->sram + ss_offset)); 5913 myri10ge_pio_copy32(mgp->eeprom_strings, 5914 (uint32_t *)(void*)((char *)mgp->sram + mgp->sram_size), 5915 MYRI10GE_EEPROM_STRINGS_SIZE); 5916 (void) memset(mgp->eeprom_strings + 5917 MYRI10GE_EEPROM_STRINGS_SIZE - 2, 0, 2); 5918 5919 status = myri10ge_read_mac_addr(mgp); 5920 if (status) { 5921 goto abort_with_mapped; 5922 } 5923 5924 status = myri10ge_select_firmware(mgp); 5925 if (status != 0) { 5926 cmn_err(CE_WARN, "%s: failed to load firmware\n", mgp->name); 5927 goto abort_with_mapped; 5928 } 5929 5930 status = myri10ge_probe_slices(mgp); 5931 if (status != 0) { 5932 cmn_err(CE_WARN, "%s: failed to probe slices\n", mgp->name); 5933 goto abort_with_dummy_rdma; 5934 } 5935 5936 status = myri10ge_alloc_slices(mgp); 5937 if (status != 0) { 5938 cmn_err(CE_WARN, "%s: failed to alloc slices\n", mgp->name); 5939 goto abort_with_dummy_rdma; 5940 } 5941 5942 /* add the interrupt handler */ 5943 status = myri10ge_add_intrs(mgp, 1); 5944 if (status != 0) { 5945 cmn_err(CE_WARN, "%s: Failed to add interrupt\n", 5946 mgp->name); 5947 goto abort_with_slices; 5948 } 5949 5950 /* now that we have an iblock_cookie, init the mutexes */ 5951 mutex_init(&mgp->cmd_lock, NULL, MUTEX_DRIVER, mgp->icookie); 5952 mutex_init(&mgp->intrlock, NULL, MUTEX_DRIVER, mgp->icookie); 5953 5954 5955 status = myri10ge_nic_stat_init(mgp); 5956 if (status != DDI_SUCCESS) 5957 goto abort_with_interrupts; 5958 status = myri10ge_info_init(mgp); 5959 if (status != DDI_SUCCESS) 5960 goto abort_with_stats; 5961 5962 /* 5963 * Initialize GLD state 5964 */ 5965 5966 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 5967 macp->m_driver = mgp; 5968 macp->m_dip = dip; 5969 macp->m_src_addr = mgp->mac_addr; 5970 macp->m_callbacks = &myri10ge_m_callbacks; 5971 macp->m_min_sdu = 0; 5972 macp->m_max_sdu = myri10ge_mtu - 5973 (sizeof (struct ether_header) + MXGEFW_PAD + VLAN_TAGSZ); 5974 #ifdef SOLARIS_S11 5975 macp->m_margin = VLAN_TAGSZ; 5976 #endif 5977 macp->m_v12n = MAC_VIRT_LEVEL1; 5978 status = mac_register(macp, &mgp->mh); 5979 if (status != 0) { 5980 cmn_err(CE_WARN, "%s: mac_register failed with %d\n", 5981 mgp->name, status); 5982 goto abort_with_info; 5983 } 5984 myri10ge_ndd_init(mgp); 5985 if (myri10ge_verbose) 5986 printf("%s: %s, tx bndry %d, fw %s\n", mgp->name, 5987 mgp->intr_type, mgp->tx_boundary, mgp->fw_name); 5988 mutex_enter(&myri10ge_param_lock); 5989 mgp->next = mgp_list; 5990 mgp_list = mgp; 5991 mutex_exit(&myri10ge_param_lock); 5992 kmem_free(macp, sizeof (*macp) * 8); 5993 mac_free(omacp); 5994 return (DDI_SUCCESS); 5995 5996 abort_with_info: 5997 myri10ge_info_destroy(mgp); 5998 5999 abort_with_stats: 6000 myri10ge_nic_stat_destroy(mgp); 6001 6002 abort_with_interrupts: 6003 mutex_destroy(&mgp->cmd_lock); 6004 mutex_destroy(&mgp->intrlock); 6005 myri10ge_rem_intrs(mgp, 1); 6006 6007 abort_with_slices: 6008 myri10ge_free_slices(mgp); 6009 6010 abort_with_dummy_rdma: 6011 myri10ge_dummy_rdma(mgp, 0); 6012 6013 abort_with_mapped: 6014 ddi_regs_map_free(&mgp->io_handle); 6015 6016 myri10ge_dma_free(&mgp->cmd_dma); 6017 6018 abort_with_mgp: 6019 kmem_free(mgp, sizeof (*mgp)); 6020 6021 abort_with_macinfo: 6022 kmem_free(macp, sizeof (*macp) * 8); 6023 mac_free(omacp); 6024 6025 abort_with_cfg_hdl: 6026 pci_config_teardown(&handle); 6027 return (DDI_FAILURE); 6028 6029 } 6030 6031 6032 static int 6033 myri10ge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 6034 { 6035 struct myri10ge_priv *mgp, *tmp; 6036 int status, i, jbufs_alloced; 6037 6038 if (cmd == DDI_SUSPEND) { 6039 status = myri10ge_suspend(dip); 6040 return (status); 6041 } 6042 6043 if (cmd != DDI_DETACH) { 6044 return (DDI_FAILURE); 6045 } 6046 /* Get the driver private (gld_mac_info_t) structure */ 6047 mgp = ddi_get_driver_private(dip); 6048 6049 mutex_enter(&mgp->intrlock); 6050 jbufs_alloced = 0; 6051 for (i = 0; i < mgp->num_slices; i++) { 6052 myri10ge_remove_jbufs(&mgp->ss[i]); 6053 jbufs_alloced += mgp->ss[i].jpool.num_alloc; 6054 } 6055 mutex_exit(&mgp->intrlock); 6056 if (jbufs_alloced != 0) { 6057 cmn_err(CE_NOTE, "%s: %d loaned rx buffers remain\n", 6058 mgp->name, jbufs_alloced); 6059 return (DDI_FAILURE); 6060 } 6061 6062 mutex_enter(&myri10ge_param_lock); 6063 if (mgp->refcnt != 0) { 6064 mutex_exit(&myri10ge_param_lock); 6065 cmn_err(CE_NOTE, "%s: %d external refs remain\n", 6066 mgp->name, mgp->refcnt); 6067 return (DDI_FAILURE); 6068 } 6069 mutex_exit(&myri10ge_param_lock); 6070 6071 status = mac_unregister(mgp->mh); 6072 if (status != DDI_SUCCESS) 6073 return (status); 6074 6075 myri10ge_ndd_fini(mgp); 6076 myri10ge_dummy_rdma(mgp, 0); 6077 myri10ge_nic_stat_destroy(mgp); 6078 myri10ge_info_destroy(mgp); 6079 6080 mutex_destroy(&mgp->cmd_lock); 6081 mutex_destroy(&mgp->intrlock); 6082 6083 myri10ge_rem_intrs(mgp, 1); 6084 6085 myri10ge_free_slices(mgp); 6086 ddi_regs_map_free(&mgp->io_handle); 6087 myri10ge_dma_free(&mgp->cmd_dma); 6088 pci_config_teardown(&mgp->cfg_hdl); 6089 6090 mutex_enter(&myri10ge_param_lock); 6091 if (mgp_list == mgp) { 6092 mgp_list = mgp->next; 6093 } else { 6094 tmp = mgp_list; 6095 while (tmp->next != mgp && tmp->next != NULL) 6096 tmp = tmp->next; 6097 if (tmp->next != NULL) 6098 tmp->next = tmp->next->next; 6099 } 6100 kmem_free(mgp, sizeof (*mgp)); 6101 mutex_exit(&myri10ge_param_lock); 6102 return (DDI_SUCCESS); 6103 } 6104 6105 /* 6106 * Helper for quiesce entry point: Interrupt threads are not being 6107 * scheduled, so we must poll for the confirmation DMA to arrive in 6108 * the firmware stats block for slice 0. We're essentially running 6109 * the guts of the interrupt handler, and just cherry picking the 6110 * confirmation that the NIC is queuesced (stats->link_down) 6111 */ 6112 6113 static int 6114 myri10ge_poll_down(struct myri10ge_priv *mgp) 6115 { 6116 struct myri10ge_slice_state *ss = mgp->ss; 6117 mcp_irq_data_t *stats = ss->fw_stats; 6118 int valid; 6119 int found_down = 0; 6120 6121 6122 /* check for a pending IRQ */ 6123 6124 if (! *((volatile uint8_t *)& stats->valid)) 6125 return (0); 6126 valid = stats->valid; 6127 6128 /* 6129 * Make sure to tell the NIC to lower a legacy IRQ, else 6130 * it may have corrupt state after restarting 6131 */ 6132 6133 if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) { 6134 /* lower legacy IRQ */ 6135 *mgp->irq_deassert = 0; 6136 mb(); 6137 /* wait for irq conf DMA */ 6138 while (*((volatile uint8_t *)& stats->valid)) 6139 ; 6140 } 6141 if (stats->stats_updated && stats->link_down) 6142 found_down = 1; 6143 6144 if (valid & 0x1) 6145 *ss->irq_claim = BE_32(3); 6146 *(ss->irq_claim + 1) = BE_32(3); 6147 6148 return (found_down); 6149 } 6150 6151 static int 6152 myri10ge_quiesce(dev_info_t *dip) 6153 { 6154 struct myri10ge_priv *mgp; 6155 myri10ge_cmd_t cmd; 6156 int status, down, i; 6157 6158 mgp = ddi_get_driver_private(dip); 6159 if (mgp == NULL) 6160 return (DDI_FAILURE); 6161 6162 /* if devices was unplumbed, it is guaranteed to be quiescent */ 6163 if (mgp->running == MYRI10GE_ETH_STOPPED) 6164 return (DDI_SUCCESS); 6165 6166 /* send a down CMD to queuesce NIC */ 6167 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 6168 if (status) { 6169 cmn_err(CE_WARN, "%s: Couldn't bring down link\n", mgp->name); 6170 return (DDI_FAILURE); 6171 } 6172 6173 for (i = 0; i < 20; i++) { 6174 down = myri10ge_poll_down(mgp); 6175 if (down) 6176 break; 6177 delay(drv_usectohz(100000)); 6178 mb(); 6179 } 6180 if (down) 6181 return (DDI_SUCCESS); 6182 return (DDI_FAILURE); 6183 } 6184 6185 /* 6186 * Distinguish between allocb'ed blocks, and gesballoc'ed attached 6187 * storage. 6188 */ 6189 static void 6190 myri10ge_find_lastfree(void) 6191 { 6192 mblk_t *mp = allocb(1024, 0); 6193 dblk_t *dbp; 6194 6195 if (mp == NULL) { 6196 cmn_err(CE_WARN, "myri10ge_find_lastfree failed\n"); 6197 return; 6198 } 6199 dbp = mp->b_datap; 6200 myri10ge_db_lastfree = (void *)dbp->db_lastfree; 6201 } 6202 6203 int 6204 _init(void) 6205 { 6206 int i; 6207 6208 if (myri10ge_verbose) 6209 cmn_err(CE_NOTE, 6210 "Myricom 10G driver (10GbE) version %s loading\n", 6211 MYRI10GE_VERSION_STR); 6212 myri10ge_find_lastfree(); 6213 mac_init_ops(&myri10ge_ops, "myri10ge"); 6214 mutex_init(&myri10ge_param_lock, NULL, MUTEX_DEFAULT, NULL); 6215 if ((i = mod_install(&modlinkage)) != 0) { 6216 cmn_err(CE_WARN, "mod_install returned %d\n", i); 6217 mac_fini_ops(&myri10ge_ops); 6218 mutex_destroy(&myri10ge_param_lock); 6219 } 6220 return (i); 6221 } 6222 6223 int 6224 _fini(void) 6225 { 6226 int i; 6227 i = mod_remove(&modlinkage); 6228 if (i != 0) { 6229 return (i); 6230 } 6231 mac_fini_ops(&myri10ge_ops); 6232 mutex_destroy(&myri10ge_param_lock); 6233 return (0); 6234 } 6235 6236 int 6237 _info(struct modinfo *modinfop) 6238 { 6239 return (mod_info(&modlinkage, modinfop)); 6240 } 6241 6242 6243 /* 6244 * This file uses MyriGE driver indentation. 6245 * 6246 * Local Variables: 6247 * c-file-style:"sun" 6248 * tab-width:8 6249 * End: 6250 */