5255 uts shouldn't open-code ISP2

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * Copyright 2007-2009 Myricom, Inc.  All rights reserved.
  29  * Use is subject to license terms.
  30  */
  31 
  32 #ifndef lint
  33 static const char __idstring[] =
  34         "@(#)$Id: myri10ge.c,v 1.186 2009-06-29 13:47:22 gallatin Exp $";
  35 #endif
  36 
  37 #define MXGEFW_NDIS
  38 #include "myri10ge_var.h"
  39 #include "rss_eth_z8e.h"
  40 #include "rss_ethp_z8e.h"
  41 #include "mcp_gen_header.h"
  42 
  43 #define MYRI10GE_MAX_ETHER_MTU 9014
  44 
  45 #define MYRI10GE_ETH_STOPPED 0
  46 #define MYRI10GE_ETH_STOPPING 1
  47 #define MYRI10GE_ETH_STARTING 2
  48 #define MYRI10GE_ETH_RUNNING 3
  49 #define MYRI10GE_ETH_OPEN_FAILED 4
  50 #define MYRI10GE_ETH_SUSPENDED_RUNNING 5
  51 
  52 static int myri10ge_small_bytes = 510;
  53 static int myri10ge_intr_coal_delay = 125;
  54 static int myri10ge_flow_control = 1;
  55 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
  56 static int myri10ge_nvidia_ecrc_enable = 1;
  57 #endif
  58 static int myri10ge_mtu_override = 0;
  59 static int myri10ge_tx_copylen = 512;
  60 static int myri10ge_deassert_wait = 1;
  61 static int myri10ge_verbose = 0;
  62 static int myri10ge_watchdog_reset = 0;
  63 static int myri10ge_use_msix = 1;
  64 static int myri10ge_max_slices = -1;
  65 static int myri10ge_use_msi = 1;
  66 int myri10ge_force_firmware = 0;
  67 static boolean_t myri10ge_use_lso = B_TRUE;
  68 static int myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
  69 static int myri10ge_tx_hash = 1;
  70 static int myri10ge_lro = 0;
  71 static int myri10ge_lro_cnt = 8;
  72 int myri10ge_lro_max_aggr = 2;
  73 static int myri10ge_lso_copy = 0;
  74 static mblk_t *myri10ge_send_wrapper(void *arg, mblk_t *mp);
  75 int myri10ge_tx_handles_initial = 128;
  76 
  77 static  kmutex_t myri10ge_param_lock;
  78 static void* myri10ge_db_lastfree;
  79 
  80 static int myri10ge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
  81 static int myri10ge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
  82 static int myri10ge_quiesce(dev_info_t *dip);
  83 
  84 DDI_DEFINE_STREAM_OPS(myri10ge_ops, nulldev, nulldev, myri10ge_attach,
  85     myri10ge_detach, nodev, NULL, D_MP, NULL, myri10ge_quiesce);
  86 
  87 
  88 static struct modldrv modldrv = {
  89         &mod_driverops,
  90         "Myricom 10G driver (10GbE)",
  91         &myri10ge_ops,
  92 };
  93 
  94 
  95 static struct modlinkage modlinkage = {
  96         MODREV_1,
  97         {&modldrv, NULL},
  98 };
  99 
 100 unsigned char myri10ge_broadcastaddr[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 101 
 102 static ddi_dma_attr_t myri10ge_misc_dma_attr = {
 103         DMA_ATTR_V0,                    /* version number. */
 104         (uint64_t)0,                    /* low address */
 105         (uint64_t)0xffffffffffffffffULL, /* high address */
 106         (uint64_t)0x7ffffff,            /* address counter max */
 107         (uint64_t)4096,                 /* alignment */
 108         (uint_t)0x7f,                   /* burstsizes for 32b and 64b xfers */
 109         (uint32_t)0x1,                  /* minimum transfer size */
 110         (uint64_t)0x7fffffff,           /* maximum transfer size */
 111         (uint64_t)0x7fffffff,           /* maximum segment size */
 112         1,                              /* scatter/gather list length */
 113         1,                              /* granularity */
 114         0                               /* attribute flags */
 115 };
 116 
 117 /*
 118  * The Myri10GE NIC has the following constraints on receive buffers:
 119  * 1) Buffers which cross a 4KB boundary must be aligned to 4KB
 120  * 2) Buffers which are not aligned to 4KB must not cross a 4KB boundary
 121  */
 122 
 123 static ddi_dma_attr_t myri10ge_rx_jumbo_dma_attr = {
 124         DMA_ATTR_V0,                    /* version number. */
 125         (uint64_t)0,                    /* low address */
 126         (uint64_t)0xffffffffffffffffULL, /* high address */
 127         (uint64_t)0x7ffffff,            /* address counter max */
 128         (uint64_t)4096,                 /* alignment */
 129         (uint_t)0x7f,                   /* burstsizes for 32b and 64b xfers */
 130         (uint32_t)0x1,                  /* minimum transfer size */
 131         (uint64_t)0x7fffffff,           /* maximum transfer size */
 132         UINT64_MAX,                     /* maximum segment size */
 133         1,                              /* scatter/gather list length */
 134         1,                              /* granularity */
 135         0                               /* attribute flags */
 136 };
 137 
 138 static ddi_dma_attr_t myri10ge_rx_std_dma_attr = {
 139         DMA_ATTR_V0,                    /* version number. */
 140         (uint64_t)0,                    /* low address */
 141         (uint64_t)0xffffffffffffffffULL, /* high address */
 142         (uint64_t)0x7ffffff,            /* address counter max */
 143 #if defined sparc64 || defined __sparcv9
 144         (uint64_t)4096,                 /* alignment */
 145 #else
 146         (uint64_t)0x80,                 /* alignment */
 147 #endif
 148         (uint_t)0x7f,                   /* burstsizes for 32b and 64b xfers */
 149         (uint32_t)0x1,                  /* minimum transfer size */
 150         (uint64_t)0x7fffffff,           /* maximum transfer size */
 151 #if defined sparc64 || defined __sparcv9
 152         UINT64_MAX,                     /* maximum segment size */
 153 #else
 154         (uint64_t)0xfff,                /* maximum segment size */
 155 #endif
 156         1,                              /* scatter/gather list length */
 157         1,                              /* granularity */
 158         0                               /* attribute flags */
 159 };
 160 
 161 static ddi_dma_attr_t myri10ge_tx_dma_attr = {
 162         DMA_ATTR_V0,                    /* version number. */
 163         (uint64_t)0,                    /* low address */
 164         (uint64_t)0xffffffffffffffffULL, /* high address */
 165         (uint64_t)0x7ffffff,            /* address counter max */
 166         (uint64_t)1,                    /* alignment */
 167         (uint_t)0x7f,                   /* burstsizes for 32b and 64b xfers */
 168         (uint32_t)0x1,                  /* minimum transfer size */
 169         (uint64_t)0x7fffffff,           /* maximum transfer size */
 170         UINT64_MAX,                     /* maximum segment size */
 171         INT32_MAX,                      /* scatter/gather list length */
 172         1,                              /* granularity */
 173         0                       /* attribute flags */
 174 };
 175 
 176 #if defined sparc64 || defined __sparcv9
 177 #define WC 0
 178 #else
 179 #define WC 1
 180 #endif
 181 
 182 struct ddi_device_acc_attr myri10ge_dev_access_attr = {
 183         DDI_DEVICE_ATTR_V0,             /* version */
 184         DDI_NEVERSWAP_ACC,              /* endian flash */
 185 #if WC
 186         DDI_MERGING_OK_ACC              /* data order */
 187 #else
 188         DDI_STRICTORDER_ACC
 189 #endif
 190 };
 191 
 192 static void myri10ge_watchdog(void *arg);
 193 
 194 #ifdef MYRICOM_PRIV
 195 int myri10ge_mtu = MYRI10GE_MAX_ETHER_MTU + MXGEFW_PAD + VLAN_TAGSZ;
 196 #else
 197 int myri10ge_mtu = ETHERMAX + MXGEFW_PAD + VLAN_TAGSZ;
 198 #endif
 199 int myri10ge_bigbufs_initial = 1024;
 200 int myri10ge_bigbufs_max = 4096;
 201 
 202 
 203 caddr_t
 204 myri10ge_dma_alloc(dev_info_t *dip, size_t len,
 205     ddi_dma_attr_t *attr, ddi_device_acc_attr_t  *accattr,
 206     uint_t alloc_flags, int bind_flags, struct myri10ge_dma_stuff *dma,
 207     int warn, int (*wait)(caddr_t))
 208 {
 209         caddr_t  kaddr;
 210         size_t real_length;
 211         ddi_dma_cookie_t cookie;
 212         uint_t count;
 213         int err;
 214 
 215         err = ddi_dma_alloc_handle(dip, attr, wait,
 216             NULL, &dma->handle);
 217         if (err != DDI_SUCCESS) {
 218                 if (warn)
 219                         cmn_err(CE_WARN,
 220                             "myri10ge: ddi_dma_alloc_handle failed\n");
 221                 goto abort_with_nothing;
 222         }
 223 
 224         err = ddi_dma_mem_alloc(dma->handle, len, accattr, alloc_flags,
 225             wait, NULL, &kaddr, &real_length,
 226             &dma->acc_handle);
 227         if (err != DDI_SUCCESS) {
 228                 if (warn)
 229                         cmn_err(CE_WARN,
 230                             "myri10ge: ddi_dma_mem_alloc failed\n");
 231                 goto abort_with_handle;
 232         }
 233 
 234         err = ddi_dma_addr_bind_handle(dma->handle, NULL, kaddr, len,
 235             bind_flags, wait, NULL, &cookie, &count);
 236 
 237         if (err != DDI_SUCCESS) {
 238                 if (warn)
 239                         cmn_err(CE_WARN,
 240                             "myri10ge: ddi_dma_addr_bind_handle failed\n");
 241                 goto abort_with_mem;
 242         }
 243 
 244         if (count != 1) {
 245                 if (warn)
 246                         cmn_err(CE_WARN,
 247                             "myri10ge: got too many dma segments ");
 248                 goto abort_with_bind;
 249         }
 250         dma->low = htonl(MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress));
 251         dma->high = htonl(MYRI10GE_HIGHPART_TO_U32(cookie.dmac_laddress));
 252         return (kaddr);
 253 
 254 abort_with_bind:
 255         (void) ddi_dma_unbind_handle(dma->handle);
 256 
 257 abort_with_mem:
 258         ddi_dma_mem_free(&dma->acc_handle);
 259 
 260 abort_with_handle:
 261         ddi_dma_free_handle(&dma->handle);
 262 abort_with_nothing:
 263         if (warn) {
 264                 cmn_err(CE_WARN, "myri10ge: myri10ge_dma_alloc failed.\n  ");
 265                 cmn_err(CE_WARN, "args: dip=%p len=0x%lx ddi_dma_attr=%p\n",
 266                     (void*) dip, len, (void*) attr);
 267                 cmn_err(CE_WARN,
 268                     "args: ddi_device_acc_attr=%p  alloc_flags=0x%x\n",
 269                     (void*) accattr, alloc_flags);
 270                 cmn_err(CE_WARN, "args: bind_flags=0x%x  dmastuff=%p",
 271                     bind_flags, (void*) dma);
 272         }
 273         return (NULL);
 274 
 275 }
 276 
 277 void
 278 myri10ge_dma_free(struct myri10ge_dma_stuff *dma)
 279 {
 280         (void) ddi_dma_unbind_handle(dma->handle);
 281         ddi_dma_mem_free(&dma->acc_handle);
 282         ddi_dma_free_handle(&dma->handle);
 283 }
 284 
 285 static inline void
 286 myri10ge_pio_copy32(void *to, uint32_t *from32, size_t size)
 287 {
 288         register volatile uint32_t *to32;
 289         size_t i;
 290 
 291         to32 = (volatile uint32_t *) to;
 292         for (i = (size / 4); i; i--) {
 293                 *to32 = *from32;
 294                 to32++;
 295                 from32++;
 296         }
 297 }
 298 
 299 #if defined(_LP64)
 300 static inline void
 301 myri10ge_pio_copy64(void *to, uint64_t *from64, size_t size)
 302 {
 303         register volatile uint64_t *to64;
 304         size_t i;
 305 
 306         to64 = (volatile uint64_t *) to;
 307         for (i = (size / 8); i; i--) {
 308                 *to64 = *from64;
 309                 to64++;
 310                 from64++;
 311         }
 312 }
 313 #endif
 314 
 315 /*
 316  * This routine copies memory from the host to the NIC.
 317  * The "size" argument must always be a multiple of
 318  * the size of long (4 or 8 bytes), and to/from must also
 319  * be naturally aligned.
 320  */
 321 static inline void
 322 myri10ge_pio_copy(void *to, void *from, size_t size)
 323 {
 324 #if !defined(_LP64)
 325         ASSERT((size % 4) == 0);
 326         myri10ge_pio_copy32(to, (uint32_t *)from, size);
 327 #else
 328         ASSERT((size % 8) == 0);
 329         myri10ge_pio_copy64(to, (uint64_t *)from, size);
 330 #endif
 331 }
 332 
 333 
 334 /*
 335  * Due to various bugs in Solaris (especially bug 6186772 where the
 336  * TCP/UDP checksum is calculated incorrectly on mblk chains with more
 337  * than two elements), and the design bug where hardware checksums are
 338  * ignored on mblk chains with more than 2 elements, we need to
 339  * allocate private pool of physically contiguous receive buffers.
 340  */
 341 
 342 static void
 343 myri10ge_jpool_init(struct myri10ge_slice_state *ss)
 344 {
 345         struct myri10ge_jpool_stuff *jpool = &ss->jpool;
 346 
 347         bzero(jpool, sizeof (*jpool));
 348         mutex_init(&jpool->mtx, NULL, MUTEX_DRIVER,
 349             ss->mgp->icookie);
 350         jpool->head = NULL;
 351 }
 352 
 353 static void
 354 myri10ge_jpool_fini(struct myri10ge_slice_state *ss)
 355 {
 356         struct myri10ge_jpool_stuff *jpool = &ss->jpool;
 357 
 358         if (jpool->head != NULL) {
 359                 cmn_err(CE_WARN,
 360                     "%s: BUG! myri10ge_jpool_fini called on non-empty pool\n",
 361                     ss->mgp->name);
 362         }
 363         mutex_destroy(&jpool->mtx);
 364 }
 365 
 366 
 367 /*
 368  * copy an array of mcp_kreq_ether_recv_t's to the mcp.  Copy
 369  * at most 32 bytes at a time, so as to avoid involving the software
 370  * pio handler in the nic.   We re-write the first segment's low
 371  * DMA address to mark it valid only after we write the entire chunk
 372  * in a burst
 373  */
 374 static inline void
 375 myri10ge_submit_8rx(mcp_kreq_ether_recv_t *dst, mcp_kreq_ether_recv_t *src)
 376 {
 377         src->addr_low |= BE_32(1);
 378         myri10ge_pio_copy(dst, src, 4 * sizeof (*src));
 379         mb();
 380         myri10ge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
 381         mb();
 382         src->addr_low &= ~(BE_32(1));
 383         dst->addr_low = src->addr_low;
 384         mb();
 385 }
 386 
 387 static void
 388 myri10ge_pull_jpool(struct myri10ge_slice_state *ss)
 389 {
 390         struct myri10ge_jpool_stuff *jpool = &ss->jpool;
 391         struct myri10ge_jpool_entry *jtail, *j, *jfree;
 392         volatile uintptr_t *putp;
 393         uintptr_t put;
 394         int i;
 395 
 396         /* find tail */
 397         jtail = NULL;
 398         if (jpool->head != NULL) {
 399                 j = jpool->head;
 400                 while (j->next != NULL)
 401                         j = j->next;
 402                 jtail = j;
 403         }
 404 
 405         /*
 406          * iterate over all per-CPU caches, and add contents into
 407          * jpool
 408          */
 409         for (i = 0; i < MYRI10GE_MAX_CPUS; i++) {
 410                 /* take per-CPU free list */
 411                 putp = (void *)&jpool->cpu[i & MYRI10GE_MAX_CPU_MASK].head;
 412                 if (*putp == NULL)
 413                         continue;
 414                 put = atomic_swap_ulong(putp, 0);
 415                 jfree = (struct myri10ge_jpool_entry *)put;
 416 
 417                 /* append to pool */
 418                 if (jtail == NULL) {
 419                         jpool->head = jfree;
 420                 } else {
 421                         jtail->next = jfree;
 422                 }
 423                 j = jfree;
 424                 while (j->next != NULL)
 425                         j = j->next;
 426                 jtail = j;
 427         }
 428 }
 429 
 430 /*
 431  * Transfers buffers from the free pool to the nic
 432  * Must be called holding the jpool mutex.
 433  */
 434 
 435 static inline void
 436 myri10ge_restock_jumbos(struct myri10ge_slice_state *ss)
 437 {
 438         struct myri10ge_jpool_stuff *jpool = &ss->jpool;
 439         struct myri10ge_jpool_entry *j;
 440         myri10ge_rx_ring_t *rx;
 441         int i, idx, limit;
 442 
 443         rx = &ss->rx_big;
 444         limit = ss->j_rx_cnt + (rx->mask + 1);
 445 
 446         for (i = rx->cnt; i != limit; i++) {
 447                 idx = i & (rx->mask);
 448                 j = jpool->head;
 449                 if (j == NULL) {
 450                         myri10ge_pull_jpool(ss);
 451                         j = jpool->head;
 452                         if (j == NULL) {
 453                                 break;
 454                         }
 455                 }
 456                 jpool->head = j->next;
 457                 rx->info[idx].j = j;
 458                 rx->shadow[idx].addr_low = j->dma.low;
 459                 rx->shadow[idx].addr_high = j->dma.high;
 460                 /* copy 4 descriptors (32-bytes) to the mcp at a time */
 461                 if ((idx & 7) == 7) {
 462                         myri10ge_submit_8rx(&rx->lanai[idx - 7],
 463                             &rx->shadow[idx - 7]);
 464                 }
 465         }
 466         rx->cnt = i;
 467 }
 468 
 469 /*
 470  * Transfer buffers from the nic to the free pool.
 471  * Should be called holding the jpool mutex
 472  */
 473 
 474 static inline void
 475 myri10ge_unstock_jumbos(struct myri10ge_slice_state *ss)
 476 {
 477         struct myri10ge_jpool_stuff *jpool = &ss->jpool;
 478         struct myri10ge_jpool_entry *j;
 479         myri10ge_rx_ring_t *rx;
 480         int i;
 481 
 482         mutex_enter(&jpool->mtx);
 483         rx = &ss->rx_big;
 484 
 485         for (i = 0; i < rx->mask + 1; i++) {
 486                 j = rx->info[i].j;
 487                 rx->info[i].j = NULL;
 488                 if (j == NULL)
 489                         continue;
 490                 j->next = jpool->head;
 491                 jpool->head = j;
 492         }
 493         mutex_exit(&jpool->mtx);
 494 
 495 }
 496 
 497 
 498 /*
 499  * Free routine which is called when the mblk allocated via
 500  * esballoc() is freed.   Here we return the jumbo buffer
 501  * to the free pool, and possibly pass some jumbo buffers
 502  * to the nic
 503  */
 504 
 505 static void
 506 myri10ge_jfree_rtn(void *arg)
 507 {
 508         struct myri10ge_jpool_entry *j = (struct myri10ge_jpool_entry *)arg;
 509         struct myri10ge_jpool_stuff *jpool;
 510         volatile uintptr_t *putp;
 511         uintptr_t old, new;
 512 
 513         jpool = &j->ss->jpool;
 514 
 515         /* prepend buffer locklessly to per-CPU freelist */
 516         putp = (void *)&jpool->cpu[CPU->cpu_seqid & MYRI10GE_MAX_CPU_MASK].head;
 517         new = (uintptr_t)j;
 518         do {
 519                 old = *putp;
 520                 j->next = (void *)old;
 521         } while (atomic_cas_ulong(putp, old, new) != old);
 522 }
 523 
 524 static void
 525 myri10ge_remove_jbuf(struct myri10ge_jpool_entry *j)
 526 {
 527         (void) ddi_dma_unbind_handle(j->dma_handle);
 528         ddi_dma_mem_free(&j->acc_handle);
 529         ddi_dma_free_handle(&j->dma_handle);
 530         kmem_free(j, sizeof (*j));
 531 }
 532 
 533 
 534 /*
 535  * Allocates one physically contiguous descriptor
 536  * and add it to the jumbo buffer pool.
 537  */
 538 
 539 static int
 540 myri10ge_add_jbuf(struct myri10ge_slice_state *ss)
 541 {
 542         struct myri10ge_jpool_entry *j;
 543         struct myri10ge_jpool_stuff *jpool = &ss->jpool;
 544         ddi_dma_attr_t *rx_dma_attr;
 545         size_t real_length;
 546         ddi_dma_cookie_t cookie;
 547         uint_t count;
 548         int err;
 549 
 550         if (myri10ge_mtu < 2048)
 551                 rx_dma_attr = &myri10ge_rx_std_dma_attr;
 552         else
 553                 rx_dma_attr = &myri10ge_rx_jumbo_dma_attr;
 554 
 555 again:
 556         j = (struct myri10ge_jpool_entry *)
 557             kmem_alloc(sizeof (*j), KM_SLEEP);
 558         err = ddi_dma_alloc_handle(ss->mgp->dip, rx_dma_attr,
 559             DDI_DMA_DONTWAIT, NULL, &j->dma_handle);
 560         if (err != DDI_SUCCESS)
 561                 goto abort_with_j;
 562 
 563         err = ddi_dma_mem_alloc(j->dma_handle, myri10ge_mtu,
 564             &myri10ge_dev_access_attr,  DDI_DMA_STREAMING, DDI_DMA_DONTWAIT,
 565             NULL, &j->buf, &real_length, &j->acc_handle);
 566         if (err != DDI_SUCCESS)
 567                 goto abort_with_handle;
 568 
 569         err = ddi_dma_addr_bind_handle(j->dma_handle, NULL, j->buf,
 570             real_length, DDI_DMA_READ|DDI_DMA_STREAMING, DDI_DMA_DONTWAIT,
 571             NULL, &cookie, &count);
 572         if (err != DDI_SUCCESS)
 573                 goto abort_with_mem;
 574 
 575         /*
 576          * Make certain std MTU buffers do not cross a 4KB boundary:
 577          *
 578          * Setting dma_attr_align=4096 will do this, but the system
 579          * will only allocate 1 RX buffer per 4KB page, rather than 2.
 580          * Setting dma_attr_granular=4096 *seems* to work around this,
 581          * but I'm paranoid about future systems no longer honoring
 582          * this, so fall back to the safe, but memory wasting way if a
 583          * buffer crosses a 4KB boundary.
 584          */
 585 
 586         if (rx_dma_attr == &myri10ge_rx_std_dma_attr &&
 587             rx_dma_attr->dma_attr_align != 4096) {
 588                 uint32_t start, end;
 589 
 590                 start = MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress);
 591                 end = start + myri10ge_mtu;
 592                 if (((end >> 12) != (start >> 12)) && (start & 4095U)) {
 593                         printf("std buffer crossed a 4KB boundary!\n");
 594                         myri10ge_remove_jbuf(j);
 595                         rx_dma_attr->dma_attr_align = 4096;
 596                         rx_dma_attr->dma_attr_seg = UINT64_MAX;
 597                         goto again;
 598                 }
 599         }
 600 
 601         j->dma.low =
 602             htonl(MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress));
 603         j->dma.high =
 604             htonl(MYRI10GE_HIGHPART_TO_U32(cookie.dmac_laddress));
 605         j->ss = ss;
 606 
 607 
 608         j->free_func.free_func = myri10ge_jfree_rtn;
 609         j->free_func.free_arg = (char *)j;
 610         mutex_enter(&jpool->mtx);
 611         j->next = jpool->head;
 612         jpool->head = j;
 613         jpool->num_alloc++;
 614         mutex_exit(&jpool->mtx);
 615         return (0);
 616 
 617 abort_with_mem:
 618         ddi_dma_mem_free(&j->acc_handle);
 619 
 620 abort_with_handle:
 621         ddi_dma_free_handle(&j->dma_handle);
 622 
 623 abort_with_j:
 624         kmem_free(j, sizeof (*j));
 625 
 626         /*
 627          * If an allocation failed, perhaps it failed because it could
 628          * not satisfy granularity requirement.  Disable that, and
 629          * try agin.
 630          */
 631         if (rx_dma_attr == &myri10ge_rx_std_dma_attr &&
 632             rx_dma_attr->dma_attr_align != 4096) {
 633                         cmn_err(CE_NOTE,
 634                             "!alloc failed, reverting to gran=1\n");
 635                         rx_dma_attr->dma_attr_align = 4096;
 636                         rx_dma_attr->dma_attr_seg = UINT64_MAX;
 637                         goto again;
 638         }
 639         return (err);
 640 }
 641 
 642 static int
 643 myri10ge_jfree_cnt(struct myri10ge_jpool_stuff *jpool)
 644 {
 645         int i;
 646         struct myri10ge_jpool_entry *j;
 647 
 648         mutex_enter(&jpool->mtx);
 649         j = jpool->head;
 650         i = 0;
 651         while (j != NULL) {
 652                 i++;
 653                 j = j->next;
 654         }
 655         mutex_exit(&jpool->mtx);
 656         return (i);
 657 }
 658 
 659 static int
 660 myri10ge_add_jbufs(struct myri10ge_slice_state *ss, int num, int total)
 661 {
 662         struct myri10ge_jpool_stuff *jpool = &ss->jpool;
 663         int allocated = 0;
 664         int err;
 665         int needed;
 666 
 667         /*
 668          * if total is set, user wants "num" jbufs in the pool,
 669          * otherwise the user wants to "num" additional jbufs
 670          * added to the pool
 671          */
 672         if (total && jpool->num_alloc) {
 673                 allocated = myri10ge_jfree_cnt(jpool);
 674                 needed = num - allocated;
 675         } else {
 676                 needed = num;
 677         }
 678 
 679         while (needed > 0) {
 680                 needed--;
 681                 err = myri10ge_add_jbuf(ss);
 682                 if (err == 0) {
 683                         allocated++;
 684                 }
 685         }
 686         return (allocated);
 687 }
 688 
 689 static void
 690 myri10ge_remove_jbufs(struct myri10ge_slice_state *ss)
 691 {
 692         struct myri10ge_jpool_stuff *jpool = &ss->jpool;
 693         struct myri10ge_jpool_entry *j;
 694 
 695         mutex_enter(&jpool->mtx);
 696         myri10ge_pull_jpool(ss);
 697         while (jpool->head != NULL) {
 698                 jpool->num_alloc--;
 699                 j = jpool->head;
 700                 jpool->head = j->next;
 701                 myri10ge_remove_jbuf(j);
 702         }
 703         mutex_exit(&jpool->mtx);
 704 }
 705 
 706 static void
 707 myri10ge_carve_up_jbufs_into_small_ring(struct myri10ge_slice_state *ss)
 708 {
 709         struct myri10ge_jpool_stuff *jpool = &ss->jpool;
 710         struct myri10ge_jpool_entry *j = NULL;
 711         caddr_t ptr;
 712         uint32_t dma_low, dma_high;
 713         int idx, len;
 714         unsigned int alloc_size;
 715 
 716         dma_low = dma_high = len = 0;
 717         alloc_size = myri10ge_small_bytes + MXGEFW_PAD;
 718         ptr = NULL;
 719         for (idx = 0; idx < ss->rx_small.mask + 1; idx++) {
 720                 /* Allocate a jumbo frame and carve it into small frames */
 721                 if (len < alloc_size) {
 722                         mutex_enter(&jpool->mtx);
 723                         /* remove jumbo from freelist */
 724                         j = jpool->head;
 725                         jpool->head = j->next;
 726                         /* place it onto small list */
 727                         j->next = ss->small_jpool;
 728                         ss->small_jpool = j;
 729                         mutex_exit(&jpool->mtx);
 730                         len = myri10ge_mtu;
 731                         dma_low = ntohl(j->dma.low);
 732                         dma_high = ntohl(j->dma.high);
 733                         ptr = j->buf;
 734                 }
 735                 ss->rx_small.info[idx].ptr = ptr;
 736                 ss->rx_small.shadow[idx].addr_low = htonl(dma_low);
 737                 ss->rx_small.shadow[idx].addr_high = htonl(dma_high);
 738                 len -= alloc_size;
 739                 ptr += alloc_size;
 740                 dma_low += alloc_size;
 741         }
 742 }
 743 
 744 /*
 745  * Return the jumbo bufs we carved up for small to the jumbo pool
 746  */
 747 
 748 static void
 749 myri10ge_release_small_jbufs(struct myri10ge_slice_state *ss)
 750 {
 751         struct myri10ge_jpool_stuff *jpool = &ss->jpool;
 752         struct myri10ge_jpool_entry *j = NULL;
 753 
 754         mutex_enter(&jpool->mtx);
 755         while (ss->small_jpool != NULL) {
 756                 j = ss->small_jpool;
 757                 ss->small_jpool = j->next;
 758                 j->next = jpool->head;
 759                 jpool->head = j;
 760         }
 761         mutex_exit(&jpool->mtx);
 762         ss->jbufs_for_smalls = 0;
 763 }
 764 
 765 static int
 766 myri10ge_add_tx_handle(struct myri10ge_slice_state *ss)
 767 {
 768         myri10ge_tx_ring_t *tx = &ss->tx;
 769         struct myri10ge_priv *mgp = ss->mgp;
 770         struct myri10ge_tx_dma_handle *handle;
 771         int err;
 772 
 773         handle = kmem_zalloc(sizeof (*handle), KM_SLEEP);
 774         err = ddi_dma_alloc_handle(mgp->dip,
 775             &myri10ge_tx_dma_attr,
 776             DDI_DMA_SLEEP, NULL,
 777             &handle->h);
 778         if (err) {
 779                 static int limit = 0;
 780                 if (limit == 0)
 781                         cmn_err(CE_WARN, "%s: Falled to alloc tx dma handle\n",
 782                             mgp->name);
 783                 limit++;
 784                 kmem_free(handle, sizeof (*handle));
 785                 return (err);
 786         }
 787         mutex_enter(&tx->handle_lock);
 788         MYRI10GE_SLICE_STAT_INC(tx_handles_alloced);
 789         handle->next = tx->free_tx_handles;
 790         tx->free_tx_handles = handle;
 791         mutex_exit(&tx->handle_lock);
 792         return (DDI_SUCCESS);
 793 }
 794 
 795 static void
 796 myri10ge_remove_tx_handles(struct myri10ge_slice_state *ss)
 797 {
 798         myri10ge_tx_ring_t *tx = &ss->tx;
 799         struct myri10ge_tx_dma_handle *handle;
 800         mutex_enter(&tx->handle_lock);
 801 
 802         handle = tx->free_tx_handles;
 803         while (handle != NULL) {
 804                 tx->free_tx_handles = handle->next;
 805                 ddi_dma_free_handle(&handle->h);
 806                 kmem_free(handle, sizeof (*handle));
 807                 handle = tx->free_tx_handles;
 808                 MYRI10GE_SLICE_STAT_DEC(tx_handles_alloced);
 809         }
 810         mutex_exit(&tx->handle_lock);
 811         if (MYRI10GE_SLICE_STAT(tx_handles_alloced) != 0) {
 812                 cmn_err(CE_WARN, "%s: %d tx dma handles allocated at close\n",
 813                     ss->mgp->name,
 814                     (int)MYRI10GE_SLICE_STAT(tx_handles_alloced));
 815         }
 816 }
 817 
 818 static void
 819 myri10ge_free_tx_handles(myri10ge_tx_ring_t *tx,
 820     struct myri10ge_tx_dma_handle_head *list)
 821 {
 822         mutex_enter(&tx->handle_lock);
 823         list->tail->next = tx->free_tx_handles;
 824         tx->free_tx_handles = list->head;
 825         mutex_exit(&tx->handle_lock);
 826 }
 827 
 828 static void
 829 myri10ge_free_tx_handle_slist(myri10ge_tx_ring_t *tx,
 830     struct myri10ge_tx_dma_handle *handle)
 831 {
 832         struct myri10ge_tx_dma_handle_head list;
 833 
 834         if (handle == NULL)
 835                 return;
 836         list.head = handle;
 837         list.tail = handle;
 838         while (handle != NULL) {
 839                 list.tail = handle;
 840                 handle = handle->next;
 841         }
 842         myri10ge_free_tx_handles(tx, &list);
 843 }
 844 
 845 static int
 846 myri10ge_alloc_tx_handles(struct myri10ge_slice_state *ss, int count,
 847     struct myri10ge_tx_dma_handle **ret)
 848 {
 849         myri10ge_tx_ring_t *tx = &ss->tx;
 850         struct myri10ge_tx_dma_handle *handle;
 851         int err, i;
 852 
 853         mutex_enter(&tx->handle_lock);
 854         for (i = 0; i < count; i++) {
 855                 handle = tx->free_tx_handles;
 856                 while (handle == NULL) {
 857                         mutex_exit(&tx->handle_lock);
 858                         err = myri10ge_add_tx_handle(ss);
 859                         if (err != DDI_SUCCESS) {
 860                                 goto abort_with_handles;
 861                         }
 862                         mutex_enter(&tx->handle_lock);
 863                         handle = tx->free_tx_handles;
 864                 }
 865                 tx->free_tx_handles = handle->next;
 866                 handle->next = *ret;
 867                 *ret = handle;
 868         }
 869         mutex_exit(&tx->handle_lock);
 870         return (DDI_SUCCESS);
 871 
 872 abort_with_handles:
 873         myri10ge_free_tx_handle_slist(tx, *ret);
 874         return (err);
 875 }
 876 
 877 
 878 /*
 879  * Frees DMA resources associated with the send ring
 880  */
 881 static void
 882 myri10ge_unprepare_tx_ring(struct myri10ge_slice_state *ss)
 883 {
 884         myri10ge_tx_ring_t *tx;
 885         struct myri10ge_tx_dma_handle_head handles;
 886         size_t bytes;
 887         int idx;
 888 
 889         tx = &ss->tx;
 890         handles.head = NULL;
 891         handles.tail = NULL;
 892         for (idx = 0; idx < ss->tx.mask + 1; idx++) {
 893                 if (tx->info[idx].m) {
 894                         (void) ddi_dma_unbind_handle(tx->info[idx].handle->h);
 895                         handles.head = tx->info[idx].handle;
 896                         if (handles.tail == NULL)
 897                                 handles.tail = tx->info[idx].handle;
 898                         freeb(tx->info[idx].m);
 899                         tx->info[idx].m = 0;
 900                         tx->info[idx].handle = 0;
 901                 }
 902                 tx->cp[idx].va = NULL;
 903                 myri10ge_dma_free(&tx->cp[idx].dma);
 904         }
 905         bytes = sizeof (*tx->cp) * (tx->mask + 1);
 906         kmem_free(tx->cp, bytes);
 907         tx->cp = NULL;
 908         if (handles.head != NULL)
 909                 myri10ge_free_tx_handles(tx, &handles);
 910         myri10ge_remove_tx_handles(ss);
 911 }
 912 
 913 /*
 914  * Allocates DMA handles associated with the send ring
 915  */
 916 static inline int
 917 myri10ge_prepare_tx_ring(struct myri10ge_slice_state *ss)
 918 {
 919         struct myri10ge_tx_dma_handle *handles;
 920         int h;
 921         size_t bytes;
 922 
 923         bytes = sizeof (*ss->tx.cp) * (ss->tx.mask + 1);
 924         ss->tx.cp = kmem_zalloc(bytes, KM_SLEEP);
 925         if (ss->tx.cp == NULL) {
 926                 cmn_err(CE_WARN,
 927                     "%s: Failed to allocate tx copyblock storage\n",
 928                     ss->mgp->name);
 929                 return (DDI_FAILURE);
 930         }
 931 
 932 
 933         /* allocate the TX copyblocks */
 934         for (h = 0; h < ss->tx.mask + 1; h++) {
 935                 ss->tx.cp[h].va = myri10ge_dma_alloc(ss->mgp->dip,
 936                     4096, &myri10ge_rx_jumbo_dma_attr,
 937                     &myri10ge_dev_access_attr, DDI_DMA_STREAMING,
 938                     DDI_DMA_WRITE|DDI_DMA_STREAMING, &ss->tx.cp[h].dma, 1,
 939                     DDI_DMA_DONTWAIT);
 940                 if (ss->tx.cp[h].va == NULL) {
 941                         cmn_err(CE_WARN, "%s: Failed to allocate tx "
 942                             "copyblock %d\n", ss->mgp->name, h);
 943                         goto abort_with_copyblocks;
 944                 }
 945         }
 946         /* pre-allocate transmit handles */
 947         handles = NULL;
 948         (void) myri10ge_alloc_tx_handles(ss, myri10ge_tx_handles_initial,
 949             &handles);
 950         if (handles != NULL)
 951                 myri10ge_free_tx_handle_slist(&ss->tx, handles);
 952 
 953         return (DDI_SUCCESS);
 954 
 955 abort_with_copyblocks:
 956         while (h > 0)  {
 957                 h--;
 958                 myri10ge_dma_free(&ss->tx.cp[h].dma);
 959         }
 960 
 961         bytes = sizeof (*ss->tx.cp) * (ss->tx.mask + 1);
 962         kmem_free(ss->tx.cp, bytes);
 963         ss->tx.cp = NULL;
 964         return (DDI_FAILURE);
 965 }
 966 
 967 /*
 968  * The eeprom strings on the lanaiX have the format
 969  * SN=x\0
 970  * MAC=x:x:x:x:x:x\0
 971  * PT:ddd mmm xx xx:xx:xx xx\0
 972  * PV:ddd mmm xx xx:xx:xx xx\0
 973  */
 974 static int
 975 myri10ge_read_mac_addr(struct myri10ge_priv *mgp)
 976 {
 977 #define MYRI10GE_NEXT_STRING(p) while (ptr < limit && *ptr++)
 978 #define myri10ge_digit(c) (((c) >= '0' && (c) <= '9') ? ((c) - '0') :     \
 979                 (((c) >= 'A' && (c) <= 'F') ? (10 + (c) - 'A') :  \
 980                 (((c) >= 'a' && (c) <= 'f') ? (10 + (c) - 'a') : -1)))
 981 
 982         char *ptr, *limit;
 983         int i, hv, lv;
 984 
 985         ptr = mgp->eeprom_strings;
 986         limit = mgp->eeprom_strings + MYRI10GE_EEPROM_STRINGS_SIZE;
 987 
 988         while (*ptr != '\0' && ptr < limit) {
 989                 if (memcmp(ptr, "MAC=", 4) == 0) {
 990                         ptr += 4;
 991                         if (myri10ge_verbose)
 992                                 printf("%s: mac address = %s\n", mgp->name,
 993                                     ptr);
 994                         mgp->mac_addr_string = ptr;
 995                         for (i = 0; i < 6; i++) {
 996                                 if ((ptr + 2) > limit)
 997                                         goto abort;
 998 
 999                                 if (*(ptr+1) == ':') {
1000                                         hv = 0;
1001                                         lv = myri10ge_digit(*ptr); ptr++;
1002                                 } else {
1003                                         hv = myri10ge_digit(*ptr); ptr++;
1004                                         lv = myri10ge_digit(*ptr); ptr++;
1005                                 }
1006                                 mgp->mac_addr[i] = (hv << 4) | lv;
1007                                 ptr++;
1008                         }
1009                 }
1010                 if (memcmp((const void *)ptr, "SN=", 3) == 0) {
1011                         ptr += 3;
1012                         mgp->sn_str = (char *)ptr;
1013                 }
1014                 if (memcmp((const void *)ptr, "PC=", 3) == 0) {
1015                         ptr += 3;
1016                         mgp->pc_str = (char *)ptr;
1017                 }
1018                 MYRI10GE_NEXT_STRING(ptr);
1019         }
1020 
1021         return (0);
1022 
1023 abort:
1024         cmn_err(CE_WARN, "%s: failed to parse eeprom_strings", mgp->name);
1025         return (ENXIO);
1026 }
1027 
1028 
1029 /*
1030  * Determine the register set containing the PCI resource we
1031  * want to map: the memory-mappable part of the interface. We do
1032  * this by scanning the DDI "reg" property of the interface,
1033  * which is an array of mx_ddi_reg_set structures.
1034  */
1035 static int
1036 myri10ge_reg_set(dev_info_t *dip, int *reg_set, int *span,
1037     unsigned long *busno, unsigned long *devno,
1038     unsigned long *funcno)
1039 {
1040 
1041 #define REGISTER_NUMBER(ip)     (ip[0] >>  0 & 0xff)
1042 #define FUNCTION_NUMBER(ip)     (ip[0] >>  8 & 0x07)
1043 #define DEVICE_NUMBER(ip)       (ip[0] >> 11 & 0x1f)
1044 #define BUS_NUMBER(ip)          (ip[0] >> 16 & 0xff)
1045 #define ADDRESS_SPACE(ip)       (ip[0] >> 24 & 0x03)
1046 #define PCI_ADDR_HIGH(ip)       (ip[1])
1047 #define PCI_ADDR_LOW(ip)        (ip[2])
1048 #define PCI_SPAN_HIGH(ip)       (ip[3])
1049 #define PCI_SPAN_LOW(ip)        (ip[4])
1050 
1051 #define MX_DDI_REG_SET_32_BIT_MEMORY_SPACE 2
1052 #define MX_DDI_REG_SET_64_BIT_MEMORY_SPACE 3
1053 
1054         int *data, i, *rs;
1055         uint32_t nelementsp;
1056 
1057 #ifdef MYRI10GE_REGSET_VERBOSE
1058         char *address_space_name[] = { "Configuration Space",
1059                                         "I/O Space",
1060                                         "32-bit Memory Space",
1061                                         "64-bit Memory Space"
1062         };
1063 #endif
1064 
1065         if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
1066             "reg", &data, &nelementsp) != DDI_SUCCESS) {
1067                 printf("Could not determine register set.\n");
1068                 return (ENXIO);
1069         }
1070 
1071 #ifdef MYRI10GE_REGSET_VERBOSE
1072         printf("There are %d register sets.\n", nelementsp / 5);
1073 #endif
1074         if (!nelementsp) {
1075                 printf("Didn't find any \"reg\" properties.\n");
1076                 ddi_prop_free(data);
1077                 return (ENODEV);
1078         }
1079 
1080         /* Scan for the register number. */
1081         rs = &data[0];
1082         *busno = BUS_NUMBER(rs);
1083         *devno = DEVICE_NUMBER(rs);
1084         *funcno = FUNCTION_NUMBER(rs);
1085 
1086 #ifdef MYRI10GE_REGSET_VERBOSE
1087         printf("*** Scanning for register number.\n");
1088 #endif
1089         for (i = 0; i < nelementsp / 5; i++) {
1090                 rs = &data[5 * i];
1091 #ifdef MYRI10GE_REGSET_VERBOSE
1092                 printf("Examining register set %d:\n", i);
1093                 printf("  Register number = %d.\n", REGISTER_NUMBER(rs));
1094                 printf("  Function number = %d.\n", FUNCTION_NUMBER(rs));
1095                 printf("  Device number   = %d.\n", DEVICE_NUMBER(rs));
1096                 printf("  Bus number      = %d.\n", BUS_NUMBER(rs));
1097                 printf("  Address space   = %d (%s ).\n", ADDRESS_SPACE(rs),
1098                     address_space_name[ADDRESS_SPACE(rs)]);
1099                 printf("  pci address 0x%08x %08x\n", PCI_ADDR_HIGH(rs),
1100                     PCI_ADDR_LOW(rs));
1101                 printf("  pci span 0x%08x %08x\n", PCI_SPAN_HIGH(rs),
1102                     PCI_SPAN_LOW(rs));
1103 #endif
1104                 /* We are looking for a memory property. */
1105 
1106                 if (ADDRESS_SPACE(rs) == MX_DDI_REG_SET_64_BIT_MEMORY_SPACE ||
1107                     ADDRESS_SPACE(rs) == MX_DDI_REG_SET_32_BIT_MEMORY_SPACE) {
1108                         *reg_set = i;
1109 
1110 #ifdef MYRI10GE_REGSET_VERBOSE
1111                         printf("%s uses register set %d.\n",
1112                             address_space_name[ADDRESS_SPACE(rs)], *reg_set);
1113 #endif
1114 
1115                         *span = (PCI_SPAN_LOW(rs));
1116 #ifdef MYRI10GE_REGSET_VERBOSE
1117                         printf("Board span is 0x%x\n", *span);
1118 #endif
1119                         break;
1120                 }
1121         }
1122 
1123         ddi_prop_free(data);
1124 
1125         /* If no match, fail. */
1126         if (i >= nelementsp / 5) {
1127                 return (EIO);
1128         }
1129 
1130         return (0);
1131 }
1132 
1133 
1134 static int
1135 myri10ge_load_firmware_from_zlib(struct myri10ge_priv *mgp, uint32_t *limit)
1136 {
1137         void *inflate_buffer;
1138         int rv, status;
1139         size_t sram_size = mgp->sram_size - MYRI10GE_EEPROM_STRINGS_SIZE;
1140         size_t destlen;
1141         mcp_gen_header_t *hdr;
1142         unsigned hdr_offset, i;
1143 
1144 
1145         *limit = 0; /* -Wuninitialized */
1146         status = 0;
1147 
1148         inflate_buffer = kmem_zalloc(sram_size, KM_NOSLEEP);
1149         if (!inflate_buffer) {
1150                 cmn_err(CE_WARN,
1151                     "%s: Could not allocate buffer to inflate mcp\n",
1152                     mgp->name);
1153                 return (ENOMEM);
1154         }
1155 
1156         destlen = sram_size;
1157         rv = z_uncompress(inflate_buffer, &destlen, mgp->eth_z8e,
1158             mgp->eth_z8e_length);
1159 
1160         if (rv != Z_OK) {
1161                 cmn_err(CE_WARN, "%s: Could not inflate mcp: %s\n",
1162                     mgp->name, z_strerror(rv));
1163                 status = ENXIO;
1164                 goto abort;
1165         }
1166 
1167         *limit = (uint32_t)destlen;
1168 
1169         hdr_offset = htonl(*(uint32_t *)(void *)((char *)inflate_buffer +
1170             MCP_HEADER_PTR_OFFSET));
1171         hdr = (void *)((char *)inflate_buffer + hdr_offset);
1172         if (ntohl(hdr->mcp_type) != MCP_TYPE_ETH) {
1173                 cmn_err(CE_WARN, "%s: Bad firmware type: 0x%x\n", mgp->name,
1174                     ntohl(hdr->mcp_type));
1175                 status = EIO;
1176                 goto abort;
1177         }
1178 
1179         /* save firmware version for kstat */
1180         (void) strncpy(mgp->fw_version, hdr->version, sizeof (mgp->fw_version));
1181         if (myri10ge_verbose)
1182                 printf("%s: firmware id: %s\n", mgp->name, hdr->version);
1183 
1184         /* Copy the inflated firmware to NIC SRAM. */
1185         for (i = 0; i < *limit; i += 256) {
1186                 myri10ge_pio_copy((char *)mgp->sram + MYRI10GE_FW_OFFSET + i,
1187                     (char *)inflate_buffer + i,
1188                     min(256U, (unsigned)(*limit - i)));
1189                 mb();
1190                 (void) *(int *)(void *)mgp->sram;
1191                 mb();
1192         }
1193 
1194 abort:
1195         kmem_free(inflate_buffer, sram_size);
1196 
1197         return (status);
1198 
1199 }
1200 
1201 
1202 int
1203 myri10ge_send_cmd(struct myri10ge_priv *mgp, uint32_t cmd,
1204                 myri10ge_cmd_t *data)
1205 {
1206         mcp_cmd_t *buf;
1207         char buf_bytes[sizeof (*buf) + 8];
1208         volatile mcp_cmd_response_t *response = mgp->cmd;
1209         volatile char *cmd_addr =
1210             (volatile char *)mgp->sram + MXGEFW_ETH_CMD;
1211         int sleep_total = 0;
1212 
1213         /* ensure buf is aligned to 8 bytes */
1214         buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
1215 
1216         buf->data0 = htonl(data->data0);
1217         buf->data1 = htonl(data->data1);
1218         buf->data2 = htonl(data->data2);
1219         buf->cmd = htonl(cmd);
1220         buf->response_addr.low = mgp->cmd_dma.low;
1221         buf->response_addr.high = mgp->cmd_dma.high;
1222         mutex_enter(&mgp->cmd_lock);
1223         response->result = 0xffffffff;
1224         mb();
1225 
1226         myri10ge_pio_copy((void *)cmd_addr, buf, sizeof (*buf));
1227 
1228         /* wait up to 20ms */
1229         for (sleep_total = 0; sleep_total < 20; sleep_total++) {
1230                 mb();
1231                 if (response->result != 0xffffffff) {
1232                         if (response->result == 0) {
1233                                 data->data0 = ntohl(response->data);
1234                                 mutex_exit(&mgp->cmd_lock);
1235                                 return (0);
1236                         } else if (ntohl(response->result)
1237                             == MXGEFW_CMD_UNKNOWN) {
1238                                 mutex_exit(&mgp->cmd_lock);
1239                                 return (ENOSYS);
1240                         } else if (ntohl(response->result)
1241                             == MXGEFW_CMD_ERROR_UNALIGNED) {
1242                                 mutex_exit(&mgp->cmd_lock);
1243                                 return (E2BIG);
1244                         } else {
1245                                 cmn_err(CE_WARN,
1246                                     "%s: command %d failed, result = %d\n",
1247                                     mgp->name, cmd, ntohl(response->result));
1248                                 mutex_exit(&mgp->cmd_lock);
1249                                 return (ENXIO);
1250                         }
1251                 }
1252                 drv_usecwait(1000);
1253         }
1254         mutex_exit(&mgp->cmd_lock);
1255         cmn_err(CE_WARN, "%s: command %d timed out, result = %d\n",
1256             mgp->name, cmd, ntohl(response->result));
1257         return (EAGAIN);
1258 }
1259 
1260 /*
1261  * Enable or disable periodic RDMAs from the host to make certain
1262  * chipsets resend dropped PCIe messages
1263  */
1264 
1265 static void
1266 myri10ge_dummy_rdma(struct myri10ge_priv *mgp, int enable)
1267 {
1268         char buf_bytes[72];
1269         volatile uint32_t *confirm;
1270         volatile char *submit;
1271         uint32_t *buf;
1272         int i;
1273 
1274         buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
1275 
1276         /* clear confirmation addr */
1277         confirm = (volatile uint32_t *)mgp->cmd;
1278         *confirm = 0;
1279         mb();
1280 
1281         /*
1282          * send an rdma command to the PCIe engine, and wait for the
1283          * response in the confirmation address.  The firmware should
1284          *  write a -1 there to indicate it is alive and well
1285          */
1286 
1287         buf[0] = mgp->cmd_dma.high;          /* confirm addr MSW */
1288         buf[1] = mgp->cmd_dma.low;           /* confirm addr LSW */
1289         buf[2] = htonl(0xffffffff);             /* confirm data */
1290         buf[3] = htonl(mgp->cmd_dma.high);   /* dummy addr MSW */
1291         buf[4] = htonl(mgp->cmd_dma.low);    /* dummy addr LSW */
1292         buf[5] = htonl(enable);                 /* enable? */
1293 
1294 
1295         submit = (volatile char *)(mgp->sram + MXGEFW_BOOT_DUMMY_RDMA);
1296 
1297         myri10ge_pio_copy((char *)submit, buf, 64);
1298         mb();
1299         drv_usecwait(1000);
1300         mb();
1301         i = 0;
1302         while (*confirm != 0xffffffff && i < 20) {
1303                 drv_usecwait(1000);
1304                 i++;
1305         }
1306         if (*confirm != 0xffffffff) {
1307                 cmn_err(CE_WARN, "%s: dummy rdma %s failed (%p = 0x%x)",
1308                     mgp->name,
1309                     (enable ? "enable" : "disable"), (void*) confirm, *confirm);
1310         }
1311 }
1312 
1313 static int
1314 myri10ge_load_firmware(struct myri10ge_priv *mgp)
1315 {
1316         myri10ge_cmd_t cmd;
1317         volatile uint32_t *confirm;
1318         volatile char *submit;
1319         char buf_bytes[72];
1320         uint32_t *buf, size;
1321         int status, i;
1322 
1323         buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
1324 
1325         status = myri10ge_load_firmware_from_zlib(mgp, &size);
1326         if (status) {
1327                 cmn_err(CE_WARN, "%s: firmware loading failed\n", mgp->name);
1328                 return (status);
1329         }
1330 
1331         /* clear confirmation addr */
1332         confirm = (volatile uint32_t *)mgp->cmd;
1333         *confirm = 0;
1334         mb();
1335 
1336         /*
1337          * send a reload command to the bootstrap MCP, and wait for the
1338          * response in the confirmation address.  The firmware should
1339          * write a -1 there to indicate it is alive and well
1340          */
1341 
1342         buf[0] = mgp->cmd_dma.high;  /* confirm addr MSW */
1343         buf[1] = mgp->cmd_dma.low;   /* confirm addr LSW */
1344         buf[2] = htonl(0xffffffff);     /* confirm data */
1345 
1346         /*
1347          * FIX: All newest firmware should un-protect the bottom of
1348          * the sram before handoff. However, the very first interfaces
1349          * do not. Therefore the handoff copy must skip the first 8 bytes
1350          */
1351         buf[3] = htonl(MYRI10GE_FW_OFFSET + 8); /* where the code starts */
1352         buf[4] = htonl(size - 8);       /* length of code */
1353         buf[5] = htonl(8);              /* where to copy to */
1354         buf[6] = htonl(0);              /* where to jump to */
1355 
1356         submit = (volatile char *)(mgp->sram + MXGEFW_BOOT_HANDOFF);
1357 
1358         myri10ge_pio_copy((char *)submit, buf, 64);
1359         mb();
1360         drv_usecwait(1000);
1361         mb();
1362         i = 0;
1363         while (*confirm != 0xffffffff && i < 1000) {
1364                 drv_usecwait(1000);
1365                 i++;
1366         }
1367         if (*confirm != 0xffffffff) {
1368                 cmn_err(CE_WARN, "%s: handoff failed (%p = 0x%x)",
1369                     mgp->name, (void *) confirm, *confirm);
1370 
1371                 return (ENXIO);
1372         }
1373         status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
1374         if (status != 0) {
1375                 cmn_err(CE_WARN, "%s: failed MXGEFW_CMD_GET_RX_RING_SIZE\n",
1376                     mgp->name);
1377                 return (ENXIO);
1378         }
1379 
1380         mgp->max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
1381         myri10ge_dummy_rdma(mgp, 1);
1382         return (0);
1383 }
1384 
1385 static int
1386 myri10ge_m_unicst(void *arg, const uint8_t *addr)
1387 {
1388         struct myri10ge_priv *mgp = arg;
1389         myri10ge_cmd_t cmd;
1390         int status;
1391 
1392         cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1393             | (addr[2] << 8) | addr[3]);
1394 
1395         cmd.data1 = ((addr[4] << 8) | (addr[5]));
1396 
1397         status = myri10ge_send_cmd(mgp, MXGEFW_SET_MAC_ADDRESS, &cmd);
1398         if (status == 0 && (addr != mgp->mac_addr))
1399                 (void) memcpy(mgp->mac_addr, addr, sizeof (mgp->mac_addr));
1400 
1401         return (status);
1402 }
1403 
1404 static int
1405 myri10ge_change_pause(struct myri10ge_priv *mgp, int pause)
1406 {
1407         myri10ge_cmd_t cmd;
1408         int status;
1409 
1410         if (pause)
1411                 status = myri10ge_send_cmd(mgp, MXGEFW_ENABLE_FLOW_CONTROL,
1412                     &cmd);
1413         else
1414                 status = myri10ge_send_cmd(mgp, MXGEFW_DISABLE_FLOW_CONTROL,
1415                     &cmd);
1416 
1417         if (status) {
1418                 cmn_err(CE_WARN, "%s: Failed to set flow control mode\n",
1419                     mgp->name);
1420                 return (ENXIO);
1421         }
1422         mgp->pause = pause;
1423         return (0);
1424 }
1425 
1426 static void
1427 myri10ge_change_promisc(struct myri10ge_priv *mgp, int promisc)
1428 {
1429         myri10ge_cmd_t cmd;
1430         int status;
1431 
1432         if (promisc)
1433                 status = myri10ge_send_cmd(mgp, MXGEFW_ENABLE_PROMISC, &cmd);
1434         else
1435                 status = myri10ge_send_cmd(mgp, MXGEFW_DISABLE_PROMISC, &cmd);
1436 
1437         if (status) {
1438                 cmn_err(CE_WARN, "%s: Failed to set promisc mode\n",
1439                     mgp->name);
1440         }
1441 }
1442 
1443 static int
1444 myri10ge_dma_test(struct myri10ge_priv *mgp, int test_type)
1445 {
1446         myri10ge_cmd_t cmd;
1447         int status;
1448         uint32_t len;
1449         void *dmabench;
1450         struct myri10ge_dma_stuff dmabench_dma;
1451         char *test = " ";
1452 
1453         /*
1454          * Run a small DMA test.
1455          * The magic multipliers to the length tell the firmware
1456          * tp do DMA read, write, or read+write tests.  The
1457          * results are returned in cmd.data0.  The upper 16
1458          * bits or the return is the number of transfers completed.
1459          * The lower 16 bits is the time in 0.5us ticks that the
1460          * transfers took to complete
1461          */
1462 
1463         len = mgp->tx_boundary;
1464 
1465         dmabench = myri10ge_dma_alloc(mgp->dip, len,
1466             &myri10ge_rx_jumbo_dma_attr, &myri10ge_dev_access_attr,
1467             DDI_DMA_STREAMING,  DDI_DMA_RDWR|DDI_DMA_STREAMING,
1468             &dmabench_dma, 1, DDI_DMA_DONTWAIT);
1469         mgp->read_dma = mgp->write_dma = mgp->read_write_dma = 0;
1470         if (dmabench == NULL) {
1471                 cmn_err(CE_WARN, "%s dma benchmark aborted\n", mgp->name);
1472                 return (ENOMEM);
1473         }
1474 
1475         cmd.data0 = ntohl(dmabench_dma.low);
1476         cmd.data1 = ntohl(dmabench_dma.high);
1477         cmd.data2 = len * 0x10000;
1478         status = myri10ge_send_cmd(mgp, test_type, &cmd);
1479         if (status != 0) {
1480                 test = "read";
1481                 goto abort;
1482         }
1483         mgp->read_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff);
1484 
1485         cmd.data0 = ntohl(dmabench_dma.low);
1486         cmd.data1 = ntohl(dmabench_dma.high);
1487         cmd.data2 = len * 0x1;
1488         status = myri10ge_send_cmd(mgp, test_type, &cmd);
1489         if (status != 0) {
1490                 test = "write";
1491                 goto abort;
1492         }
1493         mgp->write_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff);
1494 
1495         cmd.data0 = ntohl(dmabench_dma.low);
1496         cmd.data1 = ntohl(dmabench_dma.high);
1497         cmd.data2 = len * 0x10001;
1498         status = myri10ge_send_cmd(mgp, test_type, &cmd);
1499         if (status != 0) {
1500                 test = "read/write";
1501                 goto abort;
1502         }
1503         mgp->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
1504             (cmd.data0 & 0xffff);
1505 
1506 
1507 abort:
1508         myri10ge_dma_free(&dmabench_dma);
1509         if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
1510                 cmn_err(CE_WARN, "%s %s dma benchmark failed\n", mgp->name,
1511                     test);
1512         return (status);
1513 }
1514 
1515 static int
1516 myri10ge_reset(struct myri10ge_priv *mgp)
1517 {
1518         myri10ge_cmd_t cmd;
1519         struct myri10ge_nic_stat *ethstat;
1520         struct myri10ge_slice_state *ss;
1521         int i, status;
1522         size_t bytes;
1523 
1524         /* send a reset command to the card to see if it is alive */
1525         (void) memset(&cmd, 0, sizeof (cmd));
1526         status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd);
1527         if (status != 0) {
1528                 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name);
1529                 return (ENXIO);
1530         }
1531 
1532         /* Now exchange information about interrupts  */
1533 
1534         bytes = mgp->max_intr_slots * sizeof (*mgp->ss[0].rx_done.entry);
1535         cmd.data0 = (uint32_t)bytes;
1536         status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1537 
1538         /*
1539          * Even though we already know how many slices are supported
1540          * via myri10ge_probe_slices() MXGEFW_CMD_GET_MAX_RSS_QUEUES
1541          * has magic side effects, and must be called after a reset.
1542          * It must be called prior to calling any RSS related cmds,
1543          * including assigning an interrupt queue for anything but
1544          * slice 0.  It must also be called *after*
1545          * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1546          * the firmware to compute offsets.
1547          */
1548 
1549         if (mgp->num_slices > 1) {
1550 
1551                 /* ask the maximum number of slices it supports */
1552                 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1553                     &cmd);
1554                 if (status != 0) {
1555                         cmn_err(CE_WARN,
1556                             "%s: failed to get number of slices\n",
1557                             mgp->name);
1558                         return (status);
1559                 }
1560 
1561                 /*
1562                  * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1563                  * to setting up the interrupt queue DMA
1564                  */
1565 
1566                 cmd.data0 = mgp->num_slices;
1567                 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE |
1568                     MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1569                 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1570                     &cmd);
1571                 if (status != 0) {
1572                         cmn_err(CE_WARN,
1573                             "%s: failed to set number of slices\n",
1574                             mgp->name);
1575                         return (status);
1576                 }
1577         }
1578         for (i = 0; i < mgp->num_slices; i++) {
1579                 ss = &mgp->ss[i];
1580                 cmd.data0 = ntohl(ss->rx_done.dma.low);
1581                 cmd.data1 = ntohl(ss->rx_done.dma.high);
1582                 cmd.data2 = i;
1583                 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_DMA,
1584                     &cmd);
1585         };
1586 
1587         status |= myri10ge_send_cmd(mgp,  MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1588         for (i = 0; i < mgp->num_slices; i++) {
1589                 ss = &mgp->ss[i];
1590                 ss->irq_claim = (volatile unsigned int *)
1591                     (void *)(mgp->sram + cmd.data0 + 8 * i);
1592         }
1593 
1594         if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) {
1595                 status |= myri10ge_send_cmd(mgp,
1596                     MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, &cmd);
1597                 mgp->irq_deassert = (uint32_t *)(void *)(mgp->sram + cmd.data0);
1598         }
1599 
1600         status |= myri10ge_send_cmd(mgp,
1601             MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1602         mgp->intr_coal_delay_ptr = (uint32_t *)(void *)(mgp->sram + cmd.data0);
1603 
1604         if (status != 0) {
1605                 cmn_err(CE_WARN, "%s: failed set interrupt parameters\n",
1606                     mgp->name);
1607                 return (status);
1608         }
1609 
1610         *mgp->intr_coal_delay_ptr = htonl(mgp->intr_coal_delay);
1611         (void) myri10ge_dma_test(mgp, MXGEFW_DMA_TEST);
1612 
1613         /* reset mcp/driver shared state back to 0 */
1614 
1615         for (i = 0; i < mgp->num_slices; i++) {
1616                 ss = &mgp->ss[i];
1617                 bytes = mgp->max_intr_slots *
1618                     sizeof (*mgp->ss[0].rx_done.entry);
1619                 (void) memset(ss->rx_done.entry, 0, bytes);
1620                 ss->tx.req = 0;
1621                 ss->tx.done = 0;
1622                 ss->tx.pkt_done = 0;
1623                 ss->rx_big.cnt = 0;
1624                 ss->rx_small.cnt = 0;
1625                 ss->rx_done.idx = 0;
1626                 ss->rx_done.cnt = 0;
1627                 ss->rx_token = 0;
1628                 ss->tx.watchdog_done = 0;
1629                 ss->tx.watchdog_req = 0;
1630                 ss->tx.active = 0;
1631                 ss->tx.activate = 0;
1632         }
1633         mgp->watchdog_rx_pause = 0;
1634         if (mgp->ksp_stat != NULL) {
1635                 ethstat = (struct myri10ge_nic_stat *)mgp->ksp_stat->ks_data;
1636                 ethstat->link_changes.value.ul = 0;
1637         }
1638         status = myri10ge_m_unicst(mgp, mgp->mac_addr);
1639         myri10ge_change_promisc(mgp, 0);
1640         (void) myri10ge_change_pause(mgp, mgp->pause);
1641         return (status);
1642 }
1643 
1644 static int
1645 myri10ge_init_toeplitz(struct myri10ge_priv *mgp)
1646 {
1647         myri10ge_cmd_t cmd;
1648         int i, b, s, t, j;
1649         int status;
1650         uint32_t k[8];
1651         uint32_t tmp;
1652         uint8_t *key;
1653 
1654         status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RSS_KEY_OFFSET,
1655             &cmd);
1656         if (status != 0) {
1657                 cmn_err(CE_WARN, "%s: failed to get rss key\n",
1658                     mgp->name);
1659                 return (EIO);
1660         }
1661         myri10ge_pio_copy32(mgp->rss_key,
1662             (uint32_t *)(void*)((char *)mgp->sram + cmd.data0),
1663             sizeof (mgp->rss_key));
1664 
1665         mgp->toeplitz_hash_table = kmem_alloc(sizeof (uint32_t) * 12 * 256,
1666             KM_SLEEP);
1667         key = (uint8_t *)mgp->rss_key;
1668         t = 0;
1669         for (b = 0; b < 12; b++) {
1670                 for (s = 0; s < 8; s++) {
1671                         /* Bits: b*8+s, ..., b*8+s+31 */
1672                         k[s] = 0;
1673                         for (j = 0; j < 32; j++) {
1674                                 int bit = b*8+s+j;
1675                                 bit = 0x1 & (key[bit / 8] >> (7 -(bit & 0x7)));
1676                                 k[s] |= bit << (31 - j);
1677                         }
1678                 }
1679 
1680                 for (i = 0; i <= 0xff; i++) {
1681                         tmp = 0;
1682                         if (i & (1 << 7)) { tmp ^= k[0]; }
1683                         if (i & (1 << 6)) { tmp ^= k[1]; }
1684                         if (i & (1 << 5)) { tmp ^= k[2]; }
1685                         if (i & (1 << 4)) { tmp ^= k[3]; }
1686                         if (i & (1 << 3)) { tmp ^= k[4]; }
1687                         if (i & (1 << 2)) { tmp ^= k[5]; }
1688                         if (i & (1 << 1)) { tmp ^= k[6]; }
1689                         if (i & (1 << 0)) { tmp ^= k[7]; }
1690                         mgp->toeplitz_hash_table[t++] = tmp;
1691                 }
1692         }
1693         return (0);
1694 }
1695 
1696 static inline struct myri10ge_slice_state *
1697 myri10ge_toeplitz_send_hash(struct myri10ge_priv *mgp, struct ip *ip)
1698 {
1699         struct tcphdr *hdr;
1700         uint32_t saddr, daddr;
1701         uint32_t hash, slice;
1702         uint32_t *table = mgp->toeplitz_hash_table;
1703         uint16_t src, dst;
1704 
1705         /*
1706          * Note hashing order is reversed from how it is done
1707          * in the NIC, so as to generate the same hash value
1708          * for the connection to try to keep connections CPU local
1709          */
1710 
1711         /* hash on IPv4 src/dst address */
1712         saddr = ntohl(ip->ip_src.s_addr);
1713         daddr = ntohl(ip->ip_dst.s_addr);
1714         hash = table[(256 * 0) + ((daddr >> 24) & 0xff)];
1715         hash ^= table[(256 * 1) + ((daddr >> 16) & 0xff)];
1716         hash ^= table[(256 * 2) + ((daddr >> 8) & 0xff)];
1717         hash ^= table[(256 * 3) + ((daddr) & 0xff)];
1718         hash ^= table[(256 * 4) + ((saddr >> 24) & 0xff)];
1719         hash ^= table[(256 * 5) + ((saddr >> 16) & 0xff)];
1720         hash ^= table[(256 * 6) + ((saddr >> 8) & 0xff)];
1721         hash ^= table[(256 * 7) + ((saddr) & 0xff)];
1722         /* hash on TCP port, if required */
1723         if ((myri10ge_rss_hash & MXGEFW_RSS_HASH_TYPE_TCP_IPV4) &&
1724             ip->ip_p == IPPROTO_TCP) {
1725                 hdr = (struct tcphdr *)(void *)
1726                     (((uint8_t *)ip) +  (ip->ip_hl << 2));
1727                 src = ntohs(hdr->th_sport);
1728                 dst = ntohs(hdr->th_dport);
1729 
1730                 hash ^= table[(256 * 8) + ((dst >> 8) & 0xff)];
1731                 hash ^= table[(256 * 9) + ((dst) & 0xff)];
1732                 hash ^= table[(256 * 10) + ((src >> 8) & 0xff)];
1733                 hash ^= table[(256 * 11) + ((src) & 0xff)];
1734         }
1735         slice = (mgp->num_slices - 1) & hash;
1736         return (&mgp->ss[slice]);
1737 
1738 }
1739 
1740 static inline struct myri10ge_slice_state *
1741 myri10ge_simple_send_hash(struct myri10ge_priv *mgp, struct ip *ip)
1742 {
1743         struct tcphdr *hdr;
1744         uint32_t slice, hash_val;
1745 
1746 
1747         if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP) {
1748                 return (&mgp->ss[0]);
1749         }
1750         hdr = (struct tcphdr *)(void *)(((uint8_t *)ip) +  (ip->ip_hl << 2));
1751 
1752         /*
1753          * Use the second byte of the *destination* address for
1754          * MXGEFW_RSS_HASH_TYPE_SRC_PORT, so as to match NIC's hashing
1755          */
1756         hash_val = ntohs(hdr->th_dport) & 0xff;
1757         if (myri10ge_rss_hash == MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT)
1758                 hash_val += ntohs(hdr->th_sport) & 0xff;
1759 
1760         slice = (mgp->num_slices - 1) & hash_val;
1761         return (&mgp->ss[slice]);
1762 }
1763 
1764 static inline struct myri10ge_slice_state *
1765 myri10ge_send_hash(struct myri10ge_priv *mgp, mblk_t *mp)
1766 {
1767         unsigned int slice = 0;
1768         struct ether_header *eh;
1769         struct ether_vlan_header *vh;
1770         struct ip *ip;
1771         int ehl, ihl;
1772 
1773         if (mgp->num_slices == 1)
1774                 return (&mgp->ss[0]);
1775 
1776         if (myri10ge_tx_hash == 0) {
1777                 slice = CPU->cpu_id & (mgp->num_slices - 1);
1778                 return (&mgp->ss[slice]);
1779         }
1780 
1781         /*
1782          *  ensure it is a TCP or UDP over IPv4 packet, and that the
1783          *  headers are in the 1st mblk.  Otherwise, punt
1784          */
1785         ehl = sizeof (*eh);
1786         ihl = sizeof (*ip);
1787         if ((MBLKL(mp)) <  (ehl + ihl + 8))
1788                 return (&mgp->ss[0]);
1789         eh = (struct ether_header *)(void *)mp->b_rptr;
1790         ip = (struct ip *)(void *)(eh + 1);
1791         if (eh->ether_type != BE_16(ETHERTYPE_IP)) {
1792                 if (eh->ether_type != BE_16(ETHERTYPE_VLAN))
1793                         return (&mgp->ss[0]);
1794                 vh = (struct ether_vlan_header *)(void *)mp->b_rptr;
1795                 if (vh->ether_type != BE_16(ETHERTYPE_IP))
1796                         return (&mgp->ss[0]);
1797                 ehl += 4;
1798                 ip = (struct ip *)(void *)(vh + 1);
1799         }
1800         ihl = ip->ip_hl << 2;
1801         if (MBLKL(mp) <  (ehl + ihl + 8))
1802                 return (&mgp->ss[0]);
1803         switch (myri10ge_rss_hash) {
1804         case MXGEFW_RSS_HASH_TYPE_IPV4:
1805                 /* fallthru */
1806         case MXGEFW_RSS_HASH_TYPE_TCP_IPV4:
1807                 /* fallthru */
1808         case (MXGEFW_RSS_HASH_TYPE_IPV4|MXGEFW_RSS_HASH_TYPE_TCP_IPV4):
1809                 return (myri10ge_toeplitz_send_hash(mgp, ip));
1810         case MXGEFW_RSS_HASH_TYPE_SRC_PORT:
1811                 /* fallthru */
1812         case MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT:
1813                 return (myri10ge_simple_send_hash(mgp, ip));
1814         default:
1815                 break;
1816         }
1817         return (&mgp->ss[0]);
1818 }
1819 
1820 static int
1821 myri10ge_setup_slice(struct myri10ge_slice_state *ss)
1822 {
1823         struct myri10ge_priv *mgp = ss->mgp;
1824         myri10ge_cmd_t cmd;
1825         int tx_ring_size, rx_ring_size;
1826         int tx_ring_entries, rx_ring_entries;
1827         int slice, status;
1828         int allocated, idx;
1829         size_t bytes;
1830 
1831         slice = ss - mgp->ss;
1832         cmd.data0 = slice;
1833         status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
1834         tx_ring_size = cmd.data0;
1835         cmd.data0 = slice;
1836         status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
1837         if (status != 0)
1838                 return (status);
1839         rx_ring_size = cmd.data0;
1840 
1841         tx_ring_entries = tx_ring_size / sizeof (struct mcp_kreq_ether_send);
1842         rx_ring_entries = rx_ring_size / sizeof (struct mcp_dma_addr);
1843         ss->tx.mask = tx_ring_entries - 1;
1844         ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
1845 
1846         /* get the lanai pointers to the send and receive rings */
1847 
1848         cmd.data0 = slice;
1849         status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
1850         ss->tx.lanai = (mcp_kreq_ether_send_t *)(void *)(mgp->sram + cmd.data0);
1851         if (mgp->num_slices > 1) {
1852                 ss->tx.go = (char *)mgp->sram + MXGEFW_ETH_SEND_GO + 64 * slice;
1853                 ss->tx.stop = (char *)mgp->sram + MXGEFW_ETH_SEND_STOP +
1854                     64 * slice;
1855         } else {
1856                 ss->tx.go = NULL;
1857                 ss->tx.stop = NULL;
1858         }
1859 
1860         cmd.data0 = slice;
1861         status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
1862         ss->rx_small.lanai = (mcp_kreq_ether_recv_t *)
1863             (void *)(mgp->sram + cmd.data0);
1864 
1865         cmd.data0 = slice;
1866         status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
1867         ss->rx_big.lanai = (mcp_kreq_ether_recv_t *)(void *)
1868             (mgp->sram + cmd.data0);
1869 
1870         if (status != 0) {
1871                 cmn_err(CE_WARN,
1872                     "%s: failed to get ring sizes or locations\n", mgp->name);
1873                 return (status);
1874         }
1875 
1876         status = ENOMEM;
1877         bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
1878         ss->rx_small.shadow = kmem_zalloc(bytes, KM_SLEEP);
1879         if (ss->rx_small.shadow == NULL)
1880                 goto abort;
1881         (void) memset(ss->rx_small.shadow, 0, bytes);
1882 
1883         bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
1884         ss->rx_big.shadow = kmem_zalloc(bytes, KM_SLEEP);
1885         if (ss->rx_big.shadow == NULL)
1886                 goto abort_with_rx_small_shadow;
1887         (void) memset(ss->rx_big.shadow, 0, bytes);
1888 
1889         /* allocate the host info rings */
1890 
1891         bytes = tx_ring_entries * sizeof (*ss->tx.info);
1892         ss->tx.info = kmem_zalloc(bytes, KM_SLEEP);
1893         if (ss->tx.info == NULL)
1894                 goto abort_with_rx_big_shadow;
1895         (void) memset(ss->tx.info, 0, bytes);
1896 
1897         bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
1898         ss->rx_small.info = kmem_zalloc(bytes, KM_SLEEP);
1899         if (ss->rx_small.info == NULL)
1900                 goto abort_with_tx_info;
1901         (void) memset(ss->rx_small.info, 0, bytes);
1902 
1903         bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
1904         ss->rx_big.info = kmem_zalloc(bytes, KM_SLEEP);
1905         if (ss->rx_big.info == NULL)
1906                 goto abort_with_rx_small_info;
1907         (void) memset(ss->rx_big.info, 0, bytes);
1908 
1909         ss->tx.stall = ss->tx.sched = 0;
1910         ss->tx.stall_early = ss->tx.stall_late = 0;
1911 
1912         ss->jbufs_for_smalls = 1 + (1 + ss->rx_small.mask) /
1913             (myri10ge_mtu / (myri10ge_small_bytes + MXGEFW_PAD));
1914 
1915         allocated = myri10ge_add_jbufs(ss,
1916             myri10ge_bigbufs_initial + ss->jbufs_for_smalls, 1);
1917         if (allocated < ss->jbufs_for_smalls + myri10ge_bigbufs_initial) {
1918                 cmn_err(CE_WARN,
1919                     "%s: Could not allocate enough receive buffers (%d/%d)\n",
1920                     mgp->name, allocated,
1921                     myri10ge_bigbufs_initial + ss->jbufs_for_smalls);
1922                 goto abort_with_jumbos;
1923         }
1924 
1925         myri10ge_carve_up_jbufs_into_small_ring(ss);
1926         ss->j_rx_cnt = 0;
1927 
1928         mutex_enter(&ss->jpool.mtx);
1929         if (allocated < rx_ring_entries)
1930                 ss->jpool.low_water = allocated / 4;
1931         else
1932                 ss->jpool.low_water = rx_ring_entries / 2;
1933 
1934         /*
1935          * invalidate the big receive ring in case we do not
1936          * allocate sufficient jumbos to fill it
1937          */
1938         (void) memset(ss->rx_big.shadow, 1,
1939             (ss->rx_big.mask + 1) * sizeof (ss->rx_big.shadow[0]));
1940         for (idx = 7; idx <= ss->rx_big.mask; idx += 8) {
1941                 myri10ge_submit_8rx(&ss->rx_big.lanai[idx - 7],
1942                     &ss->rx_big.shadow[idx - 7]);
1943                 mb();
1944         }
1945 
1946 
1947         myri10ge_restock_jumbos(ss);
1948 
1949         for (idx = 7; idx <= ss->rx_small.mask; idx += 8) {
1950                 myri10ge_submit_8rx(&ss->rx_small.lanai[idx - 7],
1951                     &ss->rx_small.shadow[idx - 7]);
1952                 mb();
1953         }
1954         ss->rx_small.cnt = ss->rx_small.mask + 1;
1955 
1956         mutex_exit(&ss->jpool.mtx);
1957 
1958         status = myri10ge_prepare_tx_ring(ss);
1959 
1960         if (status != 0)
1961                 goto abort_with_small_jbufs;
1962 
1963         cmd.data0 = ntohl(ss->fw_stats_dma.low);
1964         cmd.data1 = ntohl(ss->fw_stats_dma.high);
1965         cmd.data2 = sizeof (mcp_irq_data_t);
1966         cmd.data2 |= (slice << 16);
1967         bzero(ss->fw_stats, sizeof (*ss->fw_stats));
1968         status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
1969         if (status == ENOSYS) {
1970                 cmd.data0 = ntohl(ss->fw_stats_dma.low) +
1971                     offsetof(mcp_irq_data_t, send_done_count);
1972                 cmd.data1 = ntohl(ss->fw_stats_dma.high);
1973                 status = myri10ge_send_cmd(mgp,
1974                     MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, &cmd);
1975         }
1976         if (status) {
1977                 cmn_err(CE_WARN, "%s: Couldn't set stats DMA\n", mgp->name);
1978                 goto abort_with_tx;
1979         }
1980 
1981         return (0);
1982 
1983 abort_with_tx:
1984         myri10ge_unprepare_tx_ring(ss);
1985 
1986 abort_with_small_jbufs:
1987         myri10ge_release_small_jbufs(ss);
1988 
1989 abort_with_jumbos:
1990         if (allocated != 0) {
1991                 mutex_enter(&ss->jpool.mtx);
1992                 ss->jpool.low_water = 0;
1993                 mutex_exit(&ss->jpool.mtx);
1994                 myri10ge_unstock_jumbos(ss);
1995                 myri10ge_remove_jbufs(ss);
1996         }
1997 
1998         bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
1999         kmem_free(ss->rx_big.info, bytes);
2000 
2001 abort_with_rx_small_info:
2002         bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
2003         kmem_free(ss->rx_small.info, bytes);
2004 
2005 abort_with_tx_info:
2006         bytes = tx_ring_entries * sizeof (*ss->tx.info);
2007         kmem_free(ss->tx.info, bytes);
2008 
2009 abort_with_rx_big_shadow:
2010         bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
2011         kmem_free(ss->rx_big.shadow, bytes);
2012 
2013 abort_with_rx_small_shadow:
2014         bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
2015         kmem_free(ss->rx_small.shadow, bytes);
2016 abort:
2017         return (status);
2018 
2019 }
2020 
2021 static void
2022 myri10ge_teardown_slice(struct myri10ge_slice_state *ss)
2023 {
2024         int tx_ring_entries, rx_ring_entries;
2025         size_t bytes;
2026 
2027         /* ignore slices that have not been fully setup */
2028         if (ss->tx.cp == NULL)
2029                 return;
2030         /* Free the TX copy buffers */
2031         myri10ge_unprepare_tx_ring(ss);
2032 
2033         /* stop passing returned buffers to firmware */
2034 
2035         mutex_enter(&ss->jpool.mtx);
2036         ss->jpool.low_water = 0;
2037         mutex_exit(&ss->jpool.mtx);
2038         myri10ge_release_small_jbufs(ss);
2039 
2040         /* Release the free jumbo frame pool */
2041         myri10ge_unstock_jumbos(ss);
2042         myri10ge_remove_jbufs(ss);
2043 
2044         rx_ring_entries = ss->rx_big.mask + 1;
2045         tx_ring_entries = ss->tx.mask + 1;
2046 
2047         bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
2048         kmem_free(ss->rx_big.info, bytes);
2049 
2050         bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
2051         kmem_free(ss->rx_small.info, bytes);
2052 
2053         bytes = tx_ring_entries * sizeof (*ss->tx.info);
2054         kmem_free(ss->tx.info, bytes);
2055 
2056         bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
2057         kmem_free(ss->rx_big.shadow, bytes);
2058 
2059         bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
2060         kmem_free(ss->rx_small.shadow, bytes);
2061 
2062 }
2063 static int
2064 myri10ge_start_locked(struct myri10ge_priv *mgp)
2065 {
2066         myri10ge_cmd_t cmd;
2067         int status, big_pow2, i;
2068         volatile uint8_t *itable;
2069 
2070         status = DDI_SUCCESS;
2071         /* Allocate DMA resources and receive buffers */
2072 
2073         status = myri10ge_reset(mgp);
2074         if (status != 0) {
2075                 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name);
2076                 return (DDI_FAILURE);
2077         }
2078 
2079         if (mgp->num_slices > 1) {
2080                 cmd.data0 = mgp->num_slices;
2081                 cmd.data1 = 1; /* use MSI-X */
2082                 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES,
2083                     &cmd);
2084                 if (status != 0) {
2085                         cmn_err(CE_WARN,
2086                             "%s: failed to set number of slices\n",
2087                             mgp->name);
2088                         goto abort_with_nothing;
2089                 }
2090                 /* setup the indirection table */
2091                 cmd.data0 = mgp->num_slices;
2092                 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
2093                     &cmd);
2094 
2095                 status |= myri10ge_send_cmd(mgp,
2096                     MXGEFW_CMD_GET_RSS_TABLE_OFFSET, &cmd);
2097                 if (status != 0) {
2098                         cmn_err(CE_WARN,
2099                             "%s: failed to setup rss tables\n", mgp->name);
2100                 }
2101 
2102                 /* just enable an identity mapping */
2103                 itable = mgp->sram + cmd.data0;
2104                 for (i = 0; i < mgp->num_slices; i++)
2105                         itable[i] = (uint8_t)i;
2106 
2107                 if (myri10ge_rss_hash & MYRI10GE_TOEPLITZ_HASH) {
2108                         status = myri10ge_init_toeplitz(mgp);
2109                         if (status != 0) {
2110                                 cmn_err(CE_WARN, "%s: failed to setup "
2111                                     "toeplitz tx hash table", mgp->name);
2112                                 goto abort_with_nothing;
2113                         }
2114                 }
2115                 cmd.data0 = 1;
2116                 cmd.data1 = myri10ge_rss_hash;
2117                 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_ENABLE,
2118                     &cmd);
2119                 if (status != 0) {
2120                         cmn_err(CE_WARN,
2121                             "%s: failed to enable slices\n", mgp->name);
2122                         goto abort_with_toeplitz;
2123                 }
2124         }
2125 
2126         for (i = 0; i < mgp->num_slices; i++) {
2127                 status = myri10ge_setup_slice(&mgp->ss[i]);
2128                 if (status != 0)
2129                         goto abort_with_slices;
2130         }
2131 
2132         /*
2133          * Tell the MCP how many buffers he has, and to
2134          *  bring the ethernet interface up
2135          *
2136          * Firmware needs the big buff size as a power of 2.  Lie and
2137          * tell him the buffer is larger, because we only use 1
2138          * buffer/pkt, and the mtu will prevent overruns
2139          */
2140         big_pow2 = myri10ge_mtu + MXGEFW_PAD;
2141         while ((big_pow2 & (big_pow2 - 1)) != 0)
2142                 big_pow2++;
2143 
2144         /* now give firmware buffers sizes, and MTU */
2145         cmd.data0 = myri10ge_mtu;
2146         status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_MTU, &cmd);
2147         cmd.data0 = myri10ge_small_bytes;
2148         status |=
2149             myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd);
2150         cmd.data0 = big_pow2;
2151         status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
2152         if (status) {
2153                 cmn_err(CE_WARN, "%s: Couldn't set buffer sizes\n", mgp->name);
2154                 goto abort_with_slices;
2155         }
2156 
2157 
2158         cmd.data0 = 1;
2159         status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_TSO_MODE, &cmd);
2160         if (status) {
2161                 cmn_err(CE_WARN, "%s: unable to setup TSO (%d)\n",
2162                     mgp->name, status);
2163         } else {
2164                 mgp->features |= MYRI10GE_TSO;
2165         }
2166 
2167         mgp->link_state = -1;
2168         mgp->rdma_tags_available = 15;
2169         status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_UP, &cmd);
2170         if (status) {
2171                 cmn_err(CE_WARN, "%s: unable to start ethernet\n", mgp->name);
2172                 goto abort_with_slices;
2173         }
2174         mgp->running = MYRI10GE_ETH_RUNNING;
2175         return (DDI_SUCCESS);
2176 
2177 abort_with_slices:
2178         for (i = 0; i < mgp->num_slices; i++)
2179                 myri10ge_teardown_slice(&mgp->ss[i]);
2180 
2181         mgp->running = MYRI10GE_ETH_STOPPED;
2182 
2183 abort_with_toeplitz:
2184         if (mgp->toeplitz_hash_table != NULL) {
2185                 kmem_free(mgp->toeplitz_hash_table,
2186                     sizeof (uint32_t) * 12 * 256);
2187                 mgp->toeplitz_hash_table = NULL;
2188         }
2189 
2190 abort_with_nothing:
2191         return (DDI_FAILURE);
2192 }
2193 
2194 static void
2195 myri10ge_stop_locked(struct myri10ge_priv *mgp)
2196 {
2197         int status, old_down_cnt;
2198         myri10ge_cmd_t cmd;
2199         int wait_time = 10;
2200         int i, polling;
2201 
2202         old_down_cnt = mgp->down_cnt;
2203         mb();
2204         status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
2205         if (status) {
2206                 cmn_err(CE_WARN, "%s: Couldn't bring down link\n", mgp->name);
2207         }
2208 
2209         while (old_down_cnt == *((volatile int *)&mgp->down_cnt)) {
2210                 delay(1 * drv_usectohz(1000000));
2211                 wait_time--;
2212                 if (wait_time == 0)
2213                         break;
2214         }
2215 again:
2216         if (old_down_cnt == *((volatile int *)&mgp->down_cnt)) {
2217                 cmn_err(CE_WARN, "%s: didn't get down irq\n", mgp->name);
2218                 for (i = 0; i < mgp->num_slices; i++) {
2219                         /*
2220                          * take and release the rx lock to ensure
2221                          * that no interrupt thread is blocked
2222                          * elsewhere in the stack, preventing
2223                          * completion
2224                          */
2225 
2226                         mutex_enter(&mgp->ss[i].rx_lock);
2227                         printf("%s: slice %d rx irq idle\n",
2228                             mgp->name, i);
2229                         mutex_exit(&mgp->ss[i].rx_lock);
2230 
2231                         /* verify that the poll handler is inactive */
2232                         mutex_enter(&mgp->ss->poll_lock);
2233                         polling = mgp->ss->rx_polling;
2234                         mutex_exit(&mgp->ss->poll_lock);
2235                         if (polling) {
2236                                 printf("%s: slice %d is polling\n",
2237                                     mgp->name, i);
2238                                 delay(1 * drv_usectohz(1000000));
2239                                 goto again;
2240                         }
2241                 }
2242                 delay(1 * drv_usectohz(1000000));
2243                 if (old_down_cnt == *((volatile int *)&mgp->down_cnt)) {
2244                         cmn_err(CE_WARN, "%s: Never got down irq\n", mgp->name);
2245                 }
2246         }
2247 
2248         for (i = 0; i < mgp->num_slices; i++)
2249                 myri10ge_teardown_slice(&mgp->ss[i]);
2250 
2251         if (mgp->toeplitz_hash_table != NULL) {
2252                 kmem_free(mgp->toeplitz_hash_table,
2253                     sizeof (uint32_t) * 12 * 256);
2254                 mgp->toeplitz_hash_table = NULL;
2255         }
2256         mgp->running = MYRI10GE_ETH_STOPPED;
2257 }
2258 
2259 static int
2260 myri10ge_m_start(void *arg)
2261 {
2262         struct myri10ge_priv *mgp = arg;
2263         int status;
2264 
2265         mutex_enter(&mgp->intrlock);
2266 
2267         if (mgp->running != MYRI10GE_ETH_STOPPED) {
2268                 mutex_exit(&mgp->intrlock);
2269                 return (DDI_FAILURE);
2270         }
2271         status = myri10ge_start_locked(mgp);
2272         mutex_exit(&mgp->intrlock);
2273 
2274         if (status != DDI_SUCCESS)
2275                 return (status);
2276 
2277         /* start the watchdog timer */
2278         mgp->timer_id = timeout(myri10ge_watchdog, mgp,
2279             mgp->timer_ticks);
2280         return (DDI_SUCCESS);
2281 
2282 }
2283 
2284 static void
2285 myri10ge_m_stop(void *arg)
2286 {
2287         struct myri10ge_priv *mgp = arg;
2288 
2289         mutex_enter(&mgp->intrlock);
2290         /* if the device not running give up */
2291         if (mgp->running != MYRI10GE_ETH_RUNNING) {
2292                 mutex_exit(&mgp->intrlock);
2293                 return;
2294         }
2295 
2296         mgp->running = MYRI10GE_ETH_STOPPING;
2297         mutex_exit(&mgp->intrlock);
2298         (void) untimeout(mgp->timer_id);
2299         mutex_enter(&mgp->intrlock);
2300         myri10ge_stop_locked(mgp);
2301         mutex_exit(&mgp->intrlock);
2302 
2303 }
2304 
2305 static inline void
2306 myri10ge_rx_csum(mblk_t *mp, struct myri10ge_rx_ring_stats *s, uint32_t csum)
2307 {
2308         struct ether_header *eh;
2309         struct ip *ip;
2310         struct ip6_hdr *ip6;
2311         uint32_t start, stuff, end, partial, hdrlen;
2312 
2313 
2314         csum = ntohs((uint16_t)csum);
2315         eh = (struct ether_header *)(void *)mp->b_rptr;
2316         hdrlen = sizeof (*eh);
2317         if (eh->ether_dhost.ether_addr_octet[0] & 1) {
2318                 if (0 == (bcmp(eh->ether_dhost.ether_addr_octet,
2319                     myri10ge_broadcastaddr, sizeof (eh->ether_dhost))))
2320                         s->brdcstrcv++;
2321                 else
2322                         s->multircv++;
2323         }
2324 
2325         if (eh->ether_type == BE_16(ETHERTYPE_VLAN)) {
2326                 /*
2327                  * fix checksum by subtracting 4 bytes after what the
2328                  * firmware thought was the end of the ether hdr
2329                  */
2330                 partial = *(uint32_t *)
2331                     (void *)(mp->b_rptr + ETHERNET_HEADER_SIZE);
2332                 csum += ~partial;
2333                 csum +=  (csum < ~partial);
2334                 csum = (csum >> 16) + (csum & 0xFFFF);
2335                 csum = (csum >> 16) + (csum & 0xFFFF);
2336                 hdrlen += VLAN_TAGSZ;
2337         }
2338 
2339         if (eh->ether_type ==  BE_16(ETHERTYPE_IP)) {
2340                 ip = (struct ip *)(void *)(mp->b_rptr + hdrlen);
2341                 start = ip->ip_hl << 2;
2342 
2343                 if (ip->ip_p == IPPROTO_TCP)
2344                         stuff = start + offsetof(struct tcphdr, th_sum);
2345                 else if (ip->ip_p == IPPROTO_UDP)
2346                         stuff = start + offsetof(struct udphdr, uh_sum);
2347                 else
2348                         return;
2349                 end = ntohs(ip->ip_len);
2350         } else if (eh->ether_type ==  BE_16(ETHERTYPE_IPV6)) {
2351                 ip6 = (struct ip6_hdr *)(void *)(mp->b_rptr + hdrlen);
2352                 start = sizeof (*ip6);
2353                 if (ip6->ip6_nxt == IPPROTO_TCP) {
2354                         stuff = start + offsetof(struct tcphdr, th_sum);
2355                 } else if (ip6->ip6_nxt == IPPROTO_UDP)
2356                         stuff = start + offsetof(struct udphdr, uh_sum);
2357                 else
2358                         return;
2359                 end = start + ntohs(ip6->ip6_plen);
2360                 /*
2361                  * IPv6 headers do not contain a checksum, and hence
2362                  * do not checksum to zero, so they don't "fall out"
2363                  * of the partial checksum calculation like IPv4
2364                  * headers do.  We need to fix the partial checksum by
2365                  * subtracting the checksum of the IPv6 header.
2366                  */
2367 
2368                 partial = myri10ge_csum_generic((uint16_t *)ip6, sizeof (*ip6));
2369                 csum += ~partial;
2370                 csum +=  (csum < ~partial);
2371                 csum = (csum >> 16) + (csum & 0xFFFF);
2372                 csum = (csum >> 16) + (csum & 0xFFFF);
2373         } else {
2374                 return;
2375         }
2376 
2377         if (MBLKL(mp) > hdrlen + end) {
2378                 /* padded frame, so hw csum may be invalid */
2379                 return;
2380         }
2381 
2382         mac_hcksum_set(mp, start, stuff, end, csum, HCK_PARTIALCKSUM);
2383 }
2384 
2385 static mblk_t *
2386 myri10ge_rx_done_small(struct myri10ge_slice_state *ss, uint32_t len,
2387     uint32_t csum)
2388 {
2389         mblk_t *mp;
2390         myri10ge_rx_ring_t *rx;
2391         int idx;
2392 
2393         rx = &ss->rx_small;
2394         idx = rx->cnt & rx->mask;
2395         ss->rx_small.cnt++;
2396 
2397         /* allocate a new buffer to pass up the stack */
2398         mp = allocb(len + MXGEFW_PAD, 0);
2399         if (mp == NULL) {
2400                 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_small_nobuf);
2401                 goto abort;
2402         }
2403         bcopy(ss->rx_small.info[idx].ptr,
2404             (caddr_t)mp->b_wptr, len + MXGEFW_PAD);
2405         mp->b_wptr += len + MXGEFW_PAD;
2406         mp->b_rptr += MXGEFW_PAD;
2407 
2408         ss->rx_stats.ibytes += len;
2409         ss->rx_stats.ipackets += 1;
2410         myri10ge_rx_csum(mp, &ss->rx_stats, csum);
2411 
2412 abort:
2413         if ((idx & 7) == 7) {
2414                 myri10ge_submit_8rx(&rx->lanai[idx - 7],
2415                     &rx->shadow[idx - 7]);
2416         }
2417 
2418         return (mp);
2419 }
2420 
2421 
2422 static mblk_t *
2423 myri10ge_rx_done_big(struct myri10ge_slice_state *ss, uint32_t len,
2424     uint32_t csum)
2425 {
2426         struct myri10ge_jpool_stuff *jpool;
2427         struct myri10ge_jpool_entry *j;
2428         mblk_t *mp;
2429         int idx, num_owned_by_mcp;
2430 
2431         jpool = &ss->jpool;
2432         idx = ss->j_rx_cnt & ss->rx_big.mask;
2433         j = ss->rx_big.info[idx].j;
2434 
2435         if (j == NULL) {
2436                 printf("%s: null j at idx=%d, rx_big.cnt = %d, j_rx_cnt=%d\n",
2437                     ss->mgp->name, idx, ss->rx_big.cnt, ss->j_rx_cnt);
2438                 return (NULL);
2439         }
2440 
2441 
2442         ss->rx_big.info[idx].j = NULL;
2443         ss->j_rx_cnt++;
2444 
2445 
2446         /*
2447          * Check to see if we are low on rx buffers.
2448          * Note that we must leave at least 8 free so there are
2449          * enough to free in a single 64-byte write.
2450          */
2451         num_owned_by_mcp = ss->rx_big.cnt - ss->j_rx_cnt;
2452         if (num_owned_by_mcp < jpool->low_water) {
2453                 mutex_enter(&jpool->mtx);
2454                 myri10ge_restock_jumbos(ss);
2455                 mutex_exit(&jpool->mtx);
2456                 num_owned_by_mcp = ss->rx_big.cnt - ss->j_rx_cnt;
2457                 /* if we are still low, then we have to copy */
2458                 if (num_owned_by_mcp < 16) {
2459                         MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_copy);
2460                         /* allocate a new buffer to pass up the stack */
2461                         mp = allocb(len + MXGEFW_PAD, 0);
2462                         if (mp == NULL) {
2463                                 goto abort;
2464                         }
2465                         bcopy(j->buf,
2466                             (caddr_t)mp->b_wptr, len + MXGEFW_PAD);
2467                         myri10ge_jfree_rtn(j);
2468                         /* push buffer back to NIC */
2469                         mutex_enter(&jpool->mtx);
2470                         myri10ge_restock_jumbos(ss);
2471                         mutex_exit(&jpool->mtx);
2472                         goto set_len;
2473                 }
2474         }
2475 
2476         /* loan our buffer to the stack */
2477         mp = desballoc((unsigned char *)j->buf, myri10ge_mtu, 0, &j->free_func);
2478         if (mp == NULL) {
2479                 goto abort;
2480         }
2481 
2482 set_len:
2483         mp->b_rptr += MXGEFW_PAD;
2484         mp->b_wptr = ((unsigned char *) mp->b_rptr + len);
2485 
2486         ss->rx_stats.ibytes += len;
2487         ss->rx_stats.ipackets += 1;
2488         myri10ge_rx_csum(mp, &ss->rx_stats, csum);
2489 
2490         return (mp);
2491 
2492 abort:
2493         myri10ge_jfree_rtn(j);
2494         MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_big_nobuf);
2495         return (NULL);
2496 }
2497 
2498 /*
2499  * Free all transmit buffers up until the specified index
2500  */
2501 static inline void
2502 myri10ge_tx_done(struct myri10ge_slice_state *ss, uint32_t mcp_index)
2503 {
2504         myri10ge_tx_ring_t *tx;
2505         struct myri10ge_tx_dma_handle_head handles;
2506         int idx;
2507         int limit = 0;
2508 
2509         tx = &ss->tx;
2510         handles.head = NULL;
2511         handles.tail = NULL;
2512         while (tx->pkt_done != (int)mcp_index) {
2513                 idx = tx->done & tx->mask;
2514 
2515                 /*
2516                  * mblk & DMA handle attached only to first slot
2517                  * per buffer in the packet
2518                  */
2519 
2520                 if (tx->info[idx].m) {
2521                         (void) ddi_dma_unbind_handle(tx->info[idx].handle->h);
2522                         tx->info[idx].handle->next = handles.head;
2523                         handles.head = tx->info[idx].handle;
2524                         if (handles.tail == NULL)
2525                                 handles.tail = tx->info[idx].handle;
2526                         freeb(tx->info[idx].m);
2527                         tx->info[idx].m = 0;
2528                         tx->info[idx].handle = 0;
2529                 }
2530                 if (tx->info[idx].ostat.opackets != 0) {
2531                         tx->stats.multixmt += tx->info[idx].ostat.multixmt;
2532                         tx->stats.brdcstxmt += tx->info[idx].ostat.brdcstxmt;
2533                         tx->stats.obytes += tx->info[idx].ostat.obytes;
2534                         tx->stats.opackets += tx->info[idx].ostat.opackets;
2535                         tx->info[idx].stat.un.all = 0;
2536                         tx->pkt_done++;
2537                 }
2538 
2539                 tx->done++;
2540                 /*
2541                  * if we stalled the queue, wake it.  But Wait until
2542                  * we have at least 1/2 our slots free.
2543                  */
2544                 if ((tx->req - tx->done) < (tx->mask >> 1) &&
2545                     tx->stall != tx->sched) {
2546                         mutex_enter(&ss->tx.lock);
2547                         tx->sched = tx->stall;
2548                         mutex_exit(&ss->tx.lock);
2549                         mac_tx_ring_update(ss->mgp->mh, tx->rh);
2550                 }
2551 
2552                 /* limit potential for livelock */
2553                 if (unlikely(++limit >  2 * tx->mask))
2554                         break;
2555         }
2556         if (tx->req == tx->done && tx->stop != NULL) {
2557                 /*
2558                  * Nic has sent all pending requests, allow him
2559                  * to stop polling this queue
2560                  */
2561                 mutex_enter(&tx->lock);
2562                 if (tx->req == tx->done && tx->active) {
2563                         *(int *)(void *)tx->stop = 1;
2564                         tx->active = 0;
2565                         mb();
2566                 }
2567                 mutex_exit(&tx->lock);
2568         }
2569         if (handles.head != NULL)
2570                 myri10ge_free_tx_handles(tx, &handles);
2571 }
2572 
2573 static void
2574 myri10ge_mbl_init(struct myri10ge_mblk_list *mbl)
2575 {
2576         mbl->head = NULL;
2577         mbl->tail = &mbl->head;
2578         mbl->cnt = 0;
2579 }
2580 
2581 /*ARGSUSED*/
2582 void
2583 myri10ge_mbl_append(struct myri10ge_slice_state *ss,
2584     struct myri10ge_mblk_list *mbl, mblk_t *mp)
2585 {
2586         *(mbl->tail) = mp;
2587         mbl->tail = &mp->b_next;
2588         mp->b_next = NULL;
2589         mbl->cnt++;
2590 }
2591 
2592 
2593 static inline void
2594 myri10ge_clean_rx_done(struct myri10ge_slice_state *ss,
2595     struct myri10ge_mblk_list *mbl, int limit, boolean_t *stop)
2596 {
2597         myri10ge_rx_done_t *rx_done = &ss->rx_done;
2598         struct myri10ge_priv *mgp = ss->mgp;
2599         mblk_t *mp;
2600         struct lro_entry *lro;
2601         uint16_t length;
2602         uint16_t checksum;
2603 
2604 
2605         while (rx_done->entry[rx_done->idx].length != 0) {
2606                 if (unlikely (*stop)) {
2607                         break;
2608                 }
2609                 length = ntohs(rx_done->entry[rx_done->idx].length);
2610                 length &= (~MXGEFW_RSS_HASH_MASK);
2611 
2612                 /* limit potential for livelock */
2613                 limit -= length;
2614                 if (unlikely(limit < 0))
2615                         break;
2616 
2617                 rx_done->entry[rx_done->idx].length = 0;
2618                 checksum = ntohs(rx_done->entry[rx_done->idx].checksum);
2619                 if (length <= myri10ge_small_bytes)
2620                         mp = myri10ge_rx_done_small(ss, length, checksum);
2621                 else
2622                         mp = myri10ge_rx_done_big(ss, length, checksum);
2623                 if (mp != NULL) {
2624                         if (!myri10ge_lro ||
2625                             0 != myri10ge_lro_rx(ss, mp, checksum, mbl))
2626                                 myri10ge_mbl_append(ss, mbl, mp);
2627                 }
2628                 rx_done->cnt++;
2629                 rx_done->idx = rx_done->cnt & (mgp->max_intr_slots - 1);
2630         }
2631         while (ss->lro_active != NULL) {
2632                 lro = ss->lro_active;
2633                 ss->lro_active = lro->next;
2634                 myri10ge_lro_flush(ss, lro, mbl);
2635         }
2636 }
2637 
2638 static void
2639 myri10ge_intr_rx(struct myri10ge_slice_state *ss)
2640 {
2641         uint64_t gen;
2642         struct myri10ge_mblk_list mbl;
2643 
2644         myri10ge_mbl_init(&mbl);
2645         if (mutex_tryenter(&ss->rx_lock) == 0)
2646                 return;
2647         gen = ss->rx_gen_num;
2648         myri10ge_clean_rx_done(ss, &mbl, MYRI10GE_POLL_NULL,
2649             &ss->rx_polling);
2650         if (mbl.head != NULL)
2651                 mac_rx_ring(ss->mgp->mh, ss->rx_rh, mbl.head, gen);
2652         mutex_exit(&ss->rx_lock);
2653 
2654 }
2655 
2656 static mblk_t *
2657 myri10ge_poll_rx(void *arg, int bytes)
2658 {
2659         struct myri10ge_slice_state *ss = arg;
2660         struct myri10ge_mblk_list mbl;
2661         boolean_t dummy = B_FALSE;
2662 
2663         if (bytes == 0)
2664                 return (NULL);
2665 
2666         myri10ge_mbl_init(&mbl);
2667         mutex_enter(&ss->rx_lock);
2668         if (ss->rx_polling)
2669                 myri10ge_clean_rx_done(ss, &mbl, bytes, &dummy);
2670         else
2671                 printf("%d: poll_rx: token=%d, polling=%d\n", (int)(ss -
2672                     ss->mgp->ss), ss->rx_token, ss->rx_polling);
2673         mutex_exit(&ss->rx_lock);
2674         return (mbl.head);
2675 }
2676 
2677 /*ARGSUSED*/
2678 static uint_t
2679 myri10ge_intr(caddr_t arg0, caddr_t arg1)
2680 {
2681         struct myri10ge_slice_state *ss =
2682             (struct myri10ge_slice_state *)(void *)arg0;
2683         struct myri10ge_priv *mgp = ss->mgp;
2684         mcp_irq_data_t *stats = ss->fw_stats;
2685         myri10ge_tx_ring_t *tx = &ss->tx;
2686         uint32_t send_done_count;
2687         uint8_t valid;
2688 
2689 
2690         /* make sure the DMA has finished */
2691         if (!stats->valid) {
2692                 return (DDI_INTR_UNCLAIMED);
2693         }
2694         valid = stats->valid;
2695 
2696         /* low bit indicates receives are present */
2697         if (valid & 1)
2698                 myri10ge_intr_rx(ss);
2699 
2700         if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) {
2701                 /* lower legacy IRQ  */
2702                 *mgp->irq_deassert = 0;
2703                 if (!myri10ge_deassert_wait)
2704                         /* don't wait for conf. that irq is low */
2705                         stats->valid = 0;
2706                 mb();
2707         } else {
2708                 /* no need to wait for conf. that irq is low */
2709                 stats->valid = 0;
2710         }
2711 
2712         do {
2713                 /* check for transmit completes and receives */
2714                 send_done_count = ntohl(stats->send_done_count);
2715                 if (send_done_count != tx->pkt_done)
2716                         myri10ge_tx_done(ss, (int)send_done_count);
2717         } while (*((volatile uint8_t *) &stats->valid));
2718 
2719         if (stats->stats_updated) {
2720                 if (mgp->link_state != stats->link_up || stats->link_down) {
2721                         mgp->link_state = stats->link_up;
2722                         if (stats->link_down) {
2723                                 mgp->down_cnt += stats->link_down;
2724                                 mgp->link_state = 0;
2725                         }
2726                         if (mgp->link_state) {
2727                                 if (myri10ge_verbose)
2728                                         printf("%s: link up\n", mgp->name);
2729                                 mac_link_update(mgp->mh, LINK_STATE_UP);
2730                         } else {
2731                                 if (myri10ge_verbose)
2732                                         printf("%s: link down\n", mgp->name);
2733                                 mac_link_update(mgp->mh, LINK_STATE_DOWN);
2734                         }
2735                         MYRI10GE_NIC_STAT_INC(link_changes);
2736                 }
2737                 if (mgp->rdma_tags_available !=
2738                     ntohl(ss->fw_stats->rdma_tags_available)) {
2739                         mgp->rdma_tags_available =
2740                             ntohl(ss->fw_stats->rdma_tags_available);
2741                         cmn_err(CE_NOTE, "%s: RDMA timed out! "
2742                             "%d tags left\n", mgp->name,
2743                             mgp->rdma_tags_available);
2744                 }
2745         }
2746 
2747         mb();
2748         /* check to see if we have rx token to pass back */
2749         if (valid & 0x1) {
2750                 mutex_enter(&ss->poll_lock);
2751                 if (ss->rx_polling) {
2752                         ss->rx_token = 1;
2753                 } else {
2754                         *ss->irq_claim = BE_32(3);
2755                         ss->rx_token = 0;
2756                 }
2757                 mutex_exit(&ss->poll_lock);
2758         }
2759         *(ss->irq_claim + 1) = BE_32(3);
2760         return (DDI_INTR_CLAIMED);
2761 }
2762 
2763 /*
2764  * Add or remove a multicast address.  This is called with our
2765  * macinfo's lock held by GLD, so we do not need to worry about
2766  * our own locking here.
2767  */
2768 static int
2769 myri10ge_m_multicst(void *arg, boolean_t add, const uint8_t *multicastaddr)
2770 {
2771         myri10ge_cmd_t cmd;
2772         struct myri10ge_priv *mgp = arg;
2773         int status, join_leave;
2774 
2775         if (add)
2776                 join_leave = MXGEFW_JOIN_MULTICAST_GROUP;
2777         else
2778                 join_leave = MXGEFW_LEAVE_MULTICAST_GROUP;
2779         (void) memcpy(&cmd.data0, multicastaddr, 4);
2780         (void) memcpy(&cmd.data1, multicastaddr + 4, 2);
2781         cmd.data0 = htonl(cmd.data0);
2782         cmd.data1 = htonl(cmd.data1);
2783         status = myri10ge_send_cmd(mgp, join_leave, &cmd);
2784         if (status == 0)
2785                 return (0);
2786 
2787         cmn_err(CE_WARN, "%s: failed to set multicast address\n",
2788             mgp->name);
2789         return (status);
2790 }
2791 
2792 
2793 static int
2794 myri10ge_m_promisc(void *arg, boolean_t on)
2795 {
2796         struct myri10ge_priv *mgp = arg;
2797 
2798         myri10ge_change_promisc(mgp, on);
2799         return (0);
2800 }
2801 
2802 /*
2803  * copy an array of mcp_kreq_ether_send_t's to the mcp.  Copy
2804  *  backwards one at a time and handle ring wraps
2805  */
2806 
2807 static inline void
2808 myri10ge_submit_req_backwards(myri10ge_tx_ring_t *tx,
2809     mcp_kreq_ether_send_t *src, int cnt)
2810 {
2811         int idx, starting_slot;
2812         starting_slot = tx->req;
2813         while (cnt > 1) {
2814                 cnt--;
2815                 idx = (starting_slot + cnt) & tx->mask;
2816                 myri10ge_pio_copy(&tx->lanai[idx],
2817                     &src[cnt], sizeof (*src));
2818                 mb();
2819         }
2820 }
2821 
2822 /*
2823  * copy an array of mcp_kreq_ether_send_t's to the mcp.  Copy
2824  * at most 32 bytes at a time, so as to avoid involving the software
2825  * pio handler in the nic.   We re-write the first segment's flags
2826  * to mark them valid only after writing the entire chain
2827  */
2828 
2829 static inline void
2830 myri10ge_submit_req(myri10ge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
2831     int cnt)
2832 {
2833         int idx, i;
2834         uint32_t *src_ints, *dst_ints;
2835         mcp_kreq_ether_send_t *srcp, *dstp, *dst;
2836         uint8_t last_flags;
2837 
2838         idx = tx->req & tx->mask;
2839 
2840         last_flags = src->flags;
2841         src->flags = 0;
2842         mb();
2843         dst = dstp = &tx->lanai[idx];
2844         srcp = src;
2845 
2846         if ((idx + cnt) < tx->mask) {
2847                 for (i = 0; i < (cnt - 1); i += 2) {
2848                         myri10ge_pio_copy(dstp, srcp, 2 * sizeof (*src));
2849                         mb(); /* force write every 32 bytes */
2850                         srcp += 2;
2851                         dstp += 2;
2852                 }
2853         } else {
2854                 /*
2855                  * submit all but the first request, and ensure
2856                  *  that it is submitted below
2857                  */
2858                 myri10ge_submit_req_backwards(tx, src, cnt);
2859                 i = 0;
2860         }
2861         if (i < cnt) {
2862                 /* submit the first request */
2863                 myri10ge_pio_copy(dstp, srcp, sizeof (*src));
2864                 mb(); /* barrier before setting valid flag */
2865         }
2866 
2867         /* re-write the last 32-bits with the valid flags */
2868         src->flags |= last_flags;
2869         src_ints = (uint32_t *)src;
2870         src_ints += 3;
2871         dst_ints = (uint32_t *)dst;
2872         dst_ints += 3;
2873         *dst_ints =  *src_ints;
2874         tx->req += cnt;
2875         mb();
2876         /* notify NIC to poll this tx ring */
2877         if (!tx->active && tx->go != NULL) {
2878                 *(int *)(void *)tx->go = 1;
2879                 tx->active = 1;
2880                 tx->activate++;
2881                 mb();
2882         }
2883 }
2884 
2885 /* ARGSUSED */
2886 static inline void
2887 myri10ge_lso_info_get(mblk_t *mp, uint32_t *mss, uint32_t *flags)
2888 {
2889         uint32_t lso_flag;
2890         mac_lso_get(mp, mss, &lso_flag);
2891         (*flags) |= lso_flag;
2892 }
2893 
2894 
2895 /* like pullupmsg, except preserve hcksum/LSO attributes */
2896 static int
2897 myri10ge_pullup(struct myri10ge_slice_state *ss, mblk_t *mp)
2898 {
2899         uint32_t start, stuff, tx_offload_flags, mss;
2900         int ok;
2901 
2902         mss = 0;
2903         mac_hcksum_get(mp, &start, &stuff, NULL, NULL, &tx_offload_flags);
2904         myri10ge_lso_info_get(mp, &mss, &tx_offload_flags);
2905 
2906         ok = pullupmsg(mp, -1);
2907         if (!ok) {
2908                 printf("pullupmsg failed");
2909                 return (DDI_FAILURE);
2910         }
2911         MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_pullup);
2912         mac_hcksum_set(mp, start, stuff, NULL, NULL, tx_offload_flags);
2913         if (tx_offload_flags & HW_LSO)
2914                 DB_LSOMSS(mp) = (uint16_t)mss;
2915         lso_info_set(mp, mss, tx_offload_flags);
2916         return (DDI_SUCCESS);
2917 }
2918 
2919 static inline void
2920 myri10ge_tx_stat(struct myri10ge_tx_pkt_stats *s, struct ether_header *eh,
2921     int opackets, int obytes)
2922 {
2923         s->un.all = 0;
2924         if (eh->ether_dhost.ether_addr_octet[0] & 1) {
2925                 if (0 == (bcmp(eh->ether_dhost.ether_addr_octet,
2926                     myri10ge_broadcastaddr, sizeof (eh->ether_dhost))))
2927                         s->un.s.brdcstxmt = 1;
2928                 else
2929                         s->un.s.multixmt = 1;
2930         }
2931         s->un.s.opackets = (uint16_t)opackets;
2932         s->un.s.obytes = obytes;
2933 }
2934 
2935 static int
2936 myri10ge_tx_copy(struct myri10ge_slice_state *ss, mblk_t *mp,
2937     mcp_kreq_ether_send_t *req)
2938 {
2939         myri10ge_tx_ring_t *tx = &ss->tx;
2940         caddr_t ptr;
2941         struct myri10ge_tx_copybuf *cp;
2942         mblk_t *bp;
2943         int idx, mblen, avail;
2944         uint16_t len;
2945 
2946         mutex_enter(&tx->lock);
2947         avail = tx->mask - (tx->req - tx->done);
2948         if (avail <= 1) {
2949                 mutex_exit(&tx->lock);
2950                 return (EBUSY);
2951         }
2952         idx = tx->req & tx->mask;
2953         cp = &tx->cp[idx];
2954         ptr = cp->va;
2955         for (len = 0, bp = mp; bp != NULL; bp = bp->b_cont) {
2956                 mblen = MBLKL(bp);
2957                 bcopy(bp->b_rptr, ptr, mblen);
2958                 ptr += mblen;
2959                 len += mblen;
2960         }
2961         /* ensure runts are padded to 60 bytes */
2962         if (len < 60) {
2963                 bzero(ptr, 64 - len);
2964                 len = 60;
2965         }
2966         req->addr_low = cp->dma.low;
2967         req->addr_high = cp->dma.high;
2968         req->length = htons(len);
2969         req->pad = 0;
2970         req->rdma_count = 1;
2971         myri10ge_tx_stat(&tx->info[idx].stat,
2972             (struct ether_header *)(void *)cp->va, 1, len);
2973         (void) ddi_dma_sync(cp->dma.handle, 0, len, DDI_DMA_SYNC_FORDEV);
2974         myri10ge_submit_req(&ss->tx, req, 1);
2975         mutex_exit(&tx->lock);
2976         freemsg(mp);
2977         return (DDI_SUCCESS);
2978 }
2979 
2980 
2981 static void
2982 myri10ge_send_locked(myri10ge_tx_ring_t *tx, mcp_kreq_ether_send_t *req_list,
2983     struct myri10ge_tx_buffer_state *tx_info,
2984     int count)
2985 {
2986         int i, idx;
2987 
2988         idx = 0; /* gcc -Wuninitialized */
2989         /* store unmapping and bp info for tx irq handler */
2990         for (i = 0; i < count; i++) {
2991                 idx = (tx->req + i) & tx->mask;
2992                 tx->info[idx].m = tx_info[i].m;
2993                 tx->info[idx].handle = tx_info[i].handle;
2994         }
2995         tx->info[idx].stat.un.all = tx_info[0].stat.un.all;
2996 
2997         /* submit the frame to the nic */
2998         myri10ge_submit_req(tx, req_list, count);
2999 
3000 
3001 }
3002 
3003 
3004 
3005 static void
3006 myri10ge_copydata(mblk_t *mp, int off, int len, caddr_t buf)
3007 {
3008         mblk_t *bp;
3009         int seglen;
3010         uint_t count;
3011 
3012         bp = mp;
3013 
3014         while (off > 0) {
3015                 seglen = MBLKL(bp);
3016                 if (off < seglen)
3017                         break;
3018                 off -= seglen;
3019                 bp = bp->b_cont;
3020         }
3021         while (len > 0) {
3022                 seglen = MBLKL(bp);
3023                 count = min(seglen - off, len);
3024                 bcopy(bp->b_rptr + off, buf, count);
3025                 len -= count;
3026                 buf += count;
3027                 off = 0;
3028                 bp = bp->b_cont;
3029         }
3030 }
3031 
3032 static int
3033 myri10ge_ether_parse_header(mblk_t *mp)
3034 {
3035         struct ether_header eh_copy;
3036         struct ether_header *eh;
3037         int eth_hdr_len, seglen;
3038 
3039         seglen = MBLKL(mp);
3040         eth_hdr_len = sizeof (*eh);
3041         if (seglen < eth_hdr_len) {
3042                 myri10ge_copydata(mp, 0, eth_hdr_len, (caddr_t)&eh_copy);
3043                 eh = &eh_copy;
3044         } else {
3045                 eh = (struct ether_header *)(void *)mp->b_rptr;
3046         }
3047         if (eh->ether_type == BE_16(ETHERTYPE_VLAN)) {
3048                 eth_hdr_len += 4;
3049         }
3050 
3051         return (eth_hdr_len);
3052 }
3053 
3054 static int
3055 myri10ge_lso_parse_header(mblk_t *mp, int off)
3056 {
3057         char buf[128];
3058         int seglen, sum_off;
3059         struct ip *ip;
3060         struct tcphdr *tcp;
3061 
3062         seglen = MBLKL(mp);
3063         if (seglen < off + sizeof (*ip)) {
3064                 myri10ge_copydata(mp, off, sizeof (*ip), buf);
3065                 ip = (struct ip *)(void *)buf;
3066         } else {
3067                 ip = (struct ip *)(void *)(mp->b_rptr + off);
3068         }
3069         if (seglen < off + (ip->ip_hl << 2) + sizeof (*tcp)) {
3070                 myri10ge_copydata(mp, off,
3071                     (ip->ip_hl << 2) + sizeof (*tcp), buf);
3072                 ip = (struct ip *)(void *)buf;
3073         }
3074         tcp = (struct tcphdr *)(void *)((char *)ip + (ip->ip_hl << 2));
3075 
3076         /*
3077          * NIC expects ip_sum to be zero.  Recent changes to
3078          * OpenSolaris leave the correct ip checksum there, rather
3079          * than the required zero, so we need to zero it.  Otherwise,
3080          * the NIC will produce bad checksums when sending LSO packets.
3081          */
3082         if (ip->ip_sum != 0) {
3083                 if (((char *)ip) != buf) {
3084                         /* ip points into mblk, so just zero it */
3085                         ip->ip_sum = 0;
3086                 } else {
3087                         /*
3088                          * ip points into a copy, so walk the chain
3089                          * to find the ip_csum, then zero it
3090                          */
3091                         sum_off = off + _PTRDIFF(&ip->ip_sum, buf);
3092                         while (sum_off > (int)(MBLKL(mp) - 1)) {
3093                                 sum_off -= MBLKL(mp);
3094                                 mp = mp->b_cont;
3095                         }
3096                         mp->b_rptr[sum_off] = 0;
3097                         sum_off++;
3098                         while (sum_off > MBLKL(mp) - 1) {
3099                                 sum_off -= MBLKL(mp);
3100                                 mp = mp->b_cont;
3101                         }
3102                         mp->b_rptr[sum_off] = 0;
3103                 }
3104         }
3105         return (off + ((ip->ip_hl + tcp->th_off) << 2));
3106 }
3107 
3108 static int
3109 myri10ge_tx_tso_copy(struct myri10ge_slice_state *ss, mblk_t *mp,
3110     mcp_kreq_ether_send_t *req_list, int hdr_size, int pkt_size,
3111     uint16_t mss, uint8_t cksum_offset)
3112 {
3113         myri10ge_tx_ring_t *tx = &ss->tx;
3114         struct myri10ge_priv *mgp = ss->mgp;
3115         mblk_t *bp;
3116         mcp_kreq_ether_send_t *req;
3117         struct myri10ge_tx_copybuf *cp;
3118         caddr_t rptr, ptr;
3119         int mblen, count, cum_len, mss_resid, tx_req, pkt_size_tmp;
3120         int resid, avail, idx, hdr_size_tmp, tx_boundary;
3121         int rdma_count;
3122         uint32_t seglen, len, boundary, low, high_swapped;
3123         uint16_t pseudo_hdr_offset = htons(mss);
3124         uint8_t flags;
3125 
3126         tx_boundary = mgp->tx_boundary;
3127         hdr_size_tmp = hdr_size;
3128         resid = tx_boundary;
3129         count = 1;
3130         mutex_enter(&tx->lock);
3131 
3132         /* check to see if the slots are really there */
3133         avail = tx->mask - (tx->req - tx->done);
3134         if (unlikely(avail <=  MYRI10GE_MAX_SEND_DESC_TSO)) {
3135                 atomic_inc_32(&tx->stall);
3136                 mutex_exit(&tx->lock);
3137                 return (EBUSY);
3138         }
3139 
3140         /* copy */
3141         cum_len = -hdr_size;
3142         count = 0;
3143         req = req_list;
3144         idx = tx->mask & tx->req;
3145         cp = &tx->cp[idx];
3146         low = ntohl(cp->dma.low);
3147         ptr = cp->va;
3148         cp->len = 0;
3149         if (mss) {
3150                 int payload = pkt_size - hdr_size;
3151                 uint16_t opackets = (payload / mss) + ((payload % mss) != 0);
3152                 tx->info[idx].ostat.opackets = opackets;
3153                 tx->info[idx].ostat.obytes = (opackets - 1) * hdr_size
3154                     + pkt_size;
3155         }
3156         hdr_size_tmp = hdr_size;
3157         mss_resid = mss;
3158         flags = (MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST);
3159         tx_req = tx->req;
3160         for (bp = mp; bp != NULL; bp = bp->b_cont) {
3161                 mblen = MBLKL(bp);
3162                 rptr = (caddr_t)bp->b_rptr;
3163                 len = min(hdr_size_tmp, mblen);
3164                 if (len) {
3165                         bcopy(rptr, ptr, len);
3166                         rptr += len;
3167                         ptr += len;
3168                         resid -= len;
3169                         mblen -= len;
3170                         hdr_size_tmp -= len;
3171                         cp->len += len;
3172                         if (hdr_size_tmp)
3173                                 continue;
3174                         if (resid < mss) {
3175                                 tx_req++;
3176                                 idx = tx->mask & tx_req;
3177                                 cp = &tx->cp[idx];
3178                                 low = ntohl(cp->dma.low);
3179                                 ptr = cp->va;
3180                                 resid = tx_boundary;
3181                         }
3182                 }
3183                 while (mblen) {
3184                         len = min(mss_resid, mblen);
3185                         bcopy(rptr, ptr, len);
3186                         mss_resid -= len;
3187                         resid -= len;
3188                         mblen -= len;
3189                         rptr += len;
3190                         ptr += len;
3191                         cp->len += len;
3192                         if (mss_resid == 0) {
3193                                 mss_resid = mss;
3194                                 if (resid < mss) {
3195                                         tx_req++;
3196                                         idx = tx->mask & tx_req;
3197                                         cp = &tx->cp[idx];
3198                                         cp->len = 0;
3199                                         low = ntohl(cp->dma.low);
3200                                         ptr = cp->va;
3201                                         resid = tx_boundary;
3202                                 }
3203                         }
3204                 }
3205         }
3206 
3207         req = req_list;
3208         pkt_size_tmp = pkt_size;
3209         count = 0;
3210         rdma_count = 0;
3211         tx_req = tx->req;
3212         while (pkt_size_tmp) {
3213                 idx = tx->mask & tx_req;
3214                 cp = &tx->cp[idx];
3215                 high_swapped = cp->dma.high;
3216                 low = ntohl(cp->dma.low);
3217                 len = cp->len;
3218                 if (len == 0) {
3219                         printf("len=0! pkt_size_tmp=%d, pkt_size=%d\n",
3220                             pkt_size_tmp, pkt_size);
3221                         for (bp = mp; bp != NULL; bp = bp->b_cont) {
3222                                 mblen = MBLKL(bp);
3223                                 printf("mblen:%d\n", mblen);
3224                         }
3225                         pkt_size_tmp = pkt_size;
3226                         tx_req = tx->req;
3227                         while (pkt_size_tmp > 0) {
3228                                 idx = tx->mask & tx_req;
3229                                 cp = &tx->cp[idx];
3230                                 printf("cp->len = %d\n", cp->len);
3231                                 pkt_size_tmp -= cp->len;
3232                                 tx_req++;
3233                         }
3234                         printf("dropped\n");
3235                         MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err);
3236                         goto done;
3237                 }
3238                 pkt_size_tmp -= len;
3239                 while (len) {
3240                         while (len) {
3241                                 uint8_t flags_next;
3242                                 int cum_len_next;
3243 
3244                                 boundary = (low + mgp->tx_boundary) &
3245                                     ~(mgp->tx_boundary - 1);
3246                                 seglen = boundary - low;
3247                                 if (seglen > len)
3248                                         seglen = len;
3249 
3250                                 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
3251                                 cum_len_next = cum_len + seglen;
3252                                 (req-rdma_count)->rdma_count = rdma_count + 1;
3253                                 if (likely(cum_len >= 0)) {
3254                                         /* payload */
3255                                         int next_is_first, chop;
3256 
3257                                         chop = (cum_len_next > mss);
3258                                         cum_len_next = cum_len_next % mss;
3259                                         next_is_first = (cum_len_next == 0);
3260                                         flags |= chop *
3261                                             MXGEFW_FLAGS_TSO_CHOP;
3262                                         flags_next |= next_is_first *
3263                                             MXGEFW_FLAGS_FIRST;
3264                                         rdma_count |= -(chop | next_is_first);
3265                                         rdma_count += chop & !next_is_first;
3266                                 } else if (likely(cum_len_next >= 0)) {
3267                                         /* header ends */
3268                                         int small;
3269 
3270                                         rdma_count = -1;
3271                                         cum_len_next = 0;
3272                                         seglen = -cum_len;
3273                                         small = (mss <= MXGEFW_SEND_SMALL_SIZE);
3274                                         flags_next = MXGEFW_FLAGS_TSO_PLD |
3275                                             MXGEFW_FLAGS_FIRST |
3276                                             (small * MXGEFW_FLAGS_SMALL);
3277                                 }
3278                                 req->addr_high = high_swapped;
3279                                 req->addr_low = htonl(low);
3280                                 req->pseudo_hdr_offset = pseudo_hdr_offset;
3281                                 req->pad = 0; /* complete solid 16-byte block */
3282                                 req->rdma_count = 1;
3283                                 req->cksum_offset = cksum_offset;
3284                                 req->length = htons(seglen);
3285                                 req->flags = flags | ((cum_len & 1) *
3286                                     MXGEFW_FLAGS_ALIGN_ODD);
3287                                 if (cksum_offset > seglen)
3288                                         cksum_offset -= seglen;
3289                                 else
3290                                         cksum_offset = 0;
3291                                 low += seglen;
3292                                 len -= seglen;
3293                                 cum_len = cum_len_next;
3294                                 req++;
3295                                 req->flags = 0;
3296                                 flags = flags_next;
3297                                 count++;
3298                                 rdma_count++;
3299                         }
3300                 }
3301                 tx_req++;
3302         }
3303         (req-rdma_count)->rdma_count = (uint8_t)rdma_count;
3304         do {
3305                 req--;
3306                 req->flags |= MXGEFW_FLAGS_TSO_LAST;
3307         } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP |
3308             MXGEFW_FLAGS_FIRST)));
3309 
3310         myri10ge_submit_req(tx, req_list, count);
3311 done:
3312         mutex_exit(&tx->lock);
3313         freemsg(mp);
3314         return (DDI_SUCCESS);
3315 }
3316 
3317 /*
3318  * Try to send the chain of buffers described by the mp.  We must not
3319  * encapsulate more than eth->tx.req - eth->tx.done, or
3320  * MXGEFW_MAX_SEND_DESC, whichever is more.
3321  */
3322 
3323 static int
3324 myri10ge_send(struct myri10ge_slice_state *ss, mblk_t *mp,
3325     mcp_kreq_ether_send_t *req_list, struct myri10ge_tx_buffer_state *tx_info)
3326 {
3327         struct myri10ge_priv *mgp = ss->mgp;
3328         myri10ge_tx_ring_t *tx = &ss->tx;
3329         mcp_kreq_ether_send_t *req;
3330         struct myri10ge_tx_dma_handle *handles, *dma_handle = NULL;
3331         mblk_t  *bp;
3332         ddi_dma_cookie_t cookie;
3333         int err, rv, count, avail, mblen, try_pullup, i, max_segs, maclen,
3334             rdma_count, cum_len, lso_hdr_size;
3335         uint32_t start, stuff, tx_offload_flags;
3336         uint32_t seglen, len, mss, boundary, low, high_swapped;
3337         uint_t ncookies;
3338         uint16_t pseudo_hdr_offset;
3339         uint8_t flags, cksum_offset, odd_flag;
3340         int pkt_size;
3341         int lso_copy = myri10ge_lso_copy;
3342         try_pullup = 1;
3343 
3344 again:
3345         /* Setup checksum offloading, if needed */
3346         mac_hcksum_get(mp, &start, &stuff, NULL, NULL, &tx_offload_flags);
3347         myri10ge_lso_info_get(mp, &mss, &tx_offload_flags);
3348         if (tx_offload_flags & HW_LSO) {
3349                 max_segs = MYRI10GE_MAX_SEND_DESC_TSO;
3350                 if ((tx_offload_flags & HCK_PARTIALCKSUM) == 0) {
3351                         MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_lsobadflags);
3352                         freemsg(mp);
3353                         return (DDI_SUCCESS);
3354                 }
3355         } else {
3356                 max_segs = MXGEFW_MAX_SEND_DESC;
3357                 mss = 0;
3358         }
3359         req = req_list;
3360         cksum_offset = 0;
3361         pseudo_hdr_offset = 0;
3362 
3363         /* leave an extra slot keep the ring from wrapping */
3364         avail = tx->mask - (tx->req - tx->done);
3365 
3366         /*
3367          * If we have > MXGEFW_MAX_SEND_DESC, then any over-length
3368          * message will need to be pulled up in order to fit.
3369          * Otherwise, we are low on transmit descriptors, it is
3370          * probably better to stall and try again rather than pullup a
3371          * message to fit.
3372          */
3373 
3374         if (avail < max_segs) {
3375                 err = EBUSY;
3376                 atomic_inc_32(&tx->stall_early);
3377                 goto stall;
3378         }
3379 
3380         /* find out how long the frame is and how many segments it is */
3381         count = 0;
3382         odd_flag = 0;
3383         pkt_size = 0;
3384         flags = (MXGEFW_FLAGS_NO_TSO | MXGEFW_FLAGS_FIRST);
3385         for (bp = mp; bp != NULL; bp = bp->b_cont) {
3386                 dblk_t *dbp;
3387                 mblen = MBLKL(bp);
3388                 if (mblen == 0) {
3389                         /*
3390                          * we can't simply skip over 0-length mblks
3391                          * because the hardware can't deal with them,
3392                          * and we could leak them.
3393                          */
3394                         MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_zero_len);
3395                         err = EIO;
3396                         goto pullup;
3397                 }
3398                 /*
3399                  * There's no advantage to copying most gesballoc
3400                  * attached blocks, so disable lso copy in that case
3401                  */
3402                 if (mss && lso_copy == 1 && ((dbp = bp->b_datap) != NULL)) {
3403                         if ((void *)dbp->db_lastfree != myri10ge_db_lastfree) {
3404                                 lso_copy = 0;
3405                         }
3406                 }
3407                 pkt_size += mblen;
3408                 count++;
3409         }
3410 
3411         /* Try to pull up excessivly long chains */
3412         if (count >= max_segs) {
3413                 err = myri10ge_pullup(ss, mp);
3414                 if (likely(err == DDI_SUCCESS)) {
3415                         count = 1;
3416                 } else {
3417                         if (count <  MYRI10GE_MAX_SEND_DESC_TSO) {
3418                                 /*
3419                                  * just let the h/w send it, it will be
3420                                  * inefficient, but us better than dropping
3421                                  */
3422                                 max_segs = MYRI10GE_MAX_SEND_DESC_TSO;
3423                         } else {
3424                                 /* drop it */
3425                                 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err);
3426                                 freemsg(mp);
3427                                 return (0);
3428                         }
3429                 }
3430         }
3431 
3432         cum_len = 0;
3433         maclen = myri10ge_ether_parse_header(mp);
3434 
3435         if (tx_offload_flags & HCK_PARTIALCKSUM) {
3436 
3437                 cksum_offset = start + maclen;
3438                 pseudo_hdr_offset = htons(stuff + maclen);
3439                 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
3440                 flags |= MXGEFW_FLAGS_CKSUM;
3441         }
3442 
3443         lso_hdr_size = 0; /* -Wunitinialized */
3444         if (mss) { /* LSO */
3445                 /* this removes any CKSUM flag from before */
3446                 flags = (MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST);
3447                 /*
3448                  * parse the headers and set cum_len to a negative
3449                  * value to reflect the offset of the TCP payload
3450                  */
3451                 lso_hdr_size =  myri10ge_lso_parse_header(mp, maclen);
3452                 cum_len = -lso_hdr_size;
3453                 if ((mss < mgp->tx_boundary) && lso_copy) {
3454                         err = myri10ge_tx_tso_copy(ss, mp, req_list,
3455                             lso_hdr_size, pkt_size, mss, cksum_offset);
3456                         return (err);
3457                 }
3458 
3459                 /*
3460                  * for TSO, pseudo_hdr_offset holds mss.  The firmware
3461                  * figures out where to put the checksum by parsing
3462                  * the header.
3463                  */
3464 
3465                 pseudo_hdr_offset = htons(mss);
3466         } else if (pkt_size <= MXGEFW_SEND_SMALL_SIZE) {
3467                 flags |= MXGEFW_FLAGS_SMALL;
3468                 if (pkt_size < myri10ge_tx_copylen) {
3469                         req->cksum_offset = cksum_offset;
3470                         req->pseudo_hdr_offset = pseudo_hdr_offset;
3471                         req->flags = flags;
3472                         err = myri10ge_tx_copy(ss, mp, req);
3473                         return (err);
3474                 }
3475                 cum_len = 0;
3476         }
3477 
3478         /* pull one DMA handle for each bp from our freelist */
3479         handles = NULL;
3480         err = myri10ge_alloc_tx_handles(ss, count, &handles);
3481         if (err != DDI_SUCCESS) {
3482                 err = DDI_FAILURE;
3483                 goto stall;
3484         }
3485         count = 0;
3486         rdma_count = 0;
3487         for (bp = mp; bp != NULL; bp = bp->b_cont) {
3488                 mblen = MBLKL(bp);
3489                 dma_handle = handles;
3490                 handles = handles->next;
3491 
3492                 rv = ddi_dma_addr_bind_handle(dma_handle->h, NULL,
3493                     (caddr_t)bp->b_rptr, mblen,
3494                     DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_SLEEP, NULL,
3495                     &cookie, &ncookies);
3496                 if (unlikely(rv != DDI_DMA_MAPPED)) {
3497                         err = EIO;
3498                         try_pullup = 0;
3499                         dma_handle->next = handles;
3500                         handles = dma_handle;
3501                         goto abort_with_handles;
3502                 }
3503 
3504                 /* reserve the slot */
3505                 tx_info[count].m = bp;
3506                 tx_info[count].handle = dma_handle;
3507 
3508                 for (; ; ) {
3509                         low = MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress);
3510                         high_swapped =
3511                             htonl(MYRI10GE_HIGHPART_TO_U32(
3512                             cookie.dmac_laddress));
3513                         len = (uint32_t)cookie.dmac_size;
3514                         while (len) {
3515                                 uint8_t flags_next;
3516                                 int cum_len_next;
3517 
3518                                 boundary = (low + mgp->tx_boundary) &
3519                                     ~(mgp->tx_boundary - 1);
3520                                 seglen = boundary - low;
3521                                 if (seglen > len)
3522                                         seglen = len;
3523 
3524                                 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
3525                                 cum_len_next = cum_len + seglen;
3526                                 if (mss) {
3527                                         (req-rdma_count)->rdma_count =
3528                                             rdma_count + 1;
3529                                         if (likely(cum_len >= 0)) {
3530                                                 /* payload */
3531                                                 int next_is_first, chop;
3532 
3533                                                 chop = (cum_len_next > mss);
3534                                                 cum_len_next =
3535                                                     cum_len_next % mss;
3536                                                 next_is_first =
3537                                                     (cum_len_next == 0);
3538                                                 flags |= chop *
3539                                                     MXGEFW_FLAGS_TSO_CHOP;
3540                                                 flags_next |= next_is_first *
3541                                                     MXGEFW_FLAGS_FIRST;
3542                                                 rdma_count |=
3543                                                     -(chop | next_is_first);
3544                                                 rdma_count +=
3545                                                     chop & !next_is_first;
3546                                         } else if (likely(cum_len_next >= 0)) {
3547                                                 /* header ends */
3548                                                 int small;
3549 
3550                                                 rdma_count = -1;
3551                                                 cum_len_next = 0;
3552                                                 seglen = -cum_len;
3553                                                 small = (mss <=
3554                                                     MXGEFW_SEND_SMALL_SIZE);
3555                                                 flags_next =
3556                                                     MXGEFW_FLAGS_TSO_PLD
3557                                                     | MXGEFW_FLAGS_FIRST
3558                                                     | (small *
3559                                                     MXGEFW_FLAGS_SMALL);
3560                                         }
3561                                 }
3562                                 req->addr_high = high_swapped;
3563                                 req->addr_low = htonl(low);
3564                                 req->pseudo_hdr_offset = pseudo_hdr_offset;
3565                                 req->pad = 0; /* complete solid 16-byte block */
3566                                 req->rdma_count = 1;
3567                                 req->cksum_offset = cksum_offset;
3568                                 req->length = htons(seglen);
3569                                 req->flags = flags | ((cum_len & 1) * odd_flag);
3570                                 if (cksum_offset > seglen)
3571                                         cksum_offset -= seglen;
3572                                 else
3573                                         cksum_offset = 0;
3574                                 low += seglen;
3575                                 len -= seglen;
3576                                 cum_len = cum_len_next;
3577                                 count++;
3578                                 rdma_count++;
3579                                 /*  make sure all the segments will fit */
3580                                 if (unlikely(count >= max_segs)) {
3581                                         MYRI10GE_ATOMIC_SLICE_STAT_INC(
3582                                             xmit_lowbuf);
3583                                         /* may try a pullup */
3584                                         err = EBUSY;
3585                                         if (try_pullup)
3586                                                 try_pullup = 2;
3587                                         goto abort_with_handles;
3588                                 }
3589                                 req++;
3590                                 req->flags = 0;
3591                                 flags = flags_next;
3592                                 tx_info[count].m = 0;
3593                         }
3594                         ncookies--;
3595                         if (ncookies == 0)
3596                                 break;
3597                         ddi_dma_nextcookie(dma_handle->h, &cookie);
3598                 }
3599         }
3600         (req-rdma_count)->rdma_count = (uint8_t)rdma_count;
3601 
3602         if (mss) {
3603                 do {
3604                         req--;
3605                         req->flags |= MXGEFW_FLAGS_TSO_LAST;
3606                 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP |
3607                     MXGEFW_FLAGS_FIRST)));
3608         }
3609 
3610         /* calculate tx stats */
3611         if (mss) {
3612                 uint16_t opackets;
3613                 int payload;
3614 
3615                 payload = pkt_size - lso_hdr_size;
3616                 opackets = (payload / mss) + ((payload % mss) != 0);
3617                 tx_info[0].stat.un.all = 0;
3618                 tx_info[0].ostat.opackets = opackets;
3619                 tx_info[0].ostat.obytes = (opackets - 1) * lso_hdr_size
3620                     + pkt_size;
3621         } else {
3622                 myri10ge_tx_stat(&tx_info[0].stat,
3623                     (struct ether_header *)(void *)mp->b_rptr, 1, pkt_size);
3624         }
3625         mutex_enter(&tx->lock);
3626 
3627         /* check to see if the slots are really there */
3628         avail = tx->mask - (tx->req - tx->done);
3629         if (unlikely(avail <= count)) {
3630                 mutex_exit(&tx->lock);
3631                 err = 0;
3632                 goto late_stall;
3633         }
3634 
3635         myri10ge_send_locked(tx, req_list, tx_info, count);
3636         mutex_exit(&tx->lock);
3637         return (DDI_SUCCESS);
3638 
3639 late_stall:
3640         try_pullup = 0;
3641         atomic_inc_32(&tx->stall_late);
3642 
3643 abort_with_handles:
3644         /* unbind and free handles from previous mblks */
3645         for (i = 0; i < count; i++) {
3646                 bp = tx_info[i].m;
3647                 tx_info[i].m = 0;
3648                 if (bp) {
3649                         dma_handle = tx_info[i].handle;
3650                         (void) ddi_dma_unbind_handle(dma_handle->h);
3651                         dma_handle->next = handles;
3652                         handles = dma_handle;
3653                         tx_info[i].handle = NULL;
3654                         tx_info[i].m = NULL;
3655                 }
3656         }
3657         myri10ge_free_tx_handle_slist(tx, handles);
3658 pullup:
3659         if (try_pullup) {
3660                 err = myri10ge_pullup(ss, mp);
3661                 if (err != DDI_SUCCESS && try_pullup == 2) {
3662                         /* drop */
3663                         MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err);
3664                         freemsg(mp);
3665                         return (0);
3666                 }
3667                 try_pullup = 0;
3668                 goto again;
3669         }
3670 
3671 stall:
3672         if (err != 0) {
3673                 if (err == EBUSY) {
3674                         atomic_inc_32(&tx->stall);
3675                 } else {
3676                         MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err);
3677                 }
3678         }
3679         return (err);
3680 }
3681 
3682 static mblk_t *
3683 myri10ge_send_wrapper(void *arg, mblk_t *mp)
3684 {
3685         struct myri10ge_slice_state *ss = arg;
3686         int err = 0;
3687         mcp_kreq_ether_send_t *req_list;
3688 #if defined(__i386)
3689         /*
3690          * We need about 2.5KB of scratch space to handle transmits.
3691          * i86pc has only 8KB of kernel stack space, so we malloc the
3692          * scratch space there rather than keeping it on the stack.
3693          */
3694         size_t req_size, tx_info_size;
3695         struct myri10ge_tx_buffer_state *tx_info;
3696         caddr_t req_bytes;
3697 
3698         req_size = sizeof (*req_list) * (MYRI10GE_MAX_SEND_DESC_TSO + 4)
3699             + 8;
3700         req_bytes = kmem_alloc(req_size, KM_SLEEP);
3701         tx_info_size = sizeof (*tx_info) * (MYRI10GE_MAX_SEND_DESC_TSO + 1);
3702         tx_info = kmem_alloc(tx_info_size, KM_SLEEP);
3703 #else
3704         char req_bytes[sizeof (*req_list) * (MYRI10GE_MAX_SEND_DESC_TSO + 4)
3705             + 8];
3706         struct myri10ge_tx_buffer_state tx_info[MYRI10GE_MAX_SEND_DESC_TSO + 1];
3707 #endif
3708 
3709         /* ensure req_list entries are aligned to 8 bytes */
3710         req_list = (struct mcp_kreq_ether_send *)
3711             (((unsigned long)req_bytes + 7UL) & ~7UL);
3712 
3713         err = myri10ge_send(ss, mp, req_list, tx_info);
3714 
3715 #if defined(__i386)
3716         kmem_free(tx_info, tx_info_size);
3717         kmem_free(req_bytes, req_size);
3718 #endif
3719         if (err)
3720                 return (mp);
3721         else
3722                 return (NULL);
3723 }
3724 
3725 static int
3726 myri10ge_addmac(void *arg, const uint8_t *mac_addr)
3727 {
3728         struct myri10ge_priv *mgp = arg;
3729         int err;
3730 
3731         if (mac_addr == NULL)
3732                 return (EINVAL);
3733 
3734         mutex_enter(&mgp->intrlock);
3735         if (mgp->macaddr_cnt) {
3736                 mutex_exit(&mgp->intrlock);
3737                 return (ENOSPC);
3738         }
3739         err = myri10ge_m_unicst(mgp, mac_addr);
3740         if (!err)
3741                 mgp->macaddr_cnt++;
3742 
3743         mutex_exit(&mgp->intrlock);
3744         if (err)
3745                 return (err);
3746 
3747         bcopy(mac_addr, mgp->mac_addr, sizeof (mgp->mac_addr));
3748         return (0);
3749 }
3750 
3751 /*ARGSUSED*/
3752 static int
3753 myri10ge_remmac(void *arg, const uint8_t *mac_addr)
3754 {
3755         struct myri10ge_priv *mgp = arg;
3756 
3757         mutex_enter(&mgp->intrlock);
3758         mgp->macaddr_cnt--;
3759         mutex_exit(&mgp->intrlock);
3760 
3761         return (0);
3762 }
3763 
3764 /*ARGSUSED*/
3765 static void
3766 myri10ge_fill_group(void *arg, mac_ring_type_t rtype, const int index,
3767     mac_group_info_t *infop, mac_group_handle_t gh)
3768 {
3769         struct myri10ge_priv *mgp = arg;
3770 
3771         if (rtype != MAC_RING_TYPE_RX)
3772                 return;
3773 
3774         infop->mgi_driver = (mac_group_driver_t)mgp;
3775         infop->mgi_start = NULL;
3776         infop->mgi_stop = NULL;
3777         infop->mgi_addmac = myri10ge_addmac;
3778         infop->mgi_remmac = myri10ge_remmac;
3779         infop->mgi_count = mgp->num_slices;
3780 }
3781 
3782 static int
3783 myri10ge_ring_start(mac_ring_driver_t rh, uint64_t mr_gen_num)
3784 {
3785         struct myri10ge_slice_state *ss;
3786 
3787         ss = (struct myri10ge_slice_state *)rh;
3788         mutex_enter(&ss->rx_lock);
3789         ss->rx_gen_num = mr_gen_num;
3790         mutex_exit(&ss->rx_lock);
3791         return (0);
3792 }
3793 
3794 /*
3795  * Retrieve a value for one of the statistics for a particular rx ring
3796  */
3797 int
3798 myri10ge_rx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val)
3799 {
3800         struct myri10ge_slice_state *ss;
3801 
3802         ss = (struct myri10ge_slice_state *)rh;
3803         switch (stat) {
3804         case MAC_STAT_RBYTES:
3805                 *val = ss->rx_stats.ibytes;
3806                 break;
3807 
3808         case MAC_STAT_IPACKETS:
3809                 *val = ss->rx_stats.ipackets;
3810                 break;
3811 
3812         default:
3813                 *val = 0;
3814                 return (ENOTSUP);
3815         }
3816 
3817         return (0);
3818 }
3819 
3820 /*
3821  * Retrieve a value for one of the statistics for a particular tx ring
3822  */
3823 int
3824 myri10ge_tx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val)
3825 {
3826         struct myri10ge_slice_state *ss;
3827 
3828         ss = (struct myri10ge_slice_state *)rh;
3829         switch (stat) {
3830         case MAC_STAT_OBYTES:
3831                 *val = ss->tx.stats.obytes;
3832                 break;
3833 
3834         case MAC_STAT_OPACKETS:
3835                 *val = ss->tx.stats.opackets;
3836                 break;
3837 
3838         default:
3839                 *val = 0;
3840                 return (ENOTSUP);
3841         }
3842 
3843         return (0);
3844 }
3845 
3846 static int
3847 myri10ge_rx_ring_intr_disable(mac_intr_handle_t intrh)
3848 {
3849         struct myri10ge_slice_state *ss;
3850 
3851         ss = (struct myri10ge_slice_state *)intrh;
3852         mutex_enter(&ss->poll_lock);
3853         ss->rx_polling = B_TRUE;
3854         mutex_exit(&ss->poll_lock);
3855         return (0);
3856 }
3857 
3858 static int
3859 myri10ge_rx_ring_intr_enable(mac_intr_handle_t intrh)
3860 {
3861         struct myri10ge_slice_state *ss;
3862 
3863         ss = (struct myri10ge_slice_state *)intrh;
3864         mutex_enter(&ss->poll_lock);
3865         ss->rx_polling = B_FALSE;
3866         if (ss->rx_token) {
3867                 *ss->irq_claim = BE_32(3);
3868                 ss->rx_token = 0;
3869         }
3870         mutex_exit(&ss->poll_lock);
3871         return (0);
3872 }
3873 
3874 /*ARGSUSED*/
3875 static void
3876 myri10ge_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index,
3877     const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh)
3878 {
3879         struct myri10ge_priv *mgp = arg;
3880         struct myri10ge_slice_state *ss;
3881         mac_intr_t *mintr = &infop->mri_intr;
3882 
3883         ASSERT((unsigned int)ring_index < mgp->num_slices);
3884 
3885         ss = &mgp->ss[ring_index];
3886         switch (rtype) {
3887         case MAC_RING_TYPE_RX:
3888                 ss->rx_rh = rh;
3889                 infop->mri_driver = (mac_ring_driver_t)ss;
3890                 infop->mri_start = myri10ge_ring_start;
3891                 infop->mri_stop = NULL;
3892                 infop->mri_poll = myri10ge_poll_rx;
3893                 infop->mri_stat = myri10ge_rx_ring_stat;
3894                 mintr->mi_handle = (mac_intr_handle_t)ss;
3895                 mintr->mi_enable = myri10ge_rx_ring_intr_enable;
3896                 mintr->mi_disable = myri10ge_rx_ring_intr_disable;
3897                 break;
3898         case MAC_RING_TYPE_TX:
3899                 ss->tx.rh = rh;
3900                 infop->mri_driver = (mac_ring_driver_t)ss;
3901                 infop->mri_start = NULL;
3902                 infop->mri_stop = NULL;
3903                 infop->mri_tx = myri10ge_send_wrapper;
3904                 infop->mri_stat = myri10ge_tx_ring_stat;
3905                 break;
3906         default:
3907                 break;
3908         }
3909 }
3910 
3911 static void
3912 myri10ge_nic_stat_destroy(struct myri10ge_priv *mgp)
3913 {
3914         if (mgp->ksp_stat == NULL)
3915                 return;
3916 
3917         kstat_delete(mgp->ksp_stat);
3918         mgp->ksp_stat = NULL;
3919 }
3920 
3921 static void
3922 myri10ge_slice_stat_destroy(struct myri10ge_slice_state *ss)
3923 {
3924         if (ss->ksp_stat == NULL)
3925                 return;
3926 
3927         kstat_delete(ss->ksp_stat);
3928         ss->ksp_stat = NULL;
3929 }
3930 
3931 static void
3932 myri10ge_info_destroy(struct myri10ge_priv *mgp)
3933 {
3934         if (mgp->ksp_info == NULL)
3935                 return;
3936 
3937         kstat_delete(mgp->ksp_info);
3938         mgp->ksp_info = NULL;
3939 }
3940 
3941 static int
3942 myri10ge_nic_stat_kstat_update(kstat_t *ksp, int rw)
3943 {
3944         struct myri10ge_nic_stat *ethstat;
3945         struct myri10ge_priv *mgp;
3946         mcp_irq_data_t *fw_stats;
3947 
3948 
3949         if (rw == KSTAT_WRITE)
3950                 return (EACCES);
3951 
3952         ethstat = (struct myri10ge_nic_stat *)ksp->ks_data;
3953         mgp = (struct myri10ge_priv *)ksp->ks_private;
3954         fw_stats = mgp->ss[0].fw_stats;
3955 
3956         ethstat->dma_read_bw_MBs.value.ul = mgp->read_dma;
3957         ethstat->dma_write_bw_MBs.value.ul = mgp->write_dma;
3958         ethstat->dma_read_write_bw_MBs.value.ul = mgp->read_write_dma;
3959         if (myri10ge_tx_dma_attr.dma_attr_flags & DDI_DMA_FORCE_PHYSICAL)
3960                 ethstat->dma_force_physical.value.ul = 1;
3961         else
3962                 ethstat->dma_force_physical.value.ul = 0;
3963         ethstat->lanes.value.ul = mgp->pcie_link_width;
3964         ethstat->dropped_bad_crc32.value.ul =
3965             ntohl(fw_stats->dropped_bad_crc32);
3966         ethstat->dropped_bad_phy.value.ul =
3967             ntohl(fw_stats->dropped_bad_phy);
3968         ethstat->dropped_link_error_or_filtered.value.ul =
3969             ntohl(fw_stats->dropped_link_error_or_filtered);
3970         ethstat->dropped_link_overflow.value.ul =
3971             ntohl(fw_stats->dropped_link_overflow);
3972         ethstat->dropped_multicast_filtered.value.ul =
3973             ntohl(fw_stats->dropped_multicast_filtered);
3974         ethstat->dropped_no_big_buffer.value.ul =
3975             ntohl(fw_stats->dropped_no_big_buffer);
3976         ethstat->dropped_no_small_buffer.value.ul =
3977             ntohl(fw_stats->dropped_no_small_buffer);
3978         ethstat->dropped_overrun.value.ul =
3979             ntohl(fw_stats->dropped_overrun);
3980         ethstat->dropped_pause.value.ul =
3981             ntohl(fw_stats->dropped_pause);
3982         ethstat->dropped_runt.value.ul =
3983             ntohl(fw_stats->dropped_runt);
3984         ethstat->link_up.value.ul =
3985             ntohl(fw_stats->link_up);
3986         ethstat->dropped_unicast_filtered.value.ul =
3987             ntohl(fw_stats->dropped_unicast_filtered);
3988         return (0);
3989 }
3990 
3991 static int
3992 myri10ge_slice_stat_kstat_update(kstat_t *ksp, int rw)
3993 {
3994         struct myri10ge_slice_stat *ethstat;
3995         struct myri10ge_slice_state *ss;
3996 
3997         if (rw == KSTAT_WRITE)
3998                 return (EACCES);
3999 
4000         ethstat = (struct myri10ge_slice_stat *)ksp->ks_data;
4001         ss = (struct myri10ge_slice_state *)ksp->ks_private;
4002 
4003         ethstat->rx_big.value.ul = ss->j_rx_cnt;
4004         ethstat->rx_bigbuf_firmware.value.ul = ss->rx_big.cnt - ss->j_rx_cnt;
4005         ethstat->rx_bigbuf_pool.value.ul =
4006             ss->jpool.num_alloc - ss->jbufs_for_smalls;
4007         ethstat->rx_bigbuf_smalls.value.ul = ss->jbufs_for_smalls;
4008         ethstat->rx_small.value.ul = ss->rx_small.cnt -
4009             (ss->rx_small.mask + 1);
4010         ethstat->tx_done.value.ul = ss->tx.done;
4011         ethstat->tx_req.value.ul = ss->tx.req;
4012         ethstat->tx_activate.value.ul = ss->tx.activate;
4013         ethstat->xmit_sched.value.ul = ss->tx.sched;
4014         ethstat->xmit_stall.value.ul = ss->tx.stall;
4015         ethstat->xmit_stall_early.value.ul = ss->tx.stall_early;
4016         ethstat->xmit_stall_late.value.ul = ss->tx.stall_late;
4017         ethstat->xmit_err.value.ul =  MYRI10GE_SLICE_STAT(xmit_err);
4018         return (0);
4019 }
4020 
4021 static int
4022 myri10ge_info_kstat_update(kstat_t *ksp, int rw)
4023 {
4024         struct myri10ge_info *info;
4025         struct myri10ge_priv *mgp;
4026 
4027 
4028         if (rw == KSTAT_WRITE)
4029                 return (EACCES);
4030 
4031         info = (struct myri10ge_info *)ksp->ks_data;
4032         mgp = (struct myri10ge_priv *)ksp->ks_private;
4033         kstat_named_setstr(&info->driver_version, MYRI10GE_VERSION_STR);
4034         kstat_named_setstr(&info->firmware_version, mgp->fw_version);
4035         kstat_named_setstr(&info->firmware_name, mgp->fw_name);
4036         kstat_named_setstr(&info->interrupt_type, mgp->intr_type);
4037         kstat_named_setstr(&info->product_code, mgp->pc_str);
4038         kstat_named_setstr(&info->serial_number, mgp->sn_str);
4039         return (0);
4040 }
4041 
4042 static struct myri10ge_info myri10ge_info_template = {
4043         { "driver_version",     KSTAT_DATA_STRING },
4044         { "firmware_version",   KSTAT_DATA_STRING },
4045         { "firmware_name",      KSTAT_DATA_STRING },
4046         { "interrupt_type",     KSTAT_DATA_STRING },
4047         { "product_code",       KSTAT_DATA_STRING },
4048         { "serial_number",      KSTAT_DATA_STRING },
4049 };
4050 static kmutex_t myri10ge_info_template_lock;
4051 
4052 
4053 static int
4054 myri10ge_info_init(struct myri10ge_priv *mgp)
4055 {
4056         struct kstat *ksp;
4057 
4058         ksp = kstat_create("myri10ge", ddi_get_instance(mgp->dip),
4059             "myri10ge_info", "net", KSTAT_TYPE_NAMED,
4060             sizeof (myri10ge_info_template) /
4061             sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
4062         if (ksp == NULL) {
4063                 cmn_err(CE_WARN,
4064                     "%s: myri10ge_info_init: kstat_create failed", mgp->name);
4065                 return (DDI_FAILURE);
4066         }
4067         mgp->ksp_info = ksp;
4068         ksp->ks_update = myri10ge_info_kstat_update;
4069         ksp->ks_private = (void *) mgp;
4070         ksp->ks_data = &myri10ge_info_template;
4071         ksp->ks_lock = &myri10ge_info_template_lock;
4072         if (MYRI10GE_VERSION_STR != NULL)
4073                 ksp->ks_data_size += strlen(MYRI10GE_VERSION_STR) + 1;
4074         if (mgp->fw_version != NULL)
4075                 ksp->ks_data_size += strlen(mgp->fw_version) + 1;
4076         ksp->ks_data_size += strlen(mgp->fw_name) + 1;
4077         ksp->ks_data_size += strlen(mgp->intr_type) + 1;
4078         if (mgp->pc_str != NULL)
4079                 ksp->ks_data_size += strlen(mgp->pc_str) + 1;
4080         if (mgp->sn_str != NULL)
4081                 ksp->ks_data_size += strlen(mgp->sn_str) + 1;
4082 
4083         kstat_install(ksp);
4084         return (DDI_SUCCESS);
4085 }
4086 
4087 
4088 static int
4089 myri10ge_nic_stat_init(struct myri10ge_priv *mgp)
4090 {
4091         struct kstat *ksp;
4092         struct myri10ge_nic_stat *ethstat;
4093 
4094         ksp = kstat_create("myri10ge", ddi_get_instance(mgp->dip),
4095             "myri10ge_nic_stats", "net", KSTAT_TYPE_NAMED,
4096             sizeof (*ethstat) / sizeof (kstat_named_t), 0);
4097         if (ksp == NULL) {
4098                 cmn_err(CE_WARN,
4099                     "%s: myri10ge_stat_init: kstat_create failed", mgp->name);
4100                 return (DDI_FAILURE);
4101         }
4102         mgp->ksp_stat = ksp;
4103         ethstat = (struct myri10ge_nic_stat *)(ksp->ks_data);
4104 
4105         kstat_named_init(&ethstat->dma_read_bw_MBs,
4106             "dma_read_bw_MBs", KSTAT_DATA_ULONG);
4107         kstat_named_init(&ethstat->dma_write_bw_MBs,
4108             "dma_write_bw_MBs", KSTAT_DATA_ULONG);
4109         kstat_named_init(&ethstat->dma_read_write_bw_MBs,
4110             "dma_read_write_bw_MBs", KSTAT_DATA_ULONG);
4111         kstat_named_init(&ethstat->dma_force_physical,
4112             "dma_force_physical", KSTAT_DATA_ULONG);
4113         kstat_named_init(&ethstat->lanes,
4114             "lanes", KSTAT_DATA_ULONG);
4115         kstat_named_init(&ethstat->dropped_bad_crc32,
4116             "dropped_bad_crc32", KSTAT_DATA_ULONG);
4117         kstat_named_init(&ethstat->dropped_bad_phy,
4118             "dropped_bad_phy", KSTAT_DATA_ULONG);
4119         kstat_named_init(&ethstat->dropped_link_error_or_filtered,
4120             "dropped_link_error_or_filtered", KSTAT_DATA_ULONG);
4121         kstat_named_init(&ethstat->dropped_link_overflow,
4122             "dropped_link_overflow", KSTAT_DATA_ULONG);
4123         kstat_named_init(&ethstat->dropped_multicast_filtered,
4124             "dropped_multicast_filtered", KSTAT_DATA_ULONG);
4125         kstat_named_init(&ethstat->dropped_no_big_buffer,
4126             "dropped_no_big_buffer", KSTAT_DATA_ULONG);
4127         kstat_named_init(&ethstat->dropped_no_small_buffer,
4128             "dropped_no_small_buffer", KSTAT_DATA_ULONG);
4129         kstat_named_init(&ethstat->dropped_overrun,
4130             "dropped_overrun", KSTAT_DATA_ULONG);
4131         kstat_named_init(&ethstat->dropped_pause,
4132             "dropped_pause", KSTAT_DATA_ULONG);
4133         kstat_named_init(&ethstat->dropped_runt,
4134             "dropped_runt", KSTAT_DATA_ULONG);
4135         kstat_named_init(&ethstat->dropped_unicast_filtered,
4136             "dropped_unicast_filtered", KSTAT_DATA_ULONG);
4137         kstat_named_init(&ethstat->dropped_runt, "dropped_runt",
4138             KSTAT_DATA_ULONG);
4139         kstat_named_init(&ethstat->link_up, "link_up", KSTAT_DATA_ULONG);
4140         kstat_named_init(&ethstat->link_changes, "link_changes",
4141             KSTAT_DATA_ULONG);
4142         ksp->ks_update = myri10ge_nic_stat_kstat_update;
4143         ksp->ks_private = (void *) mgp;
4144         kstat_install(ksp);
4145         return (DDI_SUCCESS);
4146 }
4147 
4148 static int
4149 myri10ge_slice_stat_init(struct myri10ge_slice_state *ss)
4150 {
4151         struct myri10ge_priv *mgp = ss->mgp;
4152         struct kstat *ksp;
4153         struct myri10ge_slice_stat *ethstat;
4154         int instance;
4155 
4156         /*
4157          * fake an instance so that the same slice numbers from
4158          * different instances do not collide
4159          */
4160         instance = (ddi_get_instance(mgp->dip) * 1000) +  (int)(ss - mgp->ss);
4161         ksp = kstat_create("myri10ge", instance,
4162             "myri10ge_slice_stats", "net", KSTAT_TYPE_NAMED,
4163             sizeof (*ethstat) / sizeof (kstat_named_t), 0);
4164         if (ksp == NULL) {
4165                 cmn_err(CE_WARN,
4166                     "%s: myri10ge_stat_init: kstat_create failed", mgp->name);
4167                 return (DDI_FAILURE);
4168         }
4169         ss->ksp_stat = ksp;
4170         ethstat = (struct myri10ge_slice_stat *)(ksp->ks_data);
4171         kstat_named_init(&ethstat->lro_bad_csum, "lro_bad_csum",
4172             KSTAT_DATA_ULONG);
4173         kstat_named_init(&ethstat->lro_flushed, "lro_flushed",
4174             KSTAT_DATA_ULONG);
4175         kstat_named_init(&ethstat->lro_queued, "lro_queued",
4176             KSTAT_DATA_ULONG);
4177         kstat_named_init(&ethstat->rx_bigbuf_firmware, "rx_bigbuf_firmware",
4178             KSTAT_DATA_ULONG);
4179         kstat_named_init(&ethstat->rx_bigbuf_pool, "rx_bigbuf_pool",
4180             KSTAT_DATA_ULONG);
4181         kstat_named_init(&ethstat->rx_bigbuf_smalls, "rx_bigbuf_smalls",
4182             KSTAT_DATA_ULONG);
4183         kstat_named_init(&ethstat->rx_copy, "rx_copy",
4184             KSTAT_DATA_ULONG);
4185         kstat_named_init(&ethstat->rx_big_nobuf, "rx_big_nobuf",
4186             KSTAT_DATA_ULONG);
4187         kstat_named_init(&ethstat->rx_small_nobuf, "rx_small_nobuf",
4188             KSTAT_DATA_ULONG);
4189         kstat_named_init(&ethstat->xmit_zero_len, "xmit_zero_len",
4190             KSTAT_DATA_ULONG);
4191         kstat_named_init(&ethstat->xmit_pullup, "xmit_pullup",
4192             KSTAT_DATA_ULONG);
4193         kstat_named_init(&ethstat->xmit_pullup_first, "xmit_pullup_first",
4194             KSTAT_DATA_ULONG);
4195         kstat_named_init(&ethstat->xmit_lowbuf, "xmit_lowbuf",
4196             KSTAT_DATA_ULONG);
4197         kstat_named_init(&ethstat->xmit_lsobadflags, "xmit_lsobadflags",
4198             KSTAT_DATA_ULONG);
4199         kstat_named_init(&ethstat->xmit_sched, "xmit_sched",
4200             KSTAT_DATA_ULONG);
4201         kstat_named_init(&ethstat->xmit_stall, "xmit_stall",
4202             KSTAT_DATA_ULONG);
4203         kstat_named_init(&ethstat->xmit_stall_early, "xmit_stall_early",
4204             KSTAT_DATA_ULONG);
4205         kstat_named_init(&ethstat->xmit_stall_late, "xmit_stall_late",
4206             KSTAT_DATA_ULONG);
4207         kstat_named_init(&ethstat->xmit_err, "xmit_err",
4208             KSTAT_DATA_ULONG);
4209         kstat_named_init(&ethstat->tx_req, "tx_req",
4210             KSTAT_DATA_ULONG);
4211         kstat_named_init(&ethstat->tx_activate, "tx_activate",
4212             KSTAT_DATA_ULONG);
4213         kstat_named_init(&ethstat->tx_done, "tx_done",
4214             KSTAT_DATA_ULONG);
4215         kstat_named_init(&ethstat->tx_handles_alloced, "tx_handles_alloced",
4216             KSTAT_DATA_ULONG);
4217         kstat_named_init(&ethstat->rx_big, "rx_big",
4218             KSTAT_DATA_ULONG);
4219         kstat_named_init(&ethstat->rx_small, "rx_small",
4220             KSTAT_DATA_ULONG);
4221         ksp->ks_update = myri10ge_slice_stat_kstat_update;
4222         ksp->ks_private = (void *) ss;
4223         kstat_install(ksp);
4224         return (DDI_SUCCESS);
4225 }
4226 
4227 
4228 
4229 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
4230 
4231 #include <vm/hat.h>
4232 #include <sys/ddi_isa.h>
4233 void *device_arena_alloc(size_t size, int vm_flag);
4234 void device_arena_free(void *vaddr, size_t size);
4235 
4236 static void
4237 myri10ge_enable_nvidia_ecrc(struct myri10ge_priv *mgp)
4238 {
4239         dev_info_t *parent_dip;
4240         ddi_acc_handle_t handle;
4241         unsigned long bus_number, dev_number, func_number;
4242         unsigned long cfg_pa, paddr, base, pgoffset;
4243         char            *cvaddr, *ptr;
4244         uint32_t        *ptr32;
4245         int             retval = DDI_FAILURE;
4246         int dontcare;
4247         uint16_t read_vid, read_did, vendor_id, device_id;
4248 
4249         if (!myri10ge_nvidia_ecrc_enable)
4250                 return;
4251 
4252         parent_dip = ddi_get_parent(mgp->dip);
4253         if (parent_dip == NULL) {
4254                 cmn_err(CE_WARN, "%s: I'm an orphan?", mgp->name);
4255                 return;
4256         }
4257 
4258         if (pci_config_setup(parent_dip, &handle) != DDI_SUCCESS) {
4259                 cmn_err(CE_WARN,
4260                     "%s: Could not access my parent's registers", mgp->name);
4261                 return;
4262         }
4263 
4264         vendor_id = pci_config_get16(handle, PCI_CONF_VENID);
4265         device_id = pci_config_get16(handle, PCI_CONF_DEVID);
4266         pci_config_teardown(&handle);
4267 
4268         if (myri10ge_verbose) {
4269                 unsigned long   bus_number, dev_number, func_number;
4270                 int             reg_set, span;
4271                 (void) myri10ge_reg_set(parent_dip, &reg_set, &span,
4272                     &bus_number, &dev_number, &func_number);
4273                 if (myri10ge_verbose)
4274                         printf("%s: parent at %ld:%ld:%ld\n", mgp->name,
4275                             bus_number, dev_number, func_number);
4276         }
4277 
4278         if (vendor_id !=  0x10de)
4279                 return;
4280 
4281         if (device_id != 0x005d /* CK804 */ &&
4282             (device_id < 0x374 || device_id > 0x378) /* MCP55 */) {
4283                 return;
4284         }
4285         (void) myri10ge_reg_set(parent_dip, &dontcare, &dontcare,
4286             &bus_number, &dev_number, &func_number);
4287 
4288         for (cfg_pa = 0xf0000000UL;
4289             retval != DDI_SUCCESS && cfg_pa >= 0xe0000000UL;
4290             cfg_pa -= 0x10000000UL) {
4291                 /* find the config space address for the nvidia bridge */
4292                 paddr = (cfg_pa + bus_number * 0x00100000UL +
4293                     (dev_number * 8 + func_number) * 0x00001000UL);
4294 
4295                 base = paddr & (~MMU_PAGEOFFSET);
4296                 pgoffset = paddr & MMU_PAGEOFFSET;
4297 
4298                 /* map it into the kernel */
4299                 cvaddr =  device_arena_alloc(ptob(1), VM_NOSLEEP);
4300                 if (cvaddr == NULL)
4301                         cmn_err(CE_WARN, "%s: failed to map nf4: cvaddr\n",
4302                             mgp->name);
4303 
4304                 hat_devload(kas.a_hat, cvaddr, mmu_ptob(1),
4305                     i_ddi_paddr_to_pfn(base),
4306                     PROT_WRITE|HAT_STRICTORDER, HAT_LOAD_LOCK);
4307 
4308                 ptr = cvaddr + pgoffset;
4309                 read_vid = *(uint16_t *)(void *)(ptr + PCI_CONF_VENID);
4310                 read_did = *(uint16_t *)(void *)(ptr + PCI_CONF_DEVID);
4311                 if (vendor_id ==  read_did || device_id == read_did) {
4312                         ptr32 = (uint32_t *)(void *)(ptr + 0x178);
4313                         if (myri10ge_verbose)
4314                                 printf("%s: Enabling ECRC on upstream "
4315                                     "Nvidia bridge (0x%x:0x%x) "
4316                                     "at %ld:%ld:%ld\n", mgp->name,
4317                                     read_vid, read_did, bus_number,
4318                                     dev_number, func_number);
4319                         *ptr32 |= 0x40;
4320                         retval = DDI_SUCCESS;
4321                 }
4322                 hat_unload(kas.a_hat, cvaddr, ptob(1), HAT_UNLOAD_UNLOCK);
4323                 device_arena_free(cvaddr, ptob(1));
4324         }
4325 }
4326 
4327 #else
4328 /*ARGSUSED*/
4329 static void
4330 myri10ge_enable_nvidia_ecrc(struct myri10ge_priv *mgp)
4331 {
4332 }
4333 #endif /* i386 */
4334 
4335 
4336 /*
4337  * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
4338  * when the PCI-E Completion packets are aligned on an 8-byte
4339  * boundary.  Some PCI-E chip sets always align Completion packets; on
4340  * the ones that do not, the alignment can be enforced by enabling
4341  * ECRC generation (if supported).
4342  *
4343  * When PCI-E Completion packets are not aligned, it is actually more
4344  * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
4345  *
4346  * If the driver can neither enable ECRC nor verify that it has
4347  * already been enabled, then it must use a firmware image which works
4348  * around unaligned completion packets (ethp_z8e.dat), and it should
4349  * also ensure that it never gives the device a Read-DMA which is
4350  * larger than 2KB by setting the tx.boundary to 2KB.  If ECRC is
4351  * enabled, then the driver should use the aligned (eth_z8e.dat)
4352  * firmware image, and set tx.boundary to 4KB.
4353  */
4354 
4355 
4356 static int
4357 myri10ge_firmware_probe(struct myri10ge_priv *mgp)
4358 {
4359         int status;
4360 
4361         mgp->tx_boundary = 4096;
4362         /*
4363          * Verify the max read request size was set to 4KB
4364          * before trying the test with 4KB.
4365          */
4366         if (mgp->max_read_request_4k == 0)
4367                 mgp->tx_boundary = 2048;
4368         /*
4369          * load the optimized firmware which assumes aligned PCIe
4370          * completions in order to see if it works on this host.
4371          */
4372 
4373         mgp->fw_name = "rss_eth_z8e";
4374         mgp->eth_z8e = (unsigned char *)rss_eth_z8e;
4375         mgp->eth_z8e_length = rss_eth_z8e_length;
4376 
4377         status = myri10ge_load_firmware(mgp);
4378         if (status != 0) {
4379                 return (status);
4380         }
4381         /*
4382          * Enable ECRC if possible
4383          */
4384         myri10ge_enable_nvidia_ecrc(mgp);
4385 
4386         /*
4387          * Run a DMA test which watches for unaligned completions and
4388          * aborts on the first one seen.
4389          */
4390         status = myri10ge_dma_test(mgp, MXGEFW_CMD_UNALIGNED_TEST);
4391         if (status == 0)
4392                 return (0); /* keep the aligned firmware */
4393 
4394         if (status != E2BIG)
4395                 cmn_err(CE_WARN, "%s: DMA test failed: %d\n",
4396                     mgp->name, status);
4397         if (status == ENOSYS)
4398                 cmn_err(CE_WARN, "%s: Falling back to ethp! "
4399                     "Please install up to date fw\n", mgp->name);
4400         return (status);
4401 }
4402 
4403 static int
4404 myri10ge_select_firmware(struct myri10ge_priv *mgp)
4405 {
4406         int aligned;
4407 
4408         aligned = 0;
4409 
4410         if (myri10ge_force_firmware == 1) {
4411                 if (myri10ge_verbose)
4412                         printf("%s: Assuming aligned completions (forced)\n",
4413                             mgp->name);
4414                 aligned = 1;
4415                 goto done;
4416         }
4417 
4418         if (myri10ge_force_firmware == 2) {
4419                 if (myri10ge_verbose)
4420                         printf("%s: Assuming unaligned completions (forced)\n",
4421                             mgp->name);
4422                 aligned = 0;
4423                 goto done;
4424         }
4425 
4426         /* If the width is less than 8, we may used the aligned firmware */
4427         if (mgp->pcie_link_width != 0 && mgp->pcie_link_width < 8) {
4428                 cmn_err(CE_WARN, "!%s: PCIe link running at x%d\n",
4429                     mgp->name, mgp->pcie_link_width);
4430                 aligned = 1;
4431                 goto done;
4432         }
4433 
4434         if (0 == myri10ge_firmware_probe(mgp))
4435                 return (0);  /* keep optimized firmware */
4436 
4437 done:
4438         if (aligned) {
4439                 mgp->fw_name = "rss_eth_z8e";
4440                 mgp->eth_z8e = (unsigned char *)rss_eth_z8e;
4441                 mgp->eth_z8e_length = rss_eth_z8e_length;
4442                 mgp->tx_boundary = 4096;
4443         } else {
4444                 mgp->fw_name = "rss_ethp_z8e";
4445                 mgp->eth_z8e = (unsigned char *)rss_ethp_z8e;
4446                 mgp->eth_z8e_length = rss_ethp_z8e_length;
4447                 mgp->tx_boundary = 2048;
4448         }
4449 
4450         return (myri10ge_load_firmware(mgp));
4451 }
4452 
4453 static int
4454 myri10ge_add_intrs(struct myri10ge_priv *mgp, int add_handler)
4455 {
4456         dev_info_t *devinfo = mgp->dip;
4457         int count, avail, actual, intr_types;
4458         int x, y, rc, inum = 0;
4459 
4460 
4461         rc = ddi_intr_get_supported_types(devinfo, &intr_types);
4462         if (rc != DDI_SUCCESS) {
4463                 cmn_err(CE_WARN,
4464                     "!%s: ddi_intr_get_nintrs() failure, rc = %d\n", mgp->name,
4465                     rc);
4466                 return (DDI_FAILURE);
4467         }
4468 
4469         if (!myri10ge_use_msi)
4470                 intr_types &= ~DDI_INTR_TYPE_MSI;
4471         if (!myri10ge_use_msix)
4472                 intr_types &= ~DDI_INTR_TYPE_MSIX;
4473 
4474         if (intr_types & DDI_INTR_TYPE_MSIX) {
4475                 mgp->ddi_intr_type = DDI_INTR_TYPE_MSIX;
4476                 mgp->intr_type = "MSI-X";
4477         } else if (intr_types & DDI_INTR_TYPE_MSI) {
4478                 mgp->ddi_intr_type = DDI_INTR_TYPE_MSI;
4479                 mgp->intr_type = "MSI";
4480         } else {
4481                 mgp->ddi_intr_type = DDI_INTR_TYPE_FIXED;
4482                 mgp->intr_type = "Legacy";
4483         }
4484         /* Get number of interrupts */
4485         rc = ddi_intr_get_nintrs(devinfo, mgp->ddi_intr_type, &count);
4486         if ((rc != DDI_SUCCESS) || (count == 0)) {
4487                 cmn_err(CE_WARN, "%s: ddi_intr_get_nintrs() failure, rc: %d, "
4488                     "count: %d", mgp->name, rc, count);
4489 
4490                 return (DDI_FAILURE);
4491         }
4492 
4493         /* Get number of available interrupts */
4494         rc = ddi_intr_get_navail(devinfo, mgp->ddi_intr_type, &avail);
4495         if ((rc != DDI_SUCCESS) || (avail == 0)) {
4496                 cmn_err(CE_WARN, "%s: ddi_intr_get_navail() failure, "
4497                     "rc: %d, avail: %d\n", mgp->name, rc, avail);
4498                 return (DDI_FAILURE);
4499         }
4500         if (avail < count) {
4501                 cmn_err(CE_NOTE,
4502                     "!%s: nintrs() returned %d, navail returned %d",
4503                     mgp->name, count, avail);
4504                 count = avail;
4505         }
4506 
4507         if (count < mgp->num_slices)
4508                 return (DDI_FAILURE);
4509 
4510         if (count > mgp->num_slices)
4511                 count = mgp->num_slices;
4512 
4513         /* Allocate memory for MSI interrupts */
4514         mgp->intr_size = count * sizeof (ddi_intr_handle_t);
4515         mgp->htable = kmem_alloc(mgp->intr_size, KM_SLEEP);
4516 
4517         rc = ddi_intr_alloc(devinfo, mgp->htable, mgp->ddi_intr_type, inum,
4518             count, &actual, DDI_INTR_ALLOC_NORMAL);
4519 
4520         if ((rc != DDI_SUCCESS) || (actual == 0)) {
4521                 cmn_err(CE_WARN, "%s: ddi_intr_alloc() failed: %d",
4522                     mgp->name, rc);
4523 
4524                 kmem_free(mgp->htable, mgp->intr_size);
4525                 mgp->htable = NULL;
4526                 return (DDI_FAILURE);
4527         }
4528 
4529         if ((actual < count) && myri10ge_verbose) {
4530                 cmn_err(CE_NOTE, "%s: got %d/%d slices",
4531                     mgp->name, actual, count);
4532         }
4533 
4534         mgp->intr_cnt = actual;
4535 
4536         /*
4537          * Get priority for first irq, assume remaining are all the same
4538          */
4539         if (ddi_intr_get_pri(mgp->htable[0], &mgp->intr_pri)
4540             != DDI_SUCCESS) {
4541                 cmn_err(CE_WARN, "%s: ddi_intr_get_pri() failed", mgp->name);
4542 
4543                 /* Free already allocated intr */
4544                 for (y = 0; y < actual; y++) {
4545                         (void) ddi_intr_free(mgp->htable[y]);
4546                 }
4547 
4548                 kmem_free(mgp->htable, mgp->intr_size);
4549                 mgp->htable = NULL;
4550                 return (DDI_FAILURE);
4551         }
4552 
4553         mgp->icookie = (void *)(uintptr_t)mgp->intr_pri;
4554 
4555         if (!add_handler)
4556                 return (DDI_SUCCESS);
4557 
4558         /* Call ddi_intr_add_handler() */
4559         for (x = 0; x < actual; x++) {
4560                 if (ddi_intr_add_handler(mgp->htable[x], myri10ge_intr,
4561                     (caddr_t)&mgp->ss[x], NULL) != DDI_SUCCESS) {
4562                         cmn_err(CE_WARN, "%s: ddi_intr_add_handler() failed",
4563                             mgp->name);
4564 
4565                         /* Free already allocated intr */
4566                         for (y = 0; y < actual; y++) {
4567                                 (void) ddi_intr_free(mgp->htable[y]);
4568                         }
4569 
4570                         kmem_free(mgp->htable, mgp->intr_size);
4571                         mgp->htable = NULL;
4572                         return (DDI_FAILURE);
4573                 }
4574         }
4575 
4576         (void) ddi_intr_get_cap(mgp->htable[0], &mgp->intr_cap);
4577         if (mgp->intr_cap & DDI_INTR_FLAG_BLOCK) {
4578                 /* Call ddi_intr_block_enable() for MSI */
4579                 (void) ddi_intr_block_enable(mgp->htable, mgp->intr_cnt);
4580         } else {
4581                 /* Call ddi_intr_enable() for MSI non block enable */
4582                 for (x = 0; x < mgp->intr_cnt; x++) {
4583                         (void) ddi_intr_enable(mgp->htable[x]);
4584                 }
4585         }
4586 
4587         return (DDI_SUCCESS);
4588 }
4589 
4590 static void
4591 myri10ge_rem_intrs(struct myri10ge_priv *mgp, int handler_installed)
4592 {
4593         int x, err;
4594 
4595         /* Disable all interrupts */
4596         if (handler_installed) {
4597                 if (mgp->intr_cap & DDI_INTR_FLAG_BLOCK) {
4598                         /* Call ddi_intr_block_disable() */
4599                         (void) ddi_intr_block_disable(mgp->htable,
4600                             mgp->intr_cnt);
4601                 } else {
4602                         for (x = 0; x < mgp->intr_cnt; x++) {
4603                                 (void) ddi_intr_disable(mgp->htable[x]);
4604                         }
4605                 }
4606         }
4607 
4608         for (x = 0; x < mgp->intr_cnt; x++) {
4609                 if (handler_installed) {
4610                 /* Call ddi_intr_remove_handler() */
4611                         err = ddi_intr_remove_handler(mgp->htable[x]);
4612                         if (err != DDI_SUCCESS) {
4613                                 cmn_err(CE_WARN,
4614                                     "%s: ddi_intr_remove_handler for"
4615                                     "vec %d returned %d\n", mgp->name,
4616                                     x, err);
4617                         }
4618                 }
4619                 err = ddi_intr_free(mgp->htable[x]);
4620                 if (err != DDI_SUCCESS) {
4621                         cmn_err(CE_WARN,
4622                             "%s: ddi_intr_free for vec %d returned %d\n",
4623                             mgp->name, x, err);
4624                 }
4625         }
4626         kmem_free(mgp->htable, mgp->intr_size);
4627         mgp->htable = NULL;
4628 }
4629 
4630 static void
4631 myri10ge_test_physical(dev_info_t *dip)
4632 {
4633         ddi_dma_handle_t        handle;
4634         struct myri10ge_dma_stuff dma;
4635         void *addr;
4636         int err;
4637 
4638         /* test #1, sufficient for older sparc systems */
4639         myri10ge_tx_dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
4640         err = ddi_dma_alloc_handle(dip, &myri10ge_tx_dma_attr,
4641             DDI_DMA_DONTWAIT, NULL, &handle);
4642         if (err == DDI_DMA_BADATTR)
4643                 goto fail;
4644         ddi_dma_free_handle(&handle);
4645 
4646         /* test #2, required on Olympis where the bind is what fails */
4647         addr = myri10ge_dma_alloc(dip, 128, &myri10ge_tx_dma_attr,
4648             &myri10ge_dev_access_attr, DDI_DMA_STREAMING,
4649             DDI_DMA_WRITE|DDI_DMA_STREAMING, &dma, 0, DDI_DMA_DONTWAIT);
4650         if (addr == NULL)
4651                 goto fail;
4652         myri10ge_dma_free(&dma);
4653         return;
4654 
4655 fail:
4656         if (myri10ge_verbose)
4657                 printf("myri10ge%d: DDI_DMA_FORCE_PHYSICAL failed, "
4658                     "using IOMMU\n", ddi_get_instance(dip));
4659 
4660         myri10ge_tx_dma_attr.dma_attr_flags &= ~DDI_DMA_FORCE_PHYSICAL;
4661 }
4662 
4663 static void
4664 myri10ge_get_props(dev_info_t *dip)
4665 {
4666 
4667         myri10ge_flow_control =  ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4668             "myri10ge_flow_control", myri10ge_flow_control);
4669 
4670         myri10ge_intr_coal_delay = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4671             "myri10ge_intr_coal_delay", myri10ge_intr_coal_delay);
4672 
4673 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
4674         myri10ge_nvidia_ecrc_enable = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4675             "myri10ge_nvidia_ecrc_enable", 1);
4676 #endif
4677 
4678 
4679         myri10ge_use_msi = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4680             "myri10ge_use_msi", myri10ge_use_msi);
4681 
4682         myri10ge_deassert_wait = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4683             "myri10ge_deassert_wait",  myri10ge_deassert_wait);
4684 
4685         myri10ge_verbose = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4686             "myri10ge_verbose", myri10ge_verbose);
4687 
4688         myri10ge_tx_copylen = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4689             "myri10ge_tx_copylen", myri10ge_tx_copylen);
4690 
4691         if (myri10ge_tx_copylen < 60) {
4692                 cmn_err(CE_WARN,
4693                     "myri10ge_tx_copylen must be >= 60 bytes\n");
4694                 myri10ge_tx_copylen = 60;
4695         }
4696 
4697         myri10ge_mtu_override = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4698             "myri10ge_mtu_override", myri10ge_mtu_override);
4699 
4700         if (myri10ge_mtu_override >= 1500 && myri10ge_mtu_override <= 9000)
4701                 myri10ge_mtu = myri10ge_mtu_override +
4702                     sizeof (struct ether_header) + MXGEFW_PAD + VLAN_TAGSZ;
4703         else if (myri10ge_mtu_override != 0) {
4704                 cmn_err(CE_WARN,
4705                     "myri10ge_mtu_override must be between 1500 and "
4706                     "9000 bytes\n");
4707         }
4708 
4709         myri10ge_bigbufs_initial = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4710             "myri10ge_bigbufs_initial", myri10ge_bigbufs_initial);
4711         myri10ge_bigbufs_max = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4712             "myri10ge_bigbufs_max", myri10ge_bigbufs_max);
4713 
4714         myri10ge_watchdog_reset = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4715             "myri10ge_watchdog_reset", myri10ge_watchdog_reset);
4716 
4717         if (myri10ge_bigbufs_initial < 128) {
4718                 cmn_err(CE_WARN,
4719                     "myri10ge_bigbufs_initial be at least 128\n");
4720                 myri10ge_bigbufs_initial = 128;
4721         }
4722         if (myri10ge_bigbufs_max < 128) {
4723                 cmn_err(CE_WARN,
4724                     "myri10ge_bigbufs_max be at least 128\n");
4725                 myri10ge_bigbufs_max = 128;
4726         }
4727 
4728         if (myri10ge_bigbufs_max < myri10ge_bigbufs_initial) {
4729                 cmn_err(CE_WARN,
4730                     "myri10ge_bigbufs_max must be >=  "
4731                     "myri10ge_bigbufs_initial\n");
4732                 myri10ge_bigbufs_max = myri10ge_bigbufs_initial;
4733         }
4734 
4735         myri10ge_force_firmware = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4736             "myri10ge_force_firmware", myri10ge_force_firmware);
4737 
4738         myri10ge_max_slices = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4739             "myri10ge_max_slices", myri10ge_max_slices);
4740 
4741         myri10ge_use_msix = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4742             "myri10ge_use_msix", myri10ge_use_msix);
4743 
4744         myri10ge_rss_hash = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4745             "myri10ge_rss_hash", myri10ge_rss_hash);
4746 
4747         if (myri10ge_rss_hash > MXGEFW_RSS_HASH_TYPE_MAX ||
4748             myri10ge_rss_hash < MXGEFW_RSS_HASH_TYPE_IPV4) {
4749                 cmn_err(CE_WARN, "myri10ge: Illegal rssh hash type %d\n",
4750                     myri10ge_rss_hash);
4751                 myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4752         }
4753         myri10ge_lro = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4754             "myri10ge_lro", myri10ge_lro);
4755         myri10ge_lro_cnt = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4756             "myri10ge_lro_cnt", myri10ge_lro_cnt);
4757         myri10ge_lro_max_aggr = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4758             "myri10ge_lro_max_aggr", myri10ge_lro_max_aggr);
4759         myri10ge_tx_hash = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4760             "myri10ge_tx_hash", myri10ge_tx_hash);
4761         myri10ge_use_lso = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4762             "myri10ge_use_lso", myri10ge_use_lso);
4763         myri10ge_lso_copy = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4764             "myri10ge_lso_copy", myri10ge_lso_copy);
4765         myri10ge_tx_handles_initial = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4766             "myri10ge_tx_handles_initial", myri10ge_tx_handles_initial);
4767         myri10ge_small_bytes = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4768             "myri10ge_small_bytes", myri10ge_small_bytes);
4769         if ((myri10ge_small_bytes + MXGEFW_PAD) & (128 -1)) {
4770                 cmn_err(CE_WARN, "myri10ge: myri10ge_small_bytes (%d)\n",
4771                     myri10ge_small_bytes);
4772                 cmn_err(CE_WARN, "must be aligned on 128b bndry -2\n");
4773                 myri10ge_small_bytes += 128;
4774                 myri10ge_small_bytes &= ~(128 -1);
4775                 myri10ge_small_bytes -= MXGEFW_PAD;
4776                 cmn_err(CE_WARN, "rounded up to %d\n",
4777                     myri10ge_small_bytes);
4778 
4779                 myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4780         }
4781 }
4782 
4783 #ifndef PCI_EXP_LNKSTA
4784 #define PCI_EXP_LNKSTA 18
4785 #endif
4786 
4787 static int
4788 myri10ge_find_cap(ddi_acc_handle_t handle, uint8_t *capptr, uint8_t capid)
4789 {
4790         uint16_t        status;
4791         uint8_t         ptr;
4792 
4793         /* check to see if we have capabilities */
4794         status = pci_config_get16(handle, PCI_CONF_STAT);
4795         if (!(status & PCI_STAT_CAP)) {
4796                 cmn_err(CE_WARN, "PCI_STAT_CAP not found\n");
4797                 return (ENXIO);
4798         }
4799 
4800         ptr = pci_config_get8(handle, PCI_CONF_CAP_PTR);
4801 
4802         /* Walk the capabilities list, looking for a PCI Express cap */
4803         while (ptr != PCI_CAP_NEXT_PTR_NULL) {
4804                 if (pci_config_get8(handle, ptr + PCI_CAP_ID) == capid)
4805                         break;
4806                 ptr = pci_config_get8(handle, ptr + PCI_CAP_NEXT_PTR);
4807         }
4808         if (ptr < 64) {
4809                 cmn_err(CE_WARN, "Bad capability offset %d\n", ptr);
4810                 return (ENXIO);
4811         }
4812         *capptr = ptr;
4813         return (0);
4814 }
4815 
4816 static int
4817 myri10ge_set_max_readreq(ddi_acc_handle_t handle)
4818 {
4819         int err;
4820         uint16_t        val;
4821         uint8_t         ptr;
4822 
4823         err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_PCI_E);
4824         if (err != 0) {
4825                 cmn_err(CE_WARN, "could not find PCIe cap\n");
4826                 return (ENXIO);
4827         }
4828 
4829         /* set max read req to 4096 */
4830         val = pci_config_get16(handle, ptr + PCIE_DEVCTL);
4831         val = (val & ~PCIE_DEVCTL_MAX_READ_REQ_MASK) |
4832             PCIE_DEVCTL_MAX_READ_REQ_4096;
4833         pci_config_put16(handle, ptr + PCIE_DEVCTL, val);
4834         val = pci_config_get16(handle, ptr + PCIE_DEVCTL);
4835         if ((val & (PCIE_DEVCTL_MAX_READ_REQ_4096)) !=
4836             PCIE_DEVCTL_MAX_READ_REQ_4096) {
4837                 cmn_err(CE_WARN, "could not set max read req (%x)\n", val);
4838                 return (EINVAL);
4839         }
4840         return (0);
4841 }
4842 
4843 static int
4844 myri10ge_read_pcie_link_width(ddi_acc_handle_t handle, int *link)
4845 {
4846         int err;
4847         uint16_t        val;
4848         uint8_t         ptr;
4849 
4850         err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_PCI_E);
4851         if (err != 0) {
4852                 cmn_err(CE_WARN, "could not set max read req\n");
4853                 return (ENXIO);
4854         }
4855 
4856         /* read link width */
4857         val = pci_config_get16(handle, ptr + PCIE_LINKSTS);
4858         val &= PCIE_LINKSTS_NEG_WIDTH_MASK;
4859         *link = (val >> 4);
4860         return (0);
4861 }
4862 
4863 static int
4864 myri10ge_reset_nic(struct myri10ge_priv *mgp)
4865 {
4866         ddi_acc_handle_t handle = mgp->cfg_hdl;
4867         uint32_t reboot;
4868         uint16_t cmd;
4869         int err;
4870 
4871         cmd = pci_config_get16(handle, PCI_CONF_COMM);
4872         if ((cmd & PCI_COMM_ME) == 0) {
4873                 /*
4874                  * Bus master DMA disabled?  Check to see if the card
4875                  * rebooted due to a parity error For now, just report
4876                  * it
4877                  */
4878 
4879                 /* enter read32 mode */
4880                 pci_config_put8(handle, mgp->vso + 0x10, 0x3);
4881                 /* read REBOOT_STATUS (0xfffffff0) */
4882                 pci_config_put32(handle, mgp->vso + 0x18, 0xfffffff0);
4883                 reboot = pci_config_get16(handle, mgp->vso + 0x14);
4884                 cmn_err(CE_WARN, "%s NIC rebooted 0x%x\n", mgp->name, reboot);
4885                 return (0);
4886         }
4887         if (!myri10ge_watchdog_reset) {
4888                 cmn_err(CE_WARN, "%s: not resetting\n", mgp->name);
4889                 return (1);
4890         }
4891 
4892         myri10ge_stop_locked(mgp);
4893         err = myri10ge_start_locked(mgp);
4894         if (err == DDI_FAILURE) {
4895                 return (0);
4896         }
4897         mac_tx_update(mgp->mh);
4898         return (1);
4899 }
4900 
4901 static inline int
4902 myri10ge_ring_stalled(myri10ge_tx_ring_t *tx)
4903 {
4904         if (tx->sched != tx->stall &&
4905             tx->done == tx->watchdog_done &&
4906             tx->watchdog_req != tx->watchdog_done)
4907                 return (1);
4908         return (0);
4909 }
4910 
4911 static void
4912 myri10ge_watchdog(void *arg)
4913 {
4914         struct myri10ge_priv *mgp;
4915         struct myri10ge_slice_state *ss;
4916         myri10ge_tx_ring_t *tx;
4917         int nic_ok = 1;
4918         int slices_stalled, rx_pause, i;
4919         int add_rx;
4920 
4921         mgp = arg;
4922         mutex_enter(&mgp->intrlock);
4923         if (mgp->running != MYRI10GE_ETH_RUNNING) {
4924                 cmn_err(CE_WARN,
4925                     "%s not running, not rearming watchdog (%d)\n",
4926                     mgp->name, mgp->running);
4927                 mutex_exit(&mgp->intrlock);
4928                 return;
4929         }
4930 
4931         rx_pause = ntohl(mgp->ss[0].fw_stats->dropped_pause);
4932 
4933         /*
4934          * make sure nic is stalled before we reset the nic, so as to
4935          * ensure we don't rip the transmit data structures out from
4936          * under a pending transmit
4937          */
4938 
4939         for (slices_stalled = 0, i = 0; i < mgp->num_slices; i++) {
4940                 tx = &mgp->ss[i].tx;
4941                 slices_stalled = myri10ge_ring_stalled(tx);
4942                 if (slices_stalled)
4943                         break;
4944         }
4945 
4946         if (slices_stalled) {
4947                 if (mgp->watchdog_rx_pause == rx_pause) {
4948                         cmn_err(CE_WARN,
4949                             "%s slice %d stalled:(%d, %d, %d, %d, %d %d %d\n)",
4950                             mgp->name, i, tx->sched, tx->stall,
4951                             tx->done, tx->watchdog_done, tx->req, tx->pkt_done,
4952                             (int)ntohl(mgp->ss[i].fw_stats->send_done_count));
4953                         nic_ok = myri10ge_reset_nic(mgp);
4954                 } else {
4955                         cmn_err(CE_WARN,
4956                             "%s Flow controlled, check link partner\n",
4957                             mgp->name);
4958                 }
4959         }
4960 
4961         if (!nic_ok) {
4962                 cmn_err(CE_WARN,
4963                     "%s Nic dead, not rearming watchdog\n", mgp->name);
4964                 mutex_exit(&mgp->intrlock);
4965                 return;
4966         }
4967         for (i = 0; i < mgp->num_slices; i++) {
4968                 ss = &mgp->ss[i];
4969                 tx = &ss->tx;
4970                 tx->watchdog_done = tx->done;
4971                 tx->watchdog_req = tx->req;
4972                 if (ss->watchdog_rx_copy != MYRI10GE_SLICE_STAT(rx_copy)) {
4973                         ss->watchdog_rx_copy = MYRI10GE_SLICE_STAT(rx_copy);
4974                         add_rx =
4975                             min(ss->jpool.num_alloc,
4976                             myri10ge_bigbufs_max -
4977                             (ss->jpool.num_alloc -
4978                             ss->jbufs_for_smalls));
4979                         if (add_rx != 0) {
4980                                 (void) myri10ge_add_jbufs(ss, add_rx, 0);
4981                                 /* now feed them to the firmware */
4982                                 mutex_enter(&ss->jpool.mtx);
4983                                 myri10ge_restock_jumbos(ss);
4984                                 mutex_exit(&ss->jpool.mtx);
4985                         }
4986                 }
4987         }
4988         mgp->watchdog_rx_pause = rx_pause;
4989 
4990         mgp->timer_id = timeout(myri10ge_watchdog, mgp,
4991             mgp->timer_ticks);
4992         mutex_exit(&mgp->intrlock);
4993 }
4994 
4995 /*ARGSUSED*/
4996 static int
4997 myri10ge_get_coalesce(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp)
4998 
4999 {
5000         struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp;
5001         (void) mi_mpprintf(mp, "%d", mgp->intr_coal_delay);
5002         return (0);
5003 }
5004 
5005 /*ARGSUSED*/
5006 static int
5007 myri10ge_set_coalesce(queue_t *q, mblk_t *mp, char *value,
5008     caddr_t cp, cred_t *credp)
5009 
5010 {
5011         struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp;
5012         char *end;
5013         size_t new_value;
5014 
5015         new_value = mi_strtol(value, &end, 10);
5016         if (end == value)
5017                 return (EINVAL);
5018 
5019         mutex_enter(&myri10ge_param_lock);
5020         mgp->intr_coal_delay = (int)new_value;
5021         *mgp->intr_coal_delay_ptr = htonl(mgp->intr_coal_delay);
5022         mutex_exit(&myri10ge_param_lock);
5023         return (0);
5024 }
5025 
5026 /*ARGSUSED*/
5027 static int
5028 myri10ge_get_pauseparam(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp)
5029 
5030 {
5031         struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp;
5032         (void) mi_mpprintf(mp, "%d", mgp->pause);
5033         return (0);
5034 }
5035 
5036 /*ARGSUSED*/
5037 static int
5038 myri10ge_set_pauseparam(queue_t *q, mblk_t *mp, char *value,
5039                         caddr_t cp, cred_t *credp)
5040 
5041 {
5042         struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp;
5043         char *end;
5044         size_t new_value;
5045         int err = 0;
5046 
5047         new_value = mi_strtol(value, &end, 10);
5048         if (end == value)
5049                 return (EINVAL);
5050         if (new_value != 0)
5051                 new_value = 1;
5052 
5053         mutex_enter(&myri10ge_param_lock);
5054         if (new_value != mgp->pause)
5055                 err = myri10ge_change_pause(mgp, new_value);
5056         mutex_exit(&myri10ge_param_lock);
5057         return (err);
5058 }
5059 
5060 /*ARGSUSED*/
5061 static int
5062 myri10ge_get_int(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp)
5063 
5064 {
5065         (void) mi_mpprintf(mp, "%d", *(int *)(void *)cp);
5066         return (0);
5067 }
5068 
5069 /*ARGSUSED*/
5070 static int
5071 myri10ge_set_int(queue_t *q, mblk_t *mp, char *value,
5072     caddr_t cp, cred_t *credp)
5073 
5074 {
5075         char *end;
5076         size_t new_value;
5077 
5078         new_value = mi_strtol(value, &end, 10);
5079         if (end == value)
5080                 return (EINVAL);
5081         *(int *)(void *)cp = new_value;
5082 
5083         return (0);
5084 }
5085 
5086 static void
5087 myri10ge_ndd_init(struct myri10ge_priv *mgp)
5088 {
5089         mgp->nd_head = NULL;
5090 
5091         (void) nd_load(&mgp->nd_head, "myri10ge_intr_coal_delay",
5092             myri10ge_get_coalesce, myri10ge_set_coalesce, (caddr_t)mgp);
5093         (void) nd_load(&mgp->nd_head, "myri10ge_flow_control",
5094             myri10ge_get_pauseparam, myri10ge_set_pauseparam, (caddr_t)mgp);
5095         (void) nd_load(&mgp->nd_head, "myri10ge_verbose",
5096             myri10ge_get_int, myri10ge_set_int, (caddr_t)&myri10ge_verbose);
5097         (void) nd_load(&mgp->nd_head, "myri10ge_deassert_wait",
5098             myri10ge_get_int, myri10ge_set_int,
5099             (caddr_t)&myri10ge_deassert_wait);
5100         (void) nd_load(&mgp->nd_head, "myri10ge_bigbufs_max",
5101             myri10ge_get_int, myri10ge_set_int,
5102             (caddr_t)&myri10ge_bigbufs_max);
5103         (void) nd_load(&mgp->nd_head, "myri10ge_lro",
5104             myri10ge_get_int, myri10ge_set_int,
5105             (caddr_t)&myri10ge_lro);
5106         (void) nd_load(&mgp->nd_head, "myri10ge_lro_max_aggr",
5107             myri10ge_get_int, myri10ge_set_int,
5108             (caddr_t)&myri10ge_lro_max_aggr);
5109         (void) nd_load(&mgp->nd_head, "myri10ge_tx_hash",
5110             myri10ge_get_int, myri10ge_set_int,
5111             (caddr_t)&myri10ge_tx_hash);
5112         (void) nd_load(&mgp->nd_head, "myri10ge_lso_copy",
5113             myri10ge_get_int, myri10ge_set_int,
5114             (caddr_t)&myri10ge_lso_copy);
5115 }
5116 
5117 static void
5118 myri10ge_ndd_fini(struct myri10ge_priv *mgp)
5119 {
5120         nd_free(&mgp->nd_head);
5121 }
5122 
5123 static void
5124 myri10ge_m_ioctl(void *arg, queue_t *wq, mblk_t *mp)
5125 {
5126         struct iocblk *iocp;
5127         struct myri10ge_priv *mgp = arg;
5128         int cmd, ok, err;
5129 
5130         iocp = (struct iocblk *)(void *)mp->b_rptr;
5131         cmd = iocp->ioc_cmd;
5132 
5133         ok = 0;
5134         err = 0;
5135 
5136         switch (cmd) {
5137         case ND_GET:
5138         case ND_SET:
5139                 ok = nd_getset(wq, mgp->nd_head, mp);
5140                 break;
5141         default:
5142                 break;
5143         }
5144         if (!ok)
5145                 err = EINVAL;
5146         else
5147                 err = iocp->ioc_error;
5148 
5149         if (!err)
5150                 miocack(wq, mp, iocp->ioc_count, err);
5151         else
5152                 miocnak(wq, mp, 0, err);
5153 }
5154 
5155 static struct myri10ge_priv *mgp_list;
5156 
5157 struct myri10ge_priv *
5158 myri10ge_get_instance(uint_t unit)
5159 {
5160         struct myri10ge_priv *mgp;
5161 
5162         mutex_enter(&myri10ge_param_lock);
5163         for (mgp = mgp_list; mgp != NULL; mgp = mgp->next) {
5164                 if (unit == ddi_get_instance(mgp->dip)) {
5165                         mgp->refcnt++;
5166                         break;
5167                 }
5168         }
5169         mutex_exit(&myri10ge_param_lock);
5170         return (mgp);
5171 }
5172 
5173 void
5174 myri10ge_put_instance(struct myri10ge_priv *mgp)
5175 {
5176         mutex_enter(&myri10ge_param_lock);
5177         mgp->refcnt--;
5178         mutex_exit(&myri10ge_param_lock);
5179 }
5180 
5181 static boolean_t
5182 myri10ge_m_getcapab(void *arg, mac_capab_t cap, void *cap_data)
5183 {
5184         struct myri10ge_priv *mgp = arg;
5185         uint32_t *cap_hcksum;
5186         mac_capab_lso_t *cap_lso;
5187         mac_capab_rings_t *cap_rings;
5188 
5189         switch (cap) {
5190         case MAC_CAPAB_HCKSUM:
5191                 cap_hcksum = cap_data;
5192                 *cap_hcksum = HCKSUM_INET_PARTIAL;
5193                 break;
5194         case MAC_CAPAB_RINGS:
5195                 cap_rings = cap_data;
5196                 switch (cap_rings->mr_type) {
5197                 case MAC_RING_TYPE_RX:
5198                         cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
5199                         cap_rings->mr_rnum = mgp->num_slices;
5200                         cap_rings->mr_gnum = 1;
5201                         cap_rings->mr_rget = myri10ge_fill_ring;
5202                         cap_rings->mr_gget = myri10ge_fill_group;
5203                         break;
5204                 case MAC_RING_TYPE_TX:
5205                         cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
5206                         cap_rings->mr_rnum = mgp->num_slices;
5207                         cap_rings->mr_gnum = 0;
5208                         cap_rings->mr_rget = myri10ge_fill_ring;
5209                         cap_rings->mr_gget = NULL;
5210                         break;
5211                 default:
5212                         return (B_FALSE);
5213                 }
5214                 break;
5215         case MAC_CAPAB_LSO:
5216                 cap_lso = cap_data;
5217                 if (!myri10ge_use_lso)
5218                         return (B_FALSE);
5219                 if (!(mgp->features & MYRI10GE_TSO))
5220                         return (B_FALSE);
5221                 cap_lso->lso_flags = LSO_TX_BASIC_TCP_IPV4;
5222                 cap_lso->lso_basic_tcp_ipv4.lso_max = (uint16_t)-1;
5223                 break;
5224 
5225         default:
5226                 return (B_FALSE);
5227         }
5228         return (B_TRUE);
5229 }
5230 
5231 
5232 static int
5233 myri10ge_m_stat(void *arg, uint_t stat, uint64_t *val)
5234 {
5235         struct myri10ge_priv *mgp = arg;
5236         struct myri10ge_rx_ring_stats *rstat;
5237         struct myri10ge_tx_ring_stats *tstat;
5238         mcp_irq_data_t *fw_stats = mgp->ss[0].fw_stats;
5239         struct myri10ge_slice_state *ss;
5240         uint64_t tmp = 0;
5241         int i;
5242 
5243         switch (stat) {
5244         case MAC_STAT_IFSPEED:
5245                 *val = 10ull * 1000ull * 1000000ull;
5246                 break;
5247 
5248         case MAC_STAT_MULTIRCV:
5249                 for (i = 0; i < mgp->num_slices; i++) {
5250                         rstat = &mgp->ss[i].rx_stats;
5251                         tmp += rstat->multircv;
5252                 }
5253                 *val = tmp;
5254                 break;
5255 
5256         case MAC_STAT_BRDCSTRCV:
5257                 for (i = 0; i < mgp->num_slices; i++) {
5258                         rstat = &mgp->ss[i].rx_stats;
5259                         tmp += rstat->brdcstrcv;
5260                 }
5261                 *val = tmp;
5262                 break;
5263 
5264         case MAC_STAT_MULTIXMT:
5265                 for (i = 0; i < mgp->num_slices; i++) {
5266                         tstat = &mgp->ss[i].tx.stats;
5267                         tmp += tstat->multixmt;
5268                 }
5269                 *val = tmp;
5270                 break;
5271 
5272         case MAC_STAT_BRDCSTXMT:
5273                 for (i = 0; i < mgp->num_slices; i++) {
5274                         tstat = &mgp->ss[i].tx.stats;
5275                         tmp += tstat->brdcstxmt;
5276                 }
5277                 *val = tmp;
5278                 break;
5279 
5280         case MAC_STAT_NORCVBUF:
5281                 tmp = ntohl(fw_stats->dropped_no_big_buffer);
5282                 tmp += ntohl(fw_stats->dropped_no_small_buffer);
5283                 tmp += ntohl(fw_stats->dropped_link_overflow);
5284                 for (i = 0; i < mgp->num_slices; i++) {
5285                         ss = &mgp->ss[i];
5286                         tmp += MYRI10GE_SLICE_STAT(rx_big_nobuf);
5287                         tmp += MYRI10GE_SLICE_STAT(rx_small_nobuf);
5288                 }
5289                 *val = tmp;
5290                 break;
5291 
5292         case MAC_STAT_IERRORS:
5293                 tmp += ntohl(fw_stats->dropped_bad_crc32);
5294                 tmp += ntohl(fw_stats->dropped_bad_phy);
5295                 tmp += ntohl(fw_stats->dropped_runt);
5296                 tmp += ntohl(fw_stats->dropped_overrun);
5297                 *val = tmp;
5298                 break;
5299 
5300         case MAC_STAT_OERRORS:
5301                 for (i = 0; i < mgp->num_slices; i++) {
5302                         ss = &mgp->ss[i];
5303                         tmp += MYRI10GE_SLICE_STAT(xmit_lsobadflags);
5304                         tmp += MYRI10GE_SLICE_STAT(xmit_err);
5305                 }
5306                 *val = tmp;
5307                 break;
5308 
5309         case MAC_STAT_RBYTES:
5310                 for (i = 0; i < mgp->num_slices; i++) {
5311                         rstat = &mgp->ss[i].rx_stats;
5312                         tmp += rstat->ibytes;
5313                 }
5314                 *val = tmp;
5315                 break;
5316 
5317         case MAC_STAT_IPACKETS:
5318                 for (i = 0; i < mgp->num_slices; i++) {
5319                         rstat = &mgp->ss[i].rx_stats;
5320                         tmp += rstat->ipackets;
5321                 }
5322                 *val = tmp;
5323                 break;
5324 
5325         case MAC_STAT_OBYTES:
5326                 for (i = 0; i < mgp->num_slices; i++) {
5327                         tstat = &mgp->ss[i].tx.stats;
5328                         tmp += tstat->obytes;
5329                 }
5330                 *val = tmp;
5331                 break;
5332 
5333         case MAC_STAT_OPACKETS:
5334                 for (i = 0; i < mgp->num_slices; i++) {
5335                         tstat = &mgp->ss[i].tx.stats;
5336                         tmp += tstat->opackets;
5337                 }
5338                 *val = tmp;
5339                 break;
5340 
5341         case ETHER_STAT_TOOLONG_ERRORS:
5342                 *val = ntohl(fw_stats->dropped_overrun);
5343                 break;
5344 
5345 #ifdef SOLARIS_S11
5346         case ETHER_STAT_TOOSHORT_ERRORS:
5347                 *val = ntohl(fw_stats->dropped_runt);
5348                 break;
5349 #endif
5350 
5351         case ETHER_STAT_LINK_PAUSE:
5352                 *val = mgp->pause;
5353                 break;
5354 
5355         case ETHER_STAT_LINK_AUTONEG:
5356                 *val = 1;
5357                 break;
5358 
5359         case ETHER_STAT_LINK_DUPLEX:
5360                 *val = LINK_DUPLEX_FULL;
5361                 break;
5362 
5363         default:
5364                 return (ENOTSUP);
5365         }
5366 
5367         return (0);
5368 }
5369 
5370 static mac_callbacks_t myri10ge_m_callbacks = {
5371         (MC_IOCTL | MC_GETCAPAB),
5372         myri10ge_m_stat,
5373         myri10ge_m_start,
5374         myri10ge_m_stop,
5375         myri10ge_m_promisc,
5376         myri10ge_m_multicst,
5377         NULL,
5378         NULL,
5379         NULL,
5380         myri10ge_m_ioctl,
5381         myri10ge_m_getcapab
5382 };
5383 
5384 
5385 static int
5386 myri10ge_probe_slices(struct myri10ge_priv *mgp)
5387 {
5388         myri10ge_cmd_t cmd;
5389         int status;
5390 
5391         mgp->num_slices = 1;
5392 
5393         /* hit the board with a reset to ensure it is alive */
5394         (void) memset(&cmd, 0, sizeof (cmd));
5395         status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd);
5396         if (status != 0) {
5397                 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name);
5398                 return (ENXIO);
5399         }
5400 
5401         if (myri10ge_use_msix == 0)
5402                 return (0);
5403 
5404         /* tell it the size of the interrupt queues */
5405         cmd.data0 = mgp->max_intr_slots * sizeof (struct mcp_slot);
5406         status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
5407         if (status != 0) {
5408                 cmn_err(CE_WARN, "%s: failed MXGEFW_CMD_SET_INTRQ_SIZE\n",
5409                     mgp->name);
5410                 return (ENXIO);
5411         }
5412 
5413         /* ask the maximum number of slices it supports */
5414         status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
5415             &cmd);
5416         if (status != 0)
5417                 return (0);
5418 
5419         mgp->num_slices = cmd.data0;
5420 
5421         /*
5422          * if the admin did not specify a limit to how many
5423          * slices we should use, cap it automatically to the
5424          * number of CPUs currently online
5425          */
5426         if (myri10ge_max_slices == -1)
5427                 myri10ge_max_slices = ncpus;
5428 
5429         if (mgp->num_slices > myri10ge_max_slices)
5430                 mgp->num_slices = myri10ge_max_slices;
5431 
5432 
5433         /*
5434          * Now try to allocate as many MSI-X vectors as we have
5435          * slices. We give up on MSI-X if we can only get a single
5436          * vector.
5437          */
5438         while (mgp->num_slices > 1) {
5439                 /* make sure it is a power of two */
5440                 while (mgp->num_slices & (mgp->num_slices - 1))
5441                         mgp->num_slices--;
5442                 if (mgp->num_slices == 1)
5443                         return (0);
5444 
5445                 status = myri10ge_add_intrs(mgp, 0);
5446                 if (status == 0) {
5447                         myri10ge_rem_intrs(mgp, 0);
5448                         if (mgp->intr_cnt == mgp->num_slices) {
5449                                 if (myri10ge_verbose)
5450                                         printf("Got %d slices!\n",
5451                                             mgp->num_slices);
5452                                 return (0);
5453                         }
5454                         mgp->num_slices = mgp->intr_cnt;
5455                 } else {
5456                         mgp->num_slices = mgp->num_slices / 2;
5457                 }
5458         }
5459 
5460         if (myri10ge_verbose)
5461                 printf("Got %d slices\n", mgp->num_slices);
5462         return (0);
5463 }
5464 
5465 static void
5466 myri10ge_lro_free(struct myri10ge_slice_state *ss)
5467 {
5468         struct lro_entry *lro;
5469 
5470         while (ss->lro_free != NULL) {
5471                 lro = ss->lro_free;
5472                 ss->lro_free = lro->next;
5473                 kmem_free(lro, sizeof (*lro));
5474         }
5475 }
5476 
5477 static void
5478 myri10ge_lro_alloc(struct myri10ge_slice_state *ss)
5479 {
5480         struct lro_entry *lro;
5481         int idx;
5482 
5483         ss->lro_free = NULL;
5484         ss->lro_active = NULL;
5485 
5486         for (idx = 0; idx < myri10ge_lro_cnt; idx++) {
5487                 lro = kmem_zalloc(sizeof (*lro), KM_SLEEP);
5488                 if (lro == NULL)
5489                         continue;
5490                 lro->next = ss->lro_free;
5491                 ss->lro_free = lro;
5492         }
5493 }
5494 
5495 static void
5496 myri10ge_free_slices(struct myri10ge_priv *mgp)
5497 {
5498         struct myri10ge_slice_state *ss;
5499         size_t bytes;
5500         int i;
5501 
5502         if (mgp->ss == NULL)
5503                 return;
5504 
5505         for (i = 0; i < mgp->num_slices; i++) {
5506                 ss = &mgp->ss[i];
5507                 if (ss->rx_done.entry == NULL)
5508                         continue;
5509                 myri10ge_dma_free(&ss->rx_done.dma);
5510                 ss->rx_done.entry = NULL;
5511                 if (ss->fw_stats == NULL)
5512                         continue;
5513                 myri10ge_dma_free(&ss->fw_stats_dma);
5514                 ss->fw_stats = NULL;
5515                 mutex_destroy(&ss->rx_lock);
5516                 mutex_destroy(&ss->tx.lock);
5517                 mutex_destroy(&ss->tx.handle_lock);
5518                 mutex_destroy(&ss->poll_lock);
5519                 myri10ge_jpool_fini(ss);
5520                 myri10ge_slice_stat_destroy(ss);
5521                 myri10ge_lro_free(ss);
5522         }
5523         bytes = sizeof (*mgp->ss) * mgp->num_slices;
5524         kmem_free(mgp->ss, bytes);
5525         mgp->ss = NULL;
5526 }
5527 
5528 
5529 static int
5530 myri10ge_alloc_slices(struct myri10ge_priv *mgp)
5531 {
5532         struct myri10ge_slice_state *ss;
5533         size_t bytes;
5534         int i;
5535 
5536         bytes = sizeof (*mgp->ss) * mgp->num_slices;
5537         mgp->ss = kmem_zalloc(bytes, KM_SLEEP);
5538         if (mgp->ss == NULL)
5539                 return (ENOMEM);
5540         for (i = 0; i < mgp->num_slices; i++) {
5541                 ss = &mgp->ss[i];
5542 
5543                 ss->mgp = mgp;
5544 
5545                 /* allocate the per-slice firmware stats */
5546                 bytes = sizeof (*ss->fw_stats);
5547                 ss->fw_stats = (mcp_irq_data_t *)(void *)
5548                     myri10ge_dma_alloc(mgp->dip, bytes,
5549                     &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr,
5550                     DDI_DMA_CONSISTENT, DDI_DMA_READ|DDI_DMA_CONSISTENT,
5551                     &ss->fw_stats_dma, 1, DDI_DMA_DONTWAIT);
5552                 if (ss->fw_stats == NULL)
5553                         goto abort;
5554                 (void) memset(ss->fw_stats, 0, bytes);
5555 
5556                 /* allocate rx done ring */
5557                 bytes = mgp->max_intr_slots *
5558                     sizeof (*ss->rx_done.entry);
5559                 ss->rx_done.entry = (mcp_slot_t *)(void *)
5560                     myri10ge_dma_alloc(mgp->dip, bytes,
5561                     &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr,
5562                     DDI_DMA_CONSISTENT, DDI_DMA_READ|DDI_DMA_CONSISTENT,
5563                     &ss->rx_done.dma, 1, DDI_DMA_DONTWAIT);
5564                 if (ss->rx_done.entry == NULL) {
5565                         goto abort;
5566                 }
5567                 (void) memset(ss->rx_done.entry, 0, bytes);
5568                 mutex_init(&ss->rx_lock,   NULL, MUTEX_DEFAULT, mgp->icookie);
5569                 mutex_init(&ss->tx.lock,   NULL, MUTEX_DEFAULT, NULL);
5570                 mutex_init(&ss->tx.handle_lock,   NULL, MUTEX_DEFAULT, NULL);
5571                 mutex_init(&ss->poll_lock,   NULL, MUTEX_DEFAULT, NULL);
5572                 myri10ge_jpool_init(ss);
5573                 (void) myri10ge_slice_stat_init(ss);
5574                 myri10ge_lro_alloc(ss);
5575         }
5576 
5577         return (0);
5578 
5579 abort:
5580         myri10ge_free_slices(mgp);
5581         return (ENOMEM);
5582 }
5583 
5584 static int
5585 myri10ge_save_msi_state(struct myri10ge_priv *mgp,
5586     ddi_acc_handle_t handle)
5587 {
5588         uint8_t ptr;
5589         int err;
5590 
5591         err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_MSI);
5592         if (err != 0) {
5593                 cmn_err(CE_WARN, "%s: could not find MSI cap\n",
5594                     mgp->name);
5595                 return (DDI_FAILURE);
5596         }
5597         mgp->pci_saved_state.msi_ctrl =
5598             pci_config_get16(handle, ptr + PCI_MSI_CTRL);
5599         mgp->pci_saved_state.msi_addr_low =
5600             pci_config_get32(handle, ptr + PCI_MSI_ADDR_OFFSET);
5601         mgp->pci_saved_state.msi_addr_high =
5602             pci_config_get32(handle, ptr + PCI_MSI_ADDR_OFFSET + 4);
5603         mgp->pci_saved_state.msi_data_32 =
5604             pci_config_get16(handle, ptr + PCI_MSI_32BIT_DATA);
5605         mgp->pci_saved_state.msi_data_64 =
5606             pci_config_get16(handle, ptr + PCI_MSI_64BIT_DATA);
5607         return (DDI_SUCCESS);
5608 }
5609 
5610 static int
5611 myri10ge_restore_msi_state(struct myri10ge_priv *mgp,
5612     ddi_acc_handle_t handle)
5613 {
5614         uint8_t ptr;
5615         int err;
5616 
5617         err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_MSI);
5618         if (err != 0) {
5619                 cmn_err(CE_WARN, "%s: could not find MSI cap\n",
5620                     mgp->name);
5621                 return (DDI_FAILURE);
5622         }
5623 
5624         pci_config_put16(handle, ptr + PCI_MSI_CTRL,
5625             mgp->pci_saved_state.msi_ctrl);
5626         pci_config_put32(handle, ptr + PCI_MSI_ADDR_OFFSET,
5627             mgp->pci_saved_state.msi_addr_low);
5628         pci_config_put32(handle, ptr + PCI_MSI_ADDR_OFFSET + 4,
5629             mgp->pci_saved_state.msi_addr_high);
5630         pci_config_put16(handle, ptr + PCI_MSI_32BIT_DATA,
5631             mgp->pci_saved_state.msi_data_32);
5632         pci_config_put16(handle, ptr + PCI_MSI_64BIT_DATA,
5633             mgp->pci_saved_state.msi_data_64);
5634 
5635         return (DDI_SUCCESS);
5636 }
5637 
5638 static int
5639 myri10ge_save_pci_state(struct myri10ge_priv *mgp)
5640 {
5641         ddi_acc_handle_t handle = mgp->cfg_hdl;
5642         int i;
5643         int err = DDI_SUCCESS;
5644 
5645 
5646         /* Save the non-extended PCI config space 32-bits at a time */
5647         for (i = 0; i < 16; i++)
5648                 mgp->pci_saved_state.base[i] =
5649                     pci_config_get32(handle, i*4);
5650 
5651         /* now save MSI interrupt state *, if needed */
5652         if (mgp->ddi_intr_type == DDI_INTR_TYPE_MSI)
5653                 err = myri10ge_save_msi_state(mgp, handle);
5654 
5655         return (err);
5656 }
5657 
5658 static int
5659 myri10ge_restore_pci_state(struct myri10ge_priv *mgp)
5660 {
5661         ddi_acc_handle_t handle = mgp->cfg_hdl;
5662         int i;
5663         int err = DDI_SUCCESS;
5664 
5665 
5666         /* Restore the non-extended PCI config space 32-bits at a time */
5667         for (i = 15; i >= 0; i--)
5668                 pci_config_put32(handle, i*4, mgp->pci_saved_state.base[i]);
5669 
5670         /* now restore MSI interrupt state *, if needed */
5671         if (mgp->ddi_intr_type == DDI_INTR_TYPE_MSI)
5672                 err = myri10ge_restore_msi_state(mgp, handle);
5673 
5674         if (mgp->max_read_request_4k)
5675                 (void) myri10ge_set_max_readreq(handle);
5676         return (err);
5677 }
5678 
5679 
5680 static int
5681 myri10ge_suspend(dev_info_t *dip)
5682 {
5683         struct myri10ge_priv *mgp = ddi_get_driver_private(dip);
5684         int status;
5685 
5686         if (mgp == NULL) {
5687                 cmn_err(CE_WARN, "null dip in myri10ge_suspend\n");
5688                 return (DDI_FAILURE);
5689         }
5690         if (mgp->dip != dip) {
5691                 cmn_err(CE_WARN, "bad dip in myri10ge_suspend\n");
5692                 return (DDI_FAILURE);
5693         }
5694         mutex_enter(&mgp->intrlock);
5695         if (mgp->running == MYRI10GE_ETH_RUNNING) {
5696                 mgp->running = MYRI10GE_ETH_STOPPING;
5697                 mutex_exit(&mgp->intrlock);
5698                 (void) untimeout(mgp->timer_id);
5699                 mutex_enter(&mgp->intrlock);
5700                 myri10ge_stop_locked(mgp);
5701                 mgp->running = MYRI10GE_ETH_SUSPENDED_RUNNING;
5702         }
5703         status = myri10ge_save_pci_state(mgp);
5704         mutex_exit(&mgp->intrlock);
5705         return (status);
5706 }
5707 
5708 static int
5709 myri10ge_resume(dev_info_t *dip)
5710 {
5711         struct myri10ge_priv *mgp = ddi_get_driver_private(dip);
5712         int status = DDI_SUCCESS;
5713 
5714         if (mgp == NULL) {
5715                 cmn_err(CE_WARN, "null dip in myri10ge_resume\n");
5716                 return (DDI_FAILURE);
5717         }
5718         if (mgp->dip != dip) {
5719                 cmn_err(CE_WARN, "bad dip in myri10ge_resume\n");
5720                 return (DDI_FAILURE);
5721         }
5722 
5723         mutex_enter(&mgp->intrlock);
5724         status = myri10ge_restore_pci_state(mgp);
5725         if (status == DDI_SUCCESS &&
5726             mgp->running == MYRI10GE_ETH_SUSPENDED_RUNNING) {
5727                 status = myri10ge_start_locked(mgp);
5728         }
5729         mutex_exit(&mgp->intrlock);
5730         if (status != DDI_SUCCESS)
5731                 return (status);
5732 
5733         /* start the watchdog timer */
5734         mgp->timer_id = timeout(myri10ge_watchdog, mgp,
5735             mgp->timer_ticks);
5736         return (DDI_SUCCESS);
5737 }
5738 
5739 static int
5740 myri10ge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5741 {
5742 
5743         struct myri10ge_priv *mgp;
5744         mac_register_t *macp, *omacp;
5745         ddi_acc_handle_t handle;
5746         uint32_t csr, hdr_offset;
5747         int status, span, link_width, max_read_request_4k;
5748         unsigned long bus_number, dev_number, func_number;
5749         size_t bytes;
5750         offset_t ss_offset;
5751         uint8_t vso;
5752 
5753         if (cmd == DDI_RESUME) {
5754                 return (myri10ge_resume(dip));
5755         }
5756 
5757         if (cmd != DDI_ATTACH)
5758                 return (DDI_FAILURE);
5759         if (pci_config_setup(dip, &handle) != DDI_SUCCESS)
5760                 return (DDI_FAILURE);
5761 
5762         /* enable busmater and io space access */
5763         csr = pci_config_get32(handle, PCI_CONF_COMM);
5764         pci_config_put32(handle, PCI_CONF_COMM,
5765             (csr |PCI_COMM_ME|PCI_COMM_MAE));
5766         status = myri10ge_read_pcie_link_width(handle, &link_width);
5767         if (status != 0) {
5768                 cmn_err(CE_WARN, "could not read link width!\n");
5769                 link_width = 0;
5770         }
5771         max_read_request_4k = !myri10ge_set_max_readreq(handle);
5772         status = myri10ge_find_cap(handle, &vso, PCI_CAP_ID_VS);
5773         if (status != 0)
5774                 goto abort_with_cfg_hdl;
5775         if ((omacp = mac_alloc(MAC_VERSION)) == NULL)
5776                 goto abort_with_cfg_hdl;
5777         /*
5778          * XXXX Hack: mac_register_t grows in newer kernels.  To be
5779          * able to write newer fields, such as m_margin, without
5780          * writing outside allocated memory, we allocate our own macp
5781          * and pass that to mac_register()
5782          */
5783         macp = kmem_zalloc(sizeof (*macp) * 8, KM_SLEEP);
5784         macp->m_version = omacp->m_version;
5785 
5786         if ((mgp = (struct myri10ge_priv *)
5787             kmem_zalloc(sizeof (*mgp), KM_SLEEP)) == NULL) {
5788                 goto abort_with_macinfo;
5789         }
5790         ddi_set_driver_private(dip, mgp);
5791 
5792         /* setup device name for log messages */
5793         (void) sprintf(mgp->name, "myri10ge%d", ddi_get_instance(dip));
5794 
5795         mutex_enter(&myri10ge_param_lock);
5796         myri10ge_get_props(dip);
5797         mgp->intr_coal_delay = myri10ge_intr_coal_delay;
5798         mgp->pause = myri10ge_flow_control;
5799         mutex_exit(&myri10ge_param_lock);
5800 
5801         mgp->max_read_request_4k = max_read_request_4k;
5802         mgp->pcie_link_width = link_width;
5803         mgp->running = MYRI10GE_ETH_STOPPED;
5804         mgp->vso = vso;
5805         mgp->dip = dip;
5806         mgp->cfg_hdl = handle;
5807 
5808         mgp->timer_ticks = 5 * drv_usectohz(1000000); /* 5 seconds */
5809         myri10ge_test_physical(dip);
5810 
5811         /* allocate command page */
5812         bytes = sizeof (*mgp->cmd);
5813         mgp->cmd = (mcp_cmd_response_t *)
5814             (void *)myri10ge_dma_alloc(dip, bytes,
5815             &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr,
5816             DDI_DMA_CONSISTENT, DDI_DMA_RDWR|DDI_DMA_CONSISTENT,
5817             &mgp->cmd_dma, 1, DDI_DMA_DONTWAIT);
5818         if (mgp->cmd == NULL)
5819                 goto abort_with_mgp;
5820 
5821         (void) myri10ge_reg_set(dip, &mgp->reg_set, &span, &bus_number,
5822             &dev_number, &func_number);
5823         if (myri10ge_verbose)
5824                 printf("%s at %ld:%ld:%ld attaching\n", mgp->name,
5825                     bus_number, dev_number, func_number);
5826         status = ddi_regs_map_setup(dip, mgp->reg_set, (caddr_t *)&mgp->sram,
5827             (offset_t)0, (offset_t)span,  &myri10ge_dev_access_attr,
5828             &mgp->io_handle);
5829         if (status != DDI_SUCCESS) {
5830                 cmn_err(CE_WARN, "%s: couldn't map memory space", mgp->name);
5831                 printf("%s: reg_set = %d, span = %d, status = %d",
5832                     mgp->name, mgp->reg_set, span, status);
5833                 goto abort_with_mgp;
5834         }
5835 
5836         hdr_offset = *(uint32_t *)(void*)(mgp->sram +  MCP_HEADER_PTR_OFFSET);
5837         hdr_offset = ntohl(hdr_offset) & 0xffffc;
5838         ss_offset = hdr_offset +
5839             offsetof(struct mcp_gen_header, string_specs);
5840         mgp->sram_size = ntohl(*(uint32_t *)(void*)(mgp->sram + ss_offset));
5841         myri10ge_pio_copy32(mgp->eeprom_strings,
5842             (uint32_t *)(void*)((char *)mgp->sram + mgp->sram_size),
5843             MYRI10GE_EEPROM_STRINGS_SIZE);
5844         (void) memset(mgp->eeprom_strings +
5845             MYRI10GE_EEPROM_STRINGS_SIZE - 2, 0, 2);
5846 
5847         status = myri10ge_read_mac_addr(mgp);
5848         if (status) {
5849                 goto abort_with_mapped;
5850         }
5851 
5852         status = myri10ge_select_firmware(mgp);
5853         if (status != 0) {
5854                 cmn_err(CE_WARN, "%s: failed to load firmware\n", mgp->name);
5855                 goto abort_with_mapped;
5856         }
5857 
5858         status = myri10ge_probe_slices(mgp);
5859         if (status != 0) {
5860                 cmn_err(CE_WARN, "%s: failed to probe slices\n", mgp->name);
5861                 goto abort_with_dummy_rdma;
5862         }
5863 
5864         status = myri10ge_alloc_slices(mgp);
5865         if (status != 0) {
5866                 cmn_err(CE_WARN, "%s: failed to alloc slices\n", mgp->name);
5867                 goto abort_with_dummy_rdma;
5868         }
5869 
5870         /* add the interrupt handler */
5871         status = myri10ge_add_intrs(mgp, 1);
5872         if (status != 0) {
5873                 cmn_err(CE_WARN, "%s: Failed to add interrupt\n",
5874                     mgp->name);
5875                 goto abort_with_slices;
5876         }
5877 
5878         /* now that we have an iblock_cookie, init the mutexes */
5879         mutex_init(&mgp->cmd_lock, NULL, MUTEX_DRIVER, mgp->icookie);
5880         mutex_init(&mgp->intrlock, NULL, MUTEX_DRIVER, mgp->icookie);
5881 
5882 
5883         status = myri10ge_nic_stat_init(mgp);
5884         if (status != DDI_SUCCESS)
5885                 goto abort_with_interrupts;
5886         status = myri10ge_info_init(mgp);
5887         if (status != DDI_SUCCESS)
5888                 goto abort_with_stats;
5889 
5890         /*
5891          *      Initialize  GLD state
5892          */
5893 
5894         macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
5895         macp->m_driver = mgp;
5896         macp->m_dip = dip;
5897         macp->m_src_addr = mgp->mac_addr;
5898         macp->m_callbacks = &myri10ge_m_callbacks;
5899         macp->m_min_sdu = 0;
5900         macp->m_max_sdu = myri10ge_mtu -
5901             (sizeof (struct ether_header) + MXGEFW_PAD + VLAN_TAGSZ);
5902 #ifdef SOLARIS_S11
5903         macp->m_margin = VLAN_TAGSZ;
5904 #endif
5905         macp->m_v12n = MAC_VIRT_LEVEL1;
5906         status = mac_register(macp, &mgp->mh);
5907         if (status != 0) {
5908                 cmn_err(CE_WARN, "%s: mac_register failed with %d\n",
5909                     mgp->name, status);
5910                 goto abort_with_info;
5911         }
5912         myri10ge_ndd_init(mgp);
5913         if (myri10ge_verbose)
5914                 printf("%s: %s, tx bndry %d, fw %s\n", mgp->name,
5915                     mgp->intr_type, mgp->tx_boundary, mgp->fw_name);
5916         mutex_enter(&myri10ge_param_lock);
5917         mgp->next = mgp_list;
5918         mgp_list = mgp;
5919         mutex_exit(&myri10ge_param_lock);
5920         kmem_free(macp, sizeof (*macp) * 8);
5921         mac_free(omacp);
5922         return (DDI_SUCCESS);
5923 
5924 abort_with_info:
5925         myri10ge_info_destroy(mgp);
5926 
5927 abort_with_stats:
5928         myri10ge_nic_stat_destroy(mgp);
5929 
5930 abort_with_interrupts:
5931         mutex_destroy(&mgp->cmd_lock);
5932         mutex_destroy(&mgp->intrlock);
5933         myri10ge_rem_intrs(mgp, 1);
5934 
5935 abort_with_slices:
5936         myri10ge_free_slices(mgp);
5937 
5938 abort_with_dummy_rdma:
5939         myri10ge_dummy_rdma(mgp, 0);
5940 
5941 abort_with_mapped:
5942         ddi_regs_map_free(&mgp->io_handle);
5943 
5944         myri10ge_dma_free(&mgp->cmd_dma);
5945 
5946 abort_with_mgp:
5947         kmem_free(mgp, sizeof (*mgp));
5948 
5949 abort_with_macinfo:
5950         kmem_free(macp, sizeof (*macp) * 8);
5951         mac_free(omacp);
5952 
5953 abort_with_cfg_hdl:
5954         pci_config_teardown(&handle);
5955         return (DDI_FAILURE);
5956 
5957 }
5958 
5959 
5960 static int
5961 myri10ge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5962 {
5963         struct myri10ge_priv    *mgp, *tmp;
5964         int                     status, i, jbufs_alloced;
5965 
5966         if (cmd == DDI_SUSPEND) {
5967                 status = myri10ge_suspend(dip);
5968                 return (status);
5969         }
5970 
5971         if (cmd != DDI_DETACH) {
5972                 return (DDI_FAILURE);
5973         }
5974         /* Get the driver private (gld_mac_info_t) structure */
5975         mgp = ddi_get_driver_private(dip);
5976 
5977         mutex_enter(&mgp->intrlock);
5978         jbufs_alloced = 0;
5979         for (i = 0; i < mgp->num_slices; i++) {
5980                 myri10ge_remove_jbufs(&mgp->ss[i]);
5981                 jbufs_alloced += mgp->ss[i].jpool.num_alloc;
5982         }
5983         mutex_exit(&mgp->intrlock);
5984         if (jbufs_alloced != 0) {
5985                 cmn_err(CE_NOTE, "%s: %d loaned rx buffers remain\n",
5986                     mgp->name, jbufs_alloced);
5987                 return (DDI_FAILURE);
5988         }
5989 
5990         mutex_enter(&myri10ge_param_lock);
5991         if (mgp->refcnt != 0) {
5992                 mutex_exit(&myri10ge_param_lock);
5993                 cmn_err(CE_NOTE, "%s: %d external refs remain\n",
5994                     mgp->name, mgp->refcnt);
5995                 return (DDI_FAILURE);
5996         }
5997         mutex_exit(&myri10ge_param_lock);
5998 
5999         status = mac_unregister(mgp->mh);
6000         if (status != DDI_SUCCESS)
6001                 return (status);
6002 
6003         myri10ge_ndd_fini(mgp);
6004         myri10ge_dummy_rdma(mgp, 0);
6005         myri10ge_nic_stat_destroy(mgp);
6006         myri10ge_info_destroy(mgp);
6007 
6008         mutex_destroy(&mgp->cmd_lock);
6009         mutex_destroy(&mgp->intrlock);
6010 
6011         myri10ge_rem_intrs(mgp, 1);
6012 
6013         myri10ge_free_slices(mgp);
6014         ddi_regs_map_free(&mgp->io_handle);
6015         myri10ge_dma_free(&mgp->cmd_dma);
6016         pci_config_teardown(&mgp->cfg_hdl);
6017 
6018         mutex_enter(&myri10ge_param_lock);
6019         if (mgp_list == mgp) {
6020                 mgp_list = mgp->next;
6021         } else {
6022                 tmp = mgp_list;
6023                 while (tmp->next != mgp && tmp->next != NULL)
6024                         tmp = tmp->next;
6025                 if (tmp->next != NULL)
6026                         tmp->next = tmp->next->next;
6027         }
6028         kmem_free(mgp, sizeof (*mgp));
6029         mutex_exit(&myri10ge_param_lock);
6030         return (DDI_SUCCESS);
6031 }
6032 
6033 /*
6034  * Helper for quiesce entry point: Interrupt threads are not being
6035  * scheduled, so we must poll for the confirmation DMA to arrive in
6036  * the firmware stats block for slice 0.  We're essentially running
6037  * the guts of the interrupt handler, and just cherry picking the
6038  * confirmation that the NIC is queuesced (stats->link_down)
6039  */
6040 
6041 static int
6042 myri10ge_poll_down(struct myri10ge_priv *mgp)
6043 {
6044         struct myri10ge_slice_state *ss = mgp->ss;
6045         mcp_irq_data_t *stats = ss->fw_stats;
6046         int valid;
6047         int found_down = 0;
6048 
6049 
6050         /* check for a pending IRQ */
6051 
6052         if (! *((volatile uint8_t *)& stats->valid))
6053                 return (0);
6054         valid = stats->valid;
6055 
6056         /*
6057          * Make sure to tell the NIC to lower a legacy IRQ, else
6058          * it may have corrupt state after restarting
6059          */
6060 
6061         if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) {
6062                 /* lower legacy IRQ  */
6063                 *mgp->irq_deassert = 0;
6064                 mb();
6065                 /* wait for irq conf DMA */
6066                 while (*((volatile uint8_t *)& stats->valid))
6067                         ;
6068         }
6069         if (stats->stats_updated && stats->link_down)
6070                 found_down = 1;
6071 
6072         if (valid & 0x1)
6073                 *ss->irq_claim = BE_32(3);
6074         *(ss->irq_claim + 1) = BE_32(3);
6075 
6076         return (found_down);
6077 }
6078 
6079 static int
6080 myri10ge_quiesce(dev_info_t *dip)
6081 {
6082         struct myri10ge_priv *mgp;
6083         myri10ge_cmd_t cmd;
6084         int status, down, i;
6085 
6086         mgp = ddi_get_driver_private(dip);
6087         if (mgp == NULL)
6088                 return (DDI_FAILURE);
6089 
6090         /* if devices was unplumbed, it is guaranteed to be quiescent */
6091         if (mgp->running == MYRI10GE_ETH_STOPPED)
6092                 return (DDI_SUCCESS);
6093 
6094         /* send a down CMD to queuesce NIC */
6095         status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
6096         if (status) {
6097                 cmn_err(CE_WARN, "%s: Couldn't bring down link\n", mgp->name);
6098                 return (DDI_FAILURE);
6099         }
6100 
6101         for (i = 0; i < 20; i++) {
6102                 down = myri10ge_poll_down(mgp);
6103                 if (down)
6104                         break;
6105                 delay(drv_usectohz(100000));
6106                 mb();
6107         }
6108         if (down)
6109                 return (DDI_SUCCESS);
6110         return (DDI_FAILURE);
6111 }
6112 
6113 /*
6114  * Distinguish between allocb'ed blocks, and gesballoc'ed attached
6115  * storage.
6116  */
6117 static void
6118 myri10ge_find_lastfree(void)
6119 {
6120         mblk_t *mp = allocb(1024, 0);
6121         dblk_t *dbp;
6122 
6123         if (mp == NULL) {
6124                 cmn_err(CE_WARN, "myri10ge_find_lastfree failed\n");
6125                 return;
6126         }
6127         dbp = mp->b_datap;
6128         myri10ge_db_lastfree = (void *)dbp->db_lastfree;
6129 }
6130 
6131 int
6132 _init(void)
6133 {
6134         int i;
6135 
6136         if (myri10ge_verbose)
6137                 cmn_err(CE_NOTE,
6138                     "Myricom 10G driver (10GbE) version %s loading\n",
6139                     MYRI10GE_VERSION_STR);
6140         myri10ge_find_lastfree();
6141         mac_init_ops(&myri10ge_ops, "myri10ge");
6142         mutex_init(&myri10ge_param_lock, NULL, MUTEX_DEFAULT, NULL);
6143         if ((i = mod_install(&modlinkage)) != 0) {
6144                 cmn_err(CE_WARN, "mod_install returned %d\n", i);
6145                 mac_fini_ops(&myri10ge_ops);
6146                 mutex_destroy(&myri10ge_param_lock);
6147         }
6148         return (i);
6149 }
6150 
6151 int
6152 _fini(void)
6153 {
6154         int i;
6155         i = mod_remove(&modlinkage);
6156         if (i != 0) {
6157                 return (i);
6158         }
6159         mac_fini_ops(&myri10ge_ops);
6160         mutex_destroy(&myri10ge_param_lock);
6161         return (0);
6162 }
6163 
6164 int
6165 _info(struct modinfo *modinfop)
6166 {
6167         return (mod_info(&modlinkage, modinfop));
6168 }
6169 
6170 
6171 /*
6172  *  This file uses MyriGE driver indentation.
6173  *
6174  * Local Variables:
6175  * c-file-style:"sun"
6176  * tab-width:8
6177  * End:
6178  */
--- EOF ---