1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * Copyright 2007-2009 Myricom, Inc.  All rights reserved.
  29  * Use is subject to license terms.
  30  */
  31 
  32 /*
  33  * Copyright (c) 2014, Joyent, Inc.
  34  */
  35 
  36 #ifndef lint
  37 static const char __idstring[] =
  38         "@(#)$Id: myri10ge.c,v 1.186 2009-06-29 13:47:22 gallatin Exp $";
  39 #endif
  40 
  41 #define MXGEFW_NDIS
  42 #include "myri10ge_var.h"
  43 #include "rss_eth_z8e.h"
  44 #include "rss_ethp_z8e.h"
  45 #include "mcp_gen_header.h"
  46 
  47 #define MYRI10GE_MAX_ETHER_MTU 9014
  48 #define MYRI10GE_MAX_GLD_MTU    9000
  49 #define MYRI10GE_MIN_GLD_MTU    1500
  50 
  51 #define MYRI10GE_ETH_STOPPED 0
  52 #define MYRI10GE_ETH_STOPPING 1
  53 #define MYRI10GE_ETH_STARTING 2
  54 #define MYRI10GE_ETH_RUNNING 3
  55 #define MYRI10GE_ETH_OPEN_FAILED 4
  56 #define MYRI10GE_ETH_SUSPENDED_RUNNING 5
  57 
  58 static int myri10ge_small_bytes = 510;
  59 static int myri10ge_intr_coal_delay = 125;
  60 static int myri10ge_flow_control = 1;
  61 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
  62 static int myri10ge_nvidia_ecrc_enable = 1;
  63 #endif
  64 static int myri10ge_mtu_override = 0;
  65 static int myri10ge_tx_copylen = 512;
  66 static int myri10ge_deassert_wait = 1;
  67 static int myri10ge_verbose = 0;
  68 static int myri10ge_watchdog_reset = 0;
  69 static int myri10ge_use_msix = 1;
  70 static int myri10ge_max_slices = -1;
  71 static int myri10ge_use_msi = 1;
  72 int myri10ge_force_firmware = 0;
  73 static boolean_t myri10ge_use_lso = B_TRUE;
  74 static int myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
  75 static int myri10ge_tx_hash = 1;
  76 static int myri10ge_lro = 0;
  77 static int myri10ge_lro_cnt = 8;
  78 int myri10ge_lro_max_aggr = 2;
  79 static int myri10ge_lso_copy = 0;
  80 static mblk_t *myri10ge_send_wrapper(void *arg, mblk_t *mp);
  81 int myri10ge_tx_handles_initial = 128;
  82 
  83 static  kmutex_t myri10ge_param_lock;
  84 static void* myri10ge_db_lastfree;
  85 
  86 static int myri10ge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
  87 static int myri10ge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
  88 static int myri10ge_quiesce(dev_info_t *dip);
  89 
  90 DDI_DEFINE_STREAM_OPS(myri10ge_ops, nulldev, nulldev, myri10ge_attach,
  91     myri10ge_detach, nodev, NULL, D_MP, NULL, myri10ge_quiesce);
  92 
  93 
  94 static struct modldrv modldrv = {
  95         &mod_driverops,
  96         "Myricom 10G driver (10GbE)",
  97         &myri10ge_ops,
  98 };
  99 
 100 
 101 static struct modlinkage modlinkage = {
 102         MODREV_1,
 103         {&modldrv, NULL},
 104 };
 105 
 106 unsigned char myri10ge_broadcastaddr[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 107 
 108 static ddi_dma_attr_t myri10ge_misc_dma_attr = {
 109         DMA_ATTR_V0,                    /* version number. */
 110         (uint64_t)0,                    /* low address */
 111         (uint64_t)0xffffffffffffffffULL, /* high address */
 112         (uint64_t)0x7ffffff,            /* address counter max */
 113         (uint64_t)4096,                 /* alignment */
 114         (uint_t)0x7f,                   /* burstsizes for 32b and 64b xfers */
 115         (uint32_t)0x1,                  /* minimum transfer size */
 116         (uint64_t)0x7fffffff,           /* maximum transfer size */
 117         (uint64_t)0x7fffffff,           /* maximum segment size */
 118         1,                              /* scatter/gather list length */
 119         1,                              /* granularity */
 120         0                               /* attribute flags */
 121 };
 122 
 123 /*
 124  * The Myri10GE NIC has the following constraints on receive buffers:
 125  * 1) Buffers which cross a 4KB boundary must be aligned to 4KB
 126  * 2) Buffers which are not aligned to 4KB must not cross a 4KB boundary
 127  */
 128 
 129 static ddi_dma_attr_t myri10ge_rx_jumbo_dma_attr = {
 130         DMA_ATTR_V0,                    /* version number. */
 131         (uint64_t)0,                    /* low address */
 132         (uint64_t)0xffffffffffffffffULL, /* high address */
 133         (uint64_t)0x7ffffff,            /* address counter max */
 134         (uint64_t)4096,                 /* alignment */
 135         (uint_t)0x7f,                   /* burstsizes for 32b and 64b xfers */
 136         (uint32_t)0x1,                  /* minimum transfer size */
 137         (uint64_t)0x7fffffff,           /* maximum transfer size */
 138         UINT64_MAX,                     /* maximum segment size */
 139         1,                              /* scatter/gather list length */
 140         1,                              /* granularity */
 141         0                               /* attribute flags */
 142 };
 143 
 144 static ddi_dma_attr_t myri10ge_rx_std_dma_attr = {
 145         DMA_ATTR_V0,                    /* version number. */
 146         (uint64_t)0,                    /* low address */
 147         (uint64_t)0xffffffffffffffffULL, /* high address */
 148         (uint64_t)0x7ffffff,            /* address counter max */
 149 #if defined sparc64 || defined __sparcv9
 150         (uint64_t)4096,                 /* alignment */
 151 #else
 152         (uint64_t)0x80,                 /* alignment */
 153 #endif
 154         (uint_t)0x7f,                   /* burstsizes for 32b and 64b xfers */
 155         (uint32_t)0x1,                  /* minimum transfer size */
 156         (uint64_t)0x7fffffff,           /* maximum transfer size */
 157 #if defined sparc64 || defined __sparcv9
 158         UINT64_MAX,                     /* maximum segment size */
 159 #else
 160         (uint64_t)0xfff,                /* maximum segment size */
 161 #endif
 162         1,                              /* scatter/gather list length */
 163         1,                              /* granularity */
 164         0                               /* attribute flags */
 165 };
 166 
 167 static ddi_dma_attr_t myri10ge_tx_dma_attr = {
 168         DMA_ATTR_V0,                    /* version number. */
 169         (uint64_t)0,                    /* low address */
 170         (uint64_t)0xffffffffffffffffULL, /* high address */
 171         (uint64_t)0x7ffffff,            /* address counter max */
 172         (uint64_t)1,                    /* alignment */
 173         (uint_t)0x7f,                   /* burstsizes for 32b and 64b xfers */
 174         (uint32_t)0x1,                  /* minimum transfer size */
 175         (uint64_t)0x7fffffff,           /* maximum transfer size */
 176         UINT64_MAX,                     /* maximum segment size */
 177         INT32_MAX,                      /* scatter/gather list length */
 178         1,                              /* granularity */
 179         0                       /* attribute flags */
 180 };
 181 
 182 #if defined sparc64 || defined __sparcv9
 183 #define WC 0
 184 #else
 185 #define WC 1
 186 #endif
 187 
 188 struct ddi_device_acc_attr myri10ge_dev_access_attr = {
 189         DDI_DEVICE_ATTR_V0,             /* version */
 190         DDI_NEVERSWAP_ACC,              /* endian flash */
 191 #if WC
 192         DDI_MERGING_OK_ACC              /* data order */
 193 #else
 194         DDI_STRICTORDER_ACC
 195 #endif
 196 };
 197 
 198 static void myri10ge_watchdog(void *arg);
 199 
 200 #ifdef MYRICOM_PRIV
 201 int myri10ge_mtu = MYRI10GE_MAX_ETHER_MTU + MXGEFW_PAD + VLAN_TAGSZ;
 202 #define MYRI10GE_DEFAULT_GLD_MTU        MYRI10GE_MAX_GLD_MTU
 203 #else
 204 int myri10ge_mtu = ETHERMAX + MXGEFW_PAD + VLAN_TAGSZ;
 205 #define MYRI10GE_DEFAULT_GLD_MTU        MYRI10GE_MIN_GLD_MTU
 206 #endif
 207 int myri10ge_bigbufs_initial = 1024;
 208 int myri10ge_bigbufs_max = 4096;
 209 
 210 
 211 caddr_t
 212 myri10ge_dma_alloc(dev_info_t *dip, size_t len,
 213     ddi_dma_attr_t *attr, ddi_device_acc_attr_t  *accattr,
 214     uint_t alloc_flags, int bind_flags, struct myri10ge_dma_stuff *dma,
 215     int warn, int (*wait)(caddr_t))
 216 {
 217         caddr_t  kaddr;
 218         size_t real_length;
 219         ddi_dma_cookie_t cookie;
 220         uint_t count;
 221         int err;
 222 
 223         err = ddi_dma_alloc_handle(dip, attr, wait,
 224             NULL, &dma->handle);
 225         if (err != DDI_SUCCESS) {
 226                 if (warn)
 227                         cmn_err(CE_WARN,
 228                             "myri10ge: ddi_dma_alloc_handle failed\n");
 229                 goto abort_with_nothing;
 230         }
 231 
 232         err = ddi_dma_mem_alloc(dma->handle, len, accattr, alloc_flags,
 233             wait, NULL, &kaddr, &real_length,
 234             &dma->acc_handle);
 235         if (err != DDI_SUCCESS) {
 236                 if (warn)
 237                         cmn_err(CE_WARN,
 238                             "myri10ge: ddi_dma_mem_alloc failed\n");
 239                 goto abort_with_handle;
 240         }
 241 
 242         err = ddi_dma_addr_bind_handle(dma->handle, NULL, kaddr, len,
 243             bind_flags, wait, NULL, &cookie, &count);
 244 
 245         if (err != DDI_SUCCESS) {
 246                 if (warn)
 247                         cmn_err(CE_WARN,
 248                             "myri10ge: ddi_dma_addr_bind_handle failed\n");
 249                 goto abort_with_mem;
 250         }
 251 
 252         if (count != 1) {
 253                 if (warn)
 254                         cmn_err(CE_WARN,
 255                             "myri10ge: got too many dma segments ");
 256                 goto abort_with_bind;
 257         }
 258         dma->low = htonl(MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress));
 259         dma->high = htonl(MYRI10GE_HIGHPART_TO_U32(cookie.dmac_laddress));
 260         return (kaddr);
 261 
 262 abort_with_bind:
 263         (void) ddi_dma_unbind_handle(dma->handle);
 264 
 265 abort_with_mem:
 266         ddi_dma_mem_free(&dma->acc_handle);
 267 
 268 abort_with_handle:
 269         ddi_dma_free_handle(&dma->handle);
 270 abort_with_nothing:
 271         if (warn) {
 272                 cmn_err(CE_WARN, "myri10ge: myri10ge_dma_alloc failed.\n  ");
 273                 cmn_err(CE_WARN, "args: dip=%p len=0x%lx ddi_dma_attr=%p\n",
 274                     (void*) dip, len, (void*) attr);
 275                 cmn_err(CE_WARN,
 276                     "args: ddi_device_acc_attr=%p  alloc_flags=0x%x\n",
 277                     (void*) accattr, alloc_flags);
 278                 cmn_err(CE_WARN, "args: bind_flags=0x%x  dmastuff=%p",
 279                     bind_flags, (void*) dma);
 280         }
 281         return (NULL);
 282 
 283 }
 284 
 285 void
 286 myri10ge_dma_free(struct myri10ge_dma_stuff *dma)
 287 {
 288         (void) ddi_dma_unbind_handle(dma->handle);
 289         ddi_dma_mem_free(&dma->acc_handle);
 290         ddi_dma_free_handle(&dma->handle);
 291 }
 292 
 293 static inline void
 294 myri10ge_pio_copy32(void *to, uint32_t *from32, size_t size)
 295 {
 296         register volatile uint32_t *to32;
 297         size_t i;
 298 
 299         to32 = (volatile uint32_t *) to;
 300         for (i = (size / 4); i; i--) {
 301                 *to32 = *from32;
 302                 to32++;
 303                 from32++;
 304         }
 305 }
 306 
 307 #if defined(_LP64)
 308 static inline void
 309 myri10ge_pio_copy64(void *to, uint64_t *from64, size_t size)
 310 {
 311         register volatile uint64_t *to64;
 312         size_t i;
 313 
 314         to64 = (volatile uint64_t *) to;
 315         for (i = (size / 8); i; i--) {
 316                 *to64 = *from64;
 317                 to64++;
 318                 from64++;
 319         }
 320 }
 321 #endif
 322 
 323 /*
 324  * This routine copies memory from the host to the NIC.
 325  * The "size" argument must always be a multiple of
 326  * the size of long (4 or 8 bytes), and to/from must also
 327  * be naturally aligned.
 328  */
 329 static inline void
 330 myri10ge_pio_copy(void *to, void *from, size_t size)
 331 {
 332 #if !defined(_LP64)
 333         ASSERT((size % 4) == 0);
 334         myri10ge_pio_copy32(to, (uint32_t *)from, size);
 335 #else
 336         ASSERT((size % 8) == 0);
 337         myri10ge_pio_copy64(to, (uint64_t *)from, size);
 338 #endif
 339 }
 340 
 341 
 342 /*
 343  * Due to various bugs in Solaris (especially bug 6186772 where the
 344  * TCP/UDP checksum is calculated incorrectly on mblk chains with more
 345  * than two elements), and the design bug where hardware checksums are
 346  * ignored on mblk chains with more than 2 elements, we need to
 347  * allocate private pool of physically contiguous receive buffers.
 348  */
 349 
 350 static void
 351 myri10ge_jpool_init(struct myri10ge_slice_state *ss)
 352 {
 353         struct myri10ge_jpool_stuff *jpool = &ss->jpool;
 354 
 355         bzero(jpool, sizeof (*jpool));
 356         mutex_init(&jpool->mtx, NULL, MUTEX_DRIVER,
 357             ss->mgp->icookie);
 358         jpool->head = NULL;
 359 }
 360 
 361 static void
 362 myri10ge_jpool_fini(struct myri10ge_slice_state *ss)
 363 {
 364         struct myri10ge_jpool_stuff *jpool = &ss->jpool;
 365 
 366         if (jpool->head != NULL) {
 367                 cmn_err(CE_WARN,
 368                     "%s: BUG! myri10ge_jpool_fini called on non-empty pool\n",
 369                     ss->mgp->name);
 370         }
 371         mutex_destroy(&jpool->mtx);
 372 }
 373 
 374 
 375 /*
 376  * copy an array of mcp_kreq_ether_recv_t's to the mcp.  Copy
 377  * at most 32 bytes at a time, so as to avoid involving the software
 378  * pio handler in the nic.   We re-write the first segment's low
 379  * DMA address to mark it valid only after we write the entire chunk
 380  * in a burst
 381  */
 382 static inline void
 383 myri10ge_submit_8rx(mcp_kreq_ether_recv_t *dst, mcp_kreq_ether_recv_t *src)
 384 {
 385         src->addr_low |= BE_32(1);
 386         myri10ge_pio_copy(dst, src, 4 * sizeof (*src));
 387         mb();
 388         myri10ge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
 389         mb();
 390         src->addr_low &= ~(BE_32(1));
 391         dst->addr_low = src->addr_low;
 392         mb();
 393 }
 394 
 395 static void
 396 myri10ge_pull_jpool(struct myri10ge_slice_state *ss)
 397 {
 398         struct myri10ge_jpool_stuff *jpool = &ss->jpool;
 399         struct myri10ge_jpool_entry *jtail, *j, *jfree;
 400         volatile uintptr_t *putp;
 401         uintptr_t put;
 402         int i;
 403 
 404         /* find tail */
 405         jtail = NULL;
 406         if (jpool->head != NULL) {
 407                 j = jpool->head;
 408                 while (j->next != NULL)
 409                         j = j->next;
 410                 jtail = j;
 411         }
 412 
 413         /*
 414          * iterate over all per-CPU caches, and add contents into
 415          * jpool
 416          */
 417         for (i = 0; i < MYRI10GE_MAX_CPUS; i++) {
 418                 /* take per-CPU free list */
 419                 putp = (void *)&jpool->cpu[i & MYRI10GE_MAX_CPU_MASK].head;
 420                 if (*putp == NULL)
 421                         continue;
 422                 put = atomic_swap_ulong(putp, 0);
 423                 jfree = (struct myri10ge_jpool_entry *)put;
 424 
 425                 /* append to pool */
 426                 if (jtail == NULL) {
 427                         jpool->head = jfree;
 428                 } else {
 429                         jtail->next = jfree;
 430                 }
 431                 j = jfree;
 432                 while (j->next != NULL)
 433                         j = j->next;
 434                 jtail = j;
 435         }
 436 }
 437 
 438 /*
 439  * Transfers buffers from the free pool to the nic
 440  * Must be called holding the jpool mutex.
 441  */
 442 
 443 static inline void
 444 myri10ge_restock_jumbos(struct myri10ge_slice_state *ss)
 445 {
 446         struct myri10ge_jpool_stuff *jpool = &ss->jpool;
 447         struct myri10ge_jpool_entry *j;
 448         myri10ge_rx_ring_t *rx;
 449         int i, idx, limit;
 450 
 451         rx = &ss->rx_big;
 452         limit = ss->j_rx_cnt + (rx->mask + 1);
 453 
 454         for (i = rx->cnt; i != limit; i++) {
 455                 idx = i & (rx->mask);
 456                 j = jpool->head;
 457                 if (j == NULL) {
 458                         myri10ge_pull_jpool(ss);
 459                         j = jpool->head;
 460                         if (j == NULL) {
 461                                 break;
 462                         }
 463                 }
 464                 jpool->head = j->next;
 465                 rx->info[idx].j = j;
 466                 rx->shadow[idx].addr_low = j->dma.low;
 467                 rx->shadow[idx].addr_high = j->dma.high;
 468                 /* copy 4 descriptors (32-bytes) to the mcp at a time */
 469                 if ((idx & 7) == 7) {
 470                         myri10ge_submit_8rx(&rx->lanai[idx - 7],
 471                             &rx->shadow[idx - 7]);
 472                 }
 473         }
 474         rx->cnt = i;
 475 }
 476 
 477 /*
 478  * Transfer buffers from the nic to the free pool.
 479  * Should be called holding the jpool mutex
 480  */
 481 
 482 static inline void
 483 myri10ge_unstock_jumbos(struct myri10ge_slice_state *ss)
 484 {
 485         struct myri10ge_jpool_stuff *jpool = &ss->jpool;
 486         struct myri10ge_jpool_entry *j;
 487         myri10ge_rx_ring_t *rx;
 488         int i;
 489 
 490         mutex_enter(&jpool->mtx);
 491         rx = &ss->rx_big;
 492 
 493         for (i = 0; i < rx->mask + 1; i++) {
 494                 j = rx->info[i].j;
 495                 rx->info[i].j = NULL;
 496                 if (j == NULL)
 497                         continue;
 498                 j->next = jpool->head;
 499                 jpool->head = j;
 500         }
 501         mutex_exit(&jpool->mtx);
 502 
 503 }
 504 
 505 
 506 /*
 507  * Free routine which is called when the mblk allocated via
 508  * esballoc() is freed.   Here we return the jumbo buffer
 509  * to the free pool, and possibly pass some jumbo buffers
 510  * to the nic
 511  */
 512 
 513 static void
 514 myri10ge_jfree_rtn(void *arg)
 515 {
 516         struct myri10ge_jpool_entry *j = (struct myri10ge_jpool_entry *)arg;
 517         struct myri10ge_jpool_stuff *jpool;
 518         volatile uintptr_t *putp;
 519         uintptr_t old, new;
 520 
 521         jpool = &j->ss->jpool;
 522 
 523         /* prepend buffer locklessly to per-CPU freelist */
 524         putp = (void *)&jpool->cpu[CPU->cpu_seqid & MYRI10GE_MAX_CPU_MASK].head;
 525         new = (uintptr_t)j;
 526         do {
 527                 old = *putp;
 528                 j->next = (void *)old;
 529         } while (atomic_cas_ulong(putp, old, new) != old);
 530 }
 531 
 532 static void
 533 myri10ge_remove_jbuf(struct myri10ge_jpool_entry *j)
 534 {
 535         (void) ddi_dma_unbind_handle(j->dma_handle);
 536         ddi_dma_mem_free(&j->acc_handle);
 537         ddi_dma_free_handle(&j->dma_handle);
 538         kmem_free(j, sizeof (*j));
 539 }
 540 
 541 
 542 /*
 543  * Allocates one physically contiguous descriptor
 544  * and add it to the jumbo buffer pool.
 545  */
 546 
 547 static int
 548 myri10ge_add_jbuf(struct myri10ge_slice_state *ss)
 549 {
 550         struct myri10ge_jpool_entry *j;
 551         struct myri10ge_jpool_stuff *jpool = &ss->jpool;
 552         ddi_dma_attr_t *rx_dma_attr;
 553         size_t real_length;
 554         ddi_dma_cookie_t cookie;
 555         uint_t count;
 556         int err;
 557 
 558         if (myri10ge_mtu < 2048)
 559                 rx_dma_attr = &myri10ge_rx_std_dma_attr;
 560         else
 561                 rx_dma_attr = &myri10ge_rx_jumbo_dma_attr;
 562 
 563 again:
 564         j = (struct myri10ge_jpool_entry *)
 565             kmem_alloc(sizeof (*j), KM_SLEEP);
 566         err = ddi_dma_alloc_handle(ss->mgp->dip, rx_dma_attr,
 567             DDI_DMA_DONTWAIT, NULL, &j->dma_handle);
 568         if (err != DDI_SUCCESS)
 569                 goto abort_with_j;
 570 
 571         err = ddi_dma_mem_alloc(j->dma_handle, myri10ge_mtu,
 572             &myri10ge_dev_access_attr,  DDI_DMA_STREAMING, DDI_DMA_DONTWAIT,
 573             NULL, &j->buf, &real_length, &j->acc_handle);
 574         if (err != DDI_SUCCESS)
 575                 goto abort_with_handle;
 576 
 577         err = ddi_dma_addr_bind_handle(j->dma_handle, NULL, j->buf,
 578             real_length, DDI_DMA_READ|DDI_DMA_STREAMING, DDI_DMA_DONTWAIT,
 579             NULL, &cookie, &count);
 580         if (err != DDI_SUCCESS)
 581                 goto abort_with_mem;
 582 
 583         /*
 584          * Make certain std MTU buffers do not cross a 4KB boundary:
 585          *
 586          * Setting dma_attr_align=4096 will do this, but the system
 587          * will only allocate 1 RX buffer per 4KB page, rather than 2.
 588          * Setting dma_attr_granular=4096 *seems* to work around this,
 589          * but I'm paranoid about future systems no longer honoring
 590          * this, so fall back to the safe, but memory wasting way if a
 591          * buffer crosses a 4KB boundary.
 592          */
 593 
 594         if (rx_dma_attr == &myri10ge_rx_std_dma_attr &&
 595             rx_dma_attr->dma_attr_align != 4096) {
 596                 uint32_t start, end;
 597 
 598                 start = MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress);
 599                 end = start + myri10ge_mtu;
 600                 if (((end >> 12) != (start >> 12)) && (start & 4095U)) {
 601                         printf("std buffer crossed a 4KB boundary!\n");
 602                         myri10ge_remove_jbuf(j);
 603                         rx_dma_attr->dma_attr_align = 4096;
 604                         rx_dma_attr->dma_attr_seg = UINT64_MAX;
 605                         goto again;
 606                 }
 607         }
 608 
 609         j->dma.low =
 610             htonl(MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress));
 611         j->dma.high =
 612             htonl(MYRI10GE_HIGHPART_TO_U32(cookie.dmac_laddress));
 613         j->ss = ss;
 614 
 615 
 616         j->free_func.free_func = myri10ge_jfree_rtn;
 617         j->free_func.free_arg = (char *)j;
 618         mutex_enter(&jpool->mtx);
 619         j->next = jpool->head;
 620         jpool->head = j;
 621         jpool->num_alloc++;
 622         mutex_exit(&jpool->mtx);
 623         return (0);
 624 
 625 abort_with_mem:
 626         ddi_dma_mem_free(&j->acc_handle);
 627 
 628 abort_with_handle:
 629         ddi_dma_free_handle(&j->dma_handle);
 630 
 631 abort_with_j:
 632         kmem_free(j, sizeof (*j));
 633 
 634         /*
 635          * If an allocation failed, perhaps it failed because it could
 636          * not satisfy granularity requirement.  Disable that, and
 637          * try agin.
 638          */
 639         if (rx_dma_attr == &myri10ge_rx_std_dma_attr &&
 640             rx_dma_attr->dma_attr_align != 4096) {
 641                         cmn_err(CE_NOTE,
 642                             "!alloc failed, reverting to gran=1\n");
 643                         rx_dma_attr->dma_attr_align = 4096;
 644                         rx_dma_attr->dma_attr_seg = UINT64_MAX;
 645                         goto again;
 646         }
 647         return (err);
 648 }
 649 
 650 static int
 651 myri10ge_jfree_cnt(struct myri10ge_jpool_stuff *jpool)
 652 {
 653         int i;
 654         struct myri10ge_jpool_entry *j;
 655 
 656         mutex_enter(&jpool->mtx);
 657         j = jpool->head;
 658         i = 0;
 659         while (j != NULL) {
 660                 i++;
 661                 j = j->next;
 662         }
 663         mutex_exit(&jpool->mtx);
 664         return (i);
 665 }
 666 
 667 static int
 668 myri10ge_add_jbufs(struct myri10ge_slice_state *ss, int num, int total)
 669 {
 670         struct myri10ge_jpool_stuff *jpool = &ss->jpool;
 671         int allocated = 0;
 672         int err;
 673         int needed;
 674 
 675         /*
 676          * if total is set, user wants "num" jbufs in the pool,
 677          * otherwise the user wants to "num" additional jbufs
 678          * added to the pool
 679          */
 680         if (total && jpool->num_alloc) {
 681                 allocated = myri10ge_jfree_cnt(jpool);
 682                 needed = num - allocated;
 683         } else {
 684                 needed = num;
 685         }
 686 
 687         while (needed > 0) {
 688                 needed--;
 689                 err = myri10ge_add_jbuf(ss);
 690                 if (err == 0) {
 691                         allocated++;
 692                 }
 693         }
 694         return (allocated);
 695 }
 696 
 697 static void
 698 myri10ge_remove_jbufs(struct myri10ge_slice_state *ss)
 699 {
 700         struct myri10ge_jpool_stuff *jpool = &ss->jpool;
 701         struct myri10ge_jpool_entry *j;
 702 
 703         mutex_enter(&jpool->mtx);
 704         myri10ge_pull_jpool(ss);
 705         while (jpool->head != NULL) {
 706                 jpool->num_alloc--;
 707                 j = jpool->head;
 708                 jpool->head = j->next;
 709                 myri10ge_remove_jbuf(j);
 710         }
 711         mutex_exit(&jpool->mtx);
 712 }
 713 
 714 static void
 715 myri10ge_carve_up_jbufs_into_small_ring(struct myri10ge_slice_state *ss)
 716 {
 717         struct myri10ge_jpool_stuff *jpool = &ss->jpool;
 718         struct myri10ge_jpool_entry *j = NULL;
 719         caddr_t ptr;
 720         uint32_t dma_low, dma_high;
 721         int idx, len;
 722         unsigned int alloc_size;
 723 
 724         dma_low = dma_high = len = 0;
 725         alloc_size = myri10ge_small_bytes + MXGEFW_PAD;
 726         ptr = NULL;
 727         for (idx = 0; idx < ss->rx_small.mask + 1; idx++) {
 728                 /* Allocate a jumbo frame and carve it into small frames */
 729                 if (len < alloc_size) {
 730                         mutex_enter(&jpool->mtx);
 731                         /* remove jumbo from freelist */
 732                         j = jpool->head;
 733                         jpool->head = j->next;
 734                         /* place it onto small list */
 735                         j->next = ss->small_jpool;
 736                         ss->small_jpool = j;
 737                         mutex_exit(&jpool->mtx);
 738                         len = myri10ge_mtu;
 739                         dma_low = ntohl(j->dma.low);
 740                         dma_high = ntohl(j->dma.high);
 741                         ptr = j->buf;
 742                 }
 743                 ss->rx_small.info[idx].ptr = ptr;
 744                 ss->rx_small.shadow[idx].addr_low = htonl(dma_low);
 745                 ss->rx_small.shadow[idx].addr_high = htonl(dma_high);
 746                 len -= alloc_size;
 747                 ptr += alloc_size;
 748                 dma_low += alloc_size;
 749         }
 750 }
 751 
 752 /*
 753  * Return the jumbo bufs we carved up for small to the jumbo pool
 754  */
 755 
 756 static void
 757 myri10ge_release_small_jbufs(struct myri10ge_slice_state *ss)
 758 {
 759         struct myri10ge_jpool_stuff *jpool = &ss->jpool;
 760         struct myri10ge_jpool_entry *j = NULL;
 761 
 762         mutex_enter(&jpool->mtx);
 763         while (ss->small_jpool != NULL) {
 764                 j = ss->small_jpool;
 765                 ss->small_jpool = j->next;
 766                 j->next = jpool->head;
 767                 jpool->head = j;
 768         }
 769         mutex_exit(&jpool->mtx);
 770         ss->jbufs_for_smalls = 0;
 771 }
 772 
 773 static int
 774 myri10ge_add_tx_handle(struct myri10ge_slice_state *ss)
 775 {
 776         myri10ge_tx_ring_t *tx = &ss->tx;
 777         struct myri10ge_priv *mgp = ss->mgp;
 778         struct myri10ge_tx_dma_handle *handle;
 779         int err;
 780 
 781         handle = kmem_zalloc(sizeof (*handle), KM_SLEEP);
 782         err = ddi_dma_alloc_handle(mgp->dip,
 783             &myri10ge_tx_dma_attr,
 784             DDI_DMA_SLEEP, NULL,
 785             &handle->h);
 786         if (err) {
 787                 static int limit = 0;
 788                 if (limit == 0)
 789                         cmn_err(CE_WARN, "%s: Falled to alloc tx dma handle\n",
 790                             mgp->name);
 791                 limit++;
 792                 kmem_free(handle, sizeof (*handle));
 793                 return (err);
 794         }
 795         mutex_enter(&tx->handle_lock);
 796         MYRI10GE_SLICE_STAT_INC(tx_handles_alloced);
 797         handle->next = tx->free_tx_handles;
 798         tx->free_tx_handles = handle;
 799         mutex_exit(&tx->handle_lock);
 800         return (DDI_SUCCESS);
 801 }
 802 
 803 static void
 804 myri10ge_remove_tx_handles(struct myri10ge_slice_state *ss)
 805 {
 806         myri10ge_tx_ring_t *tx = &ss->tx;
 807         struct myri10ge_tx_dma_handle *handle;
 808         mutex_enter(&tx->handle_lock);
 809 
 810         handle = tx->free_tx_handles;
 811         while (handle != NULL) {
 812                 tx->free_tx_handles = handle->next;
 813                 ddi_dma_free_handle(&handle->h);
 814                 kmem_free(handle, sizeof (*handle));
 815                 handle = tx->free_tx_handles;
 816                 MYRI10GE_SLICE_STAT_DEC(tx_handles_alloced);
 817         }
 818         mutex_exit(&tx->handle_lock);
 819         if (MYRI10GE_SLICE_STAT(tx_handles_alloced) != 0) {
 820                 cmn_err(CE_WARN, "%s: %d tx dma handles allocated at close\n",
 821                     ss->mgp->name,
 822                     (int)MYRI10GE_SLICE_STAT(tx_handles_alloced));
 823         }
 824 }
 825 
 826 static void
 827 myri10ge_free_tx_handles(myri10ge_tx_ring_t *tx,
 828     struct myri10ge_tx_dma_handle_head *list)
 829 {
 830         mutex_enter(&tx->handle_lock);
 831         list->tail->next = tx->free_tx_handles;
 832         tx->free_tx_handles = list->head;
 833         mutex_exit(&tx->handle_lock);
 834 }
 835 
 836 static void
 837 myri10ge_free_tx_handle_slist(myri10ge_tx_ring_t *tx,
 838     struct myri10ge_tx_dma_handle *handle)
 839 {
 840         struct myri10ge_tx_dma_handle_head list;
 841 
 842         if (handle == NULL)
 843                 return;
 844         list.head = handle;
 845         list.tail = handle;
 846         while (handle != NULL) {
 847                 list.tail = handle;
 848                 handle = handle->next;
 849         }
 850         myri10ge_free_tx_handles(tx, &list);
 851 }
 852 
 853 static int
 854 myri10ge_alloc_tx_handles(struct myri10ge_slice_state *ss, int count,
 855     struct myri10ge_tx_dma_handle **ret)
 856 {
 857         myri10ge_tx_ring_t *tx = &ss->tx;
 858         struct myri10ge_tx_dma_handle *handle;
 859         int err, i;
 860 
 861         mutex_enter(&tx->handle_lock);
 862         for (i = 0; i < count; i++) {
 863                 handle = tx->free_tx_handles;
 864                 while (handle == NULL) {
 865                         mutex_exit(&tx->handle_lock);
 866                         err = myri10ge_add_tx_handle(ss);
 867                         if (err != DDI_SUCCESS) {
 868                                 goto abort_with_handles;
 869                         }
 870                         mutex_enter(&tx->handle_lock);
 871                         handle = tx->free_tx_handles;
 872                 }
 873                 tx->free_tx_handles = handle->next;
 874                 handle->next = *ret;
 875                 *ret = handle;
 876         }
 877         mutex_exit(&tx->handle_lock);
 878         return (DDI_SUCCESS);
 879 
 880 abort_with_handles:
 881         myri10ge_free_tx_handle_slist(tx, *ret);
 882         return (err);
 883 }
 884 
 885 
 886 /*
 887  * Frees DMA resources associated with the send ring
 888  */
 889 static void
 890 myri10ge_unprepare_tx_ring(struct myri10ge_slice_state *ss)
 891 {
 892         myri10ge_tx_ring_t *tx;
 893         struct myri10ge_tx_dma_handle_head handles;
 894         size_t bytes;
 895         int idx;
 896 
 897         tx = &ss->tx;
 898         handles.head = NULL;
 899         handles.tail = NULL;
 900         for (idx = 0; idx < ss->tx.mask + 1; idx++) {
 901                 if (tx->info[idx].m) {
 902                         (void) ddi_dma_unbind_handle(tx->info[idx].handle->h);
 903                         handles.head = tx->info[idx].handle;
 904                         if (handles.tail == NULL)
 905                                 handles.tail = tx->info[idx].handle;
 906                         freeb(tx->info[idx].m);
 907                         tx->info[idx].m = 0;
 908                         tx->info[idx].handle = 0;
 909                 }
 910                 tx->cp[idx].va = NULL;
 911                 myri10ge_dma_free(&tx->cp[idx].dma);
 912         }
 913         bytes = sizeof (*tx->cp) * (tx->mask + 1);
 914         kmem_free(tx->cp, bytes);
 915         tx->cp = NULL;
 916         if (handles.head != NULL)
 917                 myri10ge_free_tx_handles(tx, &handles);
 918         myri10ge_remove_tx_handles(ss);
 919 }
 920 
 921 /*
 922  * Allocates DMA handles associated with the send ring
 923  */
 924 static inline int
 925 myri10ge_prepare_tx_ring(struct myri10ge_slice_state *ss)
 926 {
 927         struct myri10ge_tx_dma_handle *handles;
 928         int h;
 929         size_t bytes;
 930 
 931         bytes = sizeof (*ss->tx.cp) * (ss->tx.mask + 1);
 932         ss->tx.cp = kmem_zalloc(bytes, KM_SLEEP);
 933         if (ss->tx.cp == NULL) {
 934                 cmn_err(CE_WARN,
 935                     "%s: Failed to allocate tx copyblock storage\n",
 936                     ss->mgp->name);
 937                 return (DDI_FAILURE);
 938         }
 939 
 940 
 941         /* allocate the TX copyblocks */
 942         for (h = 0; h < ss->tx.mask + 1; h++) {
 943                 ss->tx.cp[h].va = myri10ge_dma_alloc(ss->mgp->dip,
 944                     4096, &myri10ge_rx_jumbo_dma_attr,
 945                     &myri10ge_dev_access_attr, DDI_DMA_STREAMING,
 946                     DDI_DMA_WRITE|DDI_DMA_STREAMING, &ss->tx.cp[h].dma, 1,
 947                     DDI_DMA_DONTWAIT);
 948                 if (ss->tx.cp[h].va == NULL) {
 949                         cmn_err(CE_WARN, "%s: Failed to allocate tx "
 950                             "copyblock %d\n", ss->mgp->name, h);
 951                         goto abort_with_copyblocks;
 952                 }
 953         }
 954         /* pre-allocate transmit handles */
 955         handles = NULL;
 956         (void) myri10ge_alloc_tx_handles(ss, myri10ge_tx_handles_initial,
 957             &handles);
 958         if (handles != NULL)
 959                 myri10ge_free_tx_handle_slist(&ss->tx, handles);
 960 
 961         return (DDI_SUCCESS);
 962 
 963 abort_with_copyblocks:
 964         while (h > 0)  {
 965                 h--;
 966                 myri10ge_dma_free(&ss->tx.cp[h].dma);
 967         }
 968 
 969         bytes = sizeof (*ss->tx.cp) * (ss->tx.mask + 1);
 970         kmem_free(ss->tx.cp, bytes);
 971         ss->tx.cp = NULL;
 972         return (DDI_FAILURE);
 973 }
 974 
 975 /*
 976  * The eeprom strings on the lanaiX have the format
 977  * SN=x\0
 978  * MAC=x:x:x:x:x:x\0
 979  * PT:ddd mmm xx xx:xx:xx xx\0
 980  * PV:ddd mmm xx xx:xx:xx xx\0
 981  */
 982 static int
 983 myri10ge_read_mac_addr(struct myri10ge_priv *mgp)
 984 {
 985 #define MYRI10GE_NEXT_STRING(p) while (ptr < limit && *ptr++)
 986 #define myri10ge_digit(c) (((c) >= '0' && (c) <= '9') ? ((c) - '0') :     \
 987                 (((c) >= 'A' && (c) <= 'F') ? (10 + (c) - 'A') :  \
 988                 (((c) >= 'a' && (c) <= 'f') ? (10 + (c) - 'a') : -1)))
 989 
 990         char *ptr, *limit;
 991         int i, hv, lv;
 992 
 993         ptr = mgp->eeprom_strings;
 994         limit = mgp->eeprom_strings + MYRI10GE_EEPROM_STRINGS_SIZE;
 995 
 996         while (*ptr != '\0' && ptr < limit) {
 997                 if (memcmp(ptr, "MAC=", 4) == 0) {
 998                         ptr += 4;
 999                         if (myri10ge_verbose)
1000                                 printf("%s: mac address = %s\n", mgp->name,
1001                                     ptr);
1002                         mgp->mac_addr_string = ptr;
1003                         for (i = 0; i < 6; i++) {
1004                                 if ((ptr + 2) > limit)
1005                                         goto abort;
1006 
1007                                 if (*(ptr+1) == ':') {
1008                                         hv = 0;
1009                                         lv = myri10ge_digit(*ptr); ptr++;
1010                                 } else {
1011                                         hv = myri10ge_digit(*ptr); ptr++;
1012                                         lv = myri10ge_digit(*ptr); ptr++;
1013                                 }
1014                                 mgp->mac_addr[i] = (hv << 4) | lv;
1015                                 ptr++;
1016                         }
1017                 }
1018                 if (memcmp((const void *)ptr, "SN=", 3) == 0) {
1019                         ptr += 3;
1020                         mgp->sn_str = (char *)ptr;
1021                 }
1022                 if (memcmp((const void *)ptr, "PC=", 3) == 0) {
1023                         ptr += 3;
1024                         mgp->pc_str = (char *)ptr;
1025                 }
1026                 MYRI10GE_NEXT_STRING(ptr);
1027         }
1028 
1029         return (0);
1030 
1031 abort:
1032         cmn_err(CE_WARN, "%s: failed to parse eeprom_strings", mgp->name);
1033         return (ENXIO);
1034 }
1035 
1036 
1037 /*
1038  * Determine the register set containing the PCI resource we
1039  * want to map: the memory-mappable part of the interface. We do
1040  * this by scanning the DDI "reg" property of the interface,
1041  * which is an array of mx_ddi_reg_set structures.
1042  */
1043 static int
1044 myri10ge_reg_set(dev_info_t *dip, int *reg_set, int *span,
1045     unsigned long *busno, unsigned long *devno,
1046     unsigned long *funcno)
1047 {
1048 
1049 #define REGISTER_NUMBER(ip)     (ip[0] >>  0 & 0xff)
1050 #define FUNCTION_NUMBER(ip)     (ip[0] >>  8 & 0x07)
1051 #define DEVICE_NUMBER(ip)       (ip[0] >> 11 & 0x1f)
1052 #define BUS_NUMBER(ip)          (ip[0] >> 16 & 0xff)
1053 #define ADDRESS_SPACE(ip)       (ip[0] >> 24 & 0x03)
1054 #define PCI_ADDR_HIGH(ip)       (ip[1])
1055 #define PCI_ADDR_LOW(ip)        (ip[2])
1056 #define PCI_SPAN_HIGH(ip)       (ip[3])
1057 #define PCI_SPAN_LOW(ip)        (ip[4])
1058 
1059 #define MX_DDI_REG_SET_32_BIT_MEMORY_SPACE 2
1060 #define MX_DDI_REG_SET_64_BIT_MEMORY_SPACE 3
1061 
1062         int *data, i, *rs;
1063         uint32_t nelementsp;
1064 
1065 #ifdef MYRI10GE_REGSET_VERBOSE
1066         char *address_space_name[] = { "Configuration Space",
1067                                         "I/O Space",
1068                                         "32-bit Memory Space",
1069                                         "64-bit Memory Space"
1070         };
1071 #endif
1072 
1073         if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
1074             "reg", &data, &nelementsp) != DDI_SUCCESS) {
1075                 printf("Could not determine register set.\n");
1076                 return (ENXIO);
1077         }
1078 
1079 #ifdef MYRI10GE_REGSET_VERBOSE
1080         printf("There are %d register sets.\n", nelementsp / 5);
1081 #endif
1082         if (!nelementsp) {
1083                 printf("Didn't find any \"reg\" properties.\n");
1084                 ddi_prop_free(data);
1085                 return (ENODEV);
1086         }
1087 
1088         /* Scan for the register number. */
1089         rs = &data[0];
1090         *busno = BUS_NUMBER(rs);
1091         *devno = DEVICE_NUMBER(rs);
1092         *funcno = FUNCTION_NUMBER(rs);
1093 
1094 #ifdef MYRI10GE_REGSET_VERBOSE
1095         printf("*** Scanning for register number.\n");
1096 #endif
1097         for (i = 0; i < nelementsp / 5; i++) {
1098                 rs = &data[5 * i];
1099 #ifdef MYRI10GE_REGSET_VERBOSE
1100                 printf("Examining register set %d:\n", i);
1101                 printf("  Register number = %d.\n", REGISTER_NUMBER(rs));
1102                 printf("  Function number = %d.\n", FUNCTION_NUMBER(rs));
1103                 printf("  Device number   = %d.\n", DEVICE_NUMBER(rs));
1104                 printf("  Bus number      = %d.\n", BUS_NUMBER(rs));
1105                 printf("  Address space   = %d (%s ).\n", ADDRESS_SPACE(rs),
1106                     address_space_name[ADDRESS_SPACE(rs)]);
1107                 printf("  pci address 0x%08x %08x\n", PCI_ADDR_HIGH(rs),
1108                     PCI_ADDR_LOW(rs));
1109                 printf("  pci span 0x%08x %08x\n", PCI_SPAN_HIGH(rs),
1110                     PCI_SPAN_LOW(rs));
1111 #endif
1112                 /* We are looking for a memory property. */
1113 
1114                 if (ADDRESS_SPACE(rs) == MX_DDI_REG_SET_64_BIT_MEMORY_SPACE ||
1115                     ADDRESS_SPACE(rs) == MX_DDI_REG_SET_32_BIT_MEMORY_SPACE) {
1116                         *reg_set = i;
1117 
1118 #ifdef MYRI10GE_REGSET_VERBOSE
1119                         printf("%s uses register set %d.\n",
1120                             address_space_name[ADDRESS_SPACE(rs)], *reg_set);
1121 #endif
1122 
1123                         *span = (PCI_SPAN_LOW(rs));
1124 #ifdef MYRI10GE_REGSET_VERBOSE
1125                         printf("Board span is 0x%x\n", *span);
1126 #endif
1127                         break;
1128                 }
1129         }
1130 
1131         ddi_prop_free(data);
1132 
1133         /* If no match, fail. */
1134         if (i >= nelementsp / 5) {
1135                 return (EIO);
1136         }
1137 
1138         return (0);
1139 }
1140 
1141 
1142 static int
1143 myri10ge_load_firmware_from_zlib(struct myri10ge_priv *mgp, uint32_t *limit)
1144 {
1145         void *inflate_buffer;
1146         int rv, status;
1147         size_t sram_size = mgp->sram_size - MYRI10GE_EEPROM_STRINGS_SIZE;
1148         size_t destlen;
1149         mcp_gen_header_t *hdr;
1150         unsigned hdr_offset, i;
1151 
1152 
1153         *limit = 0; /* -Wuninitialized */
1154         status = 0;
1155 
1156         inflate_buffer = kmem_zalloc(sram_size, KM_NOSLEEP);
1157         if (!inflate_buffer) {
1158                 cmn_err(CE_WARN,
1159                     "%s: Could not allocate buffer to inflate mcp\n",
1160                     mgp->name);
1161                 return (ENOMEM);
1162         }
1163 
1164         destlen = sram_size;
1165         rv = z_uncompress(inflate_buffer, &destlen, mgp->eth_z8e,
1166             mgp->eth_z8e_length);
1167 
1168         if (rv != Z_OK) {
1169                 cmn_err(CE_WARN, "%s: Could not inflate mcp: %s\n",
1170                     mgp->name, z_strerror(rv));
1171                 status = ENXIO;
1172                 goto abort;
1173         }
1174 
1175         *limit = (uint32_t)destlen;
1176 
1177         hdr_offset = htonl(*(uint32_t *)(void *)((char *)inflate_buffer +
1178             MCP_HEADER_PTR_OFFSET));
1179         hdr = (void *)((char *)inflate_buffer + hdr_offset);
1180         if (ntohl(hdr->mcp_type) != MCP_TYPE_ETH) {
1181                 cmn_err(CE_WARN, "%s: Bad firmware type: 0x%x\n", mgp->name,
1182                     ntohl(hdr->mcp_type));
1183                 status = EIO;
1184                 goto abort;
1185         }
1186 
1187         /* save firmware version for kstat */
1188         (void) strncpy(mgp->fw_version, hdr->version, sizeof (mgp->fw_version));
1189         if (myri10ge_verbose)
1190                 printf("%s: firmware id: %s\n", mgp->name, hdr->version);
1191 
1192         /* Copy the inflated firmware to NIC SRAM. */
1193         for (i = 0; i < *limit; i += 256) {
1194                 myri10ge_pio_copy((char *)mgp->sram + MYRI10GE_FW_OFFSET + i,
1195                     (char *)inflate_buffer + i,
1196                     min(256U, (unsigned)(*limit - i)));
1197                 mb();
1198                 (void) *(int *)(void *)mgp->sram;
1199                 mb();
1200         }
1201 
1202 abort:
1203         kmem_free(inflate_buffer, sram_size);
1204 
1205         return (status);
1206 
1207 }
1208 
1209 
1210 int
1211 myri10ge_send_cmd(struct myri10ge_priv *mgp, uint32_t cmd,
1212                 myri10ge_cmd_t *data)
1213 {
1214         mcp_cmd_t *buf;
1215         char buf_bytes[sizeof (*buf) + 8];
1216         volatile mcp_cmd_response_t *response = mgp->cmd;
1217         volatile char *cmd_addr =
1218             (volatile char *)mgp->sram + MXGEFW_ETH_CMD;
1219         int sleep_total = 0;
1220 
1221         /* ensure buf is aligned to 8 bytes */
1222         buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
1223 
1224         buf->data0 = htonl(data->data0);
1225         buf->data1 = htonl(data->data1);
1226         buf->data2 = htonl(data->data2);
1227         buf->cmd = htonl(cmd);
1228         buf->response_addr.low = mgp->cmd_dma.low;
1229         buf->response_addr.high = mgp->cmd_dma.high;
1230         mutex_enter(&mgp->cmd_lock);
1231         response->result = 0xffffffff;
1232         mb();
1233 
1234         myri10ge_pio_copy((void *)cmd_addr, buf, sizeof (*buf));
1235 
1236         /* wait up to 20ms */
1237         for (sleep_total = 0; sleep_total < 20; sleep_total++) {
1238                 mb();
1239                 if (response->result != 0xffffffff) {
1240                         if (response->result == 0) {
1241                                 data->data0 = ntohl(response->data);
1242                                 mutex_exit(&mgp->cmd_lock);
1243                                 return (0);
1244                         } else if (ntohl(response->result)
1245                             == MXGEFW_CMD_UNKNOWN) {
1246                                 mutex_exit(&mgp->cmd_lock);
1247                                 return (ENOSYS);
1248                         } else if (ntohl(response->result)
1249                             == MXGEFW_CMD_ERROR_UNALIGNED) {
1250                                 mutex_exit(&mgp->cmd_lock);
1251                                 return (E2BIG);
1252                         } else {
1253                                 cmn_err(CE_WARN,
1254                                     "%s: command %d failed, result = %d\n",
1255                                     mgp->name, cmd, ntohl(response->result));
1256                                 mutex_exit(&mgp->cmd_lock);
1257                                 return (ENXIO);
1258                         }
1259                 }
1260                 drv_usecwait(1000);
1261         }
1262         mutex_exit(&mgp->cmd_lock);
1263         cmn_err(CE_WARN, "%s: command %d timed out, result = %d\n",
1264             mgp->name, cmd, ntohl(response->result));
1265         return (EAGAIN);
1266 }
1267 
1268 /*
1269  * Enable or disable periodic RDMAs from the host to make certain
1270  * chipsets resend dropped PCIe messages
1271  */
1272 
1273 static void
1274 myri10ge_dummy_rdma(struct myri10ge_priv *mgp, int enable)
1275 {
1276         char buf_bytes[72];
1277         volatile uint32_t *confirm;
1278         volatile char *submit;
1279         uint32_t *buf;
1280         int i;
1281 
1282         buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
1283 
1284         /* clear confirmation addr */
1285         confirm = (volatile uint32_t *)mgp->cmd;
1286         *confirm = 0;
1287         mb();
1288 
1289         /*
1290          * send an rdma command to the PCIe engine, and wait for the
1291          * response in the confirmation address.  The firmware should
1292          *  write a -1 there to indicate it is alive and well
1293          */
1294 
1295         buf[0] = mgp->cmd_dma.high;          /* confirm addr MSW */
1296         buf[1] = mgp->cmd_dma.low;           /* confirm addr LSW */
1297         buf[2] = htonl(0xffffffff);             /* confirm data */
1298         buf[3] = htonl(mgp->cmd_dma.high);   /* dummy addr MSW */
1299         buf[4] = htonl(mgp->cmd_dma.low);    /* dummy addr LSW */
1300         buf[5] = htonl(enable);                 /* enable? */
1301 
1302 
1303         submit = (volatile char *)(mgp->sram + MXGEFW_BOOT_DUMMY_RDMA);
1304 
1305         myri10ge_pio_copy((char *)submit, buf, 64);
1306         mb();
1307         drv_usecwait(1000);
1308         mb();
1309         i = 0;
1310         while (*confirm != 0xffffffff && i < 20) {
1311                 drv_usecwait(1000);
1312                 i++;
1313         }
1314         if (*confirm != 0xffffffff) {
1315                 cmn_err(CE_WARN, "%s: dummy rdma %s failed (%p = 0x%x)",
1316                     mgp->name,
1317                     (enable ? "enable" : "disable"), (void*) confirm, *confirm);
1318         }
1319 }
1320 
1321 static int
1322 myri10ge_load_firmware(struct myri10ge_priv *mgp)
1323 {
1324         myri10ge_cmd_t cmd;
1325         volatile uint32_t *confirm;
1326         volatile char *submit;
1327         char buf_bytes[72];
1328         uint32_t *buf, size;
1329         int status, i;
1330 
1331         buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
1332 
1333         status = myri10ge_load_firmware_from_zlib(mgp, &size);
1334         if (status) {
1335                 cmn_err(CE_WARN, "%s: firmware loading failed\n", mgp->name);
1336                 return (status);
1337         }
1338 
1339         /* clear confirmation addr */
1340         confirm = (volatile uint32_t *)mgp->cmd;
1341         *confirm = 0;
1342         mb();
1343 
1344         /*
1345          * send a reload command to the bootstrap MCP, and wait for the
1346          * response in the confirmation address.  The firmware should
1347          * write a -1 there to indicate it is alive and well
1348          */
1349 
1350         buf[0] = mgp->cmd_dma.high;  /* confirm addr MSW */
1351         buf[1] = mgp->cmd_dma.low;   /* confirm addr LSW */
1352         buf[2] = htonl(0xffffffff);     /* confirm data */
1353 
1354         /*
1355          * FIX: All newest firmware should un-protect the bottom of
1356          * the sram before handoff. However, the very first interfaces
1357          * do not. Therefore the handoff copy must skip the first 8 bytes
1358          */
1359         buf[3] = htonl(MYRI10GE_FW_OFFSET + 8); /* where the code starts */
1360         buf[4] = htonl(size - 8);       /* length of code */
1361         buf[5] = htonl(8);              /* where to copy to */
1362         buf[6] = htonl(0);              /* where to jump to */
1363 
1364         submit = (volatile char *)(mgp->sram + MXGEFW_BOOT_HANDOFF);
1365 
1366         myri10ge_pio_copy((char *)submit, buf, 64);
1367         mb();
1368         drv_usecwait(1000);
1369         mb();
1370         i = 0;
1371         while (*confirm != 0xffffffff && i < 1000) {
1372                 drv_usecwait(1000);
1373                 i++;
1374         }
1375         if (*confirm != 0xffffffff) {
1376                 cmn_err(CE_WARN, "%s: handoff failed (%p = 0x%x)",
1377                     mgp->name, (void *) confirm, *confirm);
1378 
1379                 return (ENXIO);
1380         }
1381         status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
1382         if (status != 0) {
1383                 cmn_err(CE_WARN, "%s: failed MXGEFW_CMD_GET_RX_RING_SIZE\n",
1384                     mgp->name);
1385                 return (ENXIO);
1386         }
1387 
1388         mgp->max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
1389         myri10ge_dummy_rdma(mgp, 1);
1390         return (0);
1391 }
1392 
1393 static int
1394 myri10ge_m_unicst(void *arg, const uint8_t *addr)
1395 {
1396         struct myri10ge_priv *mgp = arg;
1397         myri10ge_cmd_t cmd;
1398         int status;
1399 
1400         cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1401             | (addr[2] << 8) | addr[3]);
1402 
1403         cmd.data1 = ((addr[4] << 8) | (addr[5]));
1404 
1405         status = myri10ge_send_cmd(mgp, MXGEFW_SET_MAC_ADDRESS, &cmd);
1406         if (status == 0 && (addr != mgp->mac_addr))
1407                 (void) memcpy(mgp->mac_addr, addr, sizeof (mgp->mac_addr));
1408 
1409         return (status);
1410 }
1411 
1412 static int
1413 myri10ge_change_pause(struct myri10ge_priv *mgp, int pause)
1414 {
1415         myri10ge_cmd_t cmd;
1416         int status;
1417 
1418         if (pause)
1419                 status = myri10ge_send_cmd(mgp, MXGEFW_ENABLE_FLOW_CONTROL,
1420                     &cmd);
1421         else
1422                 status = myri10ge_send_cmd(mgp, MXGEFW_DISABLE_FLOW_CONTROL,
1423                     &cmd);
1424 
1425         if (status) {
1426                 cmn_err(CE_WARN, "%s: Failed to set flow control mode\n",
1427                     mgp->name);
1428                 return (ENXIO);
1429         }
1430         mgp->pause = pause;
1431         return (0);
1432 }
1433 
1434 static void
1435 myri10ge_change_promisc(struct myri10ge_priv *mgp, int promisc)
1436 {
1437         myri10ge_cmd_t cmd;
1438         int status;
1439 
1440         if (promisc)
1441                 status = myri10ge_send_cmd(mgp, MXGEFW_ENABLE_PROMISC, &cmd);
1442         else
1443                 status = myri10ge_send_cmd(mgp, MXGEFW_DISABLE_PROMISC, &cmd);
1444 
1445         if (status) {
1446                 cmn_err(CE_WARN, "%s: Failed to set promisc mode\n",
1447                     mgp->name);
1448         }
1449 }
1450 
1451 static int
1452 myri10ge_dma_test(struct myri10ge_priv *mgp, int test_type)
1453 {
1454         myri10ge_cmd_t cmd;
1455         int status;
1456         uint32_t len;
1457         void *dmabench;
1458         struct myri10ge_dma_stuff dmabench_dma;
1459         char *test = " ";
1460 
1461         /*
1462          * Run a small DMA test.
1463          * The magic multipliers to the length tell the firmware
1464          * tp do DMA read, write, or read+write tests.  The
1465          * results are returned in cmd.data0.  The upper 16
1466          * bits or the return is the number of transfers completed.
1467          * The lower 16 bits is the time in 0.5us ticks that the
1468          * transfers took to complete
1469          */
1470 
1471         len = mgp->tx_boundary;
1472 
1473         dmabench = myri10ge_dma_alloc(mgp->dip, len,
1474             &myri10ge_rx_jumbo_dma_attr, &myri10ge_dev_access_attr,
1475             DDI_DMA_STREAMING,  DDI_DMA_RDWR|DDI_DMA_STREAMING,
1476             &dmabench_dma, 1, DDI_DMA_DONTWAIT);
1477         mgp->read_dma = mgp->write_dma = mgp->read_write_dma = 0;
1478         if (dmabench == NULL) {
1479                 cmn_err(CE_WARN, "%s dma benchmark aborted\n", mgp->name);
1480                 return (ENOMEM);
1481         }
1482 
1483         cmd.data0 = ntohl(dmabench_dma.low);
1484         cmd.data1 = ntohl(dmabench_dma.high);
1485         cmd.data2 = len * 0x10000;
1486         status = myri10ge_send_cmd(mgp, test_type, &cmd);
1487         if (status != 0) {
1488                 test = "read";
1489                 goto abort;
1490         }
1491         mgp->read_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff);
1492 
1493         cmd.data0 = ntohl(dmabench_dma.low);
1494         cmd.data1 = ntohl(dmabench_dma.high);
1495         cmd.data2 = len * 0x1;
1496         status = myri10ge_send_cmd(mgp, test_type, &cmd);
1497         if (status != 0) {
1498                 test = "write";
1499                 goto abort;
1500         }
1501         mgp->write_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff);
1502 
1503         cmd.data0 = ntohl(dmabench_dma.low);
1504         cmd.data1 = ntohl(dmabench_dma.high);
1505         cmd.data2 = len * 0x10001;
1506         status = myri10ge_send_cmd(mgp, test_type, &cmd);
1507         if (status != 0) {
1508                 test = "read/write";
1509                 goto abort;
1510         }
1511         mgp->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
1512             (cmd.data0 & 0xffff);
1513 
1514 
1515 abort:
1516         myri10ge_dma_free(&dmabench_dma);
1517         if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
1518                 cmn_err(CE_WARN, "%s %s dma benchmark failed\n", mgp->name,
1519                     test);
1520         return (status);
1521 }
1522 
1523 static int
1524 myri10ge_reset(struct myri10ge_priv *mgp)
1525 {
1526         myri10ge_cmd_t cmd;
1527         struct myri10ge_nic_stat *ethstat;
1528         struct myri10ge_slice_state *ss;
1529         int i, status;
1530         size_t bytes;
1531 
1532         /* send a reset command to the card to see if it is alive */
1533         (void) memset(&cmd, 0, sizeof (cmd));
1534         status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd);
1535         if (status != 0) {
1536                 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name);
1537                 return (ENXIO);
1538         }
1539 
1540         /* Now exchange information about interrupts  */
1541 
1542         bytes = mgp->max_intr_slots * sizeof (*mgp->ss[0].rx_done.entry);
1543         cmd.data0 = (uint32_t)bytes;
1544         status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1545 
1546         /*
1547          * Even though we already know how many slices are supported
1548          * via myri10ge_probe_slices() MXGEFW_CMD_GET_MAX_RSS_QUEUES
1549          * has magic side effects, and must be called after a reset.
1550          * It must be called prior to calling any RSS related cmds,
1551          * including assigning an interrupt queue for anything but
1552          * slice 0.  It must also be called *after*
1553          * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1554          * the firmware to compute offsets.
1555          */
1556 
1557         if (mgp->num_slices > 1) {
1558 
1559                 /* ask the maximum number of slices it supports */
1560                 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1561                     &cmd);
1562                 if (status != 0) {
1563                         cmn_err(CE_WARN,
1564                             "%s: failed to get number of slices\n",
1565                             mgp->name);
1566                         return (status);
1567                 }
1568 
1569                 /*
1570                  * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1571                  * to setting up the interrupt queue DMA
1572                  */
1573 
1574                 cmd.data0 = mgp->num_slices;
1575                 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE |
1576                     MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1577                 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1578                     &cmd);
1579                 if (status != 0) {
1580                         cmn_err(CE_WARN,
1581                             "%s: failed to set number of slices\n",
1582                             mgp->name);
1583                         return (status);
1584                 }
1585         }
1586         for (i = 0; i < mgp->num_slices; i++) {
1587                 ss = &mgp->ss[i];
1588                 cmd.data0 = ntohl(ss->rx_done.dma.low);
1589                 cmd.data1 = ntohl(ss->rx_done.dma.high);
1590                 cmd.data2 = i;
1591                 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_DMA,
1592                     &cmd);
1593         };
1594 
1595         status |= myri10ge_send_cmd(mgp,  MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1596         for (i = 0; i < mgp->num_slices; i++) {
1597                 ss = &mgp->ss[i];
1598                 ss->irq_claim = (volatile unsigned int *)
1599                     (void *)(mgp->sram + cmd.data0 + 8 * i);
1600         }
1601 
1602         if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) {
1603                 status |= myri10ge_send_cmd(mgp,
1604                     MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, &cmd);
1605                 mgp->irq_deassert = (uint32_t *)(void *)(mgp->sram + cmd.data0);
1606         }
1607 
1608         status |= myri10ge_send_cmd(mgp,
1609             MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1610         mgp->intr_coal_delay_ptr = (uint32_t *)(void *)(mgp->sram + cmd.data0);
1611 
1612         if (status != 0) {
1613                 cmn_err(CE_WARN, "%s: failed set interrupt parameters\n",
1614                     mgp->name);
1615                 return (status);
1616         }
1617 
1618         *mgp->intr_coal_delay_ptr = htonl(mgp->intr_coal_delay);
1619         (void) myri10ge_dma_test(mgp, MXGEFW_DMA_TEST);
1620 
1621         /* reset mcp/driver shared state back to 0 */
1622 
1623         for (i = 0; i < mgp->num_slices; i++) {
1624                 ss = &mgp->ss[i];
1625                 bytes = mgp->max_intr_slots *
1626                     sizeof (*mgp->ss[0].rx_done.entry);
1627                 (void) memset(ss->rx_done.entry, 0, bytes);
1628                 ss->tx.req = 0;
1629                 ss->tx.done = 0;
1630                 ss->tx.pkt_done = 0;
1631                 ss->rx_big.cnt = 0;
1632                 ss->rx_small.cnt = 0;
1633                 ss->rx_done.idx = 0;
1634                 ss->rx_done.cnt = 0;
1635                 ss->rx_token = 0;
1636                 ss->tx.watchdog_done = 0;
1637                 ss->tx.watchdog_req = 0;
1638                 ss->tx.active = 0;
1639                 ss->tx.activate = 0;
1640         }
1641         mgp->watchdog_rx_pause = 0;
1642         if (mgp->ksp_stat != NULL) {
1643                 ethstat = (struct myri10ge_nic_stat *)mgp->ksp_stat->ks_data;
1644                 ethstat->link_changes.value.ul = 0;
1645         }
1646         status = myri10ge_m_unicst(mgp, mgp->mac_addr);
1647         myri10ge_change_promisc(mgp, 0);
1648         (void) myri10ge_change_pause(mgp, mgp->pause);
1649         return (status);
1650 }
1651 
1652 static int
1653 myri10ge_init_toeplitz(struct myri10ge_priv *mgp)
1654 {
1655         myri10ge_cmd_t cmd;
1656         int i, b, s, t, j;
1657         int status;
1658         uint32_t k[8];
1659         uint32_t tmp;
1660         uint8_t *key;
1661 
1662         status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RSS_KEY_OFFSET,
1663             &cmd);
1664         if (status != 0) {
1665                 cmn_err(CE_WARN, "%s: failed to get rss key\n",
1666                     mgp->name);
1667                 return (EIO);
1668         }
1669         myri10ge_pio_copy32(mgp->rss_key,
1670             (uint32_t *)(void*)((char *)mgp->sram + cmd.data0),
1671             sizeof (mgp->rss_key));
1672 
1673         mgp->toeplitz_hash_table = kmem_alloc(sizeof (uint32_t) * 12 * 256,
1674             KM_SLEEP);
1675         key = (uint8_t *)mgp->rss_key;
1676         t = 0;
1677         for (b = 0; b < 12; b++) {
1678                 for (s = 0; s < 8; s++) {
1679                         /* Bits: b*8+s, ..., b*8+s+31 */
1680                         k[s] = 0;
1681                         for (j = 0; j < 32; j++) {
1682                                 int bit = b*8+s+j;
1683                                 bit = 0x1 & (key[bit / 8] >> (7 -(bit & 0x7)));
1684                                 k[s] |= bit << (31 - j);
1685                         }
1686                 }
1687 
1688                 for (i = 0; i <= 0xff; i++) {
1689                         tmp = 0;
1690                         if (i & (1 << 7)) { tmp ^= k[0]; }
1691                         if (i & (1 << 6)) { tmp ^= k[1]; }
1692                         if (i & (1 << 5)) { tmp ^= k[2]; }
1693                         if (i & (1 << 4)) { tmp ^= k[3]; }
1694                         if (i & (1 << 3)) { tmp ^= k[4]; }
1695                         if (i & (1 << 2)) { tmp ^= k[5]; }
1696                         if (i & (1 << 1)) { tmp ^= k[6]; }
1697                         if (i & (1 << 0)) { tmp ^= k[7]; }
1698                         mgp->toeplitz_hash_table[t++] = tmp;
1699                 }
1700         }
1701         return (0);
1702 }
1703 
1704 static inline struct myri10ge_slice_state *
1705 myri10ge_toeplitz_send_hash(struct myri10ge_priv *mgp, struct ip *ip)
1706 {
1707         struct tcphdr *hdr;
1708         uint32_t saddr, daddr;
1709         uint32_t hash, slice;
1710         uint32_t *table = mgp->toeplitz_hash_table;
1711         uint16_t src, dst;
1712 
1713         /*
1714          * Note hashing order is reversed from how it is done
1715          * in the NIC, so as to generate the same hash value
1716          * for the connection to try to keep connections CPU local
1717          */
1718 
1719         /* hash on IPv4 src/dst address */
1720         saddr = ntohl(ip->ip_src.s_addr);
1721         daddr = ntohl(ip->ip_dst.s_addr);
1722         hash = table[(256 * 0) + ((daddr >> 24) & 0xff)];
1723         hash ^= table[(256 * 1) + ((daddr >> 16) & 0xff)];
1724         hash ^= table[(256 * 2) + ((daddr >> 8) & 0xff)];
1725         hash ^= table[(256 * 3) + ((daddr) & 0xff)];
1726         hash ^= table[(256 * 4) + ((saddr >> 24) & 0xff)];
1727         hash ^= table[(256 * 5) + ((saddr >> 16) & 0xff)];
1728         hash ^= table[(256 * 6) + ((saddr >> 8) & 0xff)];
1729         hash ^= table[(256 * 7) + ((saddr) & 0xff)];
1730         /* hash on TCP port, if required */
1731         if ((myri10ge_rss_hash & MXGEFW_RSS_HASH_TYPE_TCP_IPV4) &&
1732             ip->ip_p == IPPROTO_TCP) {
1733                 hdr = (struct tcphdr *)(void *)
1734                     (((uint8_t *)ip) +  (ip->ip_hl << 2));
1735                 src = ntohs(hdr->th_sport);
1736                 dst = ntohs(hdr->th_dport);
1737 
1738                 hash ^= table[(256 * 8) + ((dst >> 8) & 0xff)];
1739                 hash ^= table[(256 * 9) + ((dst) & 0xff)];
1740                 hash ^= table[(256 * 10) + ((src >> 8) & 0xff)];
1741                 hash ^= table[(256 * 11) + ((src) & 0xff)];
1742         }
1743         slice = (mgp->num_slices - 1) & hash;
1744         return (&mgp->ss[slice]);
1745 
1746 }
1747 
1748 static inline struct myri10ge_slice_state *
1749 myri10ge_simple_send_hash(struct myri10ge_priv *mgp, struct ip *ip)
1750 {
1751         struct tcphdr *hdr;
1752         uint32_t slice, hash_val;
1753 
1754 
1755         if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP) {
1756                 return (&mgp->ss[0]);
1757         }
1758         hdr = (struct tcphdr *)(void *)(((uint8_t *)ip) +  (ip->ip_hl << 2));
1759 
1760         /*
1761          * Use the second byte of the *destination* address for
1762          * MXGEFW_RSS_HASH_TYPE_SRC_PORT, so as to match NIC's hashing
1763          */
1764         hash_val = ntohs(hdr->th_dport) & 0xff;
1765         if (myri10ge_rss_hash == MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT)
1766                 hash_val += ntohs(hdr->th_sport) & 0xff;
1767 
1768         slice = (mgp->num_slices - 1) & hash_val;
1769         return (&mgp->ss[slice]);
1770 }
1771 
1772 static inline struct myri10ge_slice_state *
1773 myri10ge_send_hash(struct myri10ge_priv *mgp, mblk_t *mp)
1774 {
1775         unsigned int slice = 0;
1776         struct ether_header *eh;
1777         struct ether_vlan_header *vh;
1778         struct ip *ip;
1779         int ehl, ihl;
1780 
1781         if (mgp->num_slices == 1)
1782                 return (&mgp->ss[0]);
1783 
1784         if (myri10ge_tx_hash == 0) {
1785                 slice = CPU->cpu_id & (mgp->num_slices - 1);
1786                 return (&mgp->ss[slice]);
1787         }
1788 
1789         /*
1790          *  ensure it is a TCP or UDP over IPv4 packet, and that the
1791          *  headers are in the 1st mblk.  Otherwise, punt
1792          */
1793         ehl = sizeof (*eh);
1794         ihl = sizeof (*ip);
1795         if ((MBLKL(mp)) <  (ehl + ihl + 8))
1796                 return (&mgp->ss[0]);
1797         eh = (struct ether_header *)(void *)mp->b_rptr;
1798         ip = (struct ip *)(void *)(eh + 1);
1799         if (eh->ether_type != BE_16(ETHERTYPE_IP)) {
1800                 if (eh->ether_type != BE_16(ETHERTYPE_VLAN))
1801                         return (&mgp->ss[0]);
1802                 vh = (struct ether_vlan_header *)(void *)mp->b_rptr;
1803                 if (vh->ether_type != BE_16(ETHERTYPE_IP))
1804                         return (&mgp->ss[0]);
1805                 ehl += 4;
1806                 ip = (struct ip *)(void *)(vh + 1);
1807         }
1808         ihl = ip->ip_hl << 2;
1809         if (MBLKL(mp) <  (ehl + ihl + 8))
1810                 return (&mgp->ss[0]);
1811         switch (myri10ge_rss_hash) {
1812         case MXGEFW_RSS_HASH_TYPE_IPV4:
1813                 /* fallthru */
1814         case MXGEFW_RSS_HASH_TYPE_TCP_IPV4:
1815                 /* fallthru */
1816         case (MXGEFW_RSS_HASH_TYPE_IPV4|MXGEFW_RSS_HASH_TYPE_TCP_IPV4):
1817                 return (myri10ge_toeplitz_send_hash(mgp, ip));
1818         case MXGEFW_RSS_HASH_TYPE_SRC_PORT:
1819                 /* fallthru */
1820         case MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT:
1821                 return (myri10ge_simple_send_hash(mgp, ip));
1822         default:
1823                 break;
1824         }
1825         return (&mgp->ss[0]);
1826 }
1827 
1828 static int
1829 myri10ge_setup_slice(struct myri10ge_slice_state *ss)
1830 {
1831         struct myri10ge_priv *mgp = ss->mgp;
1832         myri10ge_cmd_t cmd;
1833         int tx_ring_size, rx_ring_size;
1834         int tx_ring_entries, rx_ring_entries;
1835         int slice, status;
1836         int allocated, idx;
1837         size_t bytes;
1838 
1839         slice = ss - mgp->ss;
1840         cmd.data0 = slice;
1841         status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
1842         tx_ring_size = cmd.data0;
1843         cmd.data0 = slice;
1844         status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
1845         if (status != 0)
1846                 return (status);
1847         rx_ring_size = cmd.data0;
1848 
1849         tx_ring_entries = tx_ring_size / sizeof (struct mcp_kreq_ether_send);
1850         rx_ring_entries = rx_ring_size / sizeof (struct mcp_dma_addr);
1851         ss->tx.mask = tx_ring_entries - 1;
1852         ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
1853 
1854         /* get the lanai pointers to the send and receive rings */
1855 
1856         cmd.data0 = slice;
1857         status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
1858         ss->tx.lanai = (mcp_kreq_ether_send_t *)(void *)(mgp->sram + cmd.data0);
1859         if (mgp->num_slices > 1) {
1860                 ss->tx.go = (char *)mgp->sram + MXGEFW_ETH_SEND_GO + 64 * slice;
1861                 ss->tx.stop = (char *)mgp->sram + MXGEFW_ETH_SEND_STOP +
1862                     64 * slice;
1863         } else {
1864                 ss->tx.go = NULL;
1865                 ss->tx.stop = NULL;
1866         }
1867 
1868         cmd.data0 = slice;
1869         status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
1870         ss->rx_small.lanai = (mcp_kreq_ether_recv_t *)
1871             (void *)(mgp->sram + cmd.data0);
1872 
1873         cmd.data0 = slice;
1874         status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
1875         ss->rx_big.lanai = (mcp_kreq_ether_recv_t *)(void *)
1876             (mgp->sram + cmd.data0);
1877 
1878         if (status != 0) {
1879                 cmn_err(CE_WARN,
1880                     "%s: failed to get ring sizes or locations\n", mgp->name);
1881                 return (status);
1882         }
1883 
1884         status = ENOMEM;
1885         bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
1886         ss->rx_small.shadow = kmem_zalloc(bytes, KM_SLEEP);
1887         if (ss->rx_small.shadow == NULL)
1888                 goto abort;
1889         (void) memset(ss->rx_small.shadow, 0, bytes);
1890 
1891         bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
1892         ss->rx_big.shadow = kmem_zalloc(bytes, KM_SLEEP);
1893         if (ss->rx_big.shadow == NULL)
1894                 goto abort_with_rx_small_shadow;
1895         (void) memset(ss->rx_big.shadow, 0, bytes);
1896 
1897         /* allocate the host info rings */
1898 
1899         bytes = tx_ring_entries * sizeof (*ss->tx.info);
1900         ss->tx.info = kmem_zalloc(bytes, KM_SLEEP);
1901         if (ss->tx.info == NULL)
1902                 goto abort_with_rx_big_shadow;
1903         (void) memset(ss->tx.info, 0, bytes);
1904 
1905         bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
1906         ss->rx_small.info = kmem_zalloc(bytes, KM_SLEEP);
1907         if (ss->rx_small.info == NULL)
1908                 goto abort_with_tx_info;
1909         (void) memset(ss->rx_small.info, 0, bytes);
1910 
1911         bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
1912         ss->rx_big.info = kmem_zalloc(bytes, KM_SLEEP);
1913         if (ss->rx_big.info == NULL)
1914                 goto abort_with_rx_small_info;
1915         (void) memset(ss->rx_big.info, 0, bytes);
1916 
1917         ss->tx.stall = ss->tx.sched = 0;
1918         ss->tx.stall_early = ss->tx.stall_late = 0;
1919 
1920         ss->jbufs_for_smalls = 1 + (1 + ss->rx_small.mask) /
1921             (myri10ge_mtu / (myri10ge_small_bytes + MXGEFW_PAD));
1922 
1923         allocated = myri10ge_add_jbufs(ss,
1924             myri10ge_bigbufs_initial + ss->jbufs_for_smalls, 1);
1925         if (allocated < ss->jbufs_for_smalls + myri10ge_bigbufs_initial) {
1926                 cmn_err(CE_WARN,
1927                     "%s: Could not allocate enough receive buffers (%d/%d)\n",
1928                     mgp->name, allocated,
1929                     myri10ge_bigbufs_initial + ss->jbufs_for_smalls);
1930                 goto abort_with_jumbos;
1931         }
1932 
1933         myri10ge_carve_up_jbufs_into_small_ring(ss);
1934         ss->j_rx_cnt = 0;
1935 
1936         mutex_enter(&ss->jpool.mtx);
1937         if (allocated < rx_ring_entries)
1938                 ss->jpool.low_water = allocated / 4;
1939         else
1940                 ss->jpool.low_water = rx_ring_entries / 2;
1941 
1942         /*
1943          * invalidate the big receive ring in case we do not
1944          * allocate sufficient jumbos to fill it
1945          */
1946         (void) memset(ss->rx_big.shadow, 1,
1947             (ss->rx_big.mask + 1) * sizeof (ss->rx_big.shadow[0]));
1948         for (idx = 7; idx <= ss->rx_big.mask; idx += 8) {
1949                 myri10ge_submit_8rx(&ss->rx_big.lanai[idx - 7],
1950                     &ss->rx_big.shadow[idx - 7]);
1951                 mb();
1952         }
1953 
1954 
1955         myri10ge_restock_jumbos(ss);
1956 
1957         for (idx = 7; idx <= ss->rx_small.mask; idx += 8) {
1958                 myri10ge_submit_8rx(&ss->rx_small.lanai[idx - 7],
1959                     &ss->rx_small.shadow[idx - 7]);
1960                 mb();
1961         }
1962         ss->rx_small.cnt = ss->rx_small.mask + 1;
1963 
1964         mutex_exit(&ss->jpool.mtx);
1965 
1966         status = myri10ge_prepare_tx_ring(ss);
1967 
1968         if (status != 0)
1969                 goto abort_with_small_jbufs;
1970 
1971         cmd.data0 = ntohl(ss->fw_stats_dma.low);
1972         cmd.data1 = ntohl(ss->fw_stats_dma.high);
1973         cmd.data2 = sizeof (mcp_irq_data_t);
1974         cmd.data2 |= (slice << 16);
1975         bzero(ss->fw_stats, sizeof (*ss->fw_stats));
1976         status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
1977         if (status == ENOSYS) {
1978                 cmd.data0 = ntohl(ss->fw_stats_dma.low) +
1979                     offsetof(mcp_irq_data_t, send_done_count);
1980                 cmd.data1 = ntohl(ss->fw_stats_dma.high);
1981                 status = myri10ge_send_cmd(mgp,
1982                     MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, &cmd);
1983         }
1984         if (status) {
1985                 cmn_err(CE_WARN, "%s: Couldn't set stats DMA\n", mgp->name);
1986                 goto abort_with_tx;
1987         }
1988 
1989         return (0);
1990 
1991 abort_with_tx:
1992         myri10ge_unprepare_tx_ring(ss);
1993 
1994 abort_with_small_jbufs:
1995         myri10ge_release_small_jbufs(ss);
1996 
1997 abort_with_jumbos:
1998         if (allocated != 0) {
1999                 mutex_enter(&ss->jpool.mtx);
2000                 ss->jpool.low_water = 0;
2001                 mutex_exit(&ss->jpool.mtx);
2002                 myri10ge_unstock_jumbos(ss);
2003                 myri10ge_remove_jbufs(ss);
2004         }
2005 
2006         bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
2007         kmem_free(ss->rx_big.info, bytes);
2008 
2009 abort_with_rx_small_info:
2010         bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
2011         kmem_free(ss->rx_small.info, bytes);
2012 
2013 abort_with_tx_info:
2014         bytes = tx_ring_entries * sizeof (*ss->tx.info);
2015         kmem_free(ss->tx.info, bytes);
2016 
2017 abort_with_rx_big_shadow:
2018         bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
2019         kmem_free(ss->rx_big.shadow, bytes);
2020 
2021 abort_with_rx_small_shadow:
2022         bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
2023         kmem_free(ss->rx_small.shadow, bytes);
2024 abort:
2025         return (status);
2026 
2027 }
2028 
2029 static void
2030 myri10ge_teardown_slice(struct myri10ge_slice_state *ss)
2031 {
2032         int tx_ring_entries, rx_ring_entries;
2033         size_t bytes;
2034 
2035         /* ignore slices that have not been fully setup */
2036         if (ss->tx.cp == NULL)
2037                 return;
2038         /* Free the TX copy buffers */
2039         myri10ge_unprepare_tx_ring(ss);
2040 
2041         /* stop passing returned buffers to firmware */
2042 
2043         mutex_enter(&ss->jpool.mtx);
2044         ss->jpool.low_water = 0;
2045         mutex_exit(&ss->jpool.mtx);
2046         myri10ge_release_small_jbufs(ss);
2047 
2048         /* Release the free jumbo frame pool */
2049         myri10ge_unstock_jumbos(ss);
2050         myri10ge_remove_jbufs(ss);
2051 
2052         rx_ring_entries = ss->rx_big.mask + 1;
2053         tx_ring_entries = ss->tx.mask + 1;
2054 
2055         bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
2056         kmem_free(ss->rx_big.info, bytes);
2057 
2058         bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
2059         kmem_free(ss->rx_small.info, bytes);
2060 
2061         bytes = tx_ring_entries * sizeof (*ss->tx.info);
2062         kmem_free(ss->tx.info, bytes);
2063 
2064         bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
2065         kmem_free(ss->rx_big.shadow, bytes);
2066 
2067         bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
2068         kmem_free(ss->rx_small.shadow, bytes);
2069 
2070 }
2071 static int
2072 myri10ge_start_locked(struct myri10ge_priv *mgp)
2073 {
2074         myri10ge_cmd_t cmd;
2075         int status, big_pow2, i;
2076         volatile uint8_t *itable;
2077 
2078         status = DDI_SUCCESS;
2079         /* Allocate DMA resources and receive buffers */
2080 
2081         status = myri10ge_reset(mgp);
2082         if (status != 0) {
2083                 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name);
2084                 return (DDI_FAILURE);
2085         }
2086 
2087         if (mgp->num_slices > 1) {
2088                 cmd.data0 = mgp->num_slices;
2089                 cmd.data1 = 1; /* use MSI-X */
2090                 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES,
2091                     &cmd);
2092                 if (status != 0) {
2093                         cmn_err(CE_WARN,
2094                             "%s: failed to set number of slices\n",
2095                             mgp->name);
2096                         goto abort_with_nothing;
2097                 }
2098                 /* setup the indirection table */
2099                 cmd.data0 = mgp->num_slices;
2100                 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
2101                     &cmd);
2102 
2103                 status |= myri10ge_send_cmd(mgp,
2104                     MXGEFW_CMD_GET_RSS_TABLE_OFFSET, &cmd);
2105                 if (status != 0) {
2106                         cmn_err(CE_WARN,
2107                             "%s: failed to setup rss tables\n", mgp->name);
2108                 }
2109 
2110                 /* just enable an identity mapping */
2111                 itable = mgp->sram + cmd.data0;
2112                 for (i = 0; i < mgp->num_slices; i++)
2113                         itable[i] = (uint8_t)i;
2114 
2115                 if (myri10ge_rss_hash & MYRI10GE_TOEPLITZ_HASH) {
2116                         status = myri10ge_init_toeplitz(mgp);
2117                         if (status != 0) {
2118                                 cmn_err(CE_WARN, "%s: failed to setup "
2119                                     "toeplitz tx hash table", mgp->name);
2120                                 goto abort_with_nothing;
2121                         }
2122                 }
2123                 cmd.data0 = 1;
2124                 cmd.data1 = myri10ge_rss_hash;
2125                 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_ENABLE,
2126                     &cmd);
2127                 if (status != 0) {
2128                         cmn_err(CE_WARN,
2129                             "%s: failed to enable slices\n", mgp->name);
2130                         goto abort_with_toeplitz;
2131                 }
2132         }
2133 
2134         for (i = 0; i < mgp->num_slices; i++) {
2135                 status = myri10ge_setup_slice(&mgp->ss[i]);
2136                 if (status != 0)
2137                         goto abort_with_slices;
2138         }
2139 
2140         /*
2141          * Tell the MCP how many buffers he has, and to
2142          *  bring the ethernet interface up
2143          *
2144          * Firmware needs the big buff size as a power of 2.  Lie and
2145          * tell him the buffer is larger, because we only use 1
2146          * buffer/pkt, and the mtu will prevent overruns
2147          */
2148         big_pow2 = myri10ge_mtu + MXGEFW_PAD;
2149         while (!ISP2(big_pow2))
2150                 big_pow2++;
2151 
2152         /* now give firmware buffers sizes, and MTU */
2153         cmd.data0 = myri10ge_mtu;
2154         status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_MTU, &cmd);
2155         cmd.data0 = myri10ge_small_bytes;
2156         status |=
2157             myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd);
2158         cmd.data0 = big_pow2;
2159         status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
2160         if (status) {
2161                 cmn_err(CE_WARN, "%s: Couldn't set buffer sizes\n", mgp->name);
2162                 goto abort_with_slices;
2163         }
2164 
2165 
2166         cmd.data0 = 1;
2167         status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_TSO_MODE, &cmd);
2168         if (status) {
2169                 cmn_err(CE_WARN, "%s: unable to setup TSO (%d)\n",
2170                     mgp->name, status);
2171         } else {
2172                 mgp->features |= MYRI10GE_TSO;
2173         }
2174 
2175         mgp->link_state = -1;
2176         mgp->rdma_tags_available = 15;
2177         status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_UP, &cmd);
2178         if (status) {
2179                 cmn_err(CE_WARN, "%s: unable to start ethernet\n", mgp->name);
2180                 goto abort_with_slices;
2181         }
2182         mgp->running = MYRI10GE_ETH_RUNNING;
2183         return (DDI_SUCCESS);
2184 
2185 abort_with_slices:
2186         for (i = 0; i < mgp->num_slices; i++)
2187                 myri10ge_teardown_slice(&mgp->ss[i]);
2188 
2189         mgp->running = MYRI10GE_ETH_STOPPED;
2190 
2191 abort_with_toeplitz:
2192         if (mgp->toeplitz_hash_table != NULL) {
2193                 kmem_free(mgp->toeplitz_hash_table,
2194                     sizeof (uint32_t) * 12 * 256);
2195                 mgp->toeplitz_hash_table = NULL;
2196         }
2197 
2198 abort_with_nothing:
2199         return (DDI_FAILURE);
2200 }
2201 
2202 static void
2203 myri10ge_stop_locked(struct myri10ge_priv *mgp)
2204 {
2205         int status, old_down_cnt;
2206         myri10ge_cmd_t cmd;
2207         int wait_time = 10;
2208         int i, polling;
2209 
2210         old_down_cnt = mgp->down_cnt;
2211         mb();
2212         status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
2213         if (status) {
2214                 cmn_err(CE_WARN, "%s: Couldn't bring down link\n", mgp->name);
2215         }
2216 
2217         while (old_down_cnt == *((volatile int *)&mgp->down_cnt)) {
2218                 delay(drv_sectohz(1));
2219                 wait_time--;
2220                 if (wait_time == 0)
2221                         break;
2222         }
2223 again:
2224         if (old_down_cnt == *((volatile int *)&mgp->down_cnt)) {
2225                 cmn_err(CE_WARN, "%s: didn't get down irq\n", mgp->name);
2226                 for (i = 0; i < mgp->num_slices; i++) {
2227                         /*
2228                          * take and release the rx lock to ensure
2229                          * that no interrupt thread is blocked
2230                          * elsewhere in the stack, preventing
2231                          * completion
2232                          */
2233 
2234                         mutex_enter(&mgp->ss[i].rx_lock);
2235                         printf("%s: slice %d rx irq idle\n",
2236                             mgp->name, i);
2237                         mutex_exit(&mgp->ss[i].rx_lock);
2238 
2239                         /* verify that the poll handler is inactive */
2240                         mutex_enter(&mgp->ss->poll_lock);
2241                         polling = mgp->ss->rx_polling;
2242                         mutex_exit(&mgp->ss->poll_lock);
2243                         if (polling) {
2244                                 printf("%s: slice %d is polling\n",
2245                                     mgp->name, i);
2246                                 delay(drv_sectohz(1));
2247                                 goto again;
2248                         }
2249                 }
2250                 delay(drv_sectohz(1));
2251                 if (old_down_cnt == *((volatile int *)&mgp->down_cnt)) {
2252                         cmn_err(CE_WARN, "%s: Never got down irq\n", mgp->name);
2253                 }
2254         }
2255 
2256         for (i = 0; i < mgp->num_slices; i++)
2257                 myri10ge_teardown_slice(&mgp->ss[i]);
2258 
2259         if (mgp->toeplitz_hash_table != NULL) {
2260                 kmem_free(mgp->toeplitz_hash_table,
2261                     sizeof (uint32_t) * 12 * 256);
2262                 mgp->toeplitz_hash_table = NULL;
2263         }
2264         mgp->running = MYRI10GE_ETH_STOPPED;
2265 }
2266 
2267 static int
2268 myri10ge_m_start(void *arg)
2269 {
2270         struct myri10ge_priv *mgp = arg;
2271         int status;
2272 
2273         mutex_enter(&mgp->intrlock);
2274 
2275         if (mgp->running != MYRI10GE_ETH_STOPPED) {
2276                 mutex_exit(&mgp->intrlock);
2277                 return (DDI_FAILURE);
2278         }
2279         status = myri10ge_start_locked(mgp);
2280         mutex_exit(&mgp->intrlock);
2281 
2282         if (status != DDI_SUCCESS)
2283                 return (status);
2284 
2285         /* start the watchdog timer */
2286         mgp->timer_id = timeout(myri10ge_watchdog, mgp,
2287             mgp->timer_ticks);
2288         return (DDI_SUCCESS);
2289 
2290 }
2291 
2292 static void
2293 myri10ge_m_stop(void *arg)
2294 {
2295         struct myri10ge_priv *mgp = arg;
2296 
2297         mutex_enter(&mgp->intrlock);
2298         /* if the device not running give up */
2299         if (mgp->running != MYRI10GE_ETH_RUNNING) {
2300                 mutex_exit(&mgp->intrlock);
2301                 return;
2302         }
2303 
2304         mgp->running = MYRI10GE_ETH_STOPPING;
2305         mutex_exit(&mgp->intrlock);
2306         (void) untimeout(mgp->timer_id);
2307         mutex_enter(&mgp->intrlock);
2308         myri10ge_stop_locked(mgp);
2309         mutex_exit(&mgp->intrlock);
2310 
2311 }
2312 
2313 static inline void
2314 myri10ge_rx_csum(mblk_t *mp, struct myri10ge_rx_ring_stats *s, uint32_t csum)
2315 {
2316         struct ether_header *eh;
2317         struct ip *ip;
2318         struct ip6_hdr *ip6;
2319         uint32_t start, stuff, end, partial, hdrlen;
2320 
2321 
2322         csum = ntohs((uint16_t)csum);
2323         eh = (struct ether_header *)(void *)mp->b_rptr;
2324         hdrlen = sizeof (*eh);
2325         if (eh->ether_dhost.ether_addr_octet[0] & 1) {
2326                 if (0 == (bcmp(eh->ether_dhost.ether_addr_octet,
2327                     myri10ge_broadcastaddr, sizeof (eh->ether_dhost))))
2328                         s->brdcstrcv++;
2329                 else
2330                         s->multircv++;
2331         }
2332 
2333         if (eh->ether_type == BE_16(ETHERTYPE_VLAN)) {
2334                 /*
2335                  * fix checksum by subtracting 4 bytes after what the
2336                  * firmware thought was the end of the ether hdr
2337                  */
2338                 partial = *(uint32_t *)
2339                     (void *)(mp->b_rptr + ETHERNET_HEADER_SIZE);
2340                 csum += ~partial;
2341                 csum +=  (csum < ~partial);
2342                 csum = (csum >> 16) + (csum & 0xFFFF);
2343                 csum = (csum >> 16) + (csum & 0xFFFF);
2344                 hdrlen += VLAN_TAGSZ;
2345         }
2346 
2347         if (eh->ether_type ==  BE_16(ETHERTYPE_IP)) {
2348                 ip = (struct ip *)(void *)(mp->b_rptr + hdrlen);
2349                 start = ip->ip_hl << 2;
2350 
2351                 if (ip->ip_p == IPPROTO_TCP)
2352                         stuff = start + offsetof(struct tcphdr, th_sum);
2353                 else if (ip->ip_p == IPPROTO_UDP)
2354                         stuff = start + offsetof(struct udphdr, uh_sum);
2355                 else
2356                         return;
2357                 end = ntohs(ip->ip_len);
2358         } else if (eh->ether_type ==  BE_16(ETHERTYPE_IPV6)) {
2359                 ip6 = (struct ip6_hdr *)(void *)(mp->b_rptr + hdrlen);
2360                 start = sizeof (*ip6);
2361                 if (ip6->ip6_nxt == IPPROTO_TCP) {
2362                         stuff = start + offsetof(struct tcphdr, th_sum);
2363                 } else if (ip6->ip6_nxt == IPPROTO_UDP)
2364                         stuff = start + offsetof(struct udphdr, uh_sum);
2365                 else
2366                         return;
2367                 end = start + ntohs(ip6->ip6_plen);
2368                 /*
2369                  * IPv6 headers do not contain a checksum, and hence
2370                  * do not checksum to zero, so they don't "fall out"
2371                  * of the partial checksum calculation like IPv4
2372                  * headers do.  We need to fix the partial checksum by
2373                  * subtracting the checksum of the IPv6 header.
2374                  */
2375 
2376                 partial = myri10ge_csum_generic((uint16_t *)ip6, sizeof (*ip6));
2377                 csum += ~partial;
2378                 csum +=  (csum < ~partial);
2379                 csum = (csum >> 16) + (csum & 0xFFFF);
2380                 csum = (csum >> 16) + (csum & 0xFFFF);
2381         } else {
2382                 return;
2383         }
2384 
2385         if (MBLKL(mp) > hdrlen + end) {
2386                 /* padded frame, so hw csum may be invalid */
2387                 return;
2388         }
2389 
2390         mac_hcksum_set(mp, start, stuff, end, csum, HCK_PARTIALCKSUM);
2391 }
2392 
2393 static mblk_t *
2394 myri10ge_rx_done_small(struct myri10ge_slice_state *ss, uint32_t len,
2395     uint32_t csum)
2396 {
2397         mblk_t *mp;
2398         myri10ge_rx_ring_t *rx;
2399         int idx;
2400 
2401         rx = &ss->rx_small;
2402         idx = rx->cnt & rx->mask;
2403         ss->rx_small.cnt++;
2404 
2405         /* allocate a new buffer to pass up the stack */
2406         mp = allocb(len + MXGEFW_PAD, 0);
2407         if (mp == NULL) {
2408                 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_small_nobuf);
2409                 goto abort;
2410         }
2411         bcopy(ss->rx_small.info[idx].ptr,
2412             (caddr_t)mp->b_wptr, len + MXGEFW_PAD);
2413         mp->b_wptr += len + MXGEFW_PAD;
2414         mp->b_rptr += MXGEFW_PAD;
2415 
2416         ss->rx_stats.ibytes += len;
2417         ss->rx_stats.ipackets += 1;
2418         myri10ge_rx_csum(mp, &ss->rx_stats, csum);
2419 
2420 abort:
2421         if ((idx & 7) == 7) {
2422                 myri10ge_submit_8rx(&rx->lanai[idx - 7],
2423                     &rx->shadow[idx - 7]);
2424         }
2425 
2426         return (mp);
2427 }
2428 
2429 
2430 static mblk_t *
2431 myri10ge_rx_done_big(struct myri10ge_slice_state *ss, uint32_t len,
2432     uint32_t csum)
2433 {
2434         struct myri10ge_jpool_stuff *jpool;
2435         struct myri10ge_jpool_entry *j;
2436         mblk_t *mp;
2437         int idx, num_owned_by_mcp;
2438 
2439         jpool = &ss->jpool;
2440         idx = ss->j_rx_cnt & ss->rx_big.mask;
2441         j = ss->rx_big.info[idx].j;
2442 
2443         if (j == NULL) {
2444                 printf("%s: null j at idx=%d, rx_big.cnt = %d, j_rx_cnt=%d\n",
2445                     ss->mgp->name, idx, ss->rx_big.cnt, ss->j_rx_cnt);
2446                 return (NULL);
2447         }
2448 
2449 
2450         ss->rx_big.info[idx].j = NULL;
2451         ss->j_rx_cnt++;
2452 
2453 
2454         /*
2455          * Check to see if we are low on rx buffers.
2456          * Note that we must leave at least 8 free so there are
2457          * enough to free in a single 64-byte write.
2458          */
2459         num_owned_by_mcp = ss->rx_big.cnt - ss->j_rx_cnt;
2460         if (num_owned_by_mcp < jpool->low_water) {
2461                 mutex_enter(&jpool->mtx);
2462                 myri10ge_restock_jumbos(ss);
2463                 mutex_exit(&jpool->mtx);
2464                 num_owned_by_mcp = ss->rx_big.cnt - ss->j_rx_cnt;
2465                 /* if we are still low, then we have to copy */
2466                 if (num_owned_by_mcp < 16) {
2467                         MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_copy);
2468                         /* allocate a new buffer to pass up the stack */
2469                         mp = allocb(len + MXGEFW_PAD, 0);
2470                         if (mp == NULL) {
2471                                 goto abort;
2472                         }
2473                         bcopy(j->buf,
2474                             (caddr_t)mp->b_wptr, len + MXGEFW_PAD);
2475                         myri10ge_jfree_rtn(j);
2476                         /* push buffer back to NIC */
2477                         mutex_enter(&jpool->mtx);
2478                         myri10ge_restock_jumbos(ss);
2479                         mutex_exit(&jpool->mtx);
2480                         goto set_len;
2481                 }
2482         }
2483 
2484         /* loan our buffer to the stack */
2485         mp = desballoc((unsigned char *)j->buf, myri10ge_mtu, 0, &j->free_func);
2486         if (mp == NULL) {
2487                 goto abort;
2488         }
2489 
2490 set_len:
2491         mp->b_rptr += MXGEFW_PAD;
2492         mp->b_wptr = ((unsigned char *) mp->b_rptr + len);
2493 
2494         ss->rx_stats.ibytes += len;
2495         ss->rx_stats.ipackets += 1;
2496         myri10ge_rx_csum(mp, &ss->rx_stats, csum);
2497 
2498         return (mp);
2499 
2500 abort:
2501         myri10ge_jfree_rtn(j);
2502         MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_big_nobuf);
2503         return (NULL);
2504 }
2505 
2506 /*
2507  * Free all transmit buffers up until the specified index
2508  */
2509 static inline void
2510 myri10ge_tx_done(struct myri10ge_slice_state *ss, uint32_t mcp_index)
2511 {
2512         myri10ge_tx_ring_t *tx;
2513         struct myri10ge_tx_dma_handle_head handles;
2514         int idx;
2515         int limit = 0;
2516 
2517         tx = &ss->tx;
2518         handles.head = NULL;
2519         handles.tail = NULL;
2520         while (tx->pkt_done != (int)mcp_index) {
2521                 idx = tx->done & tx->mask;
2522 
2523                 /*
2524                  * mblk & DMA handle attached only to first slot
2525                  * per buffer in the packet
2526                  */
2527 
2528                 if (tx->info[idx].m) {
2529                         (void) ddi_dma_unbind_handle(tx->info[idx].handle->h);
2530                         tx->info[idx].handle->next = handles.head;
2531                         handles.head = tx->info[idx].handle;
2532                         if (handles.tail == NULL)
2533                                 handles.tail = tx->info[idx].handle;
2534                         freeb(tx->info[idx].m);
2535                         tx->info[idx].m = 0;
2536                         tx->info[idx].handle = 0;
2537                 }
2538                 if (tx->info[idx].ostat.opackets != 0) {
2539                         tx->stats.multixmt += tx->info[idx].ostat.multixmt;
2540                         tx->stats.brdcstxmt += tx->info[idx].ostat.brdcstxmt;
2541                         tx->stats.obytes += tx->info[idx].ostat.obytes;
2542                         tx->stats.opackets += tx->info[idx].ostat.opackets;
2543                         tx->info[idx].stat.un.all = 0;
2544                         tx->pkt_done++;
2545                 }
2546 
2547                 tx->done++;
2548                 /*
2549                  * if we stalled the queue, wake it.  But Wait until
2550                  * we have at least 1/2 our slots free.
2551                  */
2552                 if ((tx->req - tx->done) < (tx->mask >> 1) &&
2553                     tx->stall != tx->sched) {
2554                         mutex_enter(&ss->tx.lock);
2555                         tx->sched = tx->stall;
2556                         mutex_exit(&ss->tx.lock);
2557                         mac_tx_ring_update(ss->mgp->mh, tx->rh);
2558                 }
2559 
2560                 /* limit potential for livelock */
2561                 if (unlikely(++limit >  2 * tx->mask))
2562                         break;
2563         }
2564         if (tx->req == tx->done && tx->stop != NULL) {
2565                 /*
2566                  * Nic has sent all pending requests, allow him
2567                  * to stop polling this queue
2568                  */
2569                 mutex_enter(&tx->lock);
2570                 if (tx->req == tx->done && tx->active) {
2571                         *(int *)(void *)tx->stop = 1;
2572                         tx->active = 0;
2573                         mb();
2574                 }
2575                 mutex_exit(&tx->lock);
2576         }
2577         if (handles.head != NULL)
2578                 myri10ge_free_tx_handles(tx, &handles);
2579 }
2580 
2581 static void
2582 myri10ge_mbl_init(struct myri10ge_mblk_list *mbl)
2583 {
2584         mbl->head = NULL;
2585         mbl->tail = &mbl->head;
2586         mbl->cnt = 0;
2587 }
2588 
2589 /*ARGSUSED*/
2590 void
2591 myri10ge_mbl_append(struct myri10ge_slice_state *ss,
2592     struct myri10ge_mblk_list *mbl, mblk_t *mp)
2593 {
2594         *(mbl->tail) = mp;
2595         mbl->tail = &mp->b_next;
2596         mp->b_next = NULL;
2597         mbl->cnt++;
2598 }
2599 
2600 
2601 static inline void
2602 myri10ge_clean_rx_done(struct myri10ge_slice_state *ss,
2603     struct myri10ge_mblk_list *mbl, int limit, boolean_t *stop)
2604 {
2605         myri10ge_rx_done_t *rx_done = &ss->rx_done;
2606         struct myri10ge_priv *mgp = ss->mgp;
2607         mblk_t *mp;
2608         struct lro_entry *lro;
2609         uint16_t length;
2610         uint16_t checksum;
2611 
2612 
2613         while (rx_done->entry[rx_done->idx].length != 0) {
2614                 if (unlikely (*stop)) {
2615                         break;
2616                 }
2617                 length = ntohs(rx_done->entry[rx_done->idx].length);
2618                 length &= (~MXGEFW_RSS_HASH_MASK);
2619 
2620                 /* limit potential for livelock */
2621                 limit -= length;
2622                 if (unlikely(limit < 0))
2623                         break;
2624 
2625                 rx_done->entry[rx_done->idx].length = 0;
2626                 checksum = ntohs(rx_done->entry[rx_done->idx].checksum);
2627                 if (length <= myri10ge_small_bytes)
2628                         mp = myri10ge_rx_done_small(ss, length, checksum);
2629                 else
2630                         mp = myri10ge_rx_done_big(ss, length, checksum);
2631                 if (mp != NULL) {
2632                         if (!myri10ge_lro ||
2633                             0 != myri10ge_lro_rx(ss, mp, checksum, mbl))
2634                                 myri10ge_mbl_append(ss, mbl, mp);
2635                 }
2636                 rx_done->cnt++;
2637                 rx_done->idx = rx_done->cnt & (mgp->max_intr_slots - 1);
2638         }
2639         while (ss->lro_active != NULL) {
2640                 lro = ss->lro_active;
2641                 ss->lro_active = lro->next;
2642                 myri10ge_lro_flush(ss, lro, mbl);
2643         }
2644 }
2645 
2646 static void
2647 myri10ge_intr_rx(struct myri10ge_slice_state *ss)
2648 {
2649         uint64_t gen;
2650         struct myri10ge_mblk_list mbl;
2651 
2652         myri10ge_mbl_init(&mbl);
2653         if (mutex_tryenter(&ss->rx_lock) == 0)
2654                 return;
2655         gen = ss->rx_gen_num;
2656         myri10ge_clean_rx_done(ss, &mbl, MYRI10GE_POLL_NULL,
2657             &ss->rx_polling);
2658         if (mbl.head != NULL)
2659                 mac_rx_ring(ss->mgp->mh, ss->rx_rh, mbl.head, gen);
2660         mutex_exit(&ss->rx_lock);
2661 
2662 }
2663 
2664 static mblk_t *
2665 myri10ge_poll_rx(void *arg, int bytes)
2666 {
2667         struct myri10ge_slice_state *ss = arg;
2668         struct myri10ge_mblk_list mbl;
2669         boolean_t dummy = B_FALSE;
2670 
2671         if (bytes == 0)
2672                 return (NULL);
2673 
2674         myri10ge_mbl_init(&mbl);
2675         mutex_enter(&ss->rx_lock);
2676         if (ss->rx_polling)
2677                 myri10ge_clean_rx_done(ss, &mbl, bytes, &dummy);
2678         else
2679                 printf("%d: poll_rx: token=%d, polling=%d\n", (int)(ss -
2680                     ss->mgp->ss), ss->rx_token, ss->rx_polling);
2681         mutex_exit(&ss->rx_lock);
2682         return (mbl.head);
2683 }
2684 
2685 /*ARGSUSED*/
2686 static uint_t
2687 myri10ge_intr(caddr_t arg0, caddr_t arg1)
2688 {
2689         struct myri10ge_slice_state *ss =
2690             (struct myri10ge_slice_state *)(void *)arg0;
2691         struct myri10ge_priv *mgp = ss->mgp;
2692         mcp_irq_data_t *stats = ss->fw_stats;
2693         myri10ge_tx_ring_t *tx = &ss->tx;
2694         uint32_t send_done_count;
2695         uint8_t valid;
2696 
2697 
2698         /* make sure the DMA has finished */
2699         if (!stats->valid) {
2700                 return (DDI_INTR_UNCLAIMED);
2701         }
2702         valid = stats->valid;
2703 
2704         /* low bit indicates receives are present */
2705         if (valid & 1)
2706                 myri10ge_intr_rx(ss);
2707 
2708         if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) {
2709                 /* lower legacy IRQ  */
2710                 *mgp->irq_deassert = 0;
2711                 if (!myri10ge_deassert_wait)
2712                         /* don't wait for conf. that irq is low */
2713                         stats->valid = 0;
2714                 mb();
2715         } else {
2716                 /* no need to wait for conf. that irq is low */
2717                 stats->valid = 0;
2718         }
2719 
2720         do {
2721                 /* check for transmit completes and receives */
2722                 send_done_count = ntohl(stats->send_done_count);
2723                 if (send_done_count != tx->pkt_done)
2724                         myri10ge_tx_done(ss, (int)send_done_count);
2725         } while (*((volatile uint8_t *) &stats->valid));
2726 
2727         if (stats->stats_updated) {
2728                 if (mgp->link_state != stats->link_up || stats->link_down) {
2729                         mgp->link_state = stats->link_up;
2730                         if (stats->link_down) {
2731                                 mgp->down_cnt += stats->link_down;
2732                                 mgp->link_state = 0;
2733                         }
2734                         if (mgp->link_state) {
2735                                 if (myri10ge_verbose)
2736                                         printf("%s: link up\n", mgp->name);
2737                                 mac_link_update(mgp->mh, LINK_STATE_UP);
2738                         } else {
2739                                 if (myri10ge_verbose)
2740                                         printf("%s: link down\n", mgp->name);
2741                                 mac_link_update(mgp->mh, LINK_STATE_DOWN);
2742                         }
2743                         MYRI10GE_NIC_STAT_INC(link_changes);
2744                 }
2745                 if (mgp->rdma_tags_available !=
2746                     ntohl(ss->fw_stats->rdma_tags_available)) {
2747                         mgp->rdma_tags_available =
2748                             ntohl(ss->fw_stats->rdma_tags_available);
2749                         cmn_err(CE_NOTE, "%s: RDMA timed out! "
2750                             "%d tags left\n", mgp->name,
2751                             mgp->rdma_tags_available);
2752                 }
2753         }
2754 
2755         mb();
2756         /* check to see if we have rx token to pass back */
2757         if (valid & 0x1) {
2758                 mutex_enter(&ss->poll_lock);
2759                 if (ss->rx_polling) {
2760                         ss->rx_token = 1;
2761                 } else {
2762                         *ss->irq_claim = BE_32(3);
2763                         ss->rx_token = 0;
2764                 }
2765                 mutex_exit(&ss->poll_lock);
2766         }
2767         *(ss->irq_claim + 1) = BE_32(3);
2768         return (DDI_INTR_CLAIMED);
2769 }
2770 
2771 /*
2772  * Add or remove a multicast address.  This is called with our
2773  * macinfo's lock held by GLD, so we do not need to worry about
2774  * our own locking here.
2775  */
2776 static int
2777 myri10ge_m_multicst(void *arg, boolean_t add, const uint8_t *multicastaddr)
2778 {
2779         myri10ge_cmd_t cmd;
2780         struct myri10ge_priv *mgp = arg;
2781         int status, join_leave;
2782 
2783         if (add)
2784                 join_leave = MXGEFW_JOIN_MULTICAST_GROUP;
2785         else
2786                 join_leave = MXGEFW_LEAVE_MULTICAST_GROUP;
2787         (void) memcpy(&cmd.data0, multicastaddr, 4);
2788         (void) memcpy(&cmd.data1, multicastaddr + 4, 2);
2789         cmd.data0 = htonl(cmd.data0);
2790         cmd.data1 = htonl(cmd.data1);
2791         status = myri10ge_send_cmd(mgp, join_leave, &cmd);
2792         if (status == 0)
2793                 return (0);
2794 
2795         cmn_err(CE_WARN, "%s: failed to set multicast address\n",
2796             mgp->name);
2797         return (status);
2798 }
2799 
2800 
2801 static int
2802 myri10ge_m_promisc(void *arg, boolean_t on)
2803 {
2804         struct myri10ge_priv *mgp = arg;
2805 
2806         myri10ge_change_promisc(mgp, on);
2807         return (0);
2808 }
2809 
2810 /*
2811  * copy an array of mcp_kreq_ether_send_t's to the mcp.  Copy
2812  *  backwards one at a time and handle ring wraps
2813  */
2814 
2815 static inline void
2816 myri10ge_submit_req_backwards(myri10ge_tx_ring_t *tx,
2817     mcp_kreq_ether_send_t *src, int cnt)
2818 {
2819         int idx, starting_slot;
2820         starting_slot = tx->req;
2821         while (cnt > 1) {
2822                 cnt--;
2823                 idx = (starting_slot + cnt) & tx->mask;
2824                 myri10ge_pio_copy(&tx->lanai[idx],
2825                     &src[cnt], sizeof (*src));
2826                 mb();
2827         }
2828 }
2829 
2830 /*
2831  * copy an array of mcp_kreq_ether_send_t's to the mcp.  Copy
2832  * at most 32 bytes at a time, so as to avoid involving the software
2833  * pio handler in the nic.   We re-write the first segment's flags
2834  * to mark them valid only after writing the entire chain
2835  */
2836 
2837 static inline void
2838 myri10ge_submit_req(myri10ge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
2839     int cnt)
2840 {
2841         int idx, i;
2842         uint32_t *src_ints, *dst_ints;
2843         mcp_kreq_ether_send_t *srcp, *dstp, *dst;
2844         uint8_t last_flags;
2845 
2846         idx = tx->req & tx->mask;
2847 
2848         last_flags = src->flags;
2849         src->flags = 0;
2850         mb();
2851         dst = dstp = &tx->lanai[idx];
2852         srcp = src;
2853 
2854         if ((idx + cnt) < tx->mask) {
2855                 for (i = 0; i < (cnt - 1); i += 2) {
2856                         myri10ge_pio_copy(dstp, srcp, 2 * sizeof (*src));
2857                         mb(); /* force write every 32 bytes */
2858                         srcp += 2;
2859                         dstp += 2;
2860                 }
2861         } else {
2862                 /*
2863                  * submit all but the first request, and ensure
2864                  *  that it is submitted below
2865                  */
2866                 myri10ge_submit_req_backwards(tx, src, cnt);
2867                 i = 0;
2868         }
2869         if (i < cnt) {
2870                 /* submit the first request */
2871                 myri10ge_pio_copy(dstp, srcp, sizeof (*src));
2872                 mb(); /* barrier before setting valid flag */
2873         }
2874 
2875         /* re-write the last 32-bits with the valid flags */
2876         src->flags |= last_flags;
2877         src_ints = (uint32_t *)src;
2878         src_ints += 3;
2879         dst_ints = (uint32_t *)dst;
2880         dst_ints += 3;
2881         *dst_ints =  *src_ints;
2882         tx->req += cnt;
2883         mb();
2884         /* notify NIC to poll this tx ring */
2885         if (!tx->active && tx->go != NULL) {
2886                 *(int *)(void *)tx->go = 1;
2887                 tx->active = 1;
2888                 tx->activate++;
2889                 mb();
2890         }
2891 }
2892 
2893 /* ARGSUSED */
2894 static inline void
2895 myri10ge_lso_info_get(mblk_t *mp, uint32_t *mss, uint32_t *flags)
2896 {
2897         uint32_t lso_flag;
2898         mac_lso_get(mp, mss, &lso_flag);
2899         (*flags) |= lso_flag;
2900 }
2901 
2902 
2903 /* like pullupmsg, except preserve hcksum/LSO attributes */
2904 static int
2905 myri10ge_pullup(struct myri10ge_slice_state *ss, mblk_t *mp)
2906 {
2907         uint32_t start, stuff, tx_offload_flags, mss;
2908         int ok;
2909 
2910         mss = 0;
2911         mac_hcksum_get(mp, &start, &stuff, NULL, NULL, &tx_offload_flags);
2912         myri10ge_lso_info_get(mp, &mss, &tx_offload_flags);
2913 
2914         ok = pullupmsg(mp, -1);
2915         if (!ok) {
2916                 printf("pullupmsg failed");
2917                 return (DDI_FAILURE);
2918         }
2919         MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_pullup);
2920         mac_hcksum_set(mp, start, stuff, NULL, NULL, tx_offload_flags);
2921         if (tx_offload_flags & HW_LSO)
2922                 DB_LSOMSS(mp) = (uint16_t)mss;
2923         lso_info_set(mp, mss, tx_offload_flags);
2924         return (DDI_SUCCESS);
2925 }
2926 
2927 static inline void
2928 myri10ge_tx_stat(struct myri10ge_tx_pkt_stats *s, struct ether_header *eh,
2929     int opackets, int obytes)
2930 {
2931         s->un.all = 0;
2932         if (eh->ether_dhost.ether_addr_octet[0] & 1) {
2933                 if (0 == (bcmp(eh->ether_dhost.ether_addr_octet,
2934                     myri10ge_broadcastaddr, sizeof (eh->ether_dhost))))
2935                         s->un.s.brdcstxmt = 1;
2936                 else
2937                         s->un.s.multixmt = 1;
2938         }
2939         s->un.s.opackets = (uint16_t)opackets;
2940         s->un.s.obytes = obytes;
2941 }
2942 
2943 static int
2944 myri10ge_tx_copy(struct myri10ge_slice_state *ss, mblk_t *mp,
2945     mcp_kreq_ether_send_t *req)
2946 {
2947         myri10ge_tx_ring_t *tx = &ss->tx;
2948         caddr_t ptr;
2949         struct myri10ge_tx_copybuf *cp;
2950         mblk_t *bp;
2951         int idx, mblen, avail;
2952         uint16_t len;
2953 
2954         mutex_enter(&tx->lock);
2955         avail = tx->mask - (tx->req - tx->done);
2956         if (avail <= 1) {
2957                 mutex_exit(&tx->lock);
2958                 return (EBUSY);
2959         }
2960         idx = tx->req & tx->mask;
2961         cp = &tx->cp[idx];
2962         ptr = cp->va;
2963         for (len = 0, bp = mp; bp != NULL; bp = bp->b_cont) {
2964                 mblen = MBLKL(bp);
2965                 bcopy(bp->b_rptr, ptr, mblen);
2966                 ptr += mblen;
2967                 len += mblen;
2968         }
2969         /* ensure runts are padded to 60 bytes */
2970         if (len < 60) {
2971                 bzero(ptr, 64 - len);
2972                 len = 60;
2973         }
2974         req->addr_low = cp->dma.low;
2975         req->addr_high = cp->dma.high;
2976         req->length = htons(len);
2977         req->pad = 0;
2978         req->rdma_count = 1;
2979         myri10ge_tx_stat(&tx->info[idx].stat,
2980             (struct ether_header *)(void *)cp->va, 1, len);
2981         (void) ddi_dma_sync(cp->dma.handle, 0, len, DDI_DMA_SYNC_FORDEV);
2982         myri10ge_submit_req(&ss->tx, req, 1);
2983         mutex_exit(&tx->lock);
2984         freemsg(mp);
2985         return (DDI_SUCCESS);
2986 }
2987 
2988 
2989 static void
2990 myri10ge_send_locked(myri10ge_tx_ring_t *tx, mcp_kreq_ether_send_t *req_list,
2991     struct myri10ge_tx_buffer_state *tx_info,
2992     int count)
2993 {
2994         int i, idx;
2995 
2996         idx = 0; /* gcc -Wuninitialized */
2997         /* store unmapping and bp info for tx irq handler */
2998         for (i = 0; i < count; i++) {
2999                 idx = (tx->req + i) & tx->mask;
3000                 tx->info[idx].m = tx_info[i].m;
3001                 tx->info[idx].handle = tx_info[i].handle;
3002         }
3003         tx->info[idx].stat.un.all = tx_info[0].stat.un.all;
3004 
3005         /* submit the frame to the nic */
3006         myri10ge_submit_req(tx, req_list, count);
3007 
3008 
3009 }
3010 
3011 
3012 
3013 static void
3014 myri10ge_copydata(mblk_t *mp, int off, int len, caddr_t buf)
3015 {
3016         mblk_t *bp;
3017         int seglen;
3018         uint_t count;
3019 
3020         bp = mp;
3021 
3022         while (off > 0) {
3023                 seglen = MBLKL(bp);
3024                 if (off < seglen)
3025                         break;
3026                 off -= seglen;
3027                 bp = bp->b_cont;
3028         }
3029         while (len > 0) {
3030                 seglen = MBLKL(bp);
3031                 count = min(seglen - off, len);
3032                 bcopy(bp->b_rptr + off, buf, count);
3033                 len -= count;
3034                 buf += count;
3035                 off = 0;
3036                 bp = bp->b_cont;
3037         }
3038 }
3039 
3040 static int
3041 myri10ge_ether_parse_header(mblk_t *mp)
3042 {
3043         struct ether_header eh_copy;
3044         struct ether_header *eh;
3045         int eth_hdr_len, seglen;
3046 
3047         seglen = MBLKL(mp);
3048         eth_hdr_len = sizeof (*eh);
3049         if (seglen < eth_hdr_len) {
3050                 myri10ge_copydata(mp, 0, eth_hdr_len, (caddr_t)&eh_copy);
3051                 eh = &eh_copy;
3052         } else {
3053                 eh = (struct ether_header *)(void *)mp->b_rptr;
3054         }
3055         if (eh->ether_type == BE_16(ETHERTYPE_VLAN)) {
3056                 eth_hdr_len += 4;
3057         }
3058 
3059         return (eth_hdr_len);
3060 }
3061 
3062 static int
3063 myri10ge_lso_parse_header(mblk_t *mp, int off)
3064 {
3065         char buf[128];
3066         int seglen, sum_off;
3067         struct ip *ip;
3068         struct tcphdr *tcp;
3069 
3070         seglen = MBLKL(mp);
3071         if (seglen < off + sizeof (*ip)) {
3072                 myri10ge_copydata(mp, off, sizeof (*ip), buf);
3073                 ip = (struct ip *)(void *)buf;
3074         } else {
3075                 ip = (struct ip *)(void *)(mp->b_rptr + off);
3076         }
3077         if (seglen < off + (ip->ip_hl << 2) + sizeof (*tcp)) {
3078                 myri10ge_copydata(mp, off,
3079                     (ip->ip_hl << 2) + sizeof (*tcp), buf);
3080                 ip = (struct ip *)(void *)buf;
3081         }
3082         tcp = (struct tcphdr *)(void *)((char *)ip + (ip->ip_hl << 2));
3083 
3084         /*
3085          * NIC expects ip_sum to be zero.  Recent changes to
3086          * OpenSolaris leave the correct ip checksum there, rather
3087          * than the required zero, so we need to zero it.  Otherwise,
3088          * the NIC will produce bad checksums when sending LSO packets.
3089          */
3090         if (ip->ip_sum != 0) {
3091                 if (((char *)ip) != buf) {
3092                         /* ip points into mblk, so just zero it */
3093                         ip->ip_sum = 0;
3094                 } else {
3095                         /*
3096                          * ip points into a copy, so walk the chain
3097                          * to find the ip_csum, then zero it
3098                          */
3099                         sum_off = off + _PTRDIFF(&ip->ip_sum, buf);
3100                         while (sum_off > (int)(MBLKL(mp) - 1)) {
3101                                 sum_off -= MBLKL(mp);
3102                                 mp = mp->b_cont;
3103                         }
3104                         mp->b_rptr[sum_off] = 0;
3105                         sum_off++;
3106                         while (sum_off > MBLKL(mp) - 1) {
3107                                 sum_off -= MBLKL(mp);
3108                                 mp = mp->b_cont;
3109                         }
3110                         mp->b_rptr[sum_off] = 0;
3111                 }
3112         }
3113         return (off + ((ip->ip_hl + tcp->th_off) << 2));
3114 }
3115 
3116 static int
3117 myri10ge_tx_tso_copy(struct myri10ge_slice_state *ss, mblk_t *mp,
3118     mcp_kreq_ether_send_t *req_list, int hdr_size, int pkt_size,
3119     uint16_t mss, uint8_t cksum_offset)
3120 {
3121         myri10ge_tx_ring_t *tx = &ss->tx;
3122         struct myri10ge_priv *mgp = ss->mgp;
3123         mblk_t *bp;
3124         mcp_kreq_ether_send_t *req;
3125         struct myri10ge_tx_copybuf *cp;
3126         caddr_t rptr, ptr;
3127         int mblen, count, cum_len, mss_resid, tx_req, pkt_size_tmp;
3128         int resid, avail, idx, hdr_size_tmp, tx_boundary;
3129         int rdma_count;
3130         uint32_t seglen, len, boundary, low, high_swapped;
3131         uint16_t pseudo_hdr_offset = htons(mss);
3132         uint8_t flags;
3133 
3134         tx_boundary = mgp->tx_boundary;
3135         hdr_size_tmp = hdr_size;
3136         resid = tx_boundary;
3137         count = 1;
3138         mutex_enter(&tx->lock);
3139 
3140         /* check to see if the slots are really there */
3141         avail = tx->mask - (tx->req - tx->done);
3142         if (unlikely(avail <=  MYRI10GE_MAX_SEND_DESC_TSO)) {
3143                 atomic_inc_32(&tx->stall);
3144                 mutex_exit(&tx->lock);
3145                 return (EBUSY);
3146         }
3147 
3148         /* copy */
3149         cum_len = -hdr_size;
3150         count = 0;
3151         req = req_list;
3152         idx = tx->mask & tx->req;
3153         cp = &tx->cp[idx];
3154         low = ntohl(cp->dma.low);
3155         ptr = cp->va;
3156         cp->len = 0;
3157         if (mss) {
3158                 int payload = pkt_size - hdr_size;
3159                 uint16_t opackets = (payload / mss) + ((payload % mss) != 0);
3160                 tx->info[idx].ostat.opackets = opackets;
3161                 tx->info[idx].ostat.obytes = (opackets - 1) * hdr_size
3162                     + pkt_size;
3163         }
3164         hdr_size_tmp = hdr_size;
3165         mss_resid = mss;
3166         flags = (MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST);
3167         tx_req = tx->req;
3168         for (bp = mp; bp != NULL; bp = bp->b_cont) {
3169                 mblen = MBLKL(bp);
3170                 rptr = (caddr_t)bp->b_rptr;
3171                 len = min(hdr_size_tmp, mblen);
3172                 if (len) {
3173                         bcopy(rptr, ptr, len);
3174                         rptr += len;
3175                         ptr += len;
3176                         resid -= len;
3177                         mblen -= len;
3178                         hdr_size_tmp -= len;
3179                         cp->len += len;
3180                         if (hdr_size_tmp)
3181                                 continue;
3182                         if (resid < mss) {
3183                                 tx_req++;
3184                                 idx = tx->mask & tx_req;
3185                                 cp = &tx->cp[idx];
3186                                 low = ntohl(cp->dma.low);
3187                                 ptr = cp->va;
3188                                 resid = tx_boundary;
3189                         }
3190                 }
3191                 while (mblen) {
3192                         len = min(mss_resid, mblen);
3193                         bcopy(rptr, ptr, len);
3194                         mss_resid -= len;
3195                         resid -= len;
3196                         mblen -= len;
3197                         rptr += len;
3198                         ptr += len;
3199                         cp->len += len;
3200                         if (mss_resid == 0) {
3201                                 mss_resid = mss;
3202                                 if (resid < mss) {
3203                                         tx_req++;
3204                                         idx = tx->mask & tx_req;
3205                                         cp = &tx->cp[idx];
3206                                         cp->len = 0;
3207                                         low = ntohl(cp->dma.low);
3208                                         ptr = cp->va;
3209                                         resid = tx_boundary;
3210                                 }
3211                         }
3212                 }
3213         }
3214 
3215         req = req_list;
3216         pkt_size_tmp = pkt_size;
3217         count = 0;
3218         rdma_count = 0;
3219         tx_req = tx->req;
3220         while (pkt_size_tmp) {
3221                 idx = tx->mask & tx_req;
3222                 cp = &tx->cp[idx];
3223                 high_swapped = cp->dma.high;
3224                 low = ntohl(cp->dma.low);
3225                 len = cp->len;
3226                 if (len == 0) {
3227                         printf("len=0! pkt_size_tmp=%d, pkt_size=%d\n",
3228                             pkt_size_tmp, pkt_size);
3229                         for (bp = mp; bp != NULL; bp = bp->b_cont) {
3230                                 mblen = MBLKL(bp);
3231                                 printf("mblen:%d\n", mblen);
3232                         }
3233                         pkt_size_tmp = pkt_size;
3234                         tx_req = tx->req;
3235                         while (pkt_size_tmp > 0) {
3236                                 idx = tx->mask & tx_req;
3237                                 cp = &tx->cp[idx];
3238                                 printf("cp->len = %d\n", cp->len);
3239                                 pkt_size_tmp -= cp->len;
3240                                 tx_req++;
3241                         }
3242                         printf("dropped\n");
3243                         MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err);
3244                         goto done;
3245                 }
3246                 pkt_size_tmp -= len;
3247                 while (len) {
3248                         while (len) {
3249                                 uint8_t flags_next;
3250                                 int cum_len_next;
3251 
3252                                 boundary = (low + mgp->tx_boundary) &
3253                                     ~(mgp->tx_boundary - 1);
3254                                 seglen = boundary - low;
3255                                 if (seglen > len)
3256                                         seglen = len;
3257 
3258                                 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
3259                                 cum_len_next = cum_len + seglen;
3260                                 (req-rdma_count)->rdma_count = rdma_count + 1;
3261                                 if (likely(cum_len >= 0)) {
3262                                         /* payload */
3263                                         int next_is_first, chop;
3264 
3265                                         chop = (cum_len_next > mss);
3266                                         cum_len_next = cum_len_next % mss;
3267                                         next_is_first = (cum_len_next == 0);
3268                                         flags |= chop *
3269                                             MXGEFW_FLAGS_TSO_CHOP;
3270                                         flags_next |= next_is_first *
3271                                             MXGEFW_FLAGS_FIRST;
3272                                         rdma_count |= -(chop | next_is_first);
3273                                         rdma_count += chop & !next_is_first;
3274                                 } else if (likely(cum_len_next >= 0)) {
3275                                         /* header ends */
3276                                         int small;
3277 
3278                                         rdma_count = -1;
3279                                         cum_len_next = 0;
3280                                         seglen = -cum_len;
3281                                         small = (mss <= MXGEFW_SEND_SMALL_SIZE);
3282                                         flags_next = MXGEFW_FLAGS_TSO_PLD |
3283                                             MXGEFW_FLAGS_FIRST |
3284                                             (small * MXGEFW_FLAGS_SMALL);
3285                                 }
3286                                 req->addr_high = high_swapped;
3287                                 req->addr_low = htonl(low);
3288                                 req->pseudo_hdr_offset = pseudo_hdr_offset;
3289                                 req->pad = 0; /* complete solid 16-byte block */
3290                                 req->rdma_count = 1;
3291                                 req->cksum_offset = cksum_offset;
3292                                 req->length = htons(seglen);
3293                                 req->flags = flags | ((cum_len & 1) *
3294                                     MXGEFW_FLAGS_ALIGN_ODD);
3295                                 if (cksum_offset > seglen)
3296                                         cksum_offset -= seglen;
3297                                 else
3298                                         cksum_offset = 0;
3299                                 low += seglen;
3300                                 len -= seglen;
3301                                 cum_len = cum_len_next;
3302                                 req++;
3303                                 req->flags = 0;
3304                                 flags = flags_next;
3305                                 count++;
3306                                 rdma_count++;
3307                         }
3308                 }
3309                 tx_req++;
3310         }
3311         (req-rdma_count)->rdma_count = (uint8_t)rdma_count;
3312         do {
3313                 req--;
3314                 req->flags |= MXGEFW_FLAGS_TSO_LAST;
3315         } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP |
3316             MXGEFW_FLAGS_FIRST)));
3317 
3318         myri10ge_submit_req(tx, req_list, count);
3319 done:
3320         mutex_exit(&tx->lock);
3321         freemsg(mp);
3322         return (DDI_SUCCESS);
3323 }
3324 
3325 /*
3326  * Try to send the chain of buffers described by the mp.  We must not
3327  * encapsulate more than eth->tx.req - eth->tx.done, or
3328  * MXGEFW_MAX_SEND_DESC, whichever is more.
3329  */
3330 
3331 static int
3332 myri10ge_send(struct myri10ge_slice_state *ss, mblk_t *mp,
3333     mcp_kreq_ether_send_t *req_list, struct myri10ge_tx_buffer_state *tx_info)
3334 {
3335         struct myri10ge_priv *mgp = ss->mgp;
3336         myri10ge_tx_ring_t *tx = &ss->tx;
3337         mcp_kreq_ether_send_t *req;
3338         struct myri10ge_tx_dma_handle *handles, *dma_handle = NULL;
3339         mblk_t  *bp;
3340         ddi_dma_cookie_t cookie;
3341         int err, rv, count, avail, mblen, try_pullup, i, max_segs, maclen,
3342             rdma_count, cum_len, lso_hdr_size;
3343         uint32_t start, stuff, tx_offload_flags;
3344         uint32_t seglen, len, mss, boundary, low, high_swapped;
3345         uint_t ncookies;
3346         uint16_t pseudo_hdr_offset;
3347         uint8_t flags, cksum_offset, odd_flag;
3348         int pkt_size;
3349         int lso_copy = myri10ge_lso_copy;
3350         try_pullup = 1;
3351 
3352 again:
3353         /* Setup checksum offloading, if needed */
3354         mac_hcksum_get(mp, &start, &stuff, NULL, NULL, &tx_offload_flags);
3355         myri10ge_lso_info_get(mp, &mss, &tx_offload_flags);
3356         if (tx_offload_flags & HW_LSO) {
3357                 max_segs = MYRI10GE_MAX_SEND_DESC_TSO;
3358                 if ((tx_offload_flags & HCK_PARTIALCKSUM) == 0) {
3359                         MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_lsobadflags);
3360                         freemsg(mp);
3361                         return (DDI_SUCCESS);
3362                 }
3363         } else {
3364                 max_segs = MXGEFW_MAX_SEND_DESC;
3365                 mss = 0;
3366         }
3367         req = req_list;
3368         cksum_offset = 0;
3369         pseudo_hdr_offset = 0;
3370 
3371         /* leave an extra slot keep the ring from wrapping */
3372         avail = tx->mask - (tx->req - tx->done);
3373 
3374         /*
3375          * If we have > MXGEFW_MAX_SEND_DESC, then any over-length
3376          * message will need to be pulled up in order to fit.
3377          * Otherwise, we are low on transmit descriptors, it is
3378          * probably better to stall and try again rather than pullup a
3379          * message to fit.
3380          */
3381 
3382         if (avail < max_segs) {
3383                 err = EBUSY;
3384                 atomic_inc_32(&tx->stall_early);
3385                 goto stall;
3386         }
3387 
3388         /* find out how long the frame is and how many segments it is */
3389         count = 0;
3390         odd_flag = 0;
3391         pkt_size = 0;
3392         flags = (MXGEFW_FLAGS_NO_TSO | MXGEFW_FLAGS_FIRST);
3393         for (bp = mp; bp != NULL; bp = bp->b_cont) {
3394                 dblk_t *dbp;
3395                 mblen = MBLKL(bp);
3396                 if (mblen == 0) {
3397                         /*
3398                          * we can't simply skip over 0-length mblks
3399                          * because the hardware can't deal with them,
3400                          * and we could leak them.
3401                          */
3402                         MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_zero_len);
3403                         err = EIO;
3404                         goto pullup;
3405                 }
3406                 /*
3407                  * There's no advantage to copying most gesballoc
3408                  * attached blocks, so disable lso copy in that case
3409                  */
3410                 if (mss && lso_copy == 1 && ((dbp = bp->b_datap) != NULL)) {
3411                         if ((void *)dbp->db_lastfree != myri10ge_db_lastfree) {
3412                                 lso_copy = 0;
3413                         }
3414                 }
3415                 pkt_size += mblen;
3416                 count++;
3417         }
3418 
3419         /* Try to pull up excessivly long chains */
3420         if (count >= max_segs) {
3421                 err = myri10ge_pullup(ss, mp);
3422                 if (likely(err == DDI_SUCCESS)) {
3423                         count = 1;
3424                 } else {
3425                         if (count <  MYRI10GE_MAX_SEND_DESC_TSO) {
3426                                 /*
3427                                  * just let the h/w send it, it will be
3428                                  * inefficient, but us better than dropping
3429                                  */
3430                                 max_segs = MYRI10GE_MAX_SEND_DESC_TSO;
3431                         } else {
3432                                 /* drop it */
3433                                 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err);
3434                                 freemsg(mp);
3435                                 return (0);
3436                         }
3437                 }
3438         }
3439 
3440         cum_len = 0;
3441         maclen = myri10ge_ether_parse_header(mp);
3442 
3443         if (tx_offload_flags & HCK_PARTIALCKSUM) {
3444 
3445                 cksum_offset = start + maclen;
3446                 pseudo_hdr_offset = htons(stuff + maclen);
3447                 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
3448                 flags |= MXGEFW_FLAGS_CKSUM;
3449         }
3450 
3451         lso_hdr_size = 0; /* -Wunitinialized */
3452         if (mss) { /* LSO */
3453                 /* this removes any CKSUM flag from before */
3454                 flags = (MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST);
3455                 /*
3456                  * parse the headers and set cum_len to a negative
3457                  * value to reflect the offset of the TCP payload
3458                  */
3459                 lso_hdr_size =  myri10ge_lso_parse_header(mp, maclen);
3460                 cum_len = -lso_hdr_size;
3461                 if ((mss < mgp->tx_boundary) && lso_copy) {
3462                         err = myri10ge_tx_tso_copy(ss, mp, req_list,
3463                             lso_hdr_size, pkt_size, mss, cksum_offset);
3464                         return (err);
3465                 }
3466 
3467                 /*
3468                  * for TSO, pseudo_hdr_offset holds mss.  The firmware
3469                  * figures out where to put the checksum by parsing
3470                  * the header.
3471                  */
3472 
3473                 pseudo_hdr_offset = htons(mss);
3474         } else if (pkt_size <= MXGEFW_SEND_SMALL_SIZE) {
3475                 flags |= MXGEFW_FLAGS_SMALL;
3476                 if (pkt_size < myri10ge_tx_copylen) {
3477                         req->cksum_offset = cksum_offset;
3478                         req->pseudo_hdr_offset = pseudo_hdr_offset;
3479                         req->flags = flags;
3480                         err = myri10ge_tx_copy(ss, mp, req);
3481                         return (err);
3482                 }
3483                 cum_len = 0;
3484         }
3485 
3486         /* pull one DMA handle for each bp from our freelist */
3487         handles = NULL;
3488         err = myri10ge_alloc_tx_handles(ss, count, &handles);
3489         if (err != DDI_SUCCESS) {
3490                 err = DDI_FAILURE;
3491                 goto stall;
3492         }
3493         count = 0;
3494         rdma_count = 0;
3495         for (bp = mp; bp != NULL; bp = bp->b_cont) {
3496                 mblen = MBLKL(bp);
3497                 dma_handle = handles;
3498                 handles = handles->next;
3499 
3500                 rv = ddi_dma_addr_bind_handle(dma_handle->h, NULL,
3501                     (caddr_t)bp->b_rptr, mblen,
3502                     DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_SLEEP, NULL,
3503                     &cookie, &ncookies);
3504                 if (unlikely(rv != DDI_DMA_MAPPED)) {
3505                         err = EIO;
3506                         try_pullup = 0;
3507                         dma_handle->next = handles;
3508                         handles = dma_handle;
3509                         goto abort_with_handles;
3510                 }
3511 
3512                 /* reserve the slot */
3513                 tx_info[count].m = bp;
3514                 tx_info[count].handle = dma_handle;
3515 
3516                 for (; ; ) {
3517                         low = MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress);
3518                         high_swapped =
3519                             htonl(MYRI10GE_HIGHPART_TO_U32(
3520                             cookie.dmac_laddress));
3521                         len = (uint32_t)cookie.dmac_size;
3522                         while (len) {
3523                                 uint8_t flags_next;
3524                                 int cum_len_next;
3525 
3526                                 boundary = (low + mgp->tx_boundary) &
3527                                     ~(mgp->tx_boundary - 1);
3528                                 seglen = boundary - low;
3529                                 if (seglen > len)
3530                                         seglen = len;
3531 
3532                                 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
3533                                 cum_len_next = cum_len + seglen;
3534                                 if (mss) {
3535                                         (req-rdma_count)->rdma_count =
3536                                             rdma_count + 1;
3537                                         if (likely(cum_len >= 0)) {
3538                                                 /* payload */
3539                                                 int next_is_first, chop;
3540 
3541                                                 chop = (cum_len_next > mss);
3542                                                 cum_len_next =
3543                                                     cum_len_next % mss;
3544                                                 next_is_first =
3545                                                     (cum_len_next == 0);
3546                                                 flags |= chop *
3547                                                     MXGEFW_FLAGS_TSO_CHOP;
3548                                                 flags_next |= next_is_first *
3549                                                     MXGEFW_FLAGS_FIRST;
3550                                                 rdma_count |=
3551                                                     -(chop | next_is_first);
3552                                                 rdma_count +=
3553                                                     chop & !next_is_first;
3554                                         } else if (likely(cum_len_next >= 0)) {
3555                                                 /* header ends */
3556                                                 int small;
3557 
3558                                                 rdma_count = -1;
3559                                                 cum_len_next = 0;
3560                                                 seglen = -cum_len;
3561                                                 small = (mss <=
3562                                                     MXGEFW_SEND_SMALL_SIZE);
3563                                                 flags_next =
3564                                                     MXGEFW_FLAGS_TSO_PLD
3565                                                     | MXGEFW_FLAGS_FIRST
3566                                                     | (small *
3567                                                     MXGEFW_FLAGS_SMALL);
3568                                         }
3569                                 }
3570                                 req->addr_high = high_swapped;
3571                                 req->addr_low = htonl(low);
3572                                 req->pseudo_hdr_offset = pseudo_hdr_offset;
3573                                 req->pad = 0; /* complete solid 16-byte block */
3574                                 req->rdma_count = 1;
3575                                 req->cksum_offset = cksum_offset;
3576                                 req->length = htons(seglen);
3577                                 req->flags = flags | ((cum_len & 1) * odd_flag);
3578                                 if (cksum_offset > seglen)
3579                                         cksum_offset -= seglen;
3580                                 else
3581                                         cksum_offset = 0;
3582                                 low += seglen;
3583                                 len -= seglen;
3584                                 cum_len = cum_len_next;
3585                                 count++;
3586                                 rdma_count++;
3587                                 /*  make sure all the segments will fit */
3588                                 if (unlikely(count >= max_segs)) {
3589                                         MYRI10GE_ATOMIC_SLICE_STAT_INC(
3590                                             xmit_lowbuf);
3591                                         /* may try a pullup */
3592                                         err = EBUSY;
3593                                         if (try_pullup)
3594                                                 try_pullup = 2;
3595                                         goto abort_with_handles;
3596                                 }
3597                                 req++;
3598                                 req->flags = 0;
3599                                 flags = flags_next;
3600                                 tx_info[count].m = 0;
3601                         }
3602                         ncookies--;
3603                         if (ncookies == 0)
3604                                 break;
3605                         ddi_dma_nextcookie(dma_handle->h, &cookie);
3606                 }
3607         }
3608         (req-rdma_count)->rdma_count = (uint8_t)rdma_count;
3609 
3610         if (mss) {
3611                 do {
3612                         req--;
3613                         req->flags |= MXGEFW_FLAGS_TSO_LAST;
3614                 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP |
3615                     MXGEFW_FLAGS_FIRST)));
3616         }
3617 
3618         /* calculate tx stats */
3619         if (mss) {
3620                 uint16_t opackets;
3621                 int payload;
3622 
3623                 payload = pkt_size - lso_hdr_size;
3624                 opackets = (payload / mss) + ((payload % mss) != 0);
3625                 tx_info[0].stat.un.all = 0;
3626                 tx_info[0].ostat.opackets = opackets;
3627                 tx_info[0].ostat.obytes = (opackets - 1) * lso_hdr_size
3628                     + pkt_size;
3629         } else {
3630                 myri10ge_tx_stat(&tx_info[0].stat,
3631                     (struct ether_header *)(void *)mp->b_rptr, 1, pkt_size);
3632         }
3633         mutex_enter(&tx->lock);
3634 
3635         /* check to see if the slots are really there */
3636         avail = tx->mask - (tx->req - tx->done);
3637         if (unlikely(avail <= count)) {
3638                 mutex_exit(&tx->lock);
3639                 err = 0;
3640                 goto late_stall;
3641         }
3642 
3643         myri10ge_send_locked(tx, req_list, tx_info, count);
3644         mutex_exit(&tx->lock);
3645         return (DDI_SUCCESS);
3646 
3647 late_stall:
3648         try_pullup = 0;
3649         atomic_inc_32(&tx->stall_late);
3650 
3651 abort_with_handles:
3652         /* unbind and free handles from previous mblks */
3653         for (i = 0; i < count; i++) {
3654                 bp = tx_info[i].m;
3655                 tx_info[i].m = 0;
3656                 if (bp) {
3657                         dma_handle = tx_info[i].handle;
3658                         (void) ddi_dma_unbind_handle(dma_handle->h);
3659                         dma_handle->next = handles;
3660                         handles = dma_handle;
3661                         tx_info[i].handle = NULL;
3662                         tx_info[i].m = NULL;
3663                 }
3664         }
3665         myri10ge_free_tx_handle_slist(tx, handles);
3666 pullup:
3667         if (try_pullup) {
3668                 err = myri10ge_pullup(ss, mp);
3669                 if (err != DDI_SUCCESS && try_pullup == 2) {
3670                         /* drop */
3671                         MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err);
3672                         freemsg(mp);
3673                         return (0);
3674                 }
3675                 try_pullup = 0;
3676                 goto again;
3677         }
3678 
3679 stall:
3680         if (err != 0) {
3681                 if (err == EBUSY) {
3682                         atomic_inc_32(&tx->stall);
3683                 } else {
3684                         MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err);
3685                 }
3686         }
3687         return (err);
3688 }
3689 
3690 static mblk_t *
3691 myri10ge_send_wrapper(void *arg, mblk_t *mp)
3692 {
3693         struct myri10ge_slice_state *ss = arg;
3694         int err = 0;
3695         mcp_kreq_ether_send_t *req_list;
3696 #if defined(__i386)
3697         /*
3698          * We need about 2.5KB of scratch space to handle transmits.
3699          * i86pc has only 8KB of kernel stack space, so we malloc the
3700          * scratch space there rather than keeping it on the stack.
3701          */
3702         size_t req_size, tx_info_size;
3703         struct myri10ge_tx_buffer_state *tx_info;
3704         caddr_t req_bytes;
3705 
3706         req_size = sizeof (*req_list) * (MYRI10GE_MAX_SEND_DESC_TSO + 4)
3707             + 8;
3708         req_bytes = kmem_alloc(req_size, KM_SLEEP);
3709         tx_info_size = sizeof (*tx_info) * (MYRI10GE_MAX_SEND_DESC_TSO + 1);
3710         tx_info = kmem_alloc(tx_info_size, KM_SLEEP);
3711 #else
3712         char req_bytes[sizeof (*req_list) * (MYRI10GE_MAX_SEND_DESC_TSO + 4)
3713             + 8];
3714         struct myri10ge_tx_buffer_state tx_info[MYRI10GE_MAX_SEND_DESC_TSO + 1];
3715 #endif
3716 
3717         /* ensure req_list entries are aligned to 8 bytes */
3718         req_list = (struct mcp_kreq_ether_send *)
3719             (((unsigned long)req_bytes + 7UL) & ~7UL);
3720 
3721         err = myri10ge_send(ss, mp, req_list, tx_info);
3722 
3723 #if defined(__i386)
3724         kmem_free(tx_info, tx_info_size);
3725         kmem_free(req_bytes, req_size);
3726 #endif
3727         if (err)
3728                 return (mp);
3729         else
3730                 return (NULL);
3731 }
3732 
3733 static int
3734 myri10ge_addmac(void *arg, const uint8_t *mac_addr)
3735 {
3736         struct myri10ge_priv *mgp = arg;
3737         int err;
3738 
3739         if (mac_addr == NULL)
3740                 return (EINVAL);
3741 
3742         mutex_enter(&mgp->intrlock);
3743         if (mgp->macaddr_cnt) {
3744                 mutex_exit(&mgp->intrlock);
3745                 return (ENOSPC);
3746         }
3747         err = myri10ge_m_unicst(mgp, mac_addr);
3748         if (!err)
3749                 mgp->macaddr_cnt++;
3750 
3751         mutex_exit(&mgp->intrlock);
3752         if (err)
3753                 return (err);
3754 
3755         bcopy(mac_addr, mgp->mac_addr, sizeof (mgp->mac_addr));
3756         return (0);
3757 }
3758 
3759 /*ARGSUSED*/
3760 static int
3761 myri10ge_remmac(void *arg, const uint8_t *mac_addr)
3762 {
3763         struct myri10ge_priv *mgp = arg;
3764 
3765         mutex_enter(&mgp->intrlock);
3766         mgp->macaddr_cnt--;
3767         mutex_exit(&mgp->intrlock);
3768 
3769         return (0);
3770 }
3771 
3772 /*ARGSUSED*/
3773 static void
3774 myri10ge_fill_group(void *arg, mac_ring_type_t rtype, const int index,
3775     mac_group_info_t *infop, mac_group_handle_t gh)
3776 {
3777         struct myri10ge_priv *mgp = arg;
3778 
3779         if (rtype != MAC_RING_TYPE_RX)
3780                 return;
3781 
3782         infop->mgi_driver = (mac_group_driver_t)mgp;
3783         infop->mgi_start = NULL;
3784         infop->mgi_stop = NULL;
3785         infop->mgi_addmac = myri10ge_addmac;
3786         infop->mgi_remmac = myri10ge_remmac;
3787         infop->mgi_count = mgp->num_slices;
3788 }
3789 
3790 static int
3791 myri10ge_ring_start(mac_ring_driver_t rh, uint64_t mr_gen_num)
3792 {
3793         struct myri10ge_slice_state *ss;
3794 
3795         ss = (struct myri10ge_slice_state *)rh;
3796         mutex_enter(&ss->rx_lock);
3797         ss->rx_gen_num = mr_gen_num;
3798         mutex_exit(&ss->rx_lock);
3799         return (0);
3800 }
3801 
3802 /*
3803  * Retrieve a value for one of the statistics for a particular rx ring
3804  */
3805 int
3806 myri10ge_rx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val)
3807 {
3808         struct myri10ge_slice_state *ss;
3809 
3810         ss = (struct myri10ge_slice_state *)rh;
3811         switch (stat) {
3812         case MAC_STAT_RBYTES:
3813                 *val = ss->rx_stats.ibytes;
3814                 break;
3815 
3816         case MAC_STAT_IPACKETS:
3817                 *val = ss->rx_stats.ipackets;
3818                 break;
3819 
3820         default:
3821                 *val = 0;
3822                 return (ENOTSUP);
3823         }
3824 
3825         return (0);
3826 }
3827 
3828 /*
3829  * Retrieve a value for one of the statistics for a particular tx ring
3830  */
3831 int
3832 myri10ge_tx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val)
3833 {
3834         struct myri10ge_slice_state *ss;
3835 
3836         ss = (struct myri10ge_slice_state *)rh;
3837         switch (stat) {
3838         case MAC_STAT_OBYTES:
3839                 *val = ss->tx.stats.obytes;
3840                 break;
3841 
3842         case MAC_STAT_OPACKETS:
3843                 *val = ss->tx.stats.opackets;
3844                 break;
3845 
3846         default:
3847                 *val = 0;
3848                 return (ENOTSUP);
3849         }
3850 
3851         return (0);
3852 }
3853 
3854 static int
3855 myri10ge_rx_ring_intr_disable(mac_intr_handle_t intrh)
3856 {
3857         struct myri10ge_slice_state *ss;
3858 
3859         ss = (struct myri10ge_slice_state *)intrh;
3860         mutex_enter(&ss->poll_lock);
3861         ss->rx_polling = B_TRUE;
3862         mutex_exit(&ss->poll_lock);
3863         return (0);
3864 }
3865 
3866 static int
3867 myri10ge_rx_ring_intr_enable(mac_intr_handle_t intrh)
3868 {
3869         struct myri10ge_slice_state *ss;
3870 
3871         ss = (struct myri10ge_slice_state *)intrh;
3872         mutex_enter(&ss->poll_lock);
3873         ss->rx_polling = B_FALSE;
3874         if (ss->rx_token) {
3875                 *ss->irq_claim = BE_32(3);
3876                 ss->rx_token = 0;
3877         }
3878         mutex_exit(&ss->poll_lock);
3879         return (0);
3880 }
3881 
3882 /*ARGSUSED*/
3883 static void
3884 myri10ge_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index,
3885     const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh)
3886 {
3887         struct myri10ge_priv *mgp = arg;
3888         struct myri10ge_slice_state *ss;
3889         mac_intr_t *mintr = &infop->mri_intr;
3890 
3891         ASSERT((unsigned int)ring_index < mgp->num_slices);
3892 
3893         ss = &mgp->ss[ring_index];
3894         switch (rtype) {
3895         case MAC_RING_TYPE_RX:
3896                 ss->rx_rh = rh;
3897                 infop->mri_driver = (mac_ring_driver_t)ss;
3898                 infop->mri_start = myri10ge_ring_start;
3899                 infop->mri_stop = NULL;
3900                 infop->mri_poll = myri10ge_poll_rx;
3901                 infop->mri_stat = myri10ge_rx_ring_stat;
3902                 mintr->mi_handle = (mac_intr_handle_t)ss;
3903                 mintr->mi_enable = myri10ge_rx_ring_intr_enable;
3904                 mintr->mi_disable = myri10ge_rx_ring_intr_disable;
3905                 break;
3906         case MAC_RING_TYPE_TX:
3907                 ss->tx.rh = rh;
3908                 infop->mri_driver = (mac_ring_driver_t)ss;
3909                 infop->mri_start = NULL;
3910                 infop->mri_stop = NULL;
3911                 infop->mri_tx = myri10ge_send_wrapper;
3912                 infop->mri_stat = myri10ge_tx_ring_stat;
3913                 break;
3914         default:
3915                 break;
3916         }
3917 }
3918 
3919 static void
3920 myri10ge_nic_stat_destroy(struct myri10ge_priv *mgp)
3921 {
3922         if (mgp->ksp_stat == NULL)
3923                 return;
3924 
3925         kstat_delete(mgp->ksp_stat);
3926         mgp->ksp_stat = NULL;
3927 }
3928 
3929 static void
3930 myri10ge_slice_stat_destroy(struct myri10ge_slice_state *ss)
3931 {
3932         if (ss->ksp_stat == NULL)
3933                 return;
3934 
3935         kstat_delete(ss->ksp_stat);
3936         ss->ksp_stat = NULL;
3937 }
3938 
3939 static void
3940 myri10ge_info_destroy(struct myri10ge_priv *mgp)
3941 {
3942         if (mgp->ksp_info == NULL)
3943                 return;
3944 
3945         kstat_delete(mgp->ksp_info);
3946         mgp->ksp_info = NULL;
3947 }
3948 
3949 static int
3950 myri10ge_nic_stat_kstat_update(kstat_t *ksp, int rw)
3951 {
3952         struct myri10ge_nic_stat *ethstat;
3953         struct myri10ge_priv *mgp;
3954         mcp_irq_data_t *fw_stats;
3955 
3956 
3957         if (rw == KSTAT_WRITE)
3958                 return (EACCES);
3959 
3960         ethstat = (struct myri10ge_nic_stat *)ksp->ks_data;
3961         mgp = (struct myri10ge_priv *)ksp->ks_private;
3962         fw_stats = mgp->ss[0].fw_stats;
3963 
3964         ethstat->dma_read_bw_MBs.value.ul = mgp->read_dma;
3965         ethstat->dma_write_bw_MBs.value.ul = mgp->write_dma;
3966         ethstat->dma_read_write_bw_MBs.value.ul = mgp->read_write_dma;
3967         if (myri10ge_tx_dma_attr.dma_attr_flags & DDI_DMA_FORCE_PHYSICAL)
3968                 ethstat->dma_force_physical.value.ul = 1;
3969         else
3970                 ethstat->dma_force_physical.value.ul = 0;
3971         ethstat->lanes.value.ul = mgp->pcie_link_width;
3972         ethstat->dropped_bad_crc32.value.ul =
3973             ntohl(fw_stats->dropped_bad_crc32);
3974         ethstat->dropped_bad_phy.value.ul =
3975             ntohl(fw_stats->dropped_bad_phy);
3976         ethstat->dropped_link_error_or_filtered.value.ul =
3977             ntohl(fw_stats->dropped_link_error_or_filtered);
3978         ethstat->dropped_link_overflow.value.ul =
3979             ntohl(fw_stats->dropped_link_overflow);
3980         ethstat->dropped_multicast_filtered.value.ul =
3981             ntohl(fw_stats->dropped_multicast_filtered);
3982         ethstat->dropped_no_big_buffer.value.ul =
3983             ntohl(fw_stats->dropped_no_big_buffer);
3984         ethstat->dropped_no_small_buffer.value.ul =
3985             ntohl(fw_stats->dropped_no_small_buffer);
3986         ethstat->dropped_overrun.value.ul =
3987             ntohl(fw_stats->dropped_overrun);
3988         ethstat->dropped_pause.value.ul =
3989             ntohl(fw_stats->dropped_pause);
3990         ethstat->dropped_runt.value.ul =
3991             ntohl(fw_stats->dropped_runt);
3992         ethstat->link_up.value.ul =
3993             ntohl(fw_stats->link_up);
3994         ethstat->dropped_unicast_filtered.value.ul =
3995             ntohl(fw_stats->dropped_unicast_filtered);
3996         return (0);
3997 }
3998 
3999 static int
4000 myri10ge_slice_stat_kstat_update(kstat_t *ksp, int rw)
4001 {
4002         struct myri10ge_slice_stat *ethstat;
4003         struct myri10ge_slice_state *ss;
4004 
4005         if (rw == KSTAT_WRITE)
4006                 return (EACCES);
4007 
4008         ethstat = (struct myri10ge_slice_stat *)ksp->ks_data;
4009         ss = (struct myri10ge_slice_state *)ksp->ks_private;
4010 
4011         ethstat->rx_big.value.ul = ss->j_rx_cnt;
4012         ethstat->rx_bigbuf_firmware.value.ul = ss->rx_big.cnt - ss->j_rx_cnt;
4013         ethstat->rx_bigbuf_pool.value.ul =
4014             ss->jpool.num_alloc - ss->jbufs_for_smalls;
4015         ethstat->rx_bigbuf_smalls.value.ul = ss->jbufs_for_smalls;
4016         ethstat->rx_small.value.ul = ss->rx_small.cnt -
4017             (ss->rx_small.mask + 1);
4018         ethstat->tx_done.value.ul = ss->tx.done;
4019         ethstat->tx_req.value.ul = ss->tx.req;
4020         ethstat->tx_activate.value.ul = ss->tx.activate;
4021         ethstat->xmit_sched.value.ul = ss->tx.sched;
4022         ethstat->xmit_stall.value.ul = ss->tx.stall;
4023         ethstat->xmit_stall_early.value.ul = ss->tx.stall_early;
4024         ethstat->xmit_stall_late.value.ul = ss->tx.stall_late;
4025         ethstat->xmit_err.value.ul =  MYRI10GE_SLICE_STAT(xmit_err);
4026         return (0);
4027 }
4028 
4029 static int
4030 myri10ge_info_kstat_update(kstat_t *ksp, int rw)
4031 {
4032         struct myri10ge_info *info;
4033         struct myri10ge_priv *mgp;
4034 
4035 
4036         if (rw == KSTAT_WRITE)
4037                 return (EACCES);
4038 
4039         info = (struct myri10ge_info *)ksp->ks_data;
4040         mgp = (struct myri10ge_priv *)ksp->ks_private;
4041         kstat_named_setstr(&info->driver_version, MYRI10GE_VERSION_STR);
4042         kstat_named_setstr(&info->firmware_version, mgp->fw_version);
4043         kstat_named_setstr(&info->firmware_name, mgp->fw_name);
4044         kstat_named_setstr(&info->interrupt_type, mgp->intr_type);
4045         kstat_named_setstr(&info->product_code, mgp->pc_str);
4046         kstat_named_setstr(&info->serial_number, mgp->sn_str);
4047         return (0);
4048 }
4049 
4050 static struct myri10ge_info myri10ge_info_template = {
4051         { "driver_version",     KSTAT_DATA_STRING },
4052         { "firmware_version",   KSTAT_DATA_STRING },
4053         { "firmware_name",      KSTAT_DATA_STRING },
4054         { "interrupt_type",     KSTAT_DATA_STRING },
4055         { "product_code",       KSTAT_DATA_STRING },
4056         { "serial_number",      KSTAT_DATA_STRING },
4057 };
4058 static kmutex_t myri10ge_info_template_lock;
4059 
4060 
4061 static int
4062 myri10ge_info_init(struct myri10ge_priv *mgp)
4063 {
4064         struct kstat *ksp;
4065 
4066         ksp = kstat_create("myri10ge", ddi_get_instance(mgp->dip),
4067             "myri10ge_info", "net", KSTAT_TYPE_NAMED,
4068             sizeof (myri10ge_info_template) /
4069             sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
4070         if (ksp == NULL) {
4071                 cmn_err(CE_WARN,
4072                     "%s: myri10ge_info_init: kstat_create failed", mgp->name);
4073                 return (DDI_FAILURE);
4074         }
4075         mgp->ksp_info = ksp;
4076         ksp->ks_update = myri10ge_info_kstat_update;
4077         ksp->ks_private = (void *) mgp;
4078         ksp->ks_data = &myri10ge_info_template;
4079         ksp->ks_lock = &myri10ge_info_template_lock;
4080         if (MYRI10GE_VERSION_STR != NULL)
4081                 ksp->ks_data_size += strlen(MYRI10GE_VERSION_STR) + 1;
4082         if (mgp->fw_version != NULL)
4083                 ksp->ks_data_size += strlen(mgp->fw_version) + 1;
4084         ksp->ks_data_size += strlen(mgp->fw_name) + 1;
4085         ksp->ks_data_size += strlen(mgp->intr_type) + 1;
4086         if (mgp->pc_str != NULL)
4087                 ksp->ks_data_size += strlen(mgp->pc_str) + 1;
4088         if (mgp->sn_str != NULL)
4089                 ksp->ks_data_size += strlen(mgp->sn_str) + 1;
4090 
4091         kstat_install(ksp);
4092         return (DDI_SUCCESS);
4093 }
4094 
4095 
4096 static int
4097 myri10ge_nic_stat_init(struct myri10ge_priv *mgp)
4098 {
4099         struct kstat *ksp;
4100         struct myri10ge_nic_stat *ethstat;
4101 
4102         ksp = kstat_create("myri10ge", ddi_get_instance(mgp->dip),
4103             "myri10ge_nic_stats", "net", KSTAT_TYPE_NAMED,
4104             sizeof (*ethstat) / sizeof (kstat_named_t), 0);
4105         if (ksp == NULL) {
4106                 cmn_err(CE_WARN,
4107                     "%s: myri10ge_stat_init: kstat_create failed", mgp->name);
4108                 return (DDI_FAILURE);
4109         }
4110         mgp->ksp_stat = ksp;
4111         ethstat = (struct myri10ge_nic_stat *)(ksp->ks_data);
4112 
4113         kstat_named_init(&ethstat->dma_read_bw_MBs,
4114             "dma_read_bw_MBs", KSTAT_DATA_ULONG);
4115         kstat_named_init(&ethstat->dma_write_bw_MBs,
4116             "dma_write_bw_MBs", KSTAT_DATA_ULONG);
4117         kstat_named_init(&ethstat->dma_read_write_bw_MBs,
4118             "dma_read_write_bw_MBs", KSTAT_DATA_ULONG);
4119         kstat_named_init(&ethstat->dma_force_physical,
4120             "dma_force_physical", KSTAT_DATA_ULONG);
4121         kstat_named_init(&ethstat->lanes,
4122             "lanes", KSTAT_DATA_ULONG);
4123         kstat_named_init(&ethstat->dropped_bad_crc32,
4124             "dropped_bad_crc32", KSTAT_DATA_ULONG);
4125         kstat_named_init(&ethstat->dropped_bad_phy,
4126             "dropped_bad_phy", KSTAT_DATA_ULONG);
4127         kstat_named_init(&ethstat->dropped_link_error_or_filtered,
4128             "dropped_link_error_or_filtered", KSTAT_DATA_ULONG);
4129         kstat_named_init(&ethstat->dropped_link_overflow,
4130             "dropped_link_overflow", KSTAT_DATA_ULONG);
4131         kstat_named_init(&ethstat->dropped_multicast_filtered,
4132             "dropped_multicast_filtered", KSTAT_DATA_ULONG);
4133         kstat_named_init(&ethstat->dropped_no_big_buffer,
4134             "dropped_no_big_buffer", KSTAT_DATA_ULONG);
4135         kstat_named_init(&ethstat->dropped_no_small_buffer,
4136             "dropped_no_small_buffer", KSTAT_DATA_ULONG);
4137         kstat_named_init(&ethstat->dropped_overrun,
4138             "dropped_overrun", KSTAT_DATA_ULONG);
4139         kstat_named_init(&ethstat->dropped_pause,
4140             "dropped_pause", KSTAT_DATA_ULONG);
4141         kstat_named_init(&ethstat->dropped_runt,
4142             "dropped_runt", KSTAT_DATA_ULONG);
4143         kstat_named_init(&ethstat->dropped_unicast_filtered,
4144             "dropped_unicast_filtered", KSTAT_DATA_ULONG);
4145         kstat_named_init(&ethstat->dropped_runt, "dropped_runt",
4146             KSTAT_DATA_ULONG);
4147         kstat_named_init(&ethstat->link_up, "link_up", KSTAT_DATA_ULONG);
4148         kstat_named_init(&ethstat->link_changes, "link_changes",
4149             KSTAT_DATA_ULONG);
4150         ksp->ks_update = myri10ge_nic_stat_kstat_update;
4151         ksp->ks_private = (void *) mgp;
4152         kstat_install(ksp);
4153         return (DDI_SUCCESS);
4154 }
4155 
4156 static int
4157 myri10ge_slice_stat_init(struct myri10ge_slice_state *ss)
4158 {
4159         struct myri10ge_priv *mgp = ss->mgp;
4160         struct kstat *ksp;
4161         struct myri10ge_slice_stat *ethstat;
4162         int instance;
4163 
4164         /*
4165          * fake an instance so that the same slice numbers from
4166          * different instances do not collide
4167          */
4168         instance = (ddi_get_instance(mgp->dip) * 1000) +  (int)(ss - mgp->ss);
4169         ksp = kstat_create("myri10ge", instance,
4170             "myri10ge_slice_stats", "net", KSTAT_TYPE_NAMED,
4171             sizeof (*ethstat) / sizeof (kstat_named_t), 0);
4172         if (ksp == NULL) {
4173                 cmn_err(CE_WARN,
4174                     "%s: myri10ge_stat_init: kstat_create failed", mgp->name);
4175                 return (DDI_FAILURE);
4176         }
4177         ss->ksp_stat = ksp;
4178         ethstat = (struct myri10ge_slice_stat *)(ksp->ks_data);
4179         kstat_named_init(&ethstat->lro_bad_csum, "lro_bad_csum",
4180             KSTAT_DATA_ULONG);
4181         kstat_named_init(&ethstat->lro_flushed, "lro_flushed",
4182             KSTAT_DATA_ULONG);
4183         kstat_named_init(&ethstat->lro_queued, "lro_queued",
4184             KSTAT_DATA_ULONG);
4185         kstat_named_init(&ethstat->rx_bigbuf_firmware, "rx_bigbuf_firmware",
4186             KSTAT_DATA_ULONG);
4187         kstat_named_init(&ethstat->rx_bigbuf_pool, "rx_bigbuf_pool",
4188             KSTAT_DATA_ULONG);
4189         kstat_named_init(&ethstat->rx_bigbuf_smalls, "rx_bigbuf_smalls",
4190             KSTAT_DATA_ULONG);
4191         kstat_named_init(&ethstat->rx_copy, "rx_copy",
4192             KSTAT_DATA_ULONG);
4193         kstat_named_init(&ethstat->rx_big_nobuf, "rx_big_nobuf",
4194             KSTAT_DATA_ULONG);
4195         kstat_named_init(&ethstat->rx_small_nobuf, "rx_small_nobuf",
4196             KSTAT_DATA_ULONG);
4197         kstat_named_init(&ethstat->xmit_zero_len, "xmit_zero_len",
4198             KSTAT_DATA_ULONG);
4199         kstat_named_init(&ethstat->xmit_pullup, "xmit_pullup",
4200             KSTAT_DATA_ULONG);
4201         kstat_named_init(&ethstat->xmit_pullup_first, "xmit_pullup_first",
4202             KSTAT_DATA_ULONG);
4203         kstat_named_init(&ethstat->xmit_lowbuf, "xmit_lowbuf",
4204             KSTAT_DATA_ULONG);
4205         kstat_named_init(&ethstat->xmit_lsobadflags, "xmit_lsobadflags",
4206             KSTAT_DATA_ULONG);
4207         kstat_named_init(&ethstat->xmit_sched, "xmit_sched",
4208             KSTAT_DATA_ULONG);
4209         kstat_named_init(&ethstat->xmit_stall, "xmit_stall",
4210             KSTAT_DATA_ULONG);
4211         kstat_named_init(&ethstat->xmit_stall_early, "xmit_stall_early",
4212             KSTAT_DATA_ULONG);
4213         kstat_named_init(&ethstat->xmit_stall_late, "xmit_stall_late",
4214             KSTAT_DATA_ULONG);
4215         kstat_named_init(&ethstat->xmit_err, "xmit_err",
4216             KSTAT_DATA_ULONG);
4217         kstat_named_init(&ethstat->tx_req, "tx_req",
4218             KSTAT_DATA_ULONG);
4219         kstat_named_init(&ethstat->tx_activate, "tx_activate",
4220             KSTAT_DATA_ULONG);
4221         kstat_named_init(&ethstat->tx_done, "tx_done",
4222             KSTAT_DATA_ULONG);
4223         kstat_named_init(&ethstat->tx_handles_alloced, "tx_handles_alloced",
4224             KSTAT_DATA_ULONG);
4225         kstat_named_init(&ethstat->rx_big, "rx_big",
4226             KSTAT_DATA_ULONG);
4227         kstat_named_init(&ethstat->rx_small, "rx_small",
4228             KSTAT_DATA_ULONG);
4229         ksp->ks_update = myri10ge_slice_stat_kstat_update;
4230         ksp->ks_private = (void *) ss;
4231         kstat_install(ksp);
4232         return (DDI_SUCCESS);
4233 }
4234 
4235 
4236 
4237 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
4238 
4239 #include <vm/hat.h>
4240 #include <sys/ddi_isa.h>
4241 void *device_arena_alloc(size_t size, int vm_flag);
4242 void device_arena_free(void *vaddr, size_t size);
4243 
4244 static void
4245 myri10ge_enable_nvidia_ecrc(struct myri10ge_priv *mgp)
4246 {
4247         dev_info_t *parent_dip;
4248         ddi_acc_handle_t handle;
4249         unsigned long bus_number, dev_number, func_number;
4250         unsigned long cfg_pa, paddr, base, pgoffset;
4251         char            *cvaddr, *ptr;
4252         uint32_t        *ptr32;
4253         int             retval = DDI_FAILURE;
4254         int dontcare;
4255         uint16_t read_vid, read_did, vendor_id, device_id;
4256 
4257         if (!myri10ge_nvidia_ecrc_enable)
4258                 return;
4259 
4260         parent_dip = ddi_get_parent(mgp->dip);
4261         if (parent_dip == NULL) {
4262                 cmn_err(CE_WARN, "%s: I'm an orphan?", mgp->name);
4263                 return;
4264         }
4265 
4266         if (pci_config_setup(parent_dip, &handle) != DDI_SUCCESS) {
4267                 cmn_err(CE_WARN,
4268                     "%s: Could not access my parent's registers", mgp->name);
4269                 return;
4270         }
4271 
4272         vendor_id = pci_config_get16(handle, PCI_CONF_VENID);
4273         device_id = pci_config_get16(handle, PCI_CONF_DEVID);
4274         pci_config_teardown(&handle);
4275 
4276         if (myri10ge_verbose) {
4277                 unsigned long   bus_number, dev_number, func_number;
4278                 int             reg_set, span;
4279                 (void) myri10ge_reg_set(parent_dip, &reg_set, &span,
4280                     &bus_number, &dev_number, &func_number);
4281                 if (myri10ge_verbose)
4282                         printf("%s: parent at %ld:%ld:%ld\n", mgp->name,
4283                             bus_number, dev_number, func_number);
4284         }
4285 
4286         if (vendor_id !=  0x10de)
4287                 return;
4288 
4289         if (device_id != 0x005d /* CK804 */ &&
4290             (device_id < 0x374 || device_id > 0x378) /* MCP55 */) {
4291                 return;
4292         }
4293         (void) myri10ge_reg_set(parent_dip, &dontcare, &dontcare,
4294             &bus_number, &dev_number, &func_number);
4295 
4296         for (cfg_pa = 0xf0000000UL;
4297             retval != DDI_SUCCESS && cfg_pa >= 0xe0000000UL;
4298             cfg_pa -= 0x10000000UL) {
4299                 /* find the config space address for the nvidia bridge */
4300                 paddr = (cfg_pa + bus_number * 0x00100000UL +
4301                     (dev_number * 8 + func_number) * 0x00001000UL);
4302 
4303                 base = paddr & (~MMU_PAGEOFFSET);
4304                 pgoffset = paddr & MMU_PAGEOFFSET;
4305 
4306                 /* map it into the kernel */
4307                 cvaddr =  device_arena_alloc(ptob(1), VM_NOSLEEP);
4308                 if (cvaddr == NULL)
4309                         cmn_err(CE_WARN, "%s: failed to map nf4: cvaddr\n",
4310                             mgp->name);
4311 
4312                 hat_devload(kas.a_hat, cvaddr, mmu_ptob(1),
4313                     i_ddi_paddr_to_pfn(base),
4314                     PROT_WRITE|HAT_STRICTORDER, HAT_LOAD_LOCK);
4315 
4316                 ptr = cvaddr + pgoffset;
4317                 read_vid = *(uint16_t *)(void *)(ptr + PCI_CONF_VENID);
4318                 read_did = *(uint16_t *)(void *)(ptr + PCI_CONF_DEVID);
4319                 if (vendor_id ==  read_did || device_id == read_did) {
4320                         ptr32 = (uint32_t *)(void *)(ptr + 0x178);
4321                         if (myri10ge_verbose)
4322                                 printf("%s: Enabling ECRC on upstream "
4323                                     "Nvidia bridge (0x%x:0x%x) "
4324                                     "at %ld:%ld:%ld\n", mgp->name,
4325                                     read_vid, read_did, bus_number,
4326                                     dev_number, func_number);
4327                         *ptr32 |= 0x40;
4328                         retval = DDI_SUCCESS;
4329                 }
4330                 hat_unload(kas.a_hat, cvaddr, ptob(1), HAT_UNLOAD_UNLOCK);
4331                 device_arena_free(cvaddr, ptob(1));
4332         }
4333 }
4334 
4335 #else
4336 /*ARGSUSED*/
4337 static void
4338 myri10ge_enable_nvidia_ecrc(struct myri10ge_priv *mgp)
4339 {
4340 }
4341 #endif /* i386 */
4342 
4343 
4344 /*
4345  * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
4346  * when the PCI-E Completion packets are aligned on an 8-byte
4347  * boundary.  Some PCI-E chip sets always align Completion packets; on
4348  * the ones that do not, the alignment can be enforced by enabling
4349  * ECRC generation (if supported).
4350  *
4351  * When PCI-E Completion packets are not aligned, it is actually more
4352  * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
4353  *
4354  * If the driver can neither enable ECRC nor verify that it has
4355  * already been enabled, then it must use a firmware image which works
4356  * around unaligned completion packets (ethp_z8e.dat), and it should
4357  * also ensure that it never gives the device a Read-DMA which is
4358  * larger than 2KB by setting the tx.boundary to 2KB.  If ECRC is
4359  * enabled, then the driver should use the aligned (eth_z8e.dat)
4360  * firmware image, and set tx.boundary to 4KB.
4361  */
4362 
4363 
4364 static int
4365 myri10ge_firmware_probe(struct myri10ge_priv *mgp)
4366 {
4367         int status;
4368 
4369         mgp->tx_boundary = 4096;
4370         /*
4371          * Verify the max read request size was set to 4KB
4372          * before trying the test with 4KB.
4373          */
4374         if (mgp->max_read_request_4k == 0)
4375                 mgp->tx_boundary = 2048;
4376         /*
4377          * load the optimized firmware which assumes aligned PCIe
4378          * completions in order to see if it works on this host.
4379          */
4380 
4381         mgp->fw_name = "rss_eth_z8e";
4382         mgp->eth_z8e = (unsigned char *)rss_eth_z8e;
4383         mgp->eth_z8e_length = rss_eth_z8e_length;
4384 
4385         status = myri10ge_load_firmware(mgp);
4386         if (status != 0) {
4387                 return (status);
4388         }
4389         /*
4390          * Enable ECRC if possible
4391          */
4392         myri10ge_enable_nvidia_ecrc(mgp);
4393 
4394         /*
4395          * Run a DMA test which watches for unaligned completions and
4396          * aborts on the first one seen.
4397          */
4398         status = myri10ge_dma_test(mgp, MXGEFW_CMD_UNALIGNED_TEST);
4399         if (status == 0)
4400                 return (0); /* keep the aligned firmware */
4401 
4402         if (status != E2BIG)
4403                 cmn_err(CE_WARN, "%s: DMA test failed: %d\n",
4404                     mgp->name, status);
4405         if (status == ENOSYS)
4406                 cmn_err(CE_WARN, "%s: Falling back to ethp! "
4407                     "Please install up to date fw\n", mgp->name);
4408         return (status);
4409 }
4410 
4411 static int
4412 myri10ge_select_firmware(struct myri10ge_priv *mgp)
4413 {
4414         int aligned;
4415 
4416         aligned = 0;
4417 
4418         if (myri10ge_force_firmware == 1) {
4419                 if (myri10ge_verbose)
4420                         printf("%s: Assuming aligned completions (forced)\n",
4421                             mgp->name);
4422                 aligned = 1;
4423                 goto done;
4424         }
4425 
4426         if (myri10ge_force_firmware == 2) {
4427                 if (myri10ge_verbose)
4428                         printf("%s: Assuming unaligned completions (forced)\n",
4429                             mgp->name);
4430                 aligned = 0;
4431                 goto done;
4432         }
4433 
4434         /* If the width is less than 8, we may used the aligned firmware */
4435         if (mgp->pcie_link_width != 0 && mgp->pcie_link_width < 8) {
4436                 cmn_err(CE_WARN, "!%s: PCIe link running at x%d\n",
4437                     mgp->name, mgp->pcie_link_width);
4438                 aligned = 1;
4439                 goto done;
4440         }
4441 
4442         if (0 == myri10ge_firmware_probe(mgp))
4443                 return (0);  /* keep optimized firmware */
4444 
4445 done:
4446         if (aligned) {
4447                 mgp->fw_name = "rss_eth_z8e";
4448                 mgp->eth_z8e = (unsigned char *)rss_eth_z8e;
4449                 mgp->eth_z8e_length = rss_eth_z8e_length;
4450                 mgp->tx_boundary = 4096;
4451         } else {
4452                 mgp->fw_name = "rss_ethp_z8e";
4453                 mgp->eth_z8e = (unsigned char *)rss_ethp_z8e;
4454                 mgp->eth_z8e_length = rss_ethp_z8e_length;
4455                 mgp->tx_boundary = 2048;
4456         }
4457 
4458         return (myri10ge_load_firmware(mgp));
4459 }
4460 
4461 static int
4462 myri10ge_add_intrs(struct myri10ge_priv *mgp, int add_handler)
4463 {
4464         dev_info_t *devinfo = mgp->dip;
4465         int count, avail, actual, intr_types;
4466         int x, y, rc, inum = 0;
4467 
4468 
4469         rc = ddi_intr_get_supported_types(devinfo, &intr_types);
4470         if (rc != DDI_SUCCESS) {
4471                 cmn_err(CE_WARN,
4472                     "!%s: ddi_intr_get_nintrs() failure, rc = %d\n", mgp->name,
4473                     rc);
4474                 return (DDI_FAILURE);
4475         }
4476 
4477         if (!myri10ge_use_msi)
4478                 intr_types &= ~DDI_INTR_TYPE_MSI;
4479         if (!myri10ge_use_msix)
4480                 intr_types &= ~DDI_INTR_TYPE_MSIX;
4481 
4482         if (intr_types & DDI_INTR_TYPE_MSIX) {
4483                 mgp->ddi_intr_type = DDI_INTR_TYPE_MSIX;
4484                 mgp->intr_type = "MSI-X";
4485         } else if (intr_types & DDI_INTR_TYPE_MSI) {
4486                 mgp->ddi_intr_type = DDI_INTR_TYPE_MSI;
4487                 mgp->intr_type = "MSI";
4488         } else {
4489                 mgp->ddi_intr_type = DDI_INTR_TYPE_FIXED;
4490                 mgp->intr_type = "Legacy";
4491         }
4492         /* Get number of interrupts */
4493         rc = ddi_intr_get_nintrs(devinfo, mgp->ddi_intr_type, &count);
4494         if ((rc != DDI_SUCCESS) || (count == 0)) {
4495                 cmn_err(CE_WARN, "%s: ddi_intr_get_nintrs() failure, rc: %d, "
4496                     "count: %d", mgp->name, rc, count);
4497 
4498                 return (DDI_FAILURE);
4499         }
4500 
4501         /* Get number of available interrupts */
4502         rc = ddi_intr_get_navail(devinfo, mgp->ddi_intr_type, &avail);
4503         if ((rc != DDI_SUCCESS) || (avail == 0)) {
4504                 cmn_err(CE_WARN, "%s: ddi_intr_get_navail() failure, "
4505                     "rc: %d, avail: %d\n", mgp->name, rc, avail);
4506                 return (DDI_FAILURE);
4507         }
4508         if (avail < count) {
4509                 cmn_err(CE_NOTE,
4510                     "!%s: nintrs() returned %d, navail returned %d",
4511                     mgp->name, count, avail);
4512                 count = avail;
4513         }
4514 
4515         if (count < mgp->num_slices)
4516                 return (DDI_FAILURE);
4517 
4518         if (count > mgp->num_slices)
4519                 count = mgp->num_slices;
4520 
4521         /* Allocate memory for MSI interrupts */
4522         mgp->intr_size = count * sizeof (ddi_intr_handle_t);
4523         mgp->htable = kmem_alloc(mgp->intr_size, KM_SLEEP);
4524 
4525         rc = ddi_intr_alloc(devinfo, mgp->htable, mgp->ddi_intr_type, inum,
4526             count, &actual, DDI_INTR_ALLOC_NORMAL);
4527 
4528         if ((rc != DDI_SUCCESS) || (actual == 0)) {
4529                 cmn_err(CE_WARN, "%s: ddi_intr_alloc() failed: %d",
4530                     mgp->name, rc);
4531 
4532                 kmem_free(mgp->htable, mgp->intr_size);
4533                 mgp->htable = NULL;
4534                 return (DDI_FAILURE);
4535         }
4536 
4537         if ((actual < count) && myri10ge_verbose) {
4538                 cmn_err(CE_NOTE, "%s: got %d/%d slices",
4539                     mgp->name, actual, count);
4540         }
4541 
4542         mgp->intr_cnt = actual;
4543 
4544         /*
4545          * Get priority for first irq, assume remaining are all the same
4546          */
4547         if (ddi_intr_get_pri(mgp->htable[0], &mgp->intr_pri)
4548             != DDI_SUCCESS) {
4549                 cmn_err(CE_WARN, "%s: ddi_intr_get_pri() failed", mgp->name);
4550 
4551                 /* Free already allocated intr */
4552                 for (y = 0; y < actual; y++) {
4553                         (void) ddi_intr_free(mgp->htable[y]);
4554                 }
4555 
4556                 kmem_free(mgp->htable, mgp->intr_size);
4557                 mgp->htable = NULL;
4558                 return (DDI_FAILURE);
4559         }
4560 
4561         mgp->icookie = (void *)(uintptr_t)mgp->intr_pri;
4562 
4563         if (!add_handler)
4564                 return (DDI_SUCCESS);
4565 
4566         /* Call ddi_intr_add_handler() */
4567         for (x = 0; x < actual; x++) {
4568                 if (ddi_intr_add_handler(mgp->htable[x], myri10ge_intr,
4569                     (caddr_t)&mgp->ss[x], NULL) != DDI_SUCCESS) {
4570                         cmn_err(CE_WARN, "%s: ddi_intr_add_handler() failed",
4571                             mgp->name);
4572 
4573                         /* Free already allocated intr */
4574                         for (y = 0; y < actual; y++) {
4575                                 (void) ddi_intr_free(mgp->htable[y]);
4576                         }
4577 
4578                         kmem_free(mgp->htable, mgp->intr_size);
4579                         mgp->htable = NULL;
4580                         return (DDI_FAILURE);
4581                 }
4582         }
4583 
4584         (void) ddi_intr_get_cap(mgp->htable[0], &mgp->intr_cap);
4585         if (mgp->intr_cap & DDI_INTR_FLAG_BLOCK) {
4586                 /* Call ddi_intr_block_enable() for MSI */
4587                 (void) ddi_intr_block_enable(mgp->htable, mgp->intr_cnt);
4588         } else {
4589                 /* Call ddi_intr_enable() for MSI non block enable */
4590                 for (x = 0; x < mgp->intr_cnt; x++) {
4591                         (void) ddi_intr_enable(mgp->htable[x]);
4592                 }
4593         }
4594 
4595         return (DDI_SUCCESS);
4596 }
4597 
4598 static void
4599 myri10ge_rem_intrs(struct myri10ge_priv *mgp, int handler_installed)
4600 {
4601         int x, err;
4602 
4603         /* Disable all interrupts */
4604         if (handler_installed) {
4605                 if (mgp->intr_cap & DDI_INTR_FLAG_BLOCK) {
4606                         /* Call ddi_intr_block_disable() */
4607                         (void) ddi_intr_block_disable(mgp->htable,
4608                             mgp->intr_cnt);
4609                 } else {
4610                         for (x = 0; x < mgp->intr_cnt; x++) {
4611                                 (void) ddi_intr_disable(mgp->htable[x]);
4612                         }
4613                 }
4614         }
4615 
4616         for (x = 0; x < mgp->intr_cnt; x++) {
4617                 if (handler_installed) {
4618                 /* Call ddi_intr_remove_handler() */
4619                         err = ddi_intr_remove_handler(mgp->htable[x]);
4620                         if (err != DDI_SUCCESS) {
4621                                 cmn_err(CE_WARN,
4622                                     "%s: ddi_intr_remove_handler for"
4623                                     "vec %d returned %d\n", mgp->name,
4624                                     x, err);
4625                         }
4626                 }
4627                 err = ddi_intr_free(mgp->htable[x]);
4628                 if (err != DDI_SUCCESS) {
4629                         cmn_err(CE_WARN,
4630                             "%s: ddi_intr_free for vec %d returned %d\n",
4631                             mgp->name, x, err);
4632                 }
4633         }
4634         kmem_free(mgp->htable, mgp->intr_size);
4635         mgp->htable = NULL;
4636 }
4637 
4638 static void
4639 myri10ge_test_physical(dev_info_t *dip)
4640 {
4641         ddi_dma_handle_t        handle;
4642         struct myri10ge_dma_stuff dma;
4643         void *addr;
4644         int err;
4645 
4646         /* test #1, sufficient for older sparc systems */
4647         myri10ge_tx_dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
4648         err = ddi_dma_alloc_handle(dip, &myri10ge_tx_dma_attr,
4649             DDI_DMA_DONTWAIT, NULL, &handle);
4650         if (err == DDI_DMA_BADATTR)
4651                 goto fail;
4652         ddi_dma_free_handle(&handle);
4653 
4654         /* test #2, required on Olympis where the bind is what fails */
4655         addr = myri10ge_dma_alloc(dip, 128, &myri10ge_tx_dma_attr,
4656             &myri10ge_dev_access_attr, DDI_DMA_STREAMING,
4657             DDI_DMA_WRITE|DDI_DMA_STREAMING, &dma, 0, DDI_DMA_DONTWAIT);
4658         if (addr == NULL)
4659                 goto fail;
4660         myri10ge_dma_free(&dma);
4661         return;
4662 
4663 fail:
4664         if (myri10ge_verbose)
4665                 printf("myri10ge%d: DDI_DMA_FORCE_PHYSICAL failed, "
4666                     "using IOMMU\n", ddi_get_instance(dip));
4667 
4668         myri10ge_tx_dma_attr.dma_attr_flags &= ~DDI_DMA_FORCE_PHYSICAL;
4669 }
4670 
4671 static void
4672 myri10ge_get_props(dev_info_t *dip)
4673 {
4674 
4675         myri10ge_flow_control =  ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4676             "myri10ge_flow_control", myri10ge_flow_control);
4677 
4678         myri10ge_intr_coal_delay = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4679             "myri10ge_intr_coal_delay", myri10ge_intr_coal_delay);
4680 
4681 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
4682         myri10ge_nvidia_ecrc_enable = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4683             "myri10ge_nvidia_ecrc_enable", 1);
4684 #endif
4685 
4686 
4687         myri10ge_use_msi = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4688             "myri10ge_use_msi", myri10ge_use_msi);
4689 
4690         myri10ge_deassert_wait = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4691             "myri10ge_deassert_wait",  myri10ge_deassert_wait);
4692 
4693         myri10ge_verbose = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4694             "myri10ge_verbose", myri10ge_verbose);
4695 
4696         myri10ge_tx_copylen = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4697             "myri10ge_tx_copylen", myri10ge_tx_copylen);
4698 
4699         if (myri10ge_tx_copylen < 60) {
4700                 cmn_err(CE_WARN,
4701                     "myri10ge_tx_copylen must be >= 60 bytes\n");
4702                 myri10ge_tx_copylen = 60;
4703         }
4704 
4705         myri10ge_mtu_override = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4706             "myri10ge_mtu_override", myri10ge_mtu_override);
4707 
4708         if (myri10ge_mtu_override >= MYRI10GE_MIN_GLD_MTU &&
4709             myri10ge_mtu_override <= MYRI10GE_MAX_GLD_MTU)
4710                 myri10ge_mtu = myri10ge_mtu_override +
4711                     sizeof (struct ether_header) + MXGEFW_PAD + VLAN_TAGSZ;
4712         else if (myri10ge_mtu_override != 0) {
4713                 cmn_err(CE_WARN,
4714                     "myri10ge_mtu_override must be between 1500 and "
4715                     "9000 bytes\n");
4716         }
4717 
4718         myri10ge_bigbufs_initial = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4719             "myri10ge_bigbufs_initial", myri10ge_bigbufs_initial);
4720         myri10ge_bigbufs_max = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4721             "myri10ge_bigbufs_max", myri10ge_bigbufs_max);
4722 
4723         myri10ge_watchdog_reset = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4724             "myri10ge_watchdog_reset", myri10ge_watchdog_reset);
4725 
4726         if (myri10ge_bigbufs_initial < 128) {
4727                 cmn_err(CE_WARN,
4728                     "myri10ge_bigbufs_initial be at least 128\n");
4729                 myri10ge_bigbufs_initial = 128;
4730         }
4731         if (myri10ge_bigbufs_max < 128) {
4732                 cmn_err(CE_WARN,
4733                     "myri10ge_bigbufs_max be at least 128\n");
4734                 myri10ge_bigbufs_max = 128;
4735         }
4736 
4737         if (myri10ge_bigbufs_max < myri10ge_bigbufs_initial) {
4738                 cmn_err(CE_WARN,
4739                     "myri10ge_bigbufs_max must be >=  "
4740                     "myri10ge_bigbufs_initial\n");
4741                 myri10ge_bigbufs_max = myri10ge_bigbufs_initial;
4742         }
4743 
4744         myri10ge_force_firmware = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4745             "myri10ge_force_firmware", myri10ge_force_firmware);
4746 
4747         myri10ge_max_slices = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4748             "myri10ge_max_slices", myri10ge_max_slices);
4749 
4750         myri10ge_use_msix = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4751             "myri10ge_use_msix", myri10ge_use_msix);
4752 
4753         myri10ge_rss_hash = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4754             "myri10ge_rss_hash", myri10ge_rss_hash);
4755 
4756         if (myri10ge_rss_hash > MXGEFW_RSS_HASH_TYPE_MAX ||
4757             myri10ge_rss_hash < MXGEFW_RSS_HASH_TYPE_IPV4) {
4758                 cmn_err(CE_WARN, "myri10ge: Illegal rssh hash type %d\n",
4759                     myri10ge_rss_hash);
4760                 myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4761         }
4762         myri10ge_lro = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4763             "myri10ge_lro", myri10ge_lro);
4764         myri10ge_lro_cnt = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4765             "myri10ge_lro_cnt", myri10ge_lro_cnt);
4766         myri10ge_lro_max_aggr = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4767             "myri10ge_lro_max_aggr", myri10ge_lro_max_aggr);
4768         myri10ge_tx_hash = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4769             "myri10ge_tx_hash", myri10ge_tx_hash);
4770         myri10ge_use_lso = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4771             "myri10ge_use_lso", myri10ge_use_lso);
4772         myri10ge_lso_copy = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4773             "myri10ge_lso_copy", myri10ge_lso_copy);
4774         myri10ge_tx_handles_initial = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4775             "myri10ge_tx_handles_initial", myri10ge_tx_handles_initial);
4776         myri10ge_small_bytes = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4777             "myri10ge_small_bytes", myri10ge_small_bytes);
4778         if ((myri10ge_small_bytes + MXGEFW_PAD) & (128 -1)) {
4779                 cmn_err(CE_WARN, "myri10ge: myri10ge_small_bytes (%d)\n",
4780                     myri10ge_small_bytes);
4781                 cmn_err(CE_WARN, "must be aligned on 128b bndry -2\n");
4782                 myri10ge_small_bytes += 128;
4783                 myri10ge_small_bytes &= ~(128 -1);
4784                 myri10ge_small_bytes -= MXGEFW_PAD;
4785                 cmn_err(CE_WARN, "rounded up to %d\n",
4786                     myri10ge_small_bytes);
4787 
4788                 myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4789         }
4790 }
4791 
4792 #ifndef PCI_EXP_LNKSTA
4793 #define PCI_EXP_LNKSTA 18
4794 #endif
4795 
4796 static int
4797 myri10ge_find_cap(ddi_acc_handle_t handle, uint8_t *capptr, uint8_t capid)
4798 {
4799         uint16_t        status;
4800         uint8_t         ptr;
4801 
4802         /* check to see if we have capabilities */
4803         status = pci_config_get16(handle, PCI_CONF_STAT);
4804         if (!(status & PCI_STAT_CAP)) {
4805                 cmn_err(CE_WARN, "PCI_STAT_CAP not found\n");
4806                 return (ENXIO);
4807         }
4808 
4809         ptr = pci_config_get8(handle, PCI_CONF_CAP_PTR);
4810 
4811         /* Walk the capabilities list, looking for a PCI Express cap */
4812         while (ptr != PCI_CAP_NEXT_PTR_NULL) {
4813                 if (pci_config_get8(handle, ptr + PCI_CAP_ID) == capid)
4814                         break;
4815                 ptr = pci_config_get8(handle, ptr + PCI_CAP_NEXT_PTR);
4816         }
4817         if (ptr < 64) {
4818                 cmn_err(CE_WARN, "Bad capability offset %d\n", ptr);
4819                 return (ENXIO);
4820         }
4821         *capptr = ptr;
4822         return (0);
4823 }
4824 
4825 static int
4826 myri10ge_set_max_readreq(ddi_acc_handle_t handle)
4827 {
4828         int err;
4829         uint16_t        val;
4830         uint8_t         ptr;
4831 
4832         err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_PCI_E);
4833         if (err != 0) {
4834                 cmn_err(CE_WARN, "could not find PCIe cap\n");
4835                 return (ENXIO);
4836         }
4837 
4838         /* set max read req to 4096 */
4839         val = pci_config_get16(handle, ptr + PCIE_DEVCTL);
4840         val = (val & ~PCIE_DEVCTL_MAX_READ_REQ_MASK) |
4841             PCIE_DEVCTL_MAX_READ_REQ_4096;
4842         pci_config_put16(handle, ptr + PCIE_DEVCTL, val);
4843         val = pci_config_get16(handle, ptr + PCIE_DEVCTL);
4844         if ((val & (PCIE_DEVCTL_MAX_READ_REQ_4096)) !=
4845             PCIE_DEVCTL_MAX_READ_REQ_4096) {
4846                 cmn_err(CE_WARN, "could not set max read req (%x)\n", val);
4847                 return (EINVAL);
4848         }
4849         return (0);
4850 }
4851 
4852 static int
4853 myri10ge_read_pcie_link_width(ddi_acc_handle_t handle, int *link)
4854 {
4855         int err;
4856         uint16_t        val;
4857         uint8_t         ptr;
4858 
4859         err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_PCI_E);
4860         if (err != 0) {
4861                 cmn_err(CE_WARN, "could not set max read req\n");
4862                 return (ENXIO);
4863         }
4864 
4865         /* read link width */
4866         val = pci_config_get16(handle, ptr + PCIE_LINKSTS);
4867         val &= PCIE_LINKSTS_NEG_WIDTH_MASK;
4868         *link = (val >> 4);
4869         return (0);
4870 }
4871 
4872 static int
4873 myri10ge_reset_nic(struct myri10ge_priv *mgp)
4874 {
4875         ddi_acc_handle_t handle = mgp->cfg_hdl;
4876         uint32_t reboot;
4877         uint16_t cmd;
4878         int err;
4879 
4880         cmd = pci_config_get16(handle, PCI_CONF_COMM);
4881         if ((cmd & PCI_COMM_ME) == 0) {
4882                 /*
4883                  * Bus master DMA disabled?  Check to see if the card
4884                  * rebooted due to a parity error For now, just report
4885                  * it
4886                  */
4887 
4888                 /* enter read32 mode */
4889                 pci_config_put8(handle, mgp->vso + 0x10, 0x3);
4890                 /* read REBOOT_STATUS (0xfffffff0) */
4891                 pci_config_put32(handle, mgp->vso + 0x18, 0xfffffff0);
4892                 reboot = pci_config_get16(handle, mgp->vso + 0x14);
4893                 cmn_err(CE_WARN, "%s NIC rebooted 0x%x\n", mgp->name, reboot);
4894                 return (0);
4895         }
4896         if (!myri10ge_watchdog_reset) {
4897                 cmn_err(CE_WARN, "%s: not resetting\n", mgp->name);
4898                 return (1);
4899         }
4900 
4901         myri10ge_stop_locked(mgp);
4902         err = myri10ge_start_locked(mgp);
4903         if (err == DDI_FAILURE) {
4904                 return (0);
4905         }
4906         mac_tx_update(mgp->mh);
4907         return (1);
4908 }
4909 
4910 static inline int
4911 myri10ge_ring_stalled(myri10ge_tx_ring_t *tx)
4912 {
4913         if (tx->sched != tx->stall &&
4914             tx->done == tx->watchdog_done &&
4915             tx->watchdog_req != tx->watchdog_done)
4916                 return (1);
4917         return (0);
4918 }
4919 
4920 static void
4921 myri10ge_watchdog(void *arg)
4922 {
4923         struct myri10ge_priv *mgp;
4924         struct myri10ge_slice_state *ss;
4925         myri10ge_tx_ring_t *tx;
4926         int nic_ok = 1;
4927         int slices_stalled, rx_pause, i;
4928         int add_rx;
4929 
4930         mgp = arg;
4931         mutex_enter(&mgp->intrlock);
4932         if (mgp->running != MYRI10GE_ETH_RUNNING) {
4933                 cmn_err(CE_WARN,
4934                     "%s not running, not rearming watchdog (%d)\n",
4935                     mgp->name, mgp->running);
4936                 mutex_exit(&mgp->intrlock);
4937                 return;
4938         }
4939 
4940         rx_pause = ntohl(mgp->ss[0].fw_stats->dropped_pause);
4941 
4942         /*
4943          * make sure nic is stalled before we reset the nic, so as to
4944          * ensure we don't rip the transmit data structures out from
4945          * under a pending transmit
4946          */
4947 
4948         for (slices_stalled = 0, i = 0; i < mgp->num_slices; i++) {
4949                 tx = &mgp->ss[i].tx;
4950                 slices_stalled = myri10ge_ring_stalled(tx);
4951                 if (slices_stalled)
4952                         break;
4953         }
4954 
4955         if (slices_stalled) {
4956                 if (mgp->watchdog_rx_pause == rx_pause) {
4957                         cmn_err(CE_WARN,
4958                             "%s slice %d stalled:(%d, %d, %d, %d, %d %d %d\n)",
4959                             mgp->name, i, tx->sched, tx->stall,
4960                             tx->done, tx->watchdog_done, tx->req, tx->pkt_done,
4961                             (int)ntohl(mgp->ss[i].fw_stats->send_done_count));
4962                         nic_ok = myri10ge_reset_nic(mgp);
4963                 } else {
4964                         cmn_err(CE_WARN,
4965                             "%s Flow controlled, check link partner\n",
4966                             mgp->name);
4967                 }
4968         }
4969 
4970         if (!nic_ok) {
4971                 cmn_err(CE_WARN,
4972                     "%s Nic dead, not rearming watchdog\n", mgp->name);
4973                 mutex_exit(&mgp->intrlock);
4974                 return;
4975         }
4976         for (i = 0; i < mgp->num_slices; i++) {
4977                 ss = &mgp->ss[i];
4978                 tx = &ss->tx;
4979                 tx->watchdog_done = tx->done;
4980                 tx->watchdog_req = tx->req;
4981                 if (ss->watchdog_rx_copy != MYRI10GE_SLICE_STAT(rx_copy)) {
4982                         ss->watchdog_rx_copy = MYRI10GE_SLICE_STAT(rx_copy);
4983                         add_rx =
4984                             min(ss->jpool.num_alloc,
4985                             myri10ge_bigbufs_max -
4986                             (ss->jpool.num_alloc -
4987                             ss->jbufs_for_smalls));
4988                         if (add_rx != 0) {
4989                                 (void) myri10ge_add_jbufs(ss, add_rx, 0);
4990                                 /* now feed them to the firmware */
4991                                 mutex_enter(&ss->jpool.mtx);
4992                                 myri10ge_restock_jumbos(ss);
4993                                 mutex_exit(&ss->jpool.mtx);
4994                         }
4995                 }
4996         }
4997         mgp->watchdog_rx_pause = rx_pause;
4998 
4999         mgp->timer_id = timeout(myri10ge_watchdog, mgp,
5000             mgp->timer_ticks);
5001         mutex_exit(&mgp->intrlock);
5002 }
5003 
5004 /*ARGSUSED*/
5005 static int
5006 myri10ge_get_coalesce(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp)
5007 
5008 {
5009         struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp;
5010         (void) mi_mpprintf(mp, "%d", mgp->intr_coal_delay);
5011         return (0);
5012 }
5013 
5014 /*ARGSUSED*/
5015 static int
5016 myri10ge_set_coalesce(queue_t *q, mblk_t *mp, char *value,
5017     caddr_t cp, cred_t *credp)
5018 
5019 {
5020         struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp;
5021         char *end;
5022         size_t new_value;
5023 
5024         new_value = mi_strtol(value, &end, 10);
5025         if (end == value)
5026                 return (EINVAL);
5027 
5028         mutex_enter(&myri10ge_param_lock);
5029         mgp->intr_coal_delay = (int)new_value;
5030         *mgp->intr_coal_delay_ptr = htonl(mgp->intr_coal_delay);
5031         mutex_exit(&myri10ge_param_lock);
5032         return (0);
5033 }
5034 
5035 /*ARGSUSED*/
5036 static int
5037 myri10ge_get_pauseparam(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp)
5038 
5039 {
5040         struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp;
5041         (void) mi_mpprintf(mp, "%d", mgp->pause);
5042         return (0);
5043 }
5044 
5045 /*ARGSUSED*/
5046 static int
5047 myri10ge_set_pauseparam(queue_t *q, mblk_t *mp, char *value,
5048                         caddr_t cp, cred_t *credp)
5049 
5050 {
5051         struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp;
5052         char *end;
5053         size_t new_value;
5054         int err = 0;
5055 
5056         new_value = mi_strtol(value, &end, 10);
5057         if (end == value)
5058                 return (EINVAL);
5059         if (new_value != 0)
5060                 new_value = 1;
5061 
5062         mutex_enter(&myri10ge_param_lock);
5063         if (new_value != mgp->pause)
5064                 err = myri10ge_change_pause(mgp, new_value);
5065         mutex_exit(&myri10ge_param_lock);
5066         return (err);
5067 }
5068 
5069 /*ARGSUSED*/
5070 static int
5071 myri10ge_get_int(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp)
5072 
5073 {
5074         (void) mi_mpprintf(mp, "%d", *(int *)(void *)cp);
5075         return (0);
5076 }
5077 
5078 /*ARGSUSED*/
5079 static int
5080 myri10ge_set_int(queue_t *q, mblk_t *mp, char *value,
5081     caddr_t cp, cred_t *credp)
5082 
5083 {
5084         char *end;
5085         size_t new_value;
5086 
5087         new_value = mi_strtol(value, &end, 10);
5088         if (end == value)
5089                 return (EINVAL);
5090         *(int *)(void *)cp = new_value;
5091 
5092         return (0);
5093 }
5094 
5095 static void
5096 myri10ge_ndd_init(struct myri10ge_priv *mgp)
5097 {
5098         mgp->nd_head = NULL;
5099 
5100         (void) nd_load(&mgp->nd_head, "myri10ge_intr_coal_delay",
5101             myri10ge_get_coalesce, myri10ge_set_coalesce, (caddr_t)mgp);
5102         (void) nd_load(&mgp->nd_head, "myri10ge_flow_control",
5103             myri10ge_get_pauseparam, myri10ge_set_pauseparam, (caddr_t)mgp);
5104         (void) nd_load(&mgp->nd_head, "myri10ge_verbose",
5105             myri10ge_get_int, myri10ge_set_int, (caddr_t)&myri10ge_verbose);
5106         (void) nd_load(&mgp->nd_head, "myri10ge_deassert_wait",
5107             myri10ge_get_int, myri10ge_set_int,
5108             (caddr_t)&myri10ge_deassert_wait);
5109         (void) nd_load(&mgp->nd_head, "myri10ge_bigbufs_max",
5110             myri10ge_get_int, myri10ge_set_int,
5111             (caddr_t)&myri10ge_bigbufs_max);
5112         (void) nd_load(&mgp->nd_head, "myri10ge_lro",
5113             myri10ge_get_int, myri10ge_set_int,
5114             (caddr_t)&myri10ge_lro);
5115         (void) nd_load(&mgp->nd_head, "myri10ge_lro_max_aggr",
5116             myri10ge_get_int, myri10ge_set_int,
5117             (caddr_t)&myri10ge_lro_max_aggr);
5118         (void) nd_load(&mgp->nd_head, "myri10ge_tx_hash",
5119             myri10ge_get_int, myri10ge_set_int,
5120             (caddr_t)&myri10ge_tx_hash);
5121         (void) nd_load(&mgp->nd_head, "myri10ge_lso_copy",
5122             myri10ge_get_int, myri10ge_set_int,
5123             (caddr_t)&myri10ge_lso_copy);
5124 }
5125 
5126 static void
5127 myri10ge_ndd_fini(struct myri10ge_priv *mgp)
5128 {
5129         nd_free(&mgp->nd_head);
5130 }
5131 
5132 static void
5133 myri10ge_m_ioctl(void *arg, queue_t *wq, mblk_t *mp)
5134 {
5135         struct iocblk *iocp;
5136         struct myri10ge_priv *mgp = arg;
5137         int cmd, ok, err;
5138 
5139         iocp = (struct iocblk *)(void *)mp->b_rptr;
5140         cmd = iocp->ioc_cmd;
5141 
5142         ok = 0;
5143         err = 0;
5144 
5145         switch (cmd) {
5146         case ND_GET:
5147         case ND_SET:
5148                 ok = nd_getset(wq, mgp->nd_head, mp);
5149                 break;
5150         default:
5151                 break;
5152         }
5153         if (!ok)
5154                 err = EINVAL;
5155         else
5156                 err = iocp->ioc_error;
5157 
5158         if (!err)
5159                 miocack(wq, mp, iocp->ioc_count, err);
5160         else
5161                 miocnak(wq, mp, 0, err);
5162 }
5163 
5164 static struct myri10ge_priv *mgp_list;
5165 
5166 struct myri10ge_priv *
5167 myri10ge_get_instance(uint_t unit)
5168 {
5169         struct myri10ge_priv *mgp;
5170 
5171         mutex_enter(&myri10ge_param_lock);
5172         for (mgp = mgp_list; mgp != NULL; mgp = mgp->next) {
5173                 if (unit == ddi_get_instance(mgp->dip)) {
5174                         mgp->refcnt++;
5175                         break;
5176                 }
5177         }
5178         mutex_exit(&myri10ge_param_lock);
5179         return (mgp);
5180 }
5181 
5182 void
5183 myri10ge_put_instance(struct myri10ge_priv *mgp)
5184 {
5185         mutex_enter(&myri10ge_param_lock);
5186         mgp->refcnt--;
5187         mutex_exit(&myri10ge_param_lock);
5188 }
5189 
5190 static boolean_t
5191 myri10ge_m_getcapab(void *arg, mac_capab_t cap, void *cap_data)
5192 {
5193         struct myri10ge_priv *mgp = arg;
5194         uint32_t *cap_hcksum;
5195         mac_capab_lso_t *cap_lso;
5196         mac_capab_rings_t *cap_rings;
5197 
5198         switch (cap) {
5199         case MAC_CAPAB_HCKSUM:
5200                 cap_hcksum = cap_data;
5201                 *cap_hcksum = HCKSUM_INET_PARTIAL;
5202                 break;
5203         case MAC_CAPAB_RINGS:
5204                 cap_rings = cap_data;
5205                 switch (cap_rings->mr_type) {
5206                 case MAC_RING_TYPE_RX:
5207                         cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
5208                         cap_rings->mr_rnum = mgp->num_slices;
5209                         cap_rings->mr_gnum = 1;
5210                         cap_rings->mr_rget = myri10ge_fill_ring;
5211                         cap_rings->mr_gget = myri10ge_fill_group;
5212                         break;
5213                 case MAC_RING_TYPE_TX:
5214                         cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
5215                         cap_rings->mr_rnum = mgp->num_slices;
5216                         cap_rings->mr_gnum = 0;
5217                         cap_rings->mr_rget = myri10ge_fill_ring;
5218                         cap_rings->mr_gget = NULL;
5219                         break;
5220                 default:
5221                         return (B_FALSE);
5222                 }
5223                 break;
5224         case MAC_CAPAB_LSO:
5225                 cap_lso = cap_data;
5226                 if (!myri10ge_use_lso)
5227                         return (B_FALSE);
5228                 if (!(mgp->features & MYRI10GE_TSO))
5229                         return (B_FALSE);
5230                 cap_lso->lso_flags = LSO_TX_BASIC_TCP_IPV4;
5231                 cap_lso->lso_basic_tcp_ipv4.lso_max = (uint16_t)-1;
5232                 break;
5233 
5234         default:
5235                 return (B_FALSE);
5236         }
5237         return (B_TRUE);
5238 }
5239 
5240 
5241 static int
5242 myri10ge_m_stat(void *arg, uint_t stat, uint64_t *val)
5243 {
5244         struct myri10ge_priv *mgp = arg;
5245         struct myri10ge_rx_ring_stats *rstat;
5246         struct myri10ge_tx_ring_stats *tstat;
5247         mcp_irq_data_t *fw_stats = mgp->ss[0].fw_stats;
5248         struct myri10ge_slice_state *ss;
5249         uint64_t tmp = 0;
5250         int i;
5251 
5252         switch (stat) {
5253         case MAC_STAT_IFSPEED:
5254                 *val = 10ull * 1000ull * 1000000ull;
5255                 break;
5256 
5257         case MAC_STAT_MULTIRCV:
5258                 for (i = 0; i < mgp->num_slices; i++) {
5259                         rstat = &mgp->ss[i].rx_stats;
5260                         tmp += rstat->multircv;
5261                 }
5262                 *val = tmp;
5263                 break;
5264 
5265         case MAC_STAT_BRDCSTRCV:
5266                 for (i = 0; i < mgp->num_slices; i++) {
5267                         rstat = &mgp->ss[i].rx_stats;
5268                         tmp += rstat->brdcstrcv;
5269                 }
5270                 *val = tmp;
5271                 break;
5272 
5273         case MAC_STAT_MULTIXMT:
5274                 for (i = 0; i < mgp->num_slices; i++) {
5275                         tstat = &mgp->ss[i].tx.stats;
5276                         tmp += tstat->multixmt;
5277                 }
5278                 *val = tmp;
5279                 break;
5280 
5281         case MAC_STAT_BRDCSTXMT:
5282                 for (i = 0; i < mgp->num_slices; i++) {
5283                         tstat = &mgp->ss[i].tx.stats;
5284                         tmp += tstat->brdcstxmt;
5285                 }
5286                 *val = tmp;
5287                 break;
5288 
5289         case MAC_STAT_NORCVBUF:
5290                 tmp = ntohl(fw_stats->dropped_no_big_buffer);
5291                 tmp += ntohl(fw_stats->dropped_no_small_buffer);
5292                 tmp += ntohl(fw_stats->dropped_link_overflow);
5293                 for (i = 0; i < mgp->num_slices; i++) {
5294                         ss = &mgp->ss[i];
5295                         tmp += MYRI10GE_SLICE_STAT(rx_big_nobuf);
5296                         tmp += MYRI10GE_SLICE_STAT(rx_small_nobuf);
5297                 }
5298                 *val = tmp;
5299                 break;
5300 
5301         case MAC_STAT_IERRORS:
5302                 tmp += ntohl(fw_stats->dropped_bad_crc32);
5303                 tmp += ntohl(fw_stats->dropped_bad_phy);
5304                 tmp += ntohl(fw_stats->dropped_runt);
5305                 tmp += ntohl(fw_stats->dropped_overrun);
5306                 *val = tmp;
5307                 break;
5308 
5309         case MAC_STAT_OERRORS:
5310                 for (i = 0; i < mgp->num_slices; i++) {
5311                         ss = &mgp->ss[i];
5312                         tmp += MYRI10GE_SLICE_STAT(xmit_lsobadflags);
5313                         tmp += MYRI10GE_SLICE_STAT(xmit_err);
5314                 }
5315                 *val = tmp;
5316                 break;
5317 
5318         case MAC_STAT_RBYTES:
5319                 for (i = 0; i < mgp->num_slices; i++) {
5320                         rstat = &mgp->ss[i].rx_stats;
5321                         tmp += rstat->ibytes;
5322                 }
5323                 *val = tmp;
5324                 break;
5325 
5326         case MAC_STAT_IPACKETS:
5327                 for (i = 0; i < mgp->num_slices; i++) {
5328                         rstat = &mgp->ss[i].rx_stats;
5329                         tmp += rstat->ipackets;
5330                 }
5331                 *val = tmp;
5332                 break;
5333 
5334         case MAC_STAT_OBYTES:
5335                 for (i = 0; i < mgp->num_slices; i++) {
5336                         tstat = &mgp->ss[i].tx.stats;
5337                         tmp += tstat->obytes;
5338                 }
5339                 *val = tmp;
5340                 break;
5341 
5342         case MAC_STAT_OPACKETS:
5343                 for (i = 0; i < mgp->num_slices; i++) {
5344                         tstat = &mgp->ss[i].tx.stats;
5345                         tmp += tstat->opackets;
5346                 }
5347                 *val = tmp;
5348                 break;
5349 
5350         case ETHER_STAT_TOOLONG_ERRORS:
5351                 *val = ntohl(fw_stats->dropped_overrun);
5352                 break;
5353 
5354 #ifdef SOLARIS_S11
5355         case ETHER_STAT_TOOSHORT_ERRORS:
5356                 *val = ntohl(fw_stats->dropped_runt);
5357                 break;
5358 #endif
5359 
5360         case ETHER_STAT_LINK_PAUSE:
5361                 *val = mgp->pause;
5362                 break;
5363 
5364         case ETHER_STAT_LINK_AUTONEG:
5365                 *val = 1;
5366                 break;
5367 
5368         case ETHER_STAT_LINK_DUPLEX:
5369                 *val = LINK_DUPLEX_FULL;
5370                 break;
5371 
5372         default:
5373                 return (ENOTSUP);
5374         }
5375 
5376         return (0);
5377 }
5378 
5379 /* ARGSUSED */
5380 static void
5381 myri10ge_m_propinfo(void *arg, const char *pr_name,
5382     mac_prop_id_t pr_num, mac_prop_info_handle_t prh)
5383 {
5384         switch (pr_num) {
5385         case MAC_PROP_MTU:
5386                 mac_prop_info_set_default_uint32(prh, MYRI10GE_DEFAULT_GLD_MTU);
5387                 mac_prop_info_set_range_uint32(prh, MYRI10GE_MIN_GLD_MTU,
5388                     MYRI10GE_MAX_GLD_MTU);
5389                 break;
5390         default:
5391                 break;
5392         }
5393 }
5394 
5395 /*ARGSUSED*/
5396 static int
5397 myri10ge_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
5398     uint_t pr_valsize, const void *pr_val)
5399 {
5400         int err = 0;
5401         struct myri10ge_priv *mgp = arg;
5402 
5403         switch (pr_num) {
5404         case MAC_PROP_MTU: {
5405                 uint32_t mtu;
5406                 if (pr_valsize < sizeof (mtu)) {
5407                         err = EINVAL;
5408                         break;
5409                 }
5410                 bcopy(pr_val, &mtu, sizeof (mtu));
5411                 if (mtu > MYRI10GE_MAX_GLD_MTU ||
5412                     mtu < MYRI10GE_MIN_GLD_MTU) {
5413                         err = EINVAL;
5414                         break;
5415                 }
5416 
5417                 mutex_enter(&mgp->intrlock);
5418                 if (mgp->running != MYRI10GE_ETH_STOPPED) {
5419                         err = EBUSY;
5420                         mutex_exit(&mgp->intrlock);
5421                         break;
5422                 }
5423 
5424                 myri10ge_mtu = mtu + sizeof (struct ether_header) +
5425                     MXGEFW_PAD + VLAN_TAGSZ;
5426                 mutex_exit(&mgp->intrlock);
5427                 break;
5428         }
5429         default:
5430                 err = ENOTSUP;
5431                 break;
5432         }
5433 
5434         return (err);
5435 }
5436 
5437 static mac_callbacks_t myri10ge_m_callbacks = {
5438         (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_PROPINFO),
5439         myri10ge_m_stat,
5440         myri10ge_m_start,
5441         myri10ge_m_stop,
5442         myri10ge_m_promisc,
5443         myri10ge_m_multicst,
5444         NULL,
5445         NULL,
5446         NULL,
5447         myri10ge_m_ioctl,
5448         myri10ge_m_getcapab,
5449         NULL,
5450         NULL,
5451         myri10ge_m_setprop,
5452         NULL,
5453         myri10ge_m_propinfo
5454 };
5455 
5456 
5457 static int
5458 myri10ge_probe_slices(struct myri10ge_priv *mgp)
5459 {
5460         myri10ge_cmd_t cmd;
5461         int status;
5462 
5463         mgp->num_slices = 1;
5464 
5465         /* hit the board with a reset to ensure it is alive */
5466         (void) memset(&cmd, 0, sizeof (cmd));
5467         status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd);
5468         if (status != 0) {
5469                 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name);
5470                 return (ENXIO);
5471         }
5472 
5473         if (myri10ge_use_msix == 0)
5474                 return (0);
5475 
5476         /* tell it the size of the interrupt queues */
5477         cmd.data0 = mgp->max_intr_slots * sizeof (struct mcp_slot);
5478         status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
5479         if (status != 0) {
5480                 cmn_err(CE_WARN, "%s: failed MXGEFW_CMD_SET_INTRQ_SIZE\n",
5481                     mgp->name);
5482                 return (ENXIO);
5483         }
5484 
5485         /* ask the maximum number of slices it supports */
5486         status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
5487             &cmd);
5488         if (status != 0)
5489                 return (0);
5490 
5491         mgp->num_slices = cmd.data0;
5492 
5493         /*
5494          * if the admin did not specify a limit to how many
5495          * slices we should use, cap it automatically to the
5496          * number of CPUs currently online
5497          */
5498         if (myri10ge_max_slices == -1)
5499                 myri10ge_max_slices = ncpus;
5500 
5501         if (mgp->num_slices > myri10ge_max_slices)
5502                 mgp->num_slices = myri10ge_max_slices;
5503 
5504 
5505         /*
5506          * Now try to allocate as many MSI-X vectors as we have
5507          * slices. We give up on MSI-X if we can only get a single
5508          * vector.
5509          */
5510         while (mgp->num_slices > 1) {
5511                 /* make sure it is a power of two */
5512                 while (!ISP2(mgp->num_slices))
5513                         mgp->num_slices--;
5514                 if (mgp->num_slices == 1)
5515                         return (0);
5516 
5517                 status = myri10ge_add_intrs(mgp, 0);
5518                 if (status == 0) {
5519                         myri10ge_rem_intrs(mgp, 0);
5520                         if (mgp->intr_cnt == mgp->num_slices) {
5521                                 if (myri10ge_verbose)
5522                                         printf("Got %d slices!\n",
5523                                             mgp->num_slices);
5524                                 return (0);
5525                         }
5526                         mgp->num_slices = mgp->intr_cnt;
5527                 } else {
5528                         mgp->num_slices = mgp->num_slices / 2;
5529                 }
5530         }
5531 
5532         if (myri10ge_verbose)
5533                 printf("Got %d slices\n", mgp->num_slices);
5534         return (0);
5535 }
5536 
5537 static void
5538 myri10ge_lro_free(struct myri10ge_slice_state *ss)
5539 {
5540         struct lro_entry *lro;
5541 
5542         while (ss->lro_free != NULL) {
5543                 lro = ss->lro_free;
5544                 ss->lro_free = lro->next;
5545                 kmem_free(lro, sizeof (*lro));
5546         }
5547 }
5548 
5549 static void
5550 myri10ge_lro_alloc(struct myri10ge_slice_state *ss)
5551 {
5552         struct lro_entry *lro;
5553         int idx;
5554 
5555         ss->lro_free = NULL;
5556         ss->lro_active = NULL;
5557 
5558         for (idx = 0; idx < myri10ge_lro_cnt; idx++) {
5559                 lro = kmem_zalloc(sizeof (*lro), KM_SLEEP);
5560                 if (lro == NULL)
5561                         continue;
5562                 lro->next = ss->lro_free;
5563                 ss->lro_free = lro;
5564         }
5565 }
5566 
5567 static void
5568 myri10ge_free_slices(struct myri10ge_priv *mgp)
5569 {
5570         struct myri10ge_slice_state *ss;
5571         size_t bytes;
5572         int i;
5573 
5574         if (mgp->ss == NULL)
5575                 return;
5576 
5577         for (i = 0; i < mgp->num_slices; i++) {
5578                 ss = &mgp->ss[i];
5579                 if (ss->rx_done.entry == NULL)
5580                         continue;
5581                 myri10ge_dma_free(&ss->rx_done.dma);
5582                 ss->rx_done.entry = NULL;
5583                 if (ss->fw_stats == NULL)
5584                         continue;
5585                 myri10ge_dma_free(&ss->fw_stats_dma);
5586                 ss->fw_stats = NULL;
5587                 mutex_destroy(&ss->rx_lock);
5588                 mutex_destroy(&ss->tx.lock);
5589                 mutex_destroy(&ss->tx.handle_lock);
5590                 mutex_destroy(&ss->poll_lock);
5591                 myri10ge_jpool_fini(ss);
5592                 myri10ge_slice_stat_destroy(ss);
5593                 myri10ge_lro_free(ss);
5594         }
5595         bytes = sizeof (*mgp->ss) * mgp->num_slices;
5596         kmem_free(mgp->ss, bytes);
5597         mgp->ss = NULL;
5598 }
5599 
5600 
5601 static int
5602 myri10ge_alloc_slices(struct myri10ge_priv *mgp)
5603 {
5604         struct myri10ge_slice_state *ss;
5605         size_t bytes;
5606         int i;
5607 
5608         bytes = sizeof (*mgp->ss) * mgp->num_slices;
5609         mgp->ss = kmem_zalloc(bytes, KM_SLEEP);
5610         if (mgp->ss == NULL)
5611                 return (ENOMEM);
5612         for (i = 0; i < mgp->num_slices; i++) {
5613                 ss = &mgp->ss[i];
5614 
5615                 ss->mgp = mgp;
5616 
5617                 /* allocate the per-slice firmware stats */
5618                 bytes = sizeof (*ss->fw_stats);
5619                 ss->fw_stats = (mcp_irq_data_t *)(void *)
5620                     myri10ge_dma_alloc(mgp->dip, bytes,
5621                     &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr,
5622                     DDI_DMA_CONSISTENT, DDI_DMA_READ|DDI_DMA_CONSISTENT,
5623                     &ss->fw_stats_dma, 1, DDI_DMA_DONTWAIT);
5624                 if (ss->fw_stats == NULL)
5625                         goto abort;
5626                 (void) memset(ss->fw_stats, 0, bytes);
5627 
5628                 /* allocate rx done ring */
5629                 bytes = mgp->max_intr_slots *
5630                     sizeof (*ss->rx_done.entry);
5631                 ss->rx_done.entry = (mcp_slot_t *)(void *)
5632                     myri10ge_dma_alloc(mgp->dip, bytes,
5633                     &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr,
5634                     DDI_DMA_CONSISTENT, DDI_DMA_READ|DDI_DMA_CONSISTENT,
5635                     &ss->rx_done.dma, 1, DDI_DMA_DONTWAIT);
5636                 if (ss->rx_done.entry == NULL) {
5637                         goto abort;
5638                 }
5639                 (void) memset(ss->rx_done.entry, 0, bytes);
5640                 mutex_init(&ss->rx_lock,   NULL, MUTEX_DEFAULT, mgp->icookie);
5641                 mutex_init(&ss->tx.lock,   NULL, MUTEX_DEFAULT, NULL);
5642                 mutex_init(&ss->tx.handle_lock,   NULL, MUTEX_DEFAULT, NULL);
5643                 mutex_init(&ss->poll_lock,   NULL, MUTEX_DEFAULT, NULL);
5644                 myri10ge_jpool_init(ss);
5645                 (void) myri10ge_slice_stat_init(ss);
5646                 myri10ge_lro_alloc(ss);
5647         }
5648 
5649         return (0);
5650 
5651 abort:
5652         myri10ge_free_slices(mgp);
5653         return (ENOMEM);
5654 }
5655 
5656 static int
5657 myri10ge_save_msi_state(struct myri10ge_priv *mgp,
5658     ddi_acc_handle_t handle)
5659 {
5660         uint8_t ptr;
5661         int err;
5662 
5663         err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_MSI);
5664         if (err != 0) {
5665                 cmn_err(CE_WARN, "%s: could not find MSI cap\n",
5666                     mgp->name);
5667                 return (DDI_FAILURE);
5668         }
5669         mgp->pci_saved_state.msi_ctrl =
5670             pci_config_get16(handle, ptr + PCI_MSI_CTRL);
5671         mgp->pci_saved_state.msi_addr_low =
5672             pci_config_get32(handle, ptr + PCI_MSI_ADDR_OFFSET);
5673         mgp->pci_saved_state.msi_addr_high =
5674             pci_config_get32(handle, ptr + PCI_MSI_ADDR_OFFSET + 4);
5675         mgp->pci_saved_state.msi_data_32 =
5676             pci_config_get16(handle, ptr + PCI_MSI_32BIT_DATA);
5677         mgp->pci_saved_state.msi_data_64 =
5678             pci_config_get16(handle, ptr + PCI_MSI_64BIT_DATA);
5679         return (DDI_SUCCESS);
5680 }
5681 
5682 static int
5683 myri10ge_restore_msi_state(struct myri10ge_priv *mgp,
5684     ddi_acc_handle_t handle)
5685 {
5686         uint8_t ptr;
5687         int err;
5688 
5689         err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_MSI);
5690         if (err != 0) {
5691                 cmn_err(CE_WARN, "%s: could not find MSI cap\n",
5692                     mgp->name);
5693                 return (DDI_FAILURE);
5694         }
5695 
5696         pci_config_put16(handle, ptr + PCI_MSI_CTRL,
5697             mgp->pci_saved_state.msi_ctrl);
5698         pci_config_put32(handle, ptr + PCI_MSI_ADDR_OFFSET,
5699             mgp->pci_saved_state.msi_addr_low);
5700         pci_config_put32(handle, ptr + PCI_MSI_ADDR_OFFSET + 4,
5701             mgp->pci_saved_state.msi_addr_high);
5702         pci_config_put16(handle, ptr + PCI_MSI_32BIT_DATA,
5703             mgp->pci_saved_state.msi_data_32);
5704         pci_config_put16(handle, ptr + PCI_MSI_64BIT_DATA,
5705             mgp->pci_saved_state.msi_data_64);
5706 
5707         return (DDI_SUCCESS);
5708 }
5709 
5710 static int
5711 myri10ge_save_pci_state(struct myri10ge_priv *mgp)
5712 {
5713         ddi_acc_handle_t handle = mgp->cfg_hdl;
5714         int i;
5715         int err = DDI_SUCCESS;
5716 
5717 
5718         /* Save the non-extended PCI config space 32-bits at a time */
5719         for (i = 0; i < 16; i++)
5720                 mgp->pci_saved_state.base[i] =
5721                     pci_config_get32(handle, i*4);
5722 
5723         /* now save MSI interrupt state *, if needed */
5724         if (mgp->ddi_intr_type == DDI_INTR_TYPE_MSI)
5725                 err = myri10ge_save_msi_state(mgp, handle);
5726 
5727         return (err);
5728 }
5729 
5730 static int
5731 myri10ge_restore_pci_state(struct myri10ge_priv *mgp)
5732 {
5733         ddi_acc_handle_t handle = mgp->cfg_hdl;
5734         int i;
5735         int err = DDI_SUCCESS;
5736 
5737 
5738         /* Restore the non-extended PCI config space 32-bits at a time */
5739         for (i = 15; i >= 0; i--)
5740                 pci_config_put32(handle, i*4, mgp->pci_saved_state.base[i]);
5741 
5742         /* now restore MSI interrupt state *, if needed */
5743         if (mgp->ddi_intr_type == DDI_INTR_TYPE_MSI)
5744                 err = myri10ge_restore_msi_state(mgp, handle);
5745 
5746         if (mgp->max_read_request_4k)
5747                 (void) myri10ge_set_max_readreq(handle);
5748         return (err);
5749 }
5750 
5751 
5752 static int
5753 myri10ge_suspend(dev_info_t *dip)
5754 {
5755         struct myri10ge_priv *mgp = ddi_get_driver_private(dip);
5756         int status;
5757 
5758         if (mgp == NULL) {
5759                 cmn_err(CE_WARN, "null dip in myri10ge_suspend\n");
5760                 return (DDI_FAILURE);
5761         }
5762         if (mgp->dip != dip) {
5763                 cmn_err(CE_WARN, "bad dip in myri10ge_suspend\n");
5764                 return (DDI_FAILURE);
5765         }
5766         mutex_enter(&mgp->intrlock);
5767         if (mgp->running == MYRI10GE_ETH_RUNNING) {
5768                 mgp->running = MYRI10GE_ETH_STOPPING;
5769                 mutex_exit(&mgp->intrlock);
5770                 (void) untimeout(mgp->timer_id);
5771                 mutex_enter(&mgp->intrlock);
5772                 myri10ge_stop_locked(mgp);
5773                 mgp->running = MYRI10GE_ETH_SUSPENDED_RUNNING;
5774         }
5775         status = myri10ge_save_pci_state(mgp);
5776         mutex_exit(&mgp->intrlock);
5777         return (status);
5778 }
5779 
5780 static int
5781 myri10ge_resume(dev_info_t *dip)
5782 {
5783         struct myri10ge_priv *mgp = ddi_get_driver_private(dip);
5784         int status = DDI_SUCCESS;
5785 
5786         if (mgp == NULL) {
5787                 cmn_err(CE_WARN, "null dip in myri10ge_resume\n");
5788                 return (DDI_FAILURE);
5789         }
5790         if (mgp->dip != dip) {
5791                 cmn_err(CE_WARN, "bad dip in myri10ge_resume\n");
5792                 return (DDI_FAILURE);
5793         }
5794 
5795         mutex_enter(&mgp->intrlock);
5796         status = myri10ge_restore_pci_state(mgp);
5797         if (status == DDI_SUCCESS &&
5798             mgp->running == MYRI10GE_ETH_SUSPENDED_RUNNING) {
5799                 status = myri10ge_start_locked(mgp);
5800         }
5801         mutex_exit(&mgp->intrlock);
5802         if (status != DDI_SUCCESS)
5803                 return (status);
5804 
5805         /* start the watchdog timer */
5806         mgp->timer_id = timeout(myri10ge_watchdog, mgp,
5807             mgp->timer_ticks);
5808         return (DDI_SUCCESS);
5809 }
5810 
5811 static int
5812 myri10ge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5813 {
5814 
5815         struct myri10ge_priv *mgp;
5816         mac_register_t *macp, *omacp;
5817         ddi_acc_handle_t handle;
5818         uint32_t csr, hdr_offset;
5819         int status, span, link_width, max_read_request_4k;
5820         unsigned long bus_number, dev_number, func_number;
5821         size_t bytes;
5822         offset_t ss_offset;
5823         uint8_t vso;
5824 
5825         if (cmd == DDI_RESUME) {
5826                 return (myri10ge_resume(dip));
5827         }
5828 
5829         if (cmd != DDI_ATTACH)
5830                 return (DDI_FAILURE);
5831         if (pci_config_setup(dip, &handle) != DDI_SUCCESS)
5832                 return (DDI_FAILURE);
5833 
5834         /* enable busmater and io space access */
5835         csr = pci_config_get32(handle, PCI_CONF_COMM);
5836         pci_config_put32(handle, PCI_CONF_COMM,
5837             (csr |PCI_COMM_ME|PCI_COMM_MAE));
5838         status = myri10ge_read_pcie_link_width(handle, &link_width);
5839         if (status != 0) {
5840                 cmn_err(CE_WARN, "could not read link width!\n");
5841                 link_width = 0;
5842         }
5843         max_read_request_4k = !myri10ge_set_max_readreq(handle);
5844         status = myri10ge_find_cap(handle, &vso, PCI_CAP_ID_VS);
5845         if (status != 0)
5846                 goto abort_with_cfg_hdl;
5847         if ((omacp = mac_alloc(MAC_VERSION)) == NULL)
5848                 goto abort_with_cfg_hdl;
5849         /*
5850          * XXXX Hack: mac_register_t grows in newer kernels.  To be
5851          * able to write newer fields, such as m_margin, without
5852          * writing outside allocated memory, we allocate our own macp
5853          * and pass that to mac_register()
5854          */
5855         macp = kmem_zalloc(sizeof (*macp) * 8, KM_SLEEP);
5856         macp->m_version = omacp->m_version;
5857 
5858         if ((mgp = (struct myri10ge_priv *)
5859             kmem_zalloc(sizeof (*mgp), KM_SLEEP)) == NULL) {
5860                 goto abort_with_macinfo;
5861         }
5862         ddi_set_driver_private(dip, mgp);
5863 
5864         /* setup device name for log messages */
5865         (void) sprintf(mgp->name, "myri10ge%d", ddi_get_instance(dip));
5866 
5867         mutex_enter(&myri10ge_param_lock);
5868         myri10ge_get_props(dip);
5869         mgp->intr_coal_delay = myri10ge_intr_coal_delay;
5870         mgp->pause = myri10ge_flow_control;
5871         mutex_exit(&myri10ge_param_lock);
5872 
5873         mgp->max_read_request_4k = max_read_request_4k;
5874         mgp->pcie_link_width = link_width;
5875         mgp->running = MYRI10GE_ETH_STOPPED;
5876         mgp->vso = vso;
5877         mgp->dip = dip;
5878         mgp->cfg_hdl = handle;
5879 
5880         mgp->timer_ticks = drv_sectohz(5);
5881         myri10ge_test_physical(dip);
5882 
5883         /* allocate command page */
5884         bytes = sizeof (*mgp->cmd);
5885         mgp->cmd = (mcp_cmd_response_t *)
5886             (void *)myri10ge_dma_alloc(dip, bytes,
5887             &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr,
5888             DDI_DMA_CONSISTENT, DDI_DMA_RDWR|DDI_DMA_CONSISTENT,
5889             &mgp->cmd_dma, 1, DDI_DMA_DONTWAIT);
5890         if (mgp->cmd == NULL)
5891                 goto abort_with_mgp;
5892 
5893         (void) myri10ge_reg_set(dip, &mgp->reg_set, &span, &bus_number,
5894             &dev_number, &func_number);
5895         if (myri10ge_verbose)
5896                 printf("%s at %ld:%ld:%ld attaching\n", mgp->name,
5897                     bus_number, dev_number, func_number);
5898         status = ddi_regs_map_setup(dip, mgp->reg_set, (caddr_t *)&mgp->sram,
5899             (offset_t)0, (offset_t)span,  &myri10ge_dev_access_attr,
5900             &mgp->io_handle);
5901         if (status != DDI_SUCCESS) {
5902                 cmn_err(CE_WARN, "%s: couldn't map memory space", mgp->name);
5903                 printf("%s: reg_set = %d, span = %d, status = %d",
5904                     mgp->name, mgp->reg_set, span, status);
5905                 goto abort_with_mgp;
5906         }
5907 
5908         hdr_offset = *(uint32_t *)(void*)(mgp->sram +  MCP_HEADER_PTR_OFFSET);
5909         hdr_offset = ntohl(hdr_offset) & 0xffffc;
5910         ss_offset = hdr_offset +
5911             offsetof(struct mcp_gen_header, string_specs);
5912         mgp->sram_size = ntohl(*(uint32_t *)(void*)(mgp->sram + ss_offset));
5913         myri10ge_pio_copy32(mgp->eeprom_strings,
5914             (uint32_t *)(void*)((char *)mgp->sram + mgp->sram_size),
5915             MYRI10GE_EEPROM_STRINGS_SIZE);
5916         (void) memset(mgp->eeprom_strings +
5917             MYRI10GE_EEPROM_STRINGS_SIZE - 2, 0, 2);
5918 
5919         status = myri10ge_read_mac_addr(mgp);
5920         if (status) {
5921                 goto abort_with_mapped;
5922         }
5923 
5924         status = myri10ge_select_firmware(mgp);
5925         if (status != 0) {
5926                 cmn_err(CE_WARN, "%s: failed to load firmware\n", mgp->name);
5927                 goto abort_with_mapped;
5928         }
5929 
5930         status = myri10ge_probe_slices(mgp);
5931         if (status != 0) {
5932                 cmn_err(CE_WARN, "%s: failed to probe slices\n", mgp->name);
5933                 goto abort_with_dummy_rdma;
5934         }
5935 
5936         status = myri10ge_alloc_slices(mgp);
5937         if (status != 0) {
5938                 cmn_err(CE_WARN, "%s: failed to alloc slices\n", mgp->name);
5939                 goto abort_with_dummy_rdma;
5940         }
5941 
5942         /* add the interrupt handler */
5943         status = myri10ge_add_intrs(mgp, 1);
5944         if (status != 0) {
5945                 cmn_err(CE_WARN, "%s: Failed to add interrupt\n",
5946                     mgp->name);
5947                 goto abort_with_slices;
5948         }
5949 
5950         /* now that we have an iblock_cookie, init the mutexes */
5951         mutex_init(&mgp->cmd_lock, NULL, MUTEX_DRIVER, mgp->icookie);
5952         mutex_init(&mgp->intrlock, NULL, MUTEX_DRIVER, mgp->icookie);
5953 
5954 
5955         status = myri10ge_nic_stat_init(mgp);
5956         if (status != DDI_SUCCESS)
5957                 goto abort_with_interrupts;
5958         status = myri10ge_info_init(mgp);
5959         if (status != DDI_SUCCESS)
5960                 goto abort_with_stats;
5961 
5962         /*
5963          *      Initialize  GLD state
5964          */
5965 
5966         macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
5967         macp->m_driver = mgp;
5968         macp->m_dip = dip;
5969         macp->m_src_addr = mgp->mac_addr;
5970         macp->m_callbacks = &myri10ge_m_callbacks;
5971         macp->m_min_sdu = 0;
5972         macp->m_max_sdu = myri10ge_mtu -
5973             (sizeof (struct ether_header) + MXGEFW_PAD + VLAN_TAGSZ);
5974 #ifdef SOLARIS_S11
5975         macp->m_margin = VLAN_TAGSZ;
5976 #endif
5977         macp->m_v12n = MAC_VIRT_LEVEL1;
5978         status = mac_register(macp, &mgp->mh);
5979         if (status != 0) {
5980                 cmn_err(CE_WARN, "%s: mac_register failed with %d\n",
5981                     mgp->name, status);
5982                 goto abort_with_info;
5983         }
5984         myri10ge_ndd_init(mgp);
5985         if (myri10ge_verbose)
5986                 printf("%s: %s, tx bndry %d, fw %s\n", mgp->name,
5987                     mgp->intr_type, mgp->tx_boundary, mgp->fw_name);
5988         mutex_enter(&myri10ge_param_lock);
5989         mgp->next = mgp_list;
5990         mgp_list = mgp;
5991         mutex_exit(&myri10ge_param_lock);
5992         kmem_free(macp, sizeof (*macp) * 8);
5993         mac_free(omacp);
5994         return (DDI_SUCCESS);
5995 
5996 abort_with_info:
5997         myri10ge_info_destroy(mgp);
5998 
5999 abort_with_stats:
6000         myri10ge_nic_stat_destroy(mgp);
6001 
6002 abort_with_interrupts:
6003         mutex_destroy(&mgp->cmd_lock);
6004         mutex_destroy(&mgp->intrlock);
6005         myri10ge_rem_intrs(mgp, 1);
6006 
6007 abort_with_slices:
6008         myri10ge_free_slices(mgp);
6009 
6010 abort_with_dummy_rdma:
6011         myri10ge_dummy_rdma(mgp, 0);
6012 
6013 abort_with_mapped:
6014         ddi_regs_map_free(&mgp->io_handle);
6015 
6016         myri10ge_dma_free(&mgp->cmd_dma);
6017 
6018 abort_with_mgp:
6019         kmem_free(mgp, sizeof (*mgp));
6020 
6021 abort_with_macinfo:
6022         kmem_free(macp, sizeof (*macp) * 8);
6023         mac_free(omacp);
6024 
6025 abort_with_cfg_hdl:
6026         pci_config_teardown(&handle);
6027         return (DDI_FAILURE);
6028 
6029 }
6030 
6031 
6032 static int
6033 myri10ge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
6034 {
6035         struct myri10ge_priv    *mgp, *tmp;
6036         int                     status, i, jbufs_alloced;
6037 
6038         if (cmd == DDI_SUSPEND) {
6039                 status = myri10ge_suspend(dip);
6040                 return (status);
6041         }
6042 
6043         if (cmd != DDI_DETACH) {
6044                 return (DDI_FAILURE);
6045         }
6046         /* Get the driver private (gld_mac_info_t) structure */
6047         mgp = ddi_get_driver_private(dip);
6048 
6049         mutex_enter(&mgp->intrlock);
6050         jbufs_alloced = 0;
6051         for (i = 0; i < mgp->num_slices; i++) {
6052                 myri10ge_remove_jbufs(&mgp->ss[i]);
6053                 jbufs_alloced += mgp->ss[i].jpool.num_alloc;
6054         }
6055         mutex_exit(&mgp->intrlock);
6056         if (jbufs_alloced != 0) {
6057                 cmn_err(CE_NOTE, "%s: %d loaned rx buffers remain\n",
6058                     mgp->name, jbufs_alloced);
6059                 return (DDI_FAILURE);
6060         }
6061 
6062         mutex_enter(&myri10ge_param_lock);
6063         if (mgp->refcnt != 0) {
6064                 mutex_exit(&myri10ge_param_lock);
6065                 cmn_err(CE_NOTE, "%s: %d external refs remain\n",
6066                     mgp->name, mgp->refcnt);
6067                 return (DDI_FAILURE);
6068         }
6069         mutex_exit(&myri10ge_param_lock);
6070 
6071         status = mac_unregister(mgp->mh);
6072         if (status != DDI_SUCCESS)
6073                 return (status);
6074 
6075         myri10ge_ndd_fini(mgp);
6076         myri10ge_dummy_rdma(mgp, 0);
6077         myri10ge_nic_stat_destroy(mgp);
6078         myri10ge_info_destroy(mgp);
6079 
6080         mutex_destroy(&mgp->cmd_lock);
6081         mutex_destroy(&mgp->intrlock);
6082 
6083         myri10ge_rem_intrs(mgp, 1);
6084 
6085         myri10ge_free_slices(mgp);
6086         ddi_regs_map_free(&mgp->io_handle);
6087         myri10ge_dma_free(&mgp->cmd_dma);
6088         pci_config_teardown(&mgp->cfg_hdl);
6089 
6090         mutex_enter(&myri10ge_param_lock);
6091         if (mgp_list == mgp) {
6092                 mgp_list = mgp->next;
6093         } else {
6094                 tmp = mgp_list;
6095                 while (tmp->next != mgp && tmp->next != NULL)
6096                         tmp = tmp->next;
6097                 if (tmp->next != NULL)
6098                         tmp->next = tmp->next->next;
6099         }
6100         kmem_free(mgp, sizeof (*mgp));
6101         mutex_exit(&myri10ge_param_lock);
6102         return (DDI_SUCCESS);
6103 }
6104 
6105 /*
6106  * Helper for quiesce entry point: Interrupt threads are not being
6107  * scheduled, so we must poll for the confirmation DMA to arrive in
6108  * the firmware stats block for slice 0.  We're essentially running
6109  * the guts of the interrupt handler, and just cherry picking the
6110  * confirmation that the NIC is queuesced (stats->link_down)
6111  */
6112 
6113 static int
6114 myri10ge_poll_down(struct myri10ge_priv *mgp)
6115 {
6116         struct myri10ge_slice_state *ss = mgp->ss;
6117         mcp_irq_data_t *stats = ss->fw_stats;
6118         int valid;
6119         int found_down = 0;
6120 
6121 
6122         /* check for a pending IRQ */
6123 
6124         if (! *((volatile uint8_t *)& stats->valid))
6125                 return (0);
6126         valid = stats->valid;
6127 
6128         /*
6129          * Make sure to tell the NIC to lower a legacy IRQ, else
6130          * it may have corrupt state after restarting
6131          */
6132 
6133         if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) {
6134                 /* lower legacy IRQ  */
6135                 *mgp->irq_deassert = 0;
6136                 mb();
6137                 /* wait for irq conf DMA */
6138                 while (*((volatile uint8_t *)& stats->valid))
6139                         ;
6140         }
6141         if (stats->stats_updated && stats->link_down)
6142                 found_down = 1;
6143 
6144         if (valid & 0x1)
6145                 *ss->irq_claim = BE_32(3);
6146         *(ss->irq_claim + 1) = BE_32(3);
6147 
6148         return (found_down);
6149 }
6150 
6151 static int
6152 myri10ge_quiesce(dev_info_t *dip)
6153 {
6154         struct myri10ge_priv *mgp;
6155         myri10ge_cmd_t cmd;
6156         int status, down, i;
6157 
6158         mgp = ddi_get_driver_private(dip);
6159         if (mgp == NULL)
6160                 return (DDI_FAILURE);
6161 
6162         /* if devices was unplumbed, it is guaranteed to be quiescent */
6163         if (mgp->running == MYRI10GE_ETH_STOPPED)
6164                 return (DDI_SUCCESS);
6165 
6166         /* send a down CMD to queuesce NIC */
6167         status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
6168         if (status) {
6169                 cmn_err(CE_WARN, "%s: Couldn't bring down link\n", mgp->name);
6170                 return (DDI_FAILURE);
6171         }
6172 
6173         for (i = 0; i < 20; i++) {
6174                 down = myri10ge_poll_down(mgp);
6175                 if (down)
6176                         break;
6177                 delay(drv_usectohz(100000));
6178                 mb();
6179         }
6180         if (down)
6181                 return (DDI_SUCCESS);
6182         return (DDI_FAILURE);
6183 }
6184 
6185 /*
6186  * Distinguish between allocb'ed blocks, and gesballoc'ed attached
6187  * storage.
6188  */
6189 static void
6190 myri10ge_find_lastfree(void)
6191 {
6192         mblk_t *mp = allocb(1024, 0);
6193         dblk_t *dbp;
6194 
6195         if (mp == NULL) {
6196                 cmn_err(CE_WARN, "myri10ge_find_lastfree failed\n");
6197                 return;
6198         }
6199         dbp = mp->b_datap;
6200         myri10ge_db_lastfree = (void *)dbp->db_lastfree;
6201 }
6202 
6203 int
6204 _init(void)
6205 {
6206         int i;
6207 
6208         if (myri10ge_verbose)
6209                 cmn_err(CE_NOTE,
6210                     "Myricom 10G driver (10GbE) version %s loading\n",
6211                     MYRI10GE_VERSION_STR);
6212         myri10ge_find_lastfree();
6213         mac_init_ops(&myri10ge_ops, "myri10ge");
6214         mutex_init(&myri10ge_param_lock, NULL, MUTEX_DEFAULT, NULL);
6215         if ((i = mod_install(&modlinkage)) != 0) {
6216                 cmn_err(CE_WARN, "mod_install returned %d\n", i);
6217                 mac_fini_ops(&myri10ge_ops);
6218                 mutex_destroy(&myri10ge_param_lock);
6219         }
6220         return (i);
6221 }
6222 
6223 int
6224 _fini(void)
6225 {
6226         int i;
6227         i = mod_remove(&modlinkage);
6228         if (i != 0) {
6229                 return (i);
6230         }
6231         mac_fini_ops(&myri10ge_ops);
6232         mutex_destroy(&myri10ge_param_lock);
6233         return (0);
6234 }
6235 
6236 int
6237 _info(struct modinfo *modinfop)
6238 {
6239         return (mod_info(&modlinkage, modinfop));
6240 }
6241 
6242 
6243 /*
6244  *  This file uses MyriGE driver indentation.
6245  *
6246  * Local Variables:
6247  * c-file-style:"sun"
6248  * tab-width:8
6249  * End:
6250  */