5255 uts shouldn't open-code ISP2

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * hermon_qp.c
  28  *    Hermon Queue Pair Processing Routines
  29  *
  30  *    Implements all the routines necessary for allocating, freeing, and
  31  *    querying the Hermon queue pairs.
  32  */
  33 
  34 #include <sys/types.h>
  35 #include <sys/conf.h>
  36 #include <sys/ddi.h>
  37 #include <sys/sunddi.h>
  38 #include <sys/modctl.h>
  39 #include <sys/bitmap.h>
  40 #include <sys/sysmacros.h>
  41 
  42 #include <sys/ib/adapters/hermon/hermon.h>
  43 #include <sys/ib/ib_pkt_hdrs.h>
  44 
  45 static int hermon_qp_create_qpn(hermon_state_t *state, hermon_qphdl_t qp,
  46     hermon_rsrc_t *qpc);
  47 static int hermon_qpn_avl_compare(const void *q, const void *e);
  48 static int hermon_special_qp_rsrc_alloc(hermon_state_t *state,
  49     ibt_sqp_type_t type, uint_t port, hermon_rsrc_t **qp_rsrc);
  50 static int hermon_special_qp_rsrc_free(hermon_state_t *state,
  51     ibt_sqp_type_t type, uint_t port);
  52 static void hermon_qp_sgl_to_logwqesz(hermon_state_t *state, uint_t num_sgl,
  53     uint_t real_max_sgl, hermon_qp_wq_type_t wq_type,
  54     uint_t *logwqesz, uint_t *max_sgl);
  55 
  56 /*
  57  * hermon_qp_alloc()
  58  *    Context: Can be called only from user or kernel context.
  59  */
  60 int
  61 hermon_qp_alloc(hermon_state_t *state, hermon_qp_info_t *qpinfo,
  62     uint_t sleepflag)
  63 {
  64         hermon_rsrc_t                   *qpc, *rsrc;
  65         hermon_rsrc_type_t              rsrc_type;
  66         hermon_umap_db_entry_t          *umapdb;
  67         hermon_qphdl_t                  qp;
  68         ibt_qp_alloc_attr_t             *attr_p;
  69         ibt_qp_alloc_flags_t            alloc_flags;
  70         ibt_qp_type_t                   type;
  71         hermon_qp_wq_type_t             swq_type;
  72         ibtl_qp_hdl_t                   ibt_qphdl;
  73         ibt_chan_sizes_t                *queuesz_p;
  74         ib_qpn_t                        *qpn;
  75         hermon_qphdl_t                  *qphdl;
  76         ibt_mr_attr_t                   mr_attr;
  77         hermon_mr_options_t             mr_op;
  78         hermon_srqhdl_t                 srq;
  79         hermon_pdhdl_t                  pd;
  80         hermon_cqhdl_t                  sq_cq, rq_cq;
  81         hermon_mrhdl_t                  mr;
  82         uint64_t                        value, qp_desc_off;
  83         uint64_t                        *thewqe, thewqesz;
  84         uint32_t                        *sq_buf, *rq_buf;
  85         uint32_t                        log_qp_sq_size, log_qp_rq_size;
  86         uint32_t                        sq_size, rq_size;
  87         uint32_t                        sq_depth, rq_depth;
  88         uint32_t                        sq_wqe_size, rq_wqe_size, wqesz_shift;
  89         uint32_t                        max_sgl, max_recv_sgl, uarpg;
  90         uint_t                          qp_is_umap;
  91         uint_t                          qp_srq_en, i, j;
  92         int                             status, flag;
  93 
  94         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p, *queuesz_p))
  95 
  96         /*
  97          * Extract the necessary info from the hermon_qp_info_t structure
  98          */
  99         attr_p    = qpinfo->qpi_attrp;
 100         type      = qpinfo->qpi_type;
 101         ibt_qphdl = qpinfo->qpi_ibt_qphdl;
 102         queuesz_p = qpinfo->qpi_queueszp;
 103         qpn       = qpinfo->qpi_qpn;
 104         qphdl     = &qpinfo->qpi_qphdl;
 105         alloc_flags = attr_p->qp_alloc_flags;
 106 
 107         /*
 108          * Verify correctness of alloc_flags.
 109          *
 110          * 1. FEXCH and RSS are only allocated via qp_range.
 111          */
 112         if (alloc_flags & (IBT_QP_USES_FEXCH | IBT_QP_USES_RSS)) {
 113                 return (IBT_INVALID_PARAM);
 114         }
 115         rsrc_type = HERMON_QPC;
 116         qp_is_umap = 0;
 117 
 118         /* 2. Make sure only one of these flags is set. */
 119         switch (alloc_flags &
 120             (IBT_QP_USER_MAP | IBT_QP_USES_RFCI | IBT_QP_USES_FCMD)) {
 121         case IBT_QP_USER_MAP:
 122                 qp_is_umap = 1;
 123                 break;
 124         case IBT_QP_USES_RFCI:
 125                 if (type != IBT_UD_RQP)
 126                         return (IBT_INVALID_PARAM);
 127 
 128                 switch (attr_p->qp_fc.fc_hca_port) {
 129                 case 1:
 130                         rsrc_type = HERMON_QPC_RFCI_PORT1;
 131                         break;
 132                 case 2:
 133                         rsrc_type = HERMON_QPC_RFCI_PORT2;
 134                         break;
 135                 default:
 136                         return (IBT_INVALID_PARAM);
 137                 }
 138                 break;
 139         case IBT_QP_USES_FCMD:
 140                 if (type != IBT_UD_RQP)
 141                         return (IBT_INVALID_PARAM);
 142                 break;
 143         case 0:
 144                 break;
 145         default:
 146                 return (IBT_INVALID_PARAM);     /* conflicting flags set */
 147         }
 148 
 149         /*
 150          * Determine whether QP is being allocated for userland access or
 151          * whether it is being allocated for kernel access.  If the QP is
 152          * being allocated for userland access, then lookup the UAR
 153          * page number for the current process.  Note:  If this is not found
 154          * (e.g. if the process has not previously open()'d the Hermon driver),
 155          * then an error is returned.
 156          */
 157         if (qp_is_umap) {
 158                 status = hermon_umap_db_find(state->hs_instance, ddi_get_pid(),
 159                     MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
 160                 if (status != DDI_SUCCESS) {
 161                         return (IBT_INVALID_PARAM);
 162                 }
 163                 uarpg = ((hermon_rsrc_t *)(uintptr_t)value)->hr_indx;
 164         } else {
 165                 uarpg = state->hs_kernel_uar_index;
 166         }
 167 
 168         /*
 169          * Determine whether QP is being associated with an SRQ
 170          */
 171         qp_srq_en = (alloc_flags & IBT_QP_USES_SRQ) ? 1 : 0;
 172         if (qp_srq_en) {
 173                 /*
 174                  * Check for valid SRQ handle pointers
 175                  */
 176                 if (attr_p->qp_ibc_srq_hdl == NULL) {
 177                         status = IBT_SRQ_HDL_INVALID;
 178                         goto qpalloc_fail;
 179                 }
 180                 srq = (hermon_srqhdl_t)attr_p->qp_ibc_srq_hdl;
 181         }
 182 
 183         /*
 184          * Check for valid QP service type (only UD/RC/UC supported)
 185          */
 186         if (((type != IBT_UD_RQP) && (type != IBT_RC_RQP) &&
 187             (type != IBT_UC_RQP))) {
 188                 status = IBT_QP_SRV_TYPE_INVALID;
 189                 goto qpalloc_fail;
 190         }
 191 
 192 
 193         /*
 194          * Check for valid PD handle pointer
 195          */
 196         if (attr_p->qp_pd_hdl == NULL) {
 197                 status = IBT_PD_HDL_INVALID;
 198                 goto qpalloc_fail;
 199         }
 200         pd = (hermon_pdhdl_t)attr_p->qp_pd_hdl;
 201 
 202         /*
 203          * If on an SRQ, check to make sure the PD is the same
 204          */
 205         if (qp_srq_en && (pd->pd_pdnum != srq->srq_pdhdl->pd_pdnum)) {
 206                 status = IBT_PD_HDL_INVALID;
 207                 goto qpalloc_fail;
 208         }
 209 
 210         /* Increment the reference count on the protection domain (PD) */
 211         hermon_pd_refcnt_inc(pd);
 212 
 213         /*
 214          * Check for valid CQ handle pointers
 215          *
 216          * FCMD QPs do not require a receive cq handle.
 217          */
 218         if (attr_p->qp_ibc_scq_hdl == NULL) {
 219                 status = IBT_CQ_HDL_INVALID;
 220                 goto qpalloc_fail1;
 221         }
 222         sq_cq = (hermon_cqhdl_t)attr_p->qp_ibc_scq_hdl;
 223         if ((attr_p->qp_ibc_rcq_hdl == NULL)) {
 224                 if ((alloc_flags & IBT_QP_USES_FCMD) == 0) {
 225                         status = IBT_CQ_HDL_INVALID;
 226                         goto qpalloc_fail1;
 227                 }
 228                 rq_cq = sq_cq;  /* just use the send cq */
 229         } else
 230                 rq_cq = (hermon_cqhdl_t)attr_p->qp_ibc_rcq_hdl;
 231 
 232         /*
 233          * Increment the reference count on the CQs.  One or both of these
 234          * could return error if we determine that the given CQ is already
 235          * being used with a special (SMI/GSI) QP.
 236          */
 237         status = hermon_cq_refcnt_inc(sq_cq, HERMON_CQ_IS_NORMAL);
 238         if (status != DDI_SUCCESS) {
 239                 status = IBT_CQ_HDL_INVALID;
 240                 goto qpalloc_fail1;
 241         }
 242         status = hermon_cq_refcnt_inc(rq_cq, HERMON_CQ_IS_NORMAL);
 243         if (status != DDI_SUCCESS) {
 244                 status = IBT_CQ_HDL_INVALID;
 245                 goto qpalloc_fail2;
 246         }
 247 
 248         /*
 249          * Allocate an QP context entry.  This will be filled in with all
 250          * the necessary parameters to define the Queue Pair.  Unlike
 251          * other Hermon hardware resources, ownership is not immediately
 252          * given to hardware in the final step here.  Instead, we must
 253          * wait until the QP is later transitioned to the "Init" state before
 254          * passing the QP to hardware.  If we fail here, we must undo all
 255          * the reference count (CQ and PD).
 256          */
 257         status = hermon_rsrc_alloc(state, rsrc_type, 1, sleepflag, &qpc);
 258         if (status != DDI_SUCCESS) {
 259                 status = IBT_INSUFF_RESOURCE;
 260                 goto qpalloc_fail3;
 261         }
 262 
 263         /*
 264          * Allocate the software structure for tracking the queue pair
 265          * (i.e. the Hermon Queue Pair handle).  If we fail here, we must
 266          * undo the reference counts and the previous resource allocation.
 267          */
 268         status = hermon_rsrc_alloc(state, HERMON_QPHDL, 1, sleepflag, &rsrc);
 269         if (status != DDI_SUCCESS) {
 270                 status = IBT_INSUFF_RESOURCE;
 271                 goto qpalloc_fail4;
 272         }
 273         qp = (hermon_qphdl_t)rsrc->hr_addr;
 274         bzero(qp, sizeof (struct hermon_sw_qp_s));
 275         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp))
 276 
 277         qp->qp_alloc_flags = alloc_flags;
 278 
 279         /*
 280          * Calculate the QP number from QPC index.  This routine handles
 281          * all of the operations necessary to keep track of used, unused,
 282          * and released QP numbers.
 283          */
 284         if (type == IBT_UD_RQP) {
 285                 qp->qp_qpnum = qpc->hr_indx;
 286                 qp->qp_ring = qp->qp_qpnum << 8;
 287                 qp->qp_qpn_hdl = NULL;
 288         } else {
 289                 status = hermon_qp_create_qpn(state, qp, qpc);
 290                 if (status != DDI_SUCCESS) {
 291                         status = IBT_INSUFF_RESOURCE;
 292                         goto qpalloc_fail5;
 293                 }
 294         }
 295 
 296         /*
 297          * If this will be a user-mappable QP, then allocate an entry for
 298          * the "userland resources database".  This will later be added to
 299          * the database (after all further QP operations are successful).
 300          * If we fail here, we must undo the reference counts and the
 301          * previous resource allocation.
 302          */
 303         if (qp_is_umap) {
 304                 umapdb = hermon_umap_db_alloc(state->hs_instance, qp->qp_qpnum,
 305                     MLNX_UMAP_QPMEM_RSRC, (uint64_t)(uintptr_t)rsrc);
 306                 if (umapdb == NULL) {
 307                         status = IBT_INSUFF_RESOURCE;
 308                         goto qpalloc_fail6;
 309                 }
 310         }
 311 
 312         /*
 313          * Allocate the doorbell record.  Hermon just needs one for the RQ,
 314          * if the QP is not associated with an SRQ, and use uarpg (above) as
 315          * the uar index
 316          */
 317 
 318         if (!qp_srq_en) {
 319                 status = hermon_dbr_alloc(state, uarpg, &qp->qp_rq_dbr_acchdl,
 320                     &qp->qp_rq_vdbr, &qp->qp_rq_pdbr, &qp->qp_rdbr_mapoffset);
 321                 if (status != DDI_SUCCESS) {
 322                         status = IBT_INSUFF_RESOURCE;
 323                         goto qpalloc_fail6;
 324                 }
 325         }
 326 
 327         qp->qp_uses_lso = (attr_p->qp_flags & IBT_USES_LSO);
 328 
 329         /*
 330          * We verify that the requested number of SGL is valid (i.e.
 331          * consistent with the device limits and/or software-configured
 332          * limits).  If not, then obviously the same cleanup needs to be done.
 333          */
 334         if (type == IBT_UD_RQP) {
 335                 max_sgl = state->hs_ibtfinfo.hca_attr->hca_ud_send_sgl_sz;
 336                 swq_type = HERMON_QP_WQ_TYPE_SENDQ_UD;
 337         } else {
 338                 max_sgl = state->hs_ibtfinfo.hca_attr->hca_conn_send_sgl_sz;
 339                 swq_type = HERMON_QP_WQ_TYPE_SENDQ_CONN;
 340         }
 341         max_recv_sgl = state->hs_ibtfinfo.hca_attr->hca_recv_sgl_sz;
 342         if ((attr_p->qp_sizes.cs_sq_sgl > max_sgl) ||
 343             (!qp_srq_en && (attr_p->qp_sizes.cs_rq_sgl > max_recv_sgl))) {
 344                 status = IBT_HCA_SGL_EXCEEDED;
 345                 goto qpalloc_fail7;
 346         }
 347 
 348         /*
 349          * Determine this QP's WQE stride (for both the Send and Recv WQEs).
 350          * This will depend on the requested number of SGLs.  Note: this
 351          * has the side-effect of also calculating the real number of SGLs
 352          * (for the calculated WQE size).
 353          *
 354          * For QP's on an SRQ, we set these to 0.
 355          */
 356         if (qp_srq_en) {
 357                 qp->qp_rq_log_wqesz = 0;
 358                 qp->qp_rq_sgl = 0;
 359         } else {
 360                 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_rq_sgl,
 361                     max_recv_sgl, HERMON_QP_WQ_TYPE_RECVQ,
 362                     &qp->qp_rq_log_wqesz, &qp->qp_rq_sgl);
 363         }
 364         hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl,
 365             max_sgl, swq_type, &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl);
 366 
 367         sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
 368 
 369         /* NOTE: currently policy in driver, later maybe IBTF interface */
 370         qp->qp_no_prefetch = 0;
 371 
 372         /*
 373          * for prefetching, we need to add the number of wqes in
 374          * the 2k area plus one to the number requested, but
 375          * ONLY for send queue.  If no_prefetch == 1 (prefetch off)
 376          * it's exactly TWO wqes for the headroom
 377          */
 378         if (qp->qp_no_prefetch)
 379                 qp->qp_sq_headroom = 2 * sq_wqe_size;
 380         else
 381                 qp->qp_sq_headroom = sq_wqe_size + HERMON_QP_OH_SIZE;
 382         /*
 383          * hdrm wqes must be integral since both sq_wqe_size &
 384          * HERMON_QP_OH_SIZE are power of 2
 385          */
 386         qp->qp_sq_hdrmwqes = (qp->qp_sq_headroom / sq_wqe_size);
 387 
 388 
 389         /*
 390          * Calculate the appropriate size for the work queues.
 391          * For send queue, add in the headroom wqes to the calculation.
 392          * Note:  All Hermon QP work queues must be a power-of-2 in size.  Also
 393          * they may not be any smaller than HERMON_QP_MIN_SIZE.  This step is
 394          * to round the requested size up to the next highest power-of-2
 395          */
 396         /* first, adjust to a minimum and tell the caller the change */
 397         attr_p->qp_sizes.cs_sq = max(attr_p->qp_sizes.cs_sq,
 398             HERMON_QP_MIN_SIZE);
 399         attr_p->qp_sizes.cs_rq = max(attr_p->qp_sizes.cs_rq,
 400             HERMON_QP_MIN_SIZE);
 401         /*
 402          * now, calculate the alloc size, taking into account
 403          * the headroom for the sq
 404          */
 405         log_qp_sq_size = highbit(attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes);
 406         /* if the total is a power of two, reduce it */
 407         if (ISP2(attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes))    {

 408                 log_qp_sq_size = log_qp_sq_size - 1;
 409         }
 410 
 411         log_qp_rq_size = highbit(attr_p->qp_sizes.cs_rq);
 412         if (ISP2(attr_p->qp_sizes.cs_rq)) {
 413                 log_qp_rq_size = log_qp_rq_size - 1;
 414         }
 415 
 416         /*
 417          * Next we verify that the rounded-up size is valid (i.e. consistent
 418          * with the device limits and/or software-configured limits).  If not,
 419          * then obviously we have a lot of cleanup to do before returning.
 420          *
 421          * NOTE: the first condition deals with the (test) case of cs_sq
 422          * being just less than 2^32.  In this case, the headroom addition
 423          * to the requested cs_sq will pass the test when it should not.
 424          * This test no longer lets that case slip through the check.
 425          */
 426         if ((attr_p->qp_sizes.cs_sq >
 427             (1 << state->hs_cfg_profile->cp_log_max_qp_sz)) ||
 428             (log_qp_sq_size > state->hs_cfg_profile->cp_log_max_qp_sz) ||
 429             (!qp_srq_en && (log_qp_rq_size >
 430             state->hs_cfg_profile->cp_log_max_qp_sz))) {
 431                 status = IBT_HCA_WR_EXCEEDED;
 432                 goto qpalloc_fail7;
 433         }
 434 
 435         /*
 436          * Allocate the memory for QP work queues. Since Hermon work queues
 437          * are not allowed to cross a 32-bit (4GB) boundary, the alignment of
 438          * the work queue memory is very important.  We used to allocate
 439          * work queues (the combined receive and send queues) so that they
 440          * would be aligned on their combined size.  That alignment guaranteed
 441          * that they would never cross the 4GB boundary (Hermon work queues
 442          * are on the order of MBs at maximum).  Now we are able to relax
 443          * this alignment constraint by ensuring that the IB address assigned
 444          * to the queue memory (as a result of the hermon_mr_register() call)
 445          * is offset from zero.
 446          * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
 447          * guarantee the alignment, but when attempting to use IOMMU bypass
 448          * mode we found that we were not allowed to specify any alignment
 449          * that was more restrictive than the system page size.
 450          * So we avoided this constraint by passing two alignment values,
 451          * one for the memory allocation itself and the other for the DMA
 452          * handle (for later bind).  This used to cause more memory than
 453          * necessary to be allocated (in order to guarantee the more
 454          * restrictive alignment contraint).  But by guaranteeing the
 455          * zero-based IB virtual address for the queue, we are able to
 456          * conserve this memory.
 457          */
 458         sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
 459         sq_depth    = 1 << log_qp_sq_size;
 460         sq_size     = sq_depth * sq_wqe_size;
 461 
 462         /* QP on SRQ sets these to 0 */
 463         if (qp_srq_en) {
 464                 rq_wqe_size = 0;
 465                 rq_size     = 0;
 466         } else {
 467                 rq_wqe_size = 1 << qp->qp_rq_log_wqesz;
 468                 rq_depth    = 1 << log_qp_rq_size;
 469                 rq_size     = rq_depth * rq_wqe_size;
 470         }
 471 
 472         qp->qp_wqinfo.qa_size = sq_size + rq_size;
 473 
 474         qp->qp_wqinfo.qa_alloc_align = PAGESIZE;
 475         qp->qp_wqinfo.qa_bind_align  = PAGESIZE;
 476 
 477         if (qp_is_umap) {
 478                 qp->qp_wqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND;
 479         } else {
 480                 qp->qp_wqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
 481         }
 482         status = hermon_queue_alloc(state, &qp->qp_wqinfo, sleepflag);
 483         if (status != DDI_SUCCESS) {
 484                 status = IBT_INSUFF_RESOURCE;
 485                 goto qpalloc_fail7;
 486         }
 487 
 488         /*
 489          * Sort WQs in memory according to stride (*q_wqe_size), largest first
 490          * If they are equal, still put the SQ first
 491          */
 492         qp->qp_sq_baseaddr = 0;
 493         qp->qp_rq_baseaddr = 0;
 494         if ((sq_wqe_size > rq_wqe_size) || (sq_wqe_size == rq_wqe_size)) {
 495                 sq_buf = qp->qp_wqinfo.qa_buf_aligned;
 496 
 497                 /* if this QP is on an SRQ, set the rq_buf to NULL */
 498                 if (qp_srq_en) {
 499                         rq_buf = NULL;
 500                 } else {
 501                         rq_buf = (uint32_t *)((uintptr_t)sq_buf + sq_size);
 502                         qp->qp_rq_baseaddr = sq_size;
 503                 }
 504         } else {
 505                 rq_buf = qp->qp_wqinfo.qa_buf_aligned;
 506                 sq_buf = (uint32_t *)((uintptr_t)rq_buf + rq_size);
 507                 qp->qp_sq_baseaddr = rq_size;
 508         }
 509 
 510         if (qp_is_umap == 0) {
 511                 qp->qp_sq_wqhdr = hermon_wrid_wqhdr_create(sq_depth);
 512                 if (qp->qp_sq_wqhdr == NULL) {
 513                         status = IBT_INSUFF_RESOURCE;
 514                         goto qpalloc_fail8;
 515                 }
 516                 if (qp_srq_en) {
 517                         qp->qp_rq_wqavl.wqa_wq = srq->srq_wq_wqhdr;
 518                         qp->qp_rq_wqavl.wqa_srq_en = 1;
 519                         qp->qp_rq_wqavl.wqa_srq = srq;
 520                 } else {
 521                         qp->qp_rq_wqhdr = hermon_wrid_wqhdr_create(rq_depth);
 522                         if (qp->qp_rq_wqhdr == NULL) {
 523                                 status = IBT_INSUFF_RESOURCE;
 524                                 goto qpalloc_fail8;
 525                         }
 526                         qp->qp_rq_wqavl.wqa_wq = qp->qp_rq_wqhdr;
 527                 }
 528                 qp->qp_sq_wqavl.wqa_qpn = qp->qp_qpnum;
 529                 qp->qp_sq_wqavl.wqa_type = HERMON_WR_SEND;
 530                 qp->qp_sq_wqavl.wqa_wq = qp->qp_sq_wqhdr;
 531                 qp->qp_rq_wqavl.wqa_qpn = qp->qp_qpnum;
 532                 qp->qp_rq_wqavl.wqa_type = HERMON_WR_RECV;
 533         }
 534 
 535         /*
 536          * Register the memory for the QP work queues.  The memory for the
 537          * QP must be registered in the Hermon cMPT tables.  This gives us the
 538          * LKey to specify in the QP context later.  Note: The memory for
 539          * Hermon work queues (both Send and Recv) must be contiguous and
 540          * registered as a single memory region.  Note: If the QP memory is
 541          * user-mappable, force DDI_DMA_CONSISTENT mapping. Also, in order to
 542          * meet the alignment restriction, we pass the "mro_bind_override_addr"
 543          * flag in the call to hermon_mr_register(). This guarantees that the
 544          * resulting IB vaddr will be zero-based (modulo the offset into the
 545          * first page). If we fail here, we still have the bunch of resource
 546          * and reference count cleanup to do.
 547          */
 548         flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP :
 549             IBT_MR_NOSLEEP;
 550         mr_attr.mr_vaddr    = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
 551         mr_attr.mr_len      = qp->qp_wqinfo.qa_size;
 552         mr_attr.mr_as       = NULL;
 553         mr_attr.mr_flags    = flag;
 554         if (qp_is_umap) {
 555                 mr_op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass;
 556         } else {
 557                 /* HERMON_QUEUE_LOCATION_NORMAL */
 558                 mr_op.mro_bind_type =
 559                     state->hs_cfg_profile->cp_iommu_bypass;
 560         }
 561         mr_op.mro_bind_dmahdl = qp->qp_wqinfo.qa_dmahdl;
 562         mr_op.mro_bind_override_addr = 1;
 563         status = hermon_mr_register(state, pd, &mr_attr, &mr,
 564             &mr_op, HERMON_QP_CMPT);
 565         if (status != DDI_SUCCESS) {
 566                 status = IBT_INSUFF_RESOURCE;
 567                 goto qpalloc_fail9;
 568         }
 569 
 570         /*
 571          * Calculate the offset between the kernel virtual address space
 572          * and the IB virtual address space.  This will be used when
 573          * posting work requests to properly initialize each WQE.
 574          */
 575         qp_desc_off = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned -
 576             (uint64_t)mr->mr_bindinfo.bi_addr;
 577 
 578         /*
 579          * Fill in all the return arguments (if necessary).  This includes
 580          * real work queue sizes (in wqes), real SGLs, and QP number
 581          */
 582         if (queuesz_p != NULL) {
 583                 queuesz_p->cs_sq     =
 584                     (1 << log_qp_sq_size) - qp->qp_sq_hdrmwqes;
 585                 queuesz_p->cs_sq_sgl = qp->qp_sq_sgl;
 586 
 587                 /* if this QP is on an SRQ, set these to 0 */
 588                 if (qp_srq_en) {
 589                         queuesz_p->cs_rq     = 0;
 590                         queuesz_p->cs_rq_sgl = 0;
 591                 } else {
 592                         queuesz_p->cs_rq     = (1 << log_qp_rq_size);
 593                         queuesz_p->cs_rq_sgl = qp->qp_rq_sgl;
 594                 }
 595         }
 596         if (qpn != NULL) {
 597                 *qpn = (ib_qpn_t)qp->qp_qpnum;
 598         }
 599 
 600         /*
 601          * Fill in the rest of the Hermon Queue Pair handle.
 602          */
 603         qp->qp_qpcrsrcp              = qpc;
 604         qp->qp_rsrcp         = rsrc;
 605         qp->qp_state         = HERMON_QP_RESET;
 606         HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET);
 607         qp->qp_pdhdl         = pd;
 608         qp->qp_mrhdl         = mr;
 609         qp->qp_sq_sigtype    = (attr_p->qp_flags & IBT_WR_SIGNALED) ?
 610             HERMON_QP_SQ_WR_SIGNALED : HERMON_QP_SQ_ALL_SIGNALED;
 611         qp->qp_is_special    = 0;
 612         qp->qp_uarpg         = uarpg;
 613         qp->qp_umap_dhp              = (devmap_cookie_t)NULL;
 614         qp->qp_sq_cqhdl              = sq_cq;
 615         qp->qp_sq_bufsz              = (1 << log_qp_sq_size);
 616         qp->qp_sq_logqsz     = log_qp_sq_size;
 617         qp->qp_sq_buf                = sq_buf;
 618         qp->qp_desc_off              = qp_desc_off;
 619         qp->qp_rq_cqhdl              = rq_cq;
 620         qp->qp_rq_buf                = rq_buf;
 621         qp->qp_rlky          = (attr_p->qp_flags & IBT_FAST_REG_RES_LKEY) !=
 622             0;
 623 
 624         /* if this QP is on an SRQ, set rq_bufsz to 0 */
 625         if (qp_srq_en) {
 626                 qp->qp_rq_bufsz              = 0;
 627                 qp->qp_rq_logqsz     = 0;
 628         } else {
 629                 qp->qp_rq_bufsz              = (1 << log_qp_rq_size);
 630                 qp->qp_rq_logqsz     = log_qp_rq_size;
 631         }
 632 
 633         qp->qp_forward_sqd_event  = 0;
 634         qp->qp_sqd_still_draining = 0;
 635         qp->qp_hdlrarg               = (void *)ibt_qphdl;
 636         qp->qp_mcg_refcnt    = 0;
 637 
 638         /*
 639          * If this QP is to be associated with an SRQ, set the SRQ handle
 640          */
 641         if (qp_srq_en) {
 642                 qp->qp_srqhdl = srq;
 643                 hermon_srq_refcnt_inc(qp->qp_srqhdl);
 644         } else {
 645                 qp->qp_srqhdl = NULL;
 646         }
 647 
 648         /* Determine the QP service type */
 649         qp->qp_type = type;
 650         if (type == IBT_RC_RQP) {
 651                 qp->qp_serv_type = HERMON_QP_RC;
 652         } else if (type == IBT_UD_RQP) {
 653                 if (alloc_flags & IBT_QP_USES_RFCI)
 654                         qp->qp_serv_type = HERMON_QP_RFCI;
 655                 else if (alloc_flags & IBT_QP_USES_FCMD)
 656                         qp->qp_serv_type = HERMON_QP_FCMND;
 657                 else
 658                         qp->qp_serv_type = HERMON_QP_UD;
 659         } else {
 660                 qp->qp_serv_type = HERMON_QP_UC;
 661         }
 662 
 663         /*
 664          * Initialize the RQ WQEs - unlike Arbel, no Rcv init is needed
 665          */
 666 
 667         /*
 668          * Initialize the SQ WQEs - all that needs to be done is every 64 bytes
 669          * set the quadword to all F's - high-order bit is owner (init to one)
 670          * and the rest for the headroom definition of prefetching
 671          *
 672          */
 673         wqesz_shift = qp->qp_sq_log_wqesz;
 674         thewqesz    = 1 << wqesz_shift;
 675         thewqe = (uint64_t *)(void *)(qp->qp_sq_buf);
 676         if (qp_is_umap == 0) {
 677                 for (i = 0; i < sq_depth; i++) {
 678                         /*
 679                          * for each stride, go through and every 64 bytes
 680                          * write the init value - having set the address
 681                          * once, just keep incrementing it
 682                          */
 683                         for (j = 0; j < thewqesz; j += 64, thewqe += 8) {
 684                                 *(uint32_t *)thewqe = 0xFFFFFFFF;
 685                         }
 686                 }
 687         }
 688 
 689         /* Zero out the QP context */
 690         bzero(&qp->qpc, sizeof (hermon_hw_qpc_t));
 691 
 692         /*
 693          * Put QP handle in Hermon QPNum-to-QPHdl list.  Then fill in the
 694          * "qphdl" and return success
 695          */
 696         hermon_icm_set_num_to_hdl(state, HERMON_QPC, qpc->hr_indx, qp);
 697 
 698         /*
 699          * If this is a user-mappable QP, then we need to insert the previously
 700          * allocated entry into the "userland resources database".  This will
 701          * allow for later lookup during devmap() (i.e. mmap()) calls.
 702          */
 703         if (qp_is_umap) {
 704                 hermon_umap_db_add(umapdb);
 705         }
 706         mutex_init(&qp->qp_sq_lock, NULL, MUTEX_DRIVER,
 707             DDI_INTR_PRI(state->hs_intrmsi_pri));
 708 
 709         *qphdl = qp;
 710 
 711         return (DDI_SUCCESS);
 712 
 713 /*
 714  * The following is cleanup for all possible failure cases in this routine
 715  */
 716 qpalloc_fail9:
 717         hermon_queue_free(&qp->qp_wqinfo);
 718 qpalloc_fail8:
 719         if (qp->qp_sq_wqhdr)
 720                 hermon_wrid_wqhdr_destroy(qp->qp_sq_wqhdr);
 721         if (qp->qp_rq_wqhdr)
 722                 hermon_wrid_wqhdr_destroy(qp->qp_rq_wqhdr);
 723 qpalloc_fail7:
 724         if (qp_is_umap) {
 725                 hermon_umap_db_free(umapdb);
 726         }
 727         if (!qp_srq_en) {
 728                 hermon_dbr_free(state, uarpg, qp->qp_rq_vdbr);
 729         }
 730 
 731 qpalloc_fail6:
 732         /*
 733          * Releasing the QPN will also free up the QPC context.  Update
 734          * the QPC context pointer to indicate this.
 735          */
 736         if (qp->qp_qpn_hdl) {
 737                 hermon_qp_release_qpn(state, qp->qp_qpn_hdl,
 738                     HERMON_QPN_RELEASE);
 739         } else {
 740                 hermon_rsrc_free(state, &qpc);
 741         }
 742         qpc = NULL;
 743 qpalloc_fail5:
 744         hermon_rsrc_free(state, &rsrc);
 745 qpalloc_fail4:
 746         if (qpc) {
 747                 hermon_rsrc_free(state, &qpc);
 748         }
 749 qpalloc_fail3:
 750         hermon_cq_refcnt_dec(rq_cq);
 751 qpalloc_fail2:
 752         hermon_cq_refcnt_dec(sq_cq);
 753 qpalloc_fail1:
 754         hermon_pd_refcnt_dec(pd);
 755 qpalloc_fail:
 756         return (status);
 757 }
 758 
 759 
 760 
 761 /*
 762  * hermon_special_qp_alloc()
 763  *    Context: Can be called only from user or kernel context.
 764  */
 765 int
 766 hermon_special_qp_alloc(hermon_state_t *state, hermon_qp_info_t *qpinfo,
 767     uint_t sleepflag)
 768 {
 769         hermon_rsrc_t           *qpc, *rsrc;
 770         hermon_qphdl_t          qp;
 771         ibt_qp_alloc_attr_t     *attr_p;
 772         ibt_sqp_type_t          type;
 773         uint8_t                 port;
 774         ibtl_qp_hdl_t           ibt_qphdl;
 775         ibt_chan_sizes_t        *queuesz_p;
 776         hermon_qphdl_t          *qphdl;
 777         ibt_mr_attr_t           mr_attr;
 778         hermon_mr_options_t     mr_op;
 779         hermon_pdhdl_t          pd;
 780         hermon_cqhdl_t          sq_cq, rq_cq;
 781         hermon_mrhdl_t          mr;
 782         uint64_t                qp_desc_off;
 783         uint64_t                *thewqe, thewqesz;
 784         uint32_t                *sq_buf, *rq_buf;
 785         uint32_t                log_qp_sq_size, log_qp_rq_size;
 786         uint32_t                sq_size, rq_size, max_sgl;
 787         uint32_t                uarpg;
 788         uint32_t                sq_depth;
 789         uint32_t                sq_wqe_size, rq_wqe_size, wqesz_shift;
 790         int                     status, flag, i, j;
 791 
 792         /*
 793          * Extract the necessary info from the hermon_qp_info_t structure
 794          */
 795         attr_p    = qpinfo->qpi_attrp;
 796         type      = qpinfo->qpi_type;
 797         port      = qpinfo->qpi_port;
 798         ibt_qphdl = qpinfo->qpi_ibt_qphdl;
 799         queuesz_p = qpinfo->qpi_queueszp;
 800         qphdl     = &qpinfo->qpi_qphdl;
 801 
 802         /*
 803          * Check for valid special QP type (only SMI & GSI supported)
 804          */
 805         if ((type != IBT_SMI_SQP) && (type != IBT_GSI_SQP)) {
 806                 status = IBT_QP_SPECIAL_TYPE_INVALID;
 807                 goto spec_qpalloc_fail;
 808         }
 809 
 810         /*
 811          * Check for valid port number
 812          */
 813         if (!hermon_portnum_is_valid(state, port)) {
 814                 status = IBT_HCA_PORT_INVALID;
 815                 goto spec_qpalloc_fail;
 816         }
 817         port = port - 1;
 818 
 819         /*
 820          * Check for valid PD handle pointer
 821          */
 822         if (attr_p->qp_pd_hdl == NULL) {
 823                 status = IBT_PD_HDL_INVALID;
 824                 goto spec_qpalloc_fail;
 825         }
 826         pd = (hermon_pdhdl_t)attr_p->qp_pd_hdl;
 827 
 828         /* Increment the reference count on the PD */
 829         hermon_pd_refcnt_inc(pd);
 830 
 831         /*
 832          * Check for valid CQ handle pointers
 833          */
 834         if ((attr_p->qp_ibc_scq_hdl == NULL) ||
 835             (attr_p->qp_ibc_rcq_hdl == NULL)) {
 836                 status = IBT_CQ_HDL_INVALID;
 837                 goto spec_qpalloc_fail1;
 838         }
 839         sq_cq = (hermon_cqhdl_t)attr_p->qp_ibc_scq_hdl;
 840         rq_cq = (hermon_cqhdl_t)attr_p->qp_ibc_rcq_hdl;
 841 
 842         /*
 843          * Increment the reference count on the CQs.  One or both of these
 844          * could return error if we determine that the given CQ is already
 845          * being used with a non-special QP (i.e. a normal QP).
 846          */
 847         status = hermon_cq_refcnt_inc(sq_cq, HERMON_CQ_IS_SPECIAL);
 848         if (status != DDI_SUCCESS) {
 849                 status = IBT_CQ_HDL_INVALID;
 850                 goto spec_qpalloc_fail1;
 851         }
 852         status = hermon_cq_refcnt_inc(rq_cq, HERMON_CQ_IS_SPECIAL);
 853         if (status != DDI_SUCCESS) {
 854                 status = IBT_CQ_HDL_INVALID;
 855                 goto spec_qpalloc_fail2;
 856         }
 857 
 858         /*
 859          * Allocate the special QP resources.  Essentially, this allocation
 860          * amounts to checking if the request special QP has already been
 861          * allocated.  If successful, the QP context return is an actual
 862          * QP context that has been "aliased" to act as a special QP of the
 863          * appropriate type (and for the appropriate port).  Just as in
 864          * hermon_qp_alloc() above, ownership for this QP context is not
 865          * immediately given to hardware in the final step here.  Instead, we
 866          * wait until the QP is later transitioned to the "Init" state before
 867          * passing the QP to hardware.  If we fail here, we must undo all
 868          * the reference count (CQ and PD).
 869          */
 870         status = hermon_special_qp_rsrc_alloc(state, type, port, &qpc);
 871         if (status != DDI_SUCCESS) {
 872                 goto spec_qpalloc_fail3;
 873         }
 874 
 875         /*
 876          * Allocate the software structure for tracking the special queue
 877          * pair (i.e. the Hermon Queue Pair handle).  If we fail here, we
 878          * must undo the reference counts and the previous resource allocation.
 879          */
 880         status = hermon_rsrc_alloc(state, HERMON_QPHDL, 1, sleepflag, &rsrc);
 881         if (status != DDI_SUCCESS) {
 882                 status = IBT_INSUFF_RESOURCE;
 883                 goto spec_qpalloc_fail4;
 884         }
 885         qp = (hermon_qphdl_t)rsrc->hr_addr;
 886 
 887         bzero(qp, sizeof (struct hermon_sw_qp_s));
 888 
 889         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp))
 890         qp->qp_alloc_flags = attr_p->qp_alloc_flags;
 891 
 892         /*
 893          * Actual QP number is a combination of the index of the QPC and
 894          * the port number.  This is because the special QP contexts must
 895          * be allocated two-at-a-time.
 896          */
 897         qp->qp_qpnum = qpc->hr_indx + port;
 898         qp->qp_ring = qp->qp_qpnum << 8;
 899 
 900         uarpg = state->hs_kernel_uar_index; /* must be for spec qp */
 901         /*
 902          * Allocate the doorbell record.  Hermon uses only one for the RQ so
 903          * alloc a qp doorbell, using uarpg (above) as the uar index
 904          */
 905 
 906         status = hermon_dbr_alloc(state, uarpg, &qp->qp_rq_dbr_acchdl,
 907             &qp->qp_rq_vdbr, &qp->qp_rq_pdbr, &qp->qp_rdbr_mapoffset);
 908         if (status != DDI_SUCCESS) {
 909                 status = IBT_INSUFF_RESOURCE;
 910                 goto spec_qpalloc_fail5;
 911         }
 912         /*
 913          * Calculate the appropriate size for the work queues.
 914          * Note:  All Hermon QP work queues must be a power-of-2 in size.  Also
 915          * they may not be any smaller than HERMON_QP_MIN_SIZE.  This step is
 916          * to round the requested size up to the next highest power-of-2
 917          */
 918         attr_p->qp_sizes.cs_sq =
 919             max(attr_p->qp_sizes.cs_sq, HERMON_QP_MIN_SIZE);
 920         attr_p->qp_sizes.cs_rq =
 921             max(attr_p->qp_sizes.cs_rq, HERMON_QP_MIN_SIZE);
 922         log_qp_sq_size = highbit(attr_p->qp_sizes.cs_sq);
 923         if (ISP2(attr_p->qp_sizes.cs_sq)) {
 924                 log_qp_sq_size = log_qp_sq_size - 1;
 925         }
 926         log_qp_rq_size = highbit(attr_p->qp_sizes.cs_rq);
 927         if (ISP2(attr_p->qp_sizes.cs_rq)) {
 928                 log_qp_rq_size = log_qp_rq_size - 1;
 929         }
 930 
 931         /*
 932          * Next we verify that the rounded-up size is valid (i.e. consistent
 933          * with the device limits and/or software-configured limits).  If not,
 934          * then obviously we have a bit of cleanup to do before returning.
 935          */
 936         if ((log_qp_sq_size > state->hs_cfg_profile->cp_log_max_qp_sz) ||
 937             (log_qp_rq_size > state->hs_cfg_profile->cp_log_max_qp_sz)) {
 938                 status = IBT_HCA_WR_EXCEEDED;
 939                 goto spec_qpalloc_fail5a;
 940         }
 941 
 942         /*
 943          * Next we verify that the requested number of SGL is valid (i.e.
 944          * consistent with the device limits and/or software-configured
 945          * limits).  If not, then obviously the same cleanup needs to be done.
 946          */
 947         max_sgl = state->hs_cfg_profile->cp_wqe_real_max_sgl;
 948         if ((attr_p->qp_sizes.cs_sq_sgl > max_sgl) ||
 949             (attr_p->qp_sizes.cs_rq_sgl > max_sgl)) {
 950                 status = IBT_HCA_SGL_EXCEEDED;
 951                 goto spec_qpalloc_fail5a;
 952         }
 953 
 954         /*
 955          * Determine this QP's WQE stride (for both the Send and Recv WQEs).
 956          * This will depend on the requested number of SGLs.  Note: this
 957          * has the side-effect of also calculating the real number of SGLs
 958          * (for the calculated WQE size).
 959          */
 960         hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_rq_sgl,
 961             max_sgl, HERMON_QP_WQ_TYPE_RECVQ,
 962             &qp->qp_rq_log_wqesz, &qp->qp_rq_sgl);
 963         if (type == IBT_SMI_SQP) {
 964                 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl,
 965                     max_sgl, HERMON_QP_WQ_TYPE_SENDMLX_QP0,
 966                     &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl);
 967         } else {
 968                 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl,
 969                     max_sgl, HERMON_QP_WQ_TYPE_SENDMLX_QP1,
 970                     &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl);
 971         }
 972 
 973         /*
 974          * Allocate the memory for QP work queues. Since Hermon work queues
 975          * are not allowed to cross a 32-bit (4GB) boundary, the alignment of
 976          * the work queue memory is very important.  We used to allocate
 977          * work queues (the combined receive and send queues) so that they
 978          * would be aligned on their combined size.  That alignment guaranteed
 979          * that they would never cross the 4GB boundary (Hermon work queues
 980          * are on the order of MBs at maximum).  Now we are able to relax
 981          * this alignment constraint by ensuring that the IB address assigned
 982          * to the queue memory (as a result of the hermon_mr_register() call)
 983          * is offset from zero.
 984          * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
 985          * guarantee the alignment, but when attempting to use IOMMU bypass
 986          * mode we found that we were not allowed to specify any alignment
 987          * that was more restrictive than the system page size.
 988          * So we avoided this constraint by passing two alignment values,
 989          * one for the memory allocation itself and the other for the DMA
 990          * handle (for later bind).  This used to cause more memory than
 991          * necessary to be allocated (in order to guarantee the more
 992          * restrictive alignment contraint).  But by guaranteeing the
 993          * zero-based IB virtual address for the queue, we are able to
 994          * conserve this memory.
 995          */
 996         sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
 997         sq_depth    = 1 << log_qp_sq_size;
 998         sq_size     = (1 << log_qp_sq_size) * sq_wqe_size;
 999 
1000         rq_wqe_size = 1 << qp->qp_rq_log_wqesz;
1001         rq_size     = (1 << log_qp_rq_size) * rq_wqe_size;
1002 
1003         qp->qp_wqinfo.qa_size          = sq_size + rq_size;
1004 
1005         qp->qp_wqinfo.qa_alloc_align = PAGESIZE;
1006         qp->qp_wqinfo.qa_bind_align  = PAGESIZE;
1007         qp->qp_wqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
1008 
1009         status = hermon_queue_alloc(state, &qp->qp_wqinfo, sleepflag);
1010         if (status != NULL) {
1011                 status = IBT_INSUFF_RESOURCE;
1012                 goto spec_qpalloc_fail5a;
1013         }
1014 
1015         /*
1016          * Sort WQs in memory according to depth, stride (*q_wqe_size),
1017          * biggest first. If equal, the Send Queue still goes first
1018          */
1019         qp->qp_sq_baseaddr = 0;
1020         qp->qp_rq_baseaddr = 0;
1021         if ((sq_wqe_size > rq_wqe_size) || (sq_wqe_size == rq_wqe_size)) {
1022                 sq_buf = qp->qp_wqinfo.qa_buf_aligned;
1023                 rq_buf = (uint32_t *)((uintptr_t)sq_buf + sq_size);
1024                 qp->qp_rq_baseaddr = sq_size;
1025         } else {
1026                 rq_buf = qp->qp_wqinfo.qa_buf_aligned;
1027                 sq_buf = (uint32_t *)((uintptr_t)rq_buf + rq_size);
1028                 qp->qp_sq_baseaddr = rq_size;
1029         }
1030 
1031         qp->qp_sq_wqhdr = hermon_wrid_wqhdr_create(sq_depth);
1032         if (qp->qp_sq_wqhdr == NULL) {
1033                 status = IBT_INSUFF_RESOURCE;
1034                 goto spec_qpalloc_fail6;
1035         }
1036         qp->qp_rq_wqhdr = hermon_wrid_wqhdr_create(1 << log_qp_rq_size);
1037         if (qp->qp_rq_wqhdr == NULL) {
1038                 status = IBT_INSUFF_RESOURCE;
1039                 goto spec_qpalloc_fail6;
1040         }
1041         qp->qp_sq_wqavl.wqa_qpn = qp->qp_qpnum;
1042         qp->qp_sq_wqavl.wqa_type = HERMON_WR_SEND;
1043         qp->qp_sq_wqavl.wqa_wq = qp->qp_sq_wqhdr;
1044         qp->qp_rq_wqavl.wqa_qpn = qp->qp_qpnum;
1045         qp->qp_rq_wqavl.wqa_type = HERMON_WR_RECV;
1046         qp->qp_rq_wqavl.wqa_wq = qp->qp_rq_wqhdr;
1047 
1048         /*
1049          * Register the memory for the special QP work queues.  The memory for
1050          * the special QP must be registered in the Hermon cMPT tables.  This
1051          * gives us the LKey to specify in the QP context later.  Note: The
1052          * memory for Hermon work queues (both Send and Recv) must be contiguous
1053          * and registered as a single memory region. Also, in order to meet the
1054          * alignment restriction, we pass the "mro_bind_override_addr" flag in
1055          * the call to hermon_mr_register(). This guarantees that the resulting
1056          * IB vaddr will be zero-based (modulo the offset into the first page).
1057          * If we fail here, we have a bunch of resource and reference count
1058          * cleanup to do.
1059          */
1060         flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP :
1061             IBT_MR_NOSLEEP;
1062         mr_attr.mr_vaddr    = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
1063         mr_attr.mr_len      = qp->qp_wqinfo.qa_size;
1064         mr_attr.mr_as       = NULL;
1065         mr_attr.mr_flags    = flag;
1066 
1067         mr_op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass;
1068         mr_op.mro_bind_dmahdl = qp->qp_wqinfo.qa_dmahdl;
1069         mr_op.mro_bind_override_addr = 1;
1070 
1071         status = hermon_mr_register(state, pd, &mr_attr, &mr, &mr_op,
1072             HERMON_QP_CMPT);
1073         if (status != DDI_SUCCESS) {
1074                 status = IBT_INSUFF_RESOURCE;
1075                 goto spec_qpalloc_fail6;
1076         }
1077 
1078         /*
1079          * Calculate the offset between the kernel virtual address space
1080          * and the IB virtual address space.  This will be used when
1081          * posting work requests to properly initialize each WQE.
1082          */
1083         qp_desc_off = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned -
1084             (uint64_t)mr->mr_bindinfo.bi_addr;
1085 
1086         /* set the prefetch - initially, not prefetching */
1087         qp->qp_no_prefetch = 1;
1088 
1089         if (qp->qp_no_prefetch)
1090                 qp->qp_sq_headroom = 2 * sq_wqe_size;
1091         else
1092                 qp->qp_sq_headroom = sq_wqe_size + HERMON_QP_OH_SIZE;
1093         /*
1094          * hdrm wqes must be integral since both sq_wqe_size &
1095          * HERMON_QP_OH_SIZE are power of 2
1096          */
1097         qp->qp_sq_hdrmwqes = (qp->qp_sq_headroom / sq_wqe_size);
1098         /*
1099          * Fill in all the return arguments (if necessary).  This includes
1100          * real work queue sizes, real SGLs, and QP number (which will be
1101          * either zero or one, depending on the special QP type)
1102          */
1103         if (queuesz_p != NULL) {
1104                 queuesz_p->cs_sq     =
1105                     (1 << log_qp_sq_size) - qp->qp_sq_hdrmwqes;
1106                 queuesz_p->cs_sq_sgl = qp->qp_sq_sgl;
1107                 queuesz_p->cs_rq     = (1 << log_qp_rq_size);
1108                 queuesz_p->cs_rq_sgl = qp->qp_rq_sgl;
1109         }
1110 
1111         /*
1112          * Fill in the rest of the Hermon Queue Pair handle.  We can update
1113          * the following fields for use in further operations on the QP.
1114          */
1115         qp->qp_qpcrsrcp              = qpc;
1116         qp->qp_rsrcp         = rsrc;
1117         qp->qp_state         = HERMON_QP_RESET;
1118         HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET);
1119         qp->qp_pdhdl         = pd;
1120         qp->qp_mrhdl         = mr;
1121         qp->qp_sq_sigtype    = (attr_p->qp_flags & IBT_WR_SIGNALED) ?
1122             HERMON_QP_SQ_WR_SIGNALED : HERMON_QP_SQ_ALL_SIGNALED;
1123         qp->qp_is_special    = (type == IBT_SMI_SQP) ?
1124             HERMON_QP_SMI : HERMON_QP_GSI;
1125         qp->qp_uarpg         = uarpg;
1126         qp->qp_umap_dhp              = (devmap_cookie_t)NULL;
1127         qp->qp_sq_cqhdl              = sq_cq;
1128         qp->qp_sq_bufsz              = (1 << log_qp_sq_size);
1129         qp->qp_sq_buf                = sq_buf;
1130         qp->qp_sq_logqsz     = log_qp_sq_size;
1131         qp->qp_desc_off              = qp_desc_off;
1132         qp->qp_rq_cqhdl              = rq_cq;
1133         qp->qp_rq_bufsz              = (1 << log_qp_rq_size);
1134         qp->qp_rq_buf                = rq_buf;
1135         qp->qp_rq_logqsz     = log_qp_rq_size;
1136         qp->qp_portnum               = port;
1137         qp->qp_pkeyindx              = 0;
1138         qp->qp_forward_sqd_event  = 0;
1139         qp->qp_sqd_still_draining = 0;
1140         qp->qp_hdlrarg               = (void *)ibt_qphdl;
1141         qp->qp_mcg_refcnt    = 0;
1142         qp->qp_srqhdl                = NULL;
1143 
1144         /* All special QPs are UD QP service type */
1145         qp->qp_type = IBT_UD_RQP;
1146         qp->qp_serv_type = HERMON_QP_UD;
1147 
1148         /*
1149          * Initialize the RQ WQEs - unlike Arbel, no Rcv init is needed
1150          */
1151 
1152         /*
1153          * Initialize the SQ WQEs - all that needs to be done is every 64 bytes
1154          * set the quadword to all F's - high-order bit is owner (init to one)
1155          * and the rest for the headroom definition of prefetching
1156          *
1157          */
1158 
1159         wqesz_shift = qp->qp_sq_log_wqesz;
1160         thewqesz    = 1 << wqesz_shift;
1161         thewqe = (uint64_t *)(void *)(qp->qp_sq_buf);
1162         for (i = 0; i < sq_depth; i++) {
1163                 /*
1164                  * for each stride, go through and every 64 bytes write the
1165                  * init value - having set the address once, just keep
1166                  * incrementing it
1167                  */
1168                 for (j = 0; j < thewqesz; j += 64, thewqe += 8) {
1169                         *(uint32_t *)thewqe = 0xFFFFFFFF;
1170                 }
1171         }
1172 
1173 
1174         /* Zero out the QP context */
1175         bzero(&qp->qpc, sizeof (hermon_hw_qpc_t));
1176 
1177         /*
1178          * Put QP handle in Hermon QPNum-to-QPHdl list.  Then fill in the
1179          * "qphdl" and return success
1180          */
1181         hermon_icm_set_num_to_hdl(state, HERMON_QPC, qpc->hr_indx + port, qp);
1182 
1183         mutex_init(&qp->qp_sq_lock, NULL, MUTEX_DRIVER,
1184             DDI_INTR_PRI(state->hs_intrmsi_pri));
1185 
1186         *qphdl = qp;
1187 
1188         return (DDI_SUCCESS);
1189 
1190 /*
1191  * The following is cleanup for all possible failure cases in this routine
1192  */
1193 spec_qpalloc_fail6:
1194         hermon_queue_free(&qp->qp_wqinfo);
1195         if (qp->qp_sq_wqhdr)
1196                 hermon_wrid_wqhdr_destroy(qp->qp_sq_wqhdr);
1197         if (qp->qp_rq_wqhdr)
1198                 hermon_wrid_wqhdr_destroy(qp->qp_rq_wqhdr);
1199 spec_qpalloc_fail5a:
1200         hermon_dbr_free(state, uarpg, qp->qp_rq_vdbr);
1201 spec_qpalloc_fail5:
1202         hermon_rsrc_free(state, &rsrc);
1203 spec_qpalloc_fail4:
1204         if (hermon_special_qp_rsrc_free(state, type, port) != DDI_SUCCESS) {
1205                 HERMON_WARNING(state, "failed to free special QP rsrc");
1206         }
1207 spec_qpalloc_fail3:
1208         hermon_cq_refcnt_dec(rq_cq);
1209 spec_qpalloc_fail2:
1210         hermon_cq_refcnt_dec(sq_cq);
1211 spec_qpalloc_fail1:
1212         hermon_pd_refcnt_dec(pd);
1213 spec_qpalloc_fail:
1214         return (status);
1215 }
1216 
1217 
1218 /*
1219  * hermon_qp_alloc_range()
1220  *    Context: Can be called only from user or kernel context.
1221  */
1222 int
1223 hermon_qp_alloc_range(hermon_state_t *state, uint_t log2,
1224     hermon_qp_info_t *qpinfo, ibtl_qp_hdl_t *ibt_qphdl,
1225     ibc_cq_hdl_t *send_cq, ibc_cq_hdl_t *recv_cq,
1226     hermon_qphdl_t *qphdl, uint_t sleepflag)
1227 {
1228         hermon_rsrc_t                   *qpc, *rsrc;
1229         hermon_rsrc_type_t              rsrc_type;
1230         hermon_qphdl_t                  qp;
1231         hermon_qp_range_t               *qp_range_p;
1232         ibt_qp_alloc_attr_t             *attr_p;
1233         ibt_qp_type_t                   type;
1234         hermon_qp_wq_type_t             swq_type;
1235         ibt_chan_sizes_t                *queuesz_p;
1236         ibt_mr_attr_t                   mr_attr;
1237         hermon_mr_options_t             mr_op;
1238         hermon_srqhdl_t                 srq;
1239         hermon_pdhdl_t                  pd;
1240         hermon_cqhdl_t                  sq_cq, rq_cq;
1241         hermon_mrhdl_t                  mr;
1242         uint64_t                        qp_desc_off;
1243         uint64_t                        *thewqe, thewqesz;
1244         uint32_t                        *sq_buf, *rq_buf;
1245         uint32_t                        log_qp_sq_size, log_qp_rq_size;
1246         uint32_t                        sq_size, rq_size;
1247         uint32_t                        sq_depth, rq_depth;
1248         uint32_t                        sq_wqe_size, rq_wqe_size, wqesz_shift;
1249         uint32_t                        max_sgl, max_recv_sgl, uarpg;
1250         uint_t                          qp_srq_en, i, j;
1251         int                             ii;     /* loop counter for range */
1252         int                             status, flag;
1253         uint_t                          serv_type;
1254 
1255         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p, *queuesz_p))
1256 
1257         /*
1258          * Extract the necessary info from the hermon_qp_info_t structure
1259          */
1260         attr_p    = qpinfo->qpi_attrp;
1261         type      = qpinfo->qpi_type;
1262         queuesz_p = qpinfo->qpi_queueszp;
1263 
1264         if (attr_p->qp_alloc_flags & IBT_QP_USES_RSS) {
1265                 if (log2 > state->hs_ibtfinfo.hca_attr->hca_rss_max_log2_table)
1266                         return (IBT_INSUFF_RESOURCE);
1267                 rsrc_type = HERMON_QPC;
1268                 serv_type = HERMON_QP_UD;
1269         } else if (attr_p->qp_alloc_flags & IBT_QP_USES_FEXCH) {
1270                 if (log2 > state->hs_ibtfinfo.hca_attr->hca_fexch_max_log2_qp)
1271                         return (IBT_INSUFF_RESOURCE);
1272                 switch (attr_p->qp_fc.fc_hca_port) {
1273                 case 1:
1274                         rsrc_type = HERMON_QPC_FEXCH_PORT1;
1275                         break;
1276                 case 2:
1277                         rsrc_type = HERMON_QPC_FEXCH_PORT2;
1278                         break;
1279                 default:
1280                         return (IBT_INVALID_PARAM);
1281                 }
1282                 serv_type = HERMON_QP_FEXCH;
1283         } else
1284                 return (IBT_INVALID_PARAM);
1285 
1286         /*
1287          * Determine whether QP is being allocated for userland access or
1288          * whether it is being allocated for kernel access.  If the QP is
1289          * being allocated for userland access, fail (too complex for now).
1290          */
1291         if (attr_p->qp_alloc_flags & IBT_QP_USER_MAP) {
1292                 return (IBT_NOT_SUPPORTED);
1293         } else {
1294                 uarpg = state->hs_kernel_uar_index;
1295         }
1296 
1297         /*
1298          * Determine whether QP is being associated with an SRQ
1299          */
1300         qp_srq_en = (attr_p->qp_alloc_flags & IBT_QP_USES_SRQ) ? 1 : 0;
1301         if (qp_srq_en) {
1302                 /*
1303                  * Check for valid SRQ handle pointers
1304                  */
1305                 if (attr_p->qp_ibc_srq_hdl == NULL) {
1306                         return (IBT_SRQ_HDL_INVALID);
1307                 }
1308                 srq = (hermon_srqhdl_t)attr_p->qp_ibc_srq_hdl;
1309         }
1310 
1311         /*
1312          * Check for valid QP service type (only UD supported)
1313          */
1314         if (type != IBT_UD_RQP) {
1315                 return (IBT_QP_SRV_TYPE_INVALID);
1316         }
1317 
1318         /*
1319          * Check for valid PD handle pointer
1320          */
1321         if (attr_p->qp_pd_hdl == NULL) {
1322                 return (IBT_PD_HDL_INVALID);
1323         }
1324         pd = (hermon_pdhdl_t)attr_p->qp_pd_hdl;
1325 
1326         /*
1327          * If on an SRQ, check to make sure the PD is the same
1328          */
1329         if (qp_srq_en && (pd->pd_pdnum != srq->srq_pdhdl->pd_pdnum)) {
1330                 return (IBT_PD_HDL_INVALID);
1331         }
1332 
1333         /* set loop variable here, for freeing resources on error */
1334         ii = 0;
1335 
1336         /*
1337          * Allocate 2^log2 contiguous/aligned QP context entries.  This will
1338          * be filled in with all the necessary parameters to define the
1339          * Queue Pairs.  Unlike other Hermon hardware resources, ownership
1340          * is not immediately given to hardware in the final step here.
1341          * Instead, we must wait until the QP is later transitioned to the
1342          * "Init" state before passing the QP to hardware.  If we fail here,
1343          * we must undo all the reference count (CQ and PD).
1344          */
1345         status = hermon_rsrc_alloc(state, rsrc_type, 1 << log2, sleepflag,
1346             &qpc);
1347         if (status != DDI_SUCCESS) {
1348                 return (IBT_INSUFF_RESOURCE);
1349         }
1350 
1351         if (attr_p->qp_alloc_flags & IBT_QP_USES_FEXCH)
1352                 /*
1353                  * Need to init the MKEYs for the FEXCH QPs.
1354                  *
1355                  * For FEXCH QP subranges, we return the QPN base as
1356                  * "relative" to the full FEXCH QP range for the port.
1357                  */
1358                 *(qpinfo->qpi_qpn) = hermon_fcoib_fexch_relative_qpn(state,
1359                     attr_p->qp_fc.fc_hca_port, qpc->hr_indx);
1360         else
1361                 *(qpinfo->qpi_qpn) = (ib_qpn_t)qpc->hr_indx;
1362 
1363         qp_range_p = kmem_alloc(sizeof (*qp_range_p),
1364             (sleepflag == HERMON_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
1365         if (qp_range_p == NULL) {
1366                 status = IBT_INSUFF_RESOURCE;
1367                 goto qpalloc_fail0;
1368         }
1369         mutex_init(&qp_range_p->hqpr_lock, NULL, MUTEX_DRIVER,
1370             DDI_INTR_PRI(state->hs_intrmsi_pri));
1371         mutex_enter(&qp_range_p->hqpr_lock);
1372         qp_range_p->hqpr_refcnt = 1 << log2;
1373         qp_range_p->hqpr_qpcrsrc = qpc;
1374         mutex_exit(&qp_range_p->hqpr_lock);
1375 
1376 for_each_qp:
1377 
1378         /* Increment the reference count on the protection domain (PD) */
1379         hermon_pd_refcnt_inc(pd);
1380 
1381         rq_cq = (hermon_cqhdl_t)recv_cq[ii];
1382         sq_cq = (hermon_cqhdl_t)send_cq[ii];
1383         if (sq_cq == NULL) {
1384                 if (attr_p->qp_alloc_flags & IBT_QP_USES_FEXCH) {
1385                         /* if no send completions, just use rq_cq */
1386                         sq_cq = rq_cq;
1387                 } else {
1388                         status = IBT_CQ_HDL_INVALID;
1389                         goto qpalloc_fail1;
1390                 }
1391         }
1392 
1393         /*
1394          * Increment the reference count on the CQs.  One or both of these
1395          * could return error if we determine that the given CQ is already
1396          * being used with a special (SMI/GSI) QP.
1397          */
1398         status = hermon_cq_refcnt_inc(sq_cq, HERMON_CQ_IS_NORMAL);
1399         if (status != DDI_SUCCESS) {
1400                 status = IBT_CQ_HDL_INVALID;
1401                 goto qpalloc_fail1;
1402         }
1403         status = hermon_cq_refcnt_inc(rq_cq, HERMON_CQ_IS_NORMAL);
1404         if (status != DDI_SUCCESS) {
1405                 status = IBT_CQ_HDL_INVALID;
1406                 goto qpalloc_fail2;
1407         }
1408 
1409         /*
1410          * Allocate the software structure for tracking the queue pair
1411          * (i.e. the Hermon Queue Pair handle).  If we fail here, we must
1412          * undo the reference counts and the previous resource allocation.
1413          */
1414         status = hermon_rsrc_alloc(state, HERMON_QPHDL, 1, sleepflag, &rsrc);
1415         if (status != DDI_SUCCESS) {
1416                 status = IBT_INSUFF_RESOURCE;
1417                 goto qpalloc_fail4;
1418         }
1419         qp = (hermon_qphdl_t)rsrc->hr_addr;
1420         bzero(qp, sizeof (struct hermon_sw_qp_s));
1421         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp))
1422         qp->qp_alloc_flags = attr_p->qp_alloc_flags;
1423 
1424         /*
1425          * Calculate the QP number from QPC index.  This routine handles
1426          * all of the operations necessary to keep track of used, unused,
1427          * and released QP numbers.
1428          */
1429         qp->qp_qpnum = qpc->hr_indx + ii;
1430         qp->qp_ring = qp->qp_qpnum << 8;
1431         qp->qp_qpn_hdl = NULL;
1432 
1433         /*
1434          * Allocate the doorbell record.  Hermon just needs one for the RQ,
1435          * if the QP is not associated with an SRQ, and use uarpg (above) as
1436          * the uar index
1437          */
1438 
1439         if (!qp_srq_en) {
1440                 status = hermon_dbr_alloc(state, uarpg, &qp->qp_rq_dbr_acchdl,
1441                     &qp->qp_rq_vdbr, &qp->qp_rq_pdbr, &qp->qp_rdbr_mapoffset);
1442                 if (status != DDI_SUCCESS) {
1443                         status = IBT_INSUFF_RESOURCE;
1444                         goto qpalloc_fail6;
1445                 }
1446         }
1447 
1448         qp->qp_uses_lso = (attr_p->qp_flags & IBT_USES_LSO);
1449 
1450         /*
1451          * We verify that the requested number of SGL is valid (i.e.
1452          * consistent with the device limits and/or software-configured
1453          * limits).  If not, then obviously the same cleanup needs to be done.
1454          */
1455         max_sgl = state->hs_ibtfinfo.hca_attr->hca_ud_send_sgl_sz;
1456         swq_type = HERMON_QP_WQ_TYPE_SENDQ_UD;
1457         max_recv_sgl = state->hs_ibtfinfo.hca_attr->hca_recv_sgl_sz;
1458         if ((attr_p->qp_sizes.cs_sq_sgl > max_sgl) ||
1459             (!qp_srq_en && (attr_p->qp_sizes.cs_rq_sgl > max_recv_sgl))) {
1460                 status = IBT_HCA_SGL_EXCEEDED;
1461                 goto qpalloc_fail7;
1462         }
1463 
1464         /*
1465          * Determine this QP's WQE stride (for both the Send and Recv WQEs).
1466          * This will depend on the requested number of SGLs.  Note: this
1467          * has the side-effect of also calculating the real number of SGLs
1468          * (for the calculated WQE size).
1469          *
1470          * For QP's on an SRQ, we set these to 0.
1471          */
1472         if (qp_srq_en) {
1473                 qp->qp_rq_log_wqesz = 0;
1474                 qp->qp_rq_sgl = 0;
1475         } else {
1476                 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_rq_sgl,
1477                     max_recv_sgl, HERMON_QP_WQ_TYPE_RECVQ,
1478                     &qp->qp_rq_log_wqesz, &qp->qp_rq_sgl);
1479         }
1480         hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl,
1481             max_sgl, swq_type, &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl);
1482 
1483         sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
1484 
1485         /* NOTE: currently policy in driver, later maybe IBTF interface */
1486         qp->qp_no_prefetch = 0;
1487 
1488         /*
1489          * for prefetching, we need to add the number of wqes in
1490          * the 2k area plus one to the number requested, but
1491          * ONLY for send queue.  If no_prefetch == 1 (prefetch off)
1492          * it's exactly TWO wqes for the headroom
1493          */
1494         if (qp->qp_no_prefetch)
1495                 qp->qp_sq_headroom = 2 * sq_wqe_size;
1496         else
1497                 qp->qp_sq_headroom = sq_wqe_size + HERMON_QP_OH_SIZE;
1498         /*
1499          * hdrm wqes must be integral since both sq_wqe_size &
1500          * HERMON_QP_OH_SIZE are power of 2
1501          */
1502         qp->qp_sq_hdrmwqes = (qp->qp_sq_headroom / sq_wqe_size);
1503 
1504 
1505         /*
1506          * Calculate the appropriate size for the work queues.
1507          * For send queue, add in the headroom wqes to the calculation.
1508          * Note:  All Hermon QP work queues must be a power-of-2 in size.  Also
1509          * they may not be any smaller than HERMON_QP_MIN_SIZE.  This step is
1510          * to round the requested size up to the next highest power-of-2
1511          */
1512         /* first, adjust to a minimum and tell the caller the change */
1513         attr_p->qp_sizes.cs_sq = max(attr_p->qp_sizes.cs_sq,
1514             HERMON_QP_MIN_SIZE);
1515         attr_p->qp_sizes.cs_rq = max(attr_p->qp_sizes.cs_rq,
1516             HERMON_QP_MIN_SIZE);
1517         /*
1518          * now, calculate the alloc size, taking into account
1519          * the headroom for the sq
1520          */
1521         log_qp_sq_size = highbit(attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes);
1522         /* if the total is a power of two, reduce it */
1523         if (ISP2(attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes))    {

1524                 log_qp_sq_size = log_qp_sq_size - 1;
1525         }
1526 
1527         log_qp_rq_size = highbit(attr_p->qp_sizes.cs_rq);
1528         if (ISP2(attr_p->qp_sizes.cs_rq)) {
1529                 log_qp_rq_size = log_qp_rq_size - 1;
1530         }
1531 
1532         /*
1533          * Next we verify that the rounded-up size is valid (i.e. consistent
1534          * with the device limits and/or software-configured limits).  If not,
1535          * then obviously we have a lot of cleanup to do before returning.
1536          *
1537          * NOTE: the first condition deals with the (test) case of cs_sq
1538          * being just less than 2^32.  In this case, the headroom addition
1539          * to the requested cs_sq will pass the test when it should not.
1540          * This test no longer lets that case slip through the check.
1541          */
1542         if ((attr_p->qp_sizes.cs_sq >
1543             (1 << state->hs_cfg_profile->cp_log_max_qp_sz)) ||
1544             (log_qp_sq_size > state->hs_cfg_profile->cp_log_max_qp_sz) ||
1545             (!qp_srq_en && (log_qp_rq_size >
1546             state->hs_cfg_profile->cp_log_max_qp_sz))) {
1547                 status = IBT_HCA_WR_EXCEEDED;
1548                 goto qpalloc_fail7;
1549         }
1550 
1551         /*
1552          * Allocate the memory for QP work queues. Since Hermon work queues
1553          * are not allowed to cross a 32-bit (4GB) boundary, the alignment of
1554          * the work queue memory is very important.  We used to allocate
1555          * work queues (the combined receive and send queues) so that they
1556          * would be aligned on their combined size.  That alignment guaranteed
1557          * that they would never cross the 4GB boundary (Hermon work queues
1558          * are on the order of MBs at maximum).  Now we are able to relax
1559          * this alignment constraint by ensuring that the IB address assigned
1560          * to the queue memory (as a result of the hermon_mr_register() call)
1561          * is offset from zero.
1562          * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
1563          * guarantee the alignment, but when attempting to use IOMMU bypass
1564          * mode we found that we were not allowed to specify any alignment
1565          * that was more restrictive than the system page size.
1566          * So we avoided this constraint by passing two alignment values,
1567          * one for the memory allocation itself and the other for the DMA
1568          * handle (for later bind).  This used to cause more memory than
1569          * necessary to be allocated (in order to guarantee the more
1570          * restrictive alignment contraint).  But by guaranteeing the
1571          * zero-based IB virtual address for the queue, we are able to
1572          * conserve this memory.
1573          */
1574         sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
1575         sq_depth    = 1 << log_qp_sq_size;
1576         sq_size     = sq_depth * sq_wqe_size;
1577 
1578         /* QP on SRQ sets these to 0 */
1579         if (qp_srq_en) {
1580                 rq_wqe_size = 0;
1581                 rq_size     = 0;
1582         } else {
1583                 rq_wqe_size = 1 << qp->qp_rq_log_wqesz;
1584                 rq_depth    = 1 << log_qp_rq_size;
1585                 rq_size     = rq_depth * rq_wqe_size;
1586         }
1587 
1588         qp->qp_wqinfo.qa_size = sq_size + rq_size;
1589         qp->qp_wqinfo.qa_alloc_align = PAGESIZE;
1590         qp->qp_wqinfo.qa_bind_align  = PAGESIZE;
1591         qp->qp_wqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
1592         status = hermon_queue_alloc(state, &qp->qp_wqinfo, sleepflag);
1593         if (status != DDI_SUCCESS) {
1594                 status = IBT_INSUFF_RESOURCE;
1595                 goto qpalloc_fail7;
1596         }
1597 
1598         /*
1599          * Sort WQs in memory according to stride (*q_wqe_size), largest first
1600          * If they are equal, still put the SQ first
1601          */
1602         qp->qp_sq_baseaddr = 0;
1603         qp->qp_rq_baseaddr = 0;
1604         if ((sq_wqe_size > rq_wqe_size) || (sq_wqe_size == rq_wqe_size)) {
1605                 sq_buf = qp->qp_wqinfo.qa_buf_aligned;
1606 
1607                 /* if this QP is on an SRQ, set the rq_buf to NULL */
1608                 if (qp_srq_en) {
1609                         rq_buf = NULL;
1610                 } else {
1611                         rq_buf = (uint32_t *)((uintptr_t)sq_buf + sq_size);
1612                         qp->qp_rq_baseaddr = sq_size;
1613                 }
1614         } else {
1615                 rq_buf = qp->qp_wqinfo.qa_buf_aligned;
1616                 sq_buf = (uint32_t *)((uintptr_t)rq_buf + rq_size);
1617                 qp->qp_sq_baseaddr = rq_size;
1618         }
1619 
1620         qp->qp_sq_wqhdr = hermon_wrid_wqhdr_create(sq_depth);
1621         if (qp->qp_sq_wqhdr == NULL) {
1622                 status = IBT_INSUFF_RESOURCE;
1623                 goto qpalloc_fail8;
1624         }
1625         if (qp_srq_en) {
1626                 qp->qp_rq_wqavl.wqa_wq = srq->srq_wq_wqhdr;
1627                 qp->qp_rq_wqavl.wqa_srq_en = 1;
1628                 qp->qp_rq_wqavl.wqa_srq = srq;
1629         } else {
1630                 qp->qp_rq_wqhdr = hermon_wrid_wqhdr_create(rq_depth);
1631                 if (qp->qp_rq_wqhdr == NULL) {
1632                         status = IBT_INSUFF_RESOURCE;
1633                         goto qpalloc_fail8;
1634                 }
1635                 qp->qp_rq_wqavl.wqa_wq = qp->qp_rq_wqhdr;
1636         }
1637         qp->qp_sq_wqavl.wqa_qpn = qp->qp_qpnum;
1638         qp->qp_sq_wqavl.wqa_type = HERMON_WR_SEND;
1639         qp->qp_sq_wqavl.wqa_wq = qp->qp_sq_wqhdr;
1640         qp->qp_rq_wqavl.wqa_qpn = qp->qp_qpnum;
1641         qp->qp_rq_wqavl.wqa_type = HERMON_WR_RECV;
1642 
1643         /*
1644          * Register the memory for the QP work queues.  The memory for the
1645          * QP must be registered in the Hermon cMPT tables.  This gives us the
1646          * LKey to specify in the QP context later.  Note: The memory for
1647          * Hermon work queues (both Send and Recv) must be contiguous and
1648          * registered as a single memory region.  Note: If the QP memory is
1649          * user-mappable, force DDI_DMA_CONSISTENT mapping. Also, in order to
1650          * meet the alignment restriction, we pass the "mro_bind_override_addr"
1651          * flag in the call to hermon_mr_register(). This guarantees that the
1652          * resulting IB vaddr will be zero-based (modulo the offset into the
1653          * first page). If we fail here, we still have the bunch of resource
1654          * and reference count cleanup to do.
1655          */
1656         flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP :
1657             IBT_MR_NOSLEEP;
1658         mr_attr.mr_vaddr    = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
1659         mr_attr.mr_len      = qp->qp_wqinfo.qa_size;
1660         mr_attr.mr_as       = NULL;
1661         mr_attr.mr_flags    = flag;
1662         /* HERMON_QUEUE_LOCATION_NORMAL */
1663         mr_op.mro_bind_type =
1664             state->hs_cfg_profile->cp_iommu_bypass;
1665         mr_op.mro_bind_dmahdl = qp->qp_wqinfo.qa_dmahdl;
1666         mr_op.mro_bind_override_addr = 1;
1667         status = hermon_mr_register(state, pd, &mr_attr, &mr,
1668             &mr_op, HERMON_QP_CMPT);
1669         if (status != DDI_SUCCESS) {
1670                 status = IBT_INSUFF_RESOURCE;
1671                 goto qpalloc_fail9;
1672         }
1673 
1674         /*
1675          * Calculate the offset between the kernel virtual address space
1676          * and the IB virtual address space.  This will be used when
1677          * posting work requests to properly initialize each WQE.
1678          */
1679         qp_desc_off = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned -
1680             (uint64_t)mr->mr_bindinfo.bi_addr;
1681 
1682         /*
1683          * Fill in all the return arguments (if necessary).  This includes
1684          * real work queue sizes (in wqes), real SGLs, and QP number
1685          */
1686         if (queuesz_p != NULL) {
1687                 queuesz_p->cs_sq     =
1688                     (1 << log_qp_sq_size) - qp->qp_sq_hdrmwqes;
1689                 queuesz_p->cs_sq_sgl = qp->qp_sq_sgl;
1690 
1691                 /* if this QP is on an SRQ, set these to 0 */
1692                 if (qp_srq_en) {
1693                         queuesz_p->cs_rq     = 0;
1694                         queuesz_p->cs_rq_sgl = 0;
1695                 } else {
1696                         queuesz_p->cs_rq     = (1 << log_qp_rq_size);
1697                         queuesz_p->cs_rq_sgl = qp->qp_rq_sgl;
1698                 }
1699         }
1700 
1701         /*
1702          * Fill in the rest of the Hermon Queue Pair handle.
1703          */
1704         qp->qp_qpcrsrcp              = NULL;
1705         qp->qp_rsrcp         = rsrc;
1706         qp->qp_state         = HERMON_QP_RESET;
1707         HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET);
1708         qp->qp_pdhdl         = pd;
1709         qp->qp_mrhdl         = mr;
1710         qp->qp_sq_sigtype    = (attr_p->qp_flags & IBT_WR_SIGNALED) ?
1711             HERMON_QP_SQ_WR_SIGNALED : HERMON_QP_SQ_ALL_SIGNALED;
1712         qp->qp_is_special    = 0;
1713         qp->qp_uarpg         = uarpg;
1714         qp->qp_umap_dhp              = (devmap_cookie_t)NULL;
1715         qp->qp_sq_cqhdl              = sq_cq;
1716         qp->qp_sq_bufsz              = (1 << log_qp_sq_size);
1717         qp->qp_sq_logqsz     = log_qp_sq_size;
1718         qp->qp_sq_buf                = sq_buf;
1719         qp->qp_desc_off              = qp_desc_off;
1720         qp->qp_rq_cqhdl              = rq_cq;
1721         qp->qp_rq_buf                = rq_buf;
1722         qp->qp_rlky          = (attr_p->qp_flags & IBT_FAST_REG_RES_LKEY) !=
1723             0;
1724 
1725         /* if this QP is on an SRQ, set rq_bufsz to 0 */
1726         if (qp_srq_en) {
1727                 qp->qp_rq_bufsz              = 0;
1728                 qp->qp_rq_logqsz     = 0;
1729         } else {
1730                 qp->qp_rq_bufsz              = (1 << log_qp_rq_size);
1731                 qp->qp_rq_logqsz     = log_qp_rq_size;
1732         }
1733 
1734         qp->qp_forward_sqd_event  = 0;
1735         qp->qp_sqd_still_draining = 0;
1736         qp->qp_hdlrarg               = (void *)ibt_qphdl[ii];
1737         qp->qp_mcg_refcnt    = 0;
1738 
1739         /*
1740          * If this QP is to be associated with an SRQ, set the SRQ handle
1741          */
1742         if (qp_srq_en) {
1743                 qp->qp_srqhdl = srq;
1744                 hermon_srq_refcnt_inc(qp->qp_srqhdl);
1745         } else {
1746                 qp->qp_srqhdl = NULL;
1747         }
1748 
1749         qp->qp_type = IBT_UD_RQP;
1750         qp->qp_serv_type = serv_type;
1751 
1752         /*
1753          * Initialize the RQ WQEs - unlike Arbel, no Rcv init is needed
1754          */
1755 
1756         /*
1757          * Initialize the SQ WQEs - all that needs to be done is every 64 bytes
1758          * set the quadword to all F's - high-order bit is owner (init to one)
1759          * and the rest for the headroom definition of prefetching.
1760          */
1761         if ((attr_p->qp_alloc_flags & IBT_QP_USES_FEXCH) == 0) {
1762                 wqesz_shift = qp->qp_sq_log_wqesz;
1763                 thewqesz    = 1 << wqesz_shift;
1764                 thewqe = (uint64_t *)(void *)(qp->qp_sq_buf);
1765                 for (i = 0; i < sq_depth; i++) {
1766                         /*
1767                          * for each stride, go through and every 64 bytes
1768                          * write the init value - having set the address
1769                          * once, just keep incrementing it
1770                          */
1771                         for (j = 0; j < thewqesz; j += 64, thewqe += 8) {
1772                                 *(uint32_t *)thewqe = 0xFFFFFFFF;
1773                         }
1774                 }
1775         }
1776 
1777         /* Zero out the QP context */
1778         bzero(&qp->qpc, sizeof (hermon_hw_qpc_t));
1779 
1780         /*
1781          * Put QP handle in Hermon QPNum-to-QPHdl list.  Then fill in the
1782          * "qphdl" and return success
1783          */
1784         hermon_icm_set_num_to_hdl(state, HERMON_QPC, qpc->hr_indx + ii, qp);
1785 
1786         mutex_init(&qp->qp_sq_lock, NULL, MUTEX_DRIVER,
1787             DDI_INTR_PRI(state->hs_intrmsi_pri));
1788 
1789         qp->qp_rangep = qp_range_p;
1790 
1791         qphdl[ii] = qp;
1792 
1793         if (++ii < (1 << log2))
1794                 goto for_each_qp;
1795 
1796         return (DDI_SUCCESS);
1797 
1798 /*
1799  * The following is cleanup for all possible failure cases in this routine
1800  */
1801 qpalloc_fail9:
1802         hermon_queue_free(&qp->qp_wqinfo);
1803 qpalloc_fail8:
1804         if (qp->qp_sq_wqhdr)
1805                 hermon_wrid_wqhdr_destroy(qp->qp_sq_wqhdr);
1806         if (qp->qp_rq_wqhdr)
1807                 hermon_wrid_wqhdr_destroy(qp->qp_rq_wqhdr);
1808 qpalloc_fail7:
1809         if (!qp_srq_en) {
1810                 hermon_dbr_free(state, uarpg, qp->qp_rq_vdbr);
1811         }
1812 
1813 qpalloc_fail6:
1814         hermon_rsrc_free(state, &rsrc);
1815 qpalloc_fail4:
1816         hermon_cq_refcnt_dec(rq_cq);
1817 qpalloc_fail2:
1818         hermon_cq_refcnt_dec(sq_cq);
1819 qpalloc_fail1:
1820         hermon_pd_refcnt_dec(pd);
1821 qpalloc_fail0:
1822         if (ii == 0) {
1823                 if (qp_range_p)
1824                         kmem_free(qp_range_p, sizeof (*qp_range_p));
1825                 hermon_rsrc_free(state, &qpc);
1826         } else {
1827                 /* qp_range_p and qpc rsrc will be freed in hermon_qp_free */
1828 
1829                 mutex_enter(&qp->qp_rangep->hqpr_lock);
1830                 qp_range_p->hqpr_refcnt = ii;
1831                 mutex_exit(&qp->qp_rangep->hqpr_lock);
1832                 while (--ii >= 0) {
1833                         ibc_qpn_hdl_t qpn_hdl;
1834                         int free_status;
1835 
1836                         free_status = hermon_qp_free(state, &qphdl[ii],
1837                             IBC_FREE_QP_AND_QPN, &qpn_hdl, sleepflag);
1838                         if (free_status != DDI_SUCCESS)
1839                                 cmn_err(CE_CONT, "!qp_range: status 0x%x: "
1840                                     "error status %x during free",
1841                                     status, free_status);
1842                 }
1843         }
1844 
1845         return (status);
1846 }
1847 
1848 
1849 /*
1850  * hermon_qp_free()
1851  *    This function frees up the QP resources.  Depending on the value
1852  *    of the "free_qp_flags", the QP number may not be released until
1853  *    a subsequent call to hermon_qp_release_qpn().
1854  *
1855  *    Context: Can be called only from user or kernel context.
1856  */
1857 /* ARGSUSED */
1858 int
1859 hermon_qp_free(hermon_state_t *state, hermon_qphdl_t *qphdl,
1860     ibc_free_qp_flags_t free_qp_flags, ibc_qpn_hdl_t *qpnh,
1861     uint_t sleepflag)
1862 {
1863         hermon_rsrc_t           *qpc, *rsrc;
1864         hermon_umap_db_entry_t  *umapdb;
1865         hermon_qpn_entry_t      *entry;
1866         hermon_pdhdl_t          pd;
1867         hermon_mrhdl_t          mr;
1868         hermon_cqhdl_t          sq_cq, rq_cq;
1869         hermon_srqhdl_t         srq;
1870         hermon_qphdl_t          qp;
1871         uint64_t                value;
1872         uint_t                  type, port;
1873         uint_t                  maxprot;
1874         uint_t                  qp_srq_en;
1875         int                     status;
1876 
1877         /*
1878          * Pull all the necessary information from the Hermon Queue Pair
1879          * handle.  This is necessary here because the resource for the
1880          * QP handle is going to be freed up as part of this operation.
1881          */
1882         qp      = *qphdl;
1883         mutex_enter(&qp->qp_lock);
1884         qpc     = qp->qp_qpcrsrcp;   /* NULL if part of a "range" */
1885         rsrc    = qp->qp_rsrcp;
1886         pd      = qp->qp_pdhdl;
1887         srq     = qp->qp_srqhdl;
1888         mr      = qp->qp_mrhdl;
1889         rq_cq   = qp->qp_rq_cqhdl;
1890         sq_cq   = qp->qp_sq_cqhdl;
1891         port    = qp->qp_portnum;
1892         qp_srq_en = qp->qp_alloc_flags & IBT_QP_USES_SRQ;
1893 
1894         /*
1895          * If the QP is part of an MCG, then we fail the qp_free
1896          */
1897         if (qp->qp_mcg_refcnt != 0) {
1898                 mutex_exit(&qp->qp_lock);
1899                 status = ibc_get_ci_failure(0);
1900                 goto qpfree_fail;
1901         }
1902 
1903         /*
1904          * If the QP is not already in "Reset" state, then transition to
1905          * "Reset".  This is necessary because software does not reclaim
1906          * ownership of the QP context until the QP is in the "Reset" state.
1907          * If the ownership transfer fails for any reason, then it is an
1908          * indication that something (either in HW or SW) has gone seriously
1909          * wrong.  So we print a warning message and return.
1910          */
1911         if (qp->qp_state != HERMON_QP_RESET) {
1912                 if (hermon_qp_to_reset(state, qp) != DDI_SUCCESS) {
1913                         mutex_exit(&qp->qp_lock);
1914                         HERMON_WARNING(state, "failed to reset QP context");
1915                         status = ibc_get_ci_failure(0);
1916                         goto qpfree_fail;
1917                 }
1918                 qp->qp_state = HERMON_QP_RESET;
1919                 HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET);
1920 
1921                 /*
1922                  * Do any additional handling necessary for the transition
1923                  * to the "Reset" state (e.g. update the WRID lists)
1924                  */
1925                 if (hermon_wrid_to_reset_handling(state, qp) != DDI_SUCCESS) {
1926                         mutex_exit(&qp->qp_lock);
1927                         HERMON_WARNING(state, "failed to reset QP WRID list");
1928                         status = ibc_get_ci_failure(0);
1929                         goto qpfree_fail;
1930                 }
1931         }
1932 
1933         /*
1934          * If this was a user-mappable QP, then we need to remove its entry
1935          * from the "userland resources database".  If it is also currently
1936          * mmap()'d out to a user process, then we need to call
1937          * devmap_devmem_remap() to remap the QP memory to an invalid mapping.
1938          * We also need to invalidate the QP tracking information for the
1939          * user mapping.
1940          */
1941         if (qp->qp_alloc_flags & IBT_QP_USER_MAP) {
1942                 status = hermon_umap_db_find(state->hs_instance, qp->qp_qpnum,
1943                     MLNX_UMAP_QPMEM_RSRC, &value, HERMON_UMAP_DB_REMOVE,
1944                     &umapdb);
1945                 if (status != DDI_SUCCESS) {
1946                         mutex_exit(&qp->qp_lock);
1947                         HERMON_WARNING(state, "failed to find in database");
1948                         return (ibc_get_ci_failure(0));
1949                 }
1950                 hermon_umap_db_free(umapdb);
1951                 if (qp->qp_umap_dhp != NULL) {
1952                         maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
1953                         status = devmap_devmem_remap(qp->qp_umap_dhp,
1954                             state->hs_dip, 0, 0, qp->qp_wqinfo.qa_size,
1955                             maxprot, DEVMAP_MAPPING_INVALID, NULL);
1956                         if (status != DDI_SUCCESS) {
1957                                 mutex_exit(&qp->qp_lock);
1958                                 HERMON_WARNING(state, "failed in QP memory "
1959                                     "devmap_devmem_remap()");
1960                                 return (ibc_get_ci_failure(0));
1961                         }
1962                         qp->qp_umap_dhp = (devmap_cookie_t)NULL;
1963                 }
1964         }
1965 
1966 
1967         /*
1968          * Put NULL into the Hermon QPNum-to-QPHdl list.  This will allow any
1969          * in-progress events to detect that the QP corresponding to this
1970          * number has been freed.  Note: it does depend in whether we are
1971          * freeing a special QP or not.
1972          */
1973         if (qpc == NULL) {
1974                 hermon_icm_set_num_to_hdl(state, HERMON_QPC,
1975                     qp->qp_qpnum, NULL);
1976         } else if (qp->qp_is_special) {
1977                 hermon_icm_set_num_to_hdl(state, HERMON_QPC,
1978                     qpc->hr_indx + port, NULL);
1979         } else {
1980                 hermon_icm_set_num_to_hdl(state, HERMON_QPC,
1981                     qpc->hr_indx, NULL);
1982         }
1983 
1984         /*
1985          * Drop the QP lock
1986          *    At this point the lock is no longer necessary.  We cannot
1987          *    protect from multiple simultaneous calls to free the same QP.
1988          *    In addition, since the QP lock is contained in the QP "software
1989          *    handle" resource, which we will free (see below), it is
1990          *    important that we have no further references to that memory.
1991          */
1992         mutex_exit(&qp->qp_lock);
1993         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp))
1994 
1995         /*
1996          * Free the QP resources
1997          *    Start by deregistering and freeing the memory for work queues.
1998          *    Next free any previously allocated context information
1999          *    (depending on QP type)
2000          *    Finally, decrement the necessary reference counts.
2001          * If this fails for any reason, then it is an indication that
2002          * something (either in HW or SW) has gone seriously wrong.  So we
2003          * print a warning message and return.
2004          */
2005         status = hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
2006             sleepflag);
2007         if (status != DDI_SUCCESS) {
2008                 HERMON_WARNING(state, "failed to deregister QP memory");
2009                 status = ibc_get_ci_failure(0);
2010                 goto qpfree_fail;
2011         }
2012 
2013         /* Free the memory for the QP */
2014         hermon_queue_free(&qp->qp_wqinfo);
2015 
2016         if (qp->qp_sq_wqhdr)
2017                 hermon_wrid_wqhdr_destroy(qp->qp_sq_wqhdr);
2018         if (qp->qp_rq_wqhdr)
2019                 hermon_wrid_wqhdr_destroy(qp->qp_rq_wqhdr);
2020 
2021         /* Free the dbr */
2022         if (!qp_srq_en) {
2023                 hermon_dbr_free(state, qp->qp_uarpg, qp->qp_rq_vdbr);
2024         }
2025 
2026         /*
2027          * Free up the remainder of the QP resources.  Note: we have a few
2028          * different resources to free up depending on whether the QP is a
2029          * special QP or not.  As described above, if any of these fail for
2030          * any reason it is an indication that something (either in HW or SW)
2031          * has gone seriously wrong.  So we print a warning message and
2032          * return.
2033          */
2034         if (qp->qp_is_special) {
2035                 type = (qp->qp_is_special == HERMON_QP_SMI) ?
2036                     IBT_SMI_SQP : IBT_GSI_SQP;
2037 
2038                 /* Free up resources for the special QP */
2039                 status = hermon_special_qp_rsrc_free(state, type, port);
2040                 if (status != DDI_SUCCESS) {
2041                         HERMON_WARNING(state, "failed to free special QP rsrc");
2042                         status = ibc_get_ci_failure(0);
2043                         goto qpfree_fail;
2044                 }
2045 
2046         } else if (qp->qp_rangep) {
2047                 int refcnt;
2048                 mutex_enter(&qp->qp_rangep->hqpr_lock);
2049                 refcnt = --qp->qp_rangep->hqpr_refcnt;
2050                 mutex_exit(&qp->qp_rangep->hqpr_lock);
2051                 if (refcnt == 0) {
2052                         mutex_destroy(&qp->qp_rangep->hqpr_lock);
2053                         hermon_rsrc_free(state, &qp->qp_rangep->hqpr_qpcrsrc);
2054                         kmem_free(qp->qp_rangep, sizeof (*qp->qp_rangep));
2055                 }
2056                 qp->qp_rangep = NULL;
2057         } else if (qp->qp_qpn_hdl == NULL) {
2058                 hermon_rsrc_free(state, &qpc);
2059         } else {
2060                 /*
2061                  * Check the flags and determine whether to release the
2062                  * QPN or not, based on their value.
2063                  */
2064                 if (free_qp_flags == IBC_FREE_QP_ONLY) {
2065                         entry = qp->qp_qpn_hdl;
2066                         hermon_qp_release_qpn(state, qp->qp_qpn_hdl,
2067                             HERMON_QPN_FREE_ONLY);
2068                         *qpnh = (ibc_qpn_hdl_t)entry;
2069                 } else {
2070                         hermon_qp_release_qpn(state, qp->qp_qpn_hdl,
2071                             HERMON_QPN_RELEASE);
2072                 }
2073         }
2074 
2075         mutex_destroy(&qp->qp_sq_lock);
2076 
2077         /* Free the Hermon Queue Pair handle */
2078         hermon_rsrc_free(state, &rsrc);
2079 
2080         /* Decrement the reference counts on CQs, PD and SRQ (if needed) */
2081         hermon_cq_refcnt_dec(rq_cq);
2082         hermon_cq_refcnt_dec(sq_cq);
2083         hermon_pd_refcnt_dec(pd);
2084         if (qp_srq_en == HERMON_QP_SRQ_ENABLED) {
2085                 hermon_srq_refcnt_dec(srq);
2086         }
2087 
2088         /* Set the qphdl pointer to NULL and return success */
2089         *qphdl = NULL;
2090 
2091         return (DDI_SUCCESS);
2092 
2093 qpfree_fail:
2094         return (status);
2095 }
2096 
2097 
2098 /*
2099  * hermon_qp_query()
2100  *    Context: Can be called from interrupt or base context.
2101  */
2102 int
2103 hermon_qp_query(hermon_state_t *state, hermon_qphdl_t qp,
2104     ibt_qp_query_attr_t *attr_p)
2105 {
2106         ibt_cep_state_t         qp_state;
2107         ibt_qp_ud_attr_t        *ud;
2108         ibt_qp_rc_attr_t        *rc;
2109         ibt_qp_uc_attr_t        *uc;
2110         ibt_cep_flags_t         enable_flags;
2111         hermon_hw_addr_path_t   *qpc_path, *qpc_alt_path;
2112         ibt_cep_path_t          *path_ptr, *alt_path_ptr;
2113         hermon_hw_qpc_t         *qpc;
2114         int                     status;
2115         uint_t                  tmp_sched_q, tmp_alt_sched_q;
2116 
2117         mutex_enter(&qp->qp_lock);
2118 
2119         /*
2120          * Grab the temporary QPC entry from QP software state
2121          */
2122         qpc = &qp->qpc;
2123 
2124         /* Convert the current Hermon QP state to IBTF QP state */
2125         switch (qp->qp_state) {
2126         case HERMON_QP_RESET:
2127                 qp_state = IBT_STATE_RESET;             /* "Reset" */
2128                 break;
2129         case HERMON_QP_INIT:
2130                 qp_state = IBT_STATE_INIT;              /* Initialized */
2131                 break;
2132         case HERMON_QP_RTR:
2133                 qp_state = IBT_STATE_RTR;               /* Ready to Receive */
2134                 break;
2135         case HERMON_QP_RTS:
2136                 qp_state = IBT_STATE_RTS;               /* Ready to Send */
2137                 break;
2138         case HERMON_QP_SQERR:
2139                 qp_state = IBT_STATE_SQE;               /* Send Queue Error */
2140                 break;
2141         case HERMON_QP_SQD:
2142                 if (qp->qp_sqd_still_draining) {
2143                         qp_state = IBT_STATE_SQDRAIN;   /* SQ Draining */
2144                 } else {
2145                         qp_state = IBT_STATE_SQD;       /* SQ Drained */
2146                 }
2147                 break;
2148         case HERMON_QP_ERR:
2149                 qp_state = IBT_STATE_ERROR;             /* Error */
2150                 break;
2151         default:
2152                 mutex_exit(&qp->qp_lock);
2153                 return (ibc_get_ci_failure(0));
2154         }
2155         attr_p->qp_info.qp_state = qp_state;
2156 
2157         /* SRQ Hook. */
2158         attr_p->qp_srq = NULL;
2159 
2160         /*
2161          * The following QP information is always returned, regardless of
2162          * the current QP state.  Note: Some special handling is necessary
2163          * for calculating the QP number on special QP (QP0 and QP1).
2164          */
2165         attr_p->qp_sq_cq    =
2166             (qp->qp_sq_cqhdl == NULL) ? NULL : qp->qp_sq_cqhdl->cq_hdlrarg;
2167         attr_p->qp_rq_cq    =
2168             (qp->qp_rq_cqhdl == NULL) ? NULL : qp->qp_rq_cqhdl->cq_hdlrarg;
2169         if (qp->qp_is_special) {
2170                 attr_p->qp_qpn = (qp->qp_is_special == HERMON_QP_SMI) ? 0 : 1;
2171         } else {
2172                 attr_p->qp_qpn = (ib_qpn_t)qp->qp_qpnum;
2173         }
2174         attr_p->qp_sq_sgl   = qp->qp_sq_sgl;
2175         attr_p->qp_rq_sgl   = qp->qp_rq_sgl;
2176         attr_p->qp_info.qp_sq_sz = qp->qp_sq_bufsz - qp->qp_sq_hdrmwqes;
2177         attr_p->qp_info.qp_rq_sz = qp->qp_rq_bufsz;
2178 
2179         /*
2180          * If QP is currently in the "Reset" state, then only the above are
2181          * returned
2182          */
2183         if (qp_state == IBT_STATE_RESET) {
2184                 mutex_exit(&qp->qp_lock);
2185                 return (DDI_SUCCESS);
2186         }
2187 
2188         /*
2189          * Post QUERY_QP command to firmware
2190          *
2191          * We do a HERMON_NOSLEEP here because we are holding the "qp_lock".
2192          * Since we may be in the interrupt context (or subsequently raised
2193          * to interrupt level by priority inversion), we do not want to block
2194          * in this routine waiting for success.
2195          */
2196         tmp_sched_q = qpc->pri_addr_path.sched_q;
2197         tmp_alt_sched_q = qpc->alt_addr_path.sched_q;
2198         status = hermon_cmn_query_cmd_post(state, QUERY_QP, 0, qp->qp_qpnum,
2199             qpc, sizeof (hermon_hw_qpc_t), HERMON_CMD_NOSLEEP_SPIN);
2200         if (status != HERMON_CMD_SUCCESS) {
2201                 mutex_exit(&qp->qp_lock);
2202                 cmn_err(CE_WARN, "hermon%d: hermon_qp_query: QUERY_QP "
2203                     "command failed: %08x\n", state->hs_instance, status);
2204                 if (status == HERMON_CMD_INVALID_STATUS) {
2205                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2206                 }
2207                 return (ibc_get_ci_failure(0));
2208         }
2209         qpc->pri_addr_path.sched_q = tmp_sched_q;
2210         qpc->alt_addr_path.sched_q = tmp_alt_sched_q;
2211 
2212         /*
2213          * Fill in the additional QP info based on the QP's transport type.
2214          */
2215         if (qp->qp_type == IBT_UD_RQP) {
2216 
2217                 /* Fill in the UD-specific info */
2218                 ud = &attr_p->qp_info.qp_transport.ud;
2219                 ud->ud_qkey  = (ib_qkey_t)qpc->qkey;
2220                 ud->ud_sq_psn        = qpc->next_snd_psn;
2221                 ud->ud_pkey_ix       = qpc->pri_addr_path.pkey_indx;
2222                 /* port+1 for port 1/2 */
2223                 ud->ud_port  =
2224                     (uint8_t)(((qpc->pri_addr_path.sched_q >> 6) & 0x01) + 1);
2225 
2226                 attr_p->qp_info.qp_trans = IBT_UD_SRV;
2227 
2228                 if (qp->qp_serv_type == HERMON_QP_FEXCH) {
2229                         ibt_pmr_desc_t *pmr;
2230                         uint64_t heart_beat;
2231 
2232                         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pmr))
2233                         pmr = &attr_p->qp_query_fexch.fq_uni_mem_desc;
2234                         pmr->pmd_iova = 0;
2235                         pmr->pmd_lkey = pmr->pmd_rkey =
2236                             hermon_fcoib_qpn_to_mkey(state, qp->qp_qpnum);
2237                         pmr->pmd_phys_buf_list_sz =
2238                             state->hs_fcoib.hfc_mtts_per_mpt;
2239                         pmr->pmd_sync_required = 0;
2240 
2241                         pmr = &attr_p->qp_query_fexch.fq_bi_mem_desc;
2242                         pmr->pmd_iova = 0;
2243                         pmr->pmd_lkey = 0;
2244                         pmr->pmd_rkey = 0;
2245                         pmr->pmd_phys_buf_list_sz = 0;
2246                         pmr->pmd_sync_required = 0;
2247 
2248                         attr_p->qp_query_fexch.fq_flags =
2249                             ((hermon_get_heart_beat_rq_cmd_post(state,
2250                             qp->qp_qpnum, &heart_beat) == HERMON_CMD_SUCCESS) &&
2251                             (heart_beat == 0)) ? IBT_FEXCH_HEART_BEAT_OK :
2252                             IBT_FEXCH_NO_FLAGS;
2253 
2254                         ud->ud_fc = qp->qp_fc_attr;
2255                 } else if (qp->qp_serv_type == HERMON_QP_FCMND ||
2256                     qp->qp_serv_type == HERMON_QP_RFCI) {
2257                         ud->ud_fc = qp->qp_fc_attr;
2258                 }
2259 
2260         } else if (qp->qp_serv_type == HERMON_QP_RC) {
2261 
2262                 /* Fill in the RC-specific info */
2263                 rc = &attr_p->qp_info.qp_transport.rc;
2264                 rc->rc_sq_psn        = qpc->next_snd_psn;
2265                 rc->rc_rq_psn        = qpc->next_rcv_psn;
2266                 rc->rc_dst_qpn       = qpc->rem_qpn;
2267 
2268                 /* Grab the path migration state information */
2269                 if (qpc->pm_state == HERMON_QP_PMSTATE_MIGRATED) {
2270                         rc->rc_mig_state = IBT_STATE_MIGRATED;
2271                 } else if (qpc->pm_state == HERMON_QP_PMSTATE_REARM) {
2272                         rc->rc_mig_state = IBT_STATE_REARMED;
2273                 } else {
2274                         rc->rc_mig_state = IBT_STATE_ARMED;
2275                 }
2276                 rc->rc_rdma_ra_out = (1 << qpc->sra_max);
2277                 rc->rc_rdma_ra_in  = (1 << qpc->rra_max);
2278                 rc->rc_min_rnr_nak = qpc->min_rnr_nak;
2279                 rc->rc_path_mtu         = qpc->mtu;
2280                 rc->rc_retry_cnt   = qpc->retry_cnt;
2281 
2282                 /* Get the common primary address path fields */
2283                 qpc_path = &qpc->pri_addr_path;
2284                 path_ptr = &rc->rc_path;
2285                 hermon_get_addr_path(state, qpc_path, &path_ptr->cep_adds_vect,
2286                     HERMON_ADDRPATH_QP);
2287 
2288                 /* Fill in the additional primary address path fields */
2289                 path_ptr->cep_pkey_ix           = qpc_path->pkey_indx;
2290                 path_ptr->cep_hca_port_num =
2291                     path_ptr->cep_adds_vect.av_port_num =
2292                     (uint8_t)(((qpc_path->sched_q >> 6) & 0x01) + 1);
2293                 path_ptr->cep_timeout           = qpc_path->ack_timeout;
2294 
2295                 /* Get the common alternate address path fields */
2296                 qpc_alt_path = &qpc->alt_addr_path;
2297                 alt_path_ptr = &rc->rc_alt_path;
2298                 hermon_get_addr_path(state, qpc_alt_path,
2299                     &alt_path_ptr->cep_adds_vect, HERMON_ADDRPATH_QP);
2300 
2301                 /* Fill in the additional alternate address path fields */
2302                 alt_path_ptr->cep_pkey_ix    = qpc_alt_path->pkey_indx;
2303                 alt_path_ptr->cep_hca_port_num       =
2304                     alt_path_ptr->cep_adds_vect.av_port_num =
2305                     (uint8_t)(((qpc_alt_path->sched_q >> 6) & 0x01) + 1);
2306                 alt_path_ptr->cep_timeout    = qpc_alt_path->ack_timeout;
2307 
2308                 /* Get the RNR retry time from primary path */
2309                 rc->rc_rnr_retry_cnt = qpc->rnr_retry;
2310 
2311                 /* Set the enable flags based on RDMA/Atomic enable bits */
2312                 enable_flags = IBT_CEP_NO_FLAGS;
2313                 enable_flags |= ((qpc->rre == 0) ? 0 : IBT_CEP_RDMA_RD);
2314                 enable_flags |= ((qpc->rwe == 0) ? 0 : IBT_CEP_RDMA_WR);
2315                 enable_flags |= ((qpc->rae == 0) ? 0 : IBT_CEP_ATOMIC);
2316                 attr_p->qp_info.qp_flags = enable_flags;
2317 
2318                 attr_p->qp_info.qp_trans = IBT_RC_SRV;
2319 
2320         } else if (qp->qp_serv_type == HERMON_QP_UC) {
2321 
2322                 /* Fill in the UC-specific info */
2323                 uc = &attr_p->qp_info.qp_transport.uc;
2324                 uc->uc_sq_psn        = qpc->next_snd_psn;
2325                 uc->uc_rq_psn        = qpc->next_rcv_psn;
2326                 uc->uc_dst_qpn       = qpc->rem_qpn;
2327 
2328                 /* Grab the path migration state information */
2329                 if (qpc->pm_state == HERMON_QP_PMSTATE_MIGRATED) {
2330                         uc->uc_mig_state = IBT_STATE_MIGRATED;
2331                 } else if (qpc->pm_state == HERMON_QP_PMSTATE_REARM) {
2332                         uc->uc_mig_state = IBT_STATE_REARMED;
2333                 } else {
2334                         uc->uc_mig_state = IBT_STATE_ARMED;
2335                 }
2336                 uc->uc_path_mtu = qpc->mtu;
2337 
2338                 /* Get the common primary address path fields */
2339                 qpc_path = &qpc->pri_addr_path;
2340                 path_ptr = &uc->uc_path;
2341                 hermon_get_addr_path(state, qpc_path, &path_ptr->cep_adds_vect,
2342                     HERMON_ADDRPATH_QP);
2343 
2344                 /* Fill in the additional primary address path fields */
2345                 path_ptr->cep_pkey_ix           = qpc_path->pkey_indx;
2346                 path_ptr->cep_hca_port_num =
2347                     path_ptr->cep_adds_vect.av_port_num =
2348                     (uint8_t)(((qpc_path->sched_q >> 6) & 0x01) + 1);
2349 
2350                 /* Get the common alternate address path fields */
2351                 qpc_alt_path = &qpc->alt_addr_path;
2352                 alt_path_ptr = &uc->uc_alt_path;
2353                 hermon_get_addr_path(state, qpc_alt_path,
2354                     &alt_path_ptr->cep_adds_vect, HERMON_ADDRPATH_QP);
2355 
2356                 /* Fill in the additional alternate address path fields */
2357                 alt_path_ptr->cep_pkey_ix    = qpc_alt_path->pkey_indx;
2358                 alt_path_ptr->cep_hca_port_num       =
2359                     alt_path_ptr->cep_adds_vect.av_port_num =
2360                     (uint8_t)(((qpc_alt_path->sched_q >> 6) & 0x01) + 1);
2361 
2362                 /*
2363                  * Set the enable flags based on RDMA enable bits (by
2364                  * definition UC doesn't support Atomic or RDMA Read)
2365                  */
2366                 enable_flags = ((qpc->rwe == 0) ? 0 : IBT_CEP_RDMA_WR);
2367                 attr_p->qp_info.qp_flags = enable_flags;
2368 
2369                 attr_p->qp_info.qp_trans = IBT_UC_SRV;
2370 
2371         } else {
2372                 HERMON_WARNING(state, "unexpected QP transport type");
2373                 mutex_exit(&qp->qp_lock);
2374                 return (ibc_get_ci_failure(0));
2375         }
2376 
2377         /*
2378          * Under certain circumstances it is possible for the Hermon hardware
2379          * to transition to one of the error states without software directly
2380          * knowing about it.  The QueryQP() call is the one place where we
2381          * have an opportunity to sample and update our view of the QP state.
2382          */
2383         if (qpc->state == HERMON_QP_SQERR) {
2384                 attr_p->qp_info.qp_state = IBT_STATE_SQE;
2385                 qp->qp_state = HERMON_QP_SQERR;
2386                 HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_SQERR);
2387         }
2388         if (qpc->state == HERMON_QP_ERR) {
2389                 attr_p->qp_info.qp_state = IBT_STATE_ERROR;
2390                 qp->qp_state = HERMON_QP_ERR;
2391                 HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_ERR);
2392         }
2393         mutex_exit(&qp->qp_lock);
2394 
2395         return (DDI_SUCCESS);
2396 }
2397 
2398 
2399 /*
2400  * hermon_qp_create_qpn()
2401  *    Context: Can be called from interrupt or base context.
2402  */
2403 static int
2404 hermon_qp_create_qpn(hermon_state_t *state, hermon_qphdl_t qp,
2405     hermon_rsrc_t *qpc)
2406 {
2407         hermon_qpn_entry_t      query;
2408         hermon_qpn_entry_t      *entry;
2409         avl_index_t             where;
2410 
2411         /*
2412          * Build a query (for the AVL tree lookup) and attempt to find
2413          * a previously added entry that has a matching QPC index.  If
2414          * no matching entry is found, then allocate, initialize, and
2415          * add an entry to the AVL tree.
2416          * If a matching entry is found, then increment its QPN counter
2417          * and reference counter.
2418          */
2419         query.qpn_indx = qpc->hr_indx;
2420         mutex_enter(&state->hs_qpn_avl_lock);
2421         entry = (hermon_qpn_entry_t *)avl_find(&state->hs_qpn_avl,
2422             &query, &where);
2423         if (entry == NULL) {
2424                 /*
2425                  * Allocate and initialize a QPN entry, then insert
2426                  * it into the AVL tree.
2427                  */
2428                 entry = (hermon_qpn_entry_t *)kmem_zalloc(
2429                     sizeof (hermon_qpn_entry_t), KM_NOSLEEP);
2430                 if (entry == NULL) {
2431                         mutex_exit(&state->hs_qpn_avl_lock);
2432                         return (DDI_FAILURE);
2433                 }
2434                 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*entry))
2435 
2436                 entry->qpn_indx         = qpc->hr_indx;
2437                 entry->qpn_refcnt  = 0;
2438                 entry->qpn_counter = 0;
2439 
2440                 avl_insert(&state->hs_qpn_avl, entry, where);
2441         }
2442 
2443         /*
2444          * Make the AVL tree entry point to the QP context resource that
2445          * it will be responsible for tracking
2446          */
2447         entry->qpn_qpc = qpc;
2448 
2449         /*
2450          * Setup the QP handle to point to the AVL tree entry.  Then
2451          * generate the new QP number from the entry's QPN counter value
2452          * and the hardware's QP context table index.
2453          */
2454         qp->qp_qpn_hdl       = entry;
2455         qp->qp_qpnum = ((entry->qpn_counter <<
2456             state->hs_cfg_profile->cp_log_num_qp) | qpc->hr_indx) &
2457             HERMON_QP_MAXNUMBER_MSK;
2458         qp->qp_ring = qp->qp_qpnum << 8;
2459 
2460         /*
2461          * Increment the reference counter and QPN counter.  The QPN
2462          * counter always indicates the next available number for use.
2463          */
2464         entry->qpn_counter++;
2465         entry->qpn_refcnt++;
2466 
2467         mutex_exit(&state->hs_qpn_avl_lock);
2468 
2469         return (DDI_SUCCESS);
2470 }
2471 
2472 
2473 /*
2474  * hermon_qp_release_qpn()
2475  *    Context: Can be called only from user or kernel context.
2476  */
2477 void
2478 hermon_qp_release_qpn(hermon_state_t *state, hermon_qpn_entry_t *entry,
2479     int flags)
2480 {
2481         ASSERT(entry != NULL);
2482 
2483         mutex_enter(&state->hs_qpn_avl_lock);
2484 
2485         /*
2486          * If we are releasing the QP number here, then we decrement the
2487          * reference count and check for zero references.  If there are
2488          * zero references, then we free the QPC context (if it hadn't
2489          * already been freed during a HERMON_QPN_FREE_ONLY free, i.e. for
2490          * reuse with another similar QP number) and remove the tracking
2491          * structure from the QP number AVL tree and free the structure.
2492          * If we are not releasing the QP number here, then, as long as we
2493          * have not exhausted the usefulness of the QPC context (that is,
2494          * re-used it too many times without the reference count having
2495          * gone to zero), we free up the QPC context for use by another
2496          * thread (which will use it to construct a different QP number
2497          * from the same QPC table index).
2498          */
2499         if (flags == HERMON_QPN_RELEASE) {
2500                 entry->qpn_refcnt--;
2501 
2502                 /*
2503                  * If the reference count is zero, then we free the QPC
2504                  * context (if it hadn't already been freed in an early
2505                  * step, e.g. HERMON_QPN_FREE_ONLY) and remove/free the
2506                  * tracking structure from the QP number AVL tree.
2507                  */
2508                 if (entry->qpn_refcnt == 0) {
2509                         if (entry->qpn_qpc != NULL) {
2510                                 hermon_rsrc_free(state, &entry->qpn_qpc);
2511                         }
2512 
2513                         /*
2514                          * If the current entry has served it's useful
2515                          * purpose (i.e. been reused the maximum allowable
2516                          * number of times), then remove it from QP number
2517                          * AVL tree and free it up.
2518                          */
2519                         if (entry->qpn_counter >= (1 <<
2520                             (24 - state->hs_cfg_profile->cp_log_num_qp))) {
2521                                 avl_remove(&state->hs_qpn_avl, entry);
2522                                 kmem_free(entry, sizeof (hermon_qpn_entry_t));
2523                         }
2524                 }
2525 
2526         } else if (flags == HERMON_QPN_FREE_ONLY) {
2527                 /*
2528                  * Even if we are not freeing the QP number, that will not
2529                  * always prevent us from releasing the QPC context.  In fact,
2530                  * since the QPC context only forms part of the whole QPN,
2531                  * we want to free it up for use by other consumers.  But
2532                  * if the reference count is non-zero (which it will always
2533                  * be when we are doing HERMON_QPN_FREE_ONLY) and the counter
2534                  * has reached its maximum value, then we cannot reuse the
2535                  * QPC context until the reference count eventually reaches
2536                  * zero (in HERMON_QPN_RELEASE, above).
2537                  */
2538                 if (entry->qpn_counter < (1 <<
2539                     (24 - state->hs_cfg_profile->cp_log_num_qp))) {
2540                         hermon_rsrc_free(state, &entry->qpn_qpc);
2541                 }
2542         }
2543         mutex_exit(&state->hs_qpn_avl_lock);
2544 }
2545 
2546 
2547 /*
2548  * hermon_qpn_avl_compare()
2549  *    Context: Can be called from user or kernel context.
2550  */
2551 static int
2552 hermon_qpn_avl_compare(const void *q, const void *e)
2553 {
2554         hermon_qpn_entry_t      *entry, *query;
2555 
2556         entry = (hermon_qpn_entry_t *)e;
2557         query = (hermon_qpn_entry_t *)q;
2558 
2559         if (query->qpn_indx < entry->qpn_indx) {
2560                 return (-1);
2561         } else if (query->qpn_indx > entry->qpn_indx) {
2562                 return (+1);
2563         } else {
2564                 return (0);
2565         }
2566 }
2567 
2568 
2569 /*
2570  * hermon_qpn_avl_init()
2571  *    Context: Only called from attach() path context
2572  */
2573 void
2574 hermon_qpn_avl_init(hermon_state_t *state)
2575 {
2576         /* Initialize the lock used for QP number (QPN) AVL tree access */
2577         mutex_init(&state->hs_qpn_avl_lock, NULL, MUTEX_DRIVER,
2578             DDI_INTR_PRI(state->hs_intrmsi_pri));
2579 
2580         /* Initialize the AVL tree for the QP number (QPN) storage */
2581         avl_create(&state->hs_qpn_avl, hermon_qpn_avl_compare,
2582             sizeof (hermon_qpn_entry_t),
2583             offsetof(hermon_qpn_entry_t, qpn_avlnode));
2584 }
2585 
2586 
2587 /*
2588  * hermon_qpn_avl_fini()
2589  *    Context: Only called from attach() and/or detach() path contexts
2590  */
2591 void
2592 hermon_qpn_avl_fini(hermon_state_t *state)
2593 {
2594         hermon_qpn_entry_t      *entry;
2595         void                    *cookie;
2596 
2597         /*
2598          * Empty all entries (if necessary) and destroy the AVL tree
2599          * that was used for QP number (QPN) tracking.
2600          */
2601         cookie = NULL;
2602         while ((entry = (hermon_qpn_entry_t *)avl_destroy_nodes(
2603             &state->hs_qpn_avl, &cookie)) != NULL) {
2604                 kmem_free(entry, sizeof (hermon_qpn_entry_t));
2605         }
2606         avl_destroy(&state->hs_qpn_avl);
2607 
2608         /* Destroy the lock used for QP number (QPN) AVL tree access */
2609         mutex_destroy(&state->hs_qpn_avl_lock);
2610 }
2611 
2612 
2613 /*
2614  * hermon_qphdl_from_qpnum()
2615  *    Context: Can be called from interrupt or base context.
2616  *
2617  *    This routine is important because changing the unconstrained
2618  *    portion of the QP number is critical to the detection of a
2619  *    potential race condition in the QP event handler code (i.e. the case
2620  *    where a QP is freed and alloc'd again before an event for the
2621  *    "old" QP can be handled).
2622  *
2623  *    While this is not a perfect solution (not sure that one exists)
2624  *    it does help to mitigate the chance that this race condition will
2625  *    cause us to deliver a "stale" event to the new QP owner.  Note:
2626  *    this solution does not scale well because the number of constrained
2627  *    bits increases (and, hence, the number of unconstrained bits
2628  *    decreases) as the number of supported QPs grows.  For small and
2629  *    intermediate values, it should hopefully provide sufficient
2630  *    protection.
2631  */
2632 hermon_qphdl_t
2633 hermon_qphdl_from_qpnum(hermon_state_t *state, uint_t qpnum)
2634 {
2635         uint_t  qpindx, qpmask;
2636 
2637         /* Calculate the QP table index from the qpnum */
2638         qpmask = (1 << state->hs_cfg_profile->cp_log_num_qp) - 1;
2639         qpindx = qpnum & qpmask;
2640         return (hermon_icm_num_to_hdl(state, HERMON_QPC, qpindx));
2641 }
2642 
2643 
2644 /*
2645  * hermon_special_qp_rsrc_alloc
2646  *    Context: Can be called from interrupt or base context.
2647  */
2648 static int
2649 hermon_special_qp_rsrc_alloc(hermon_state_t *state, ibt_sqp_type_t type,
2650     uint_t port, hermon_rsrc_t **qp_rsrc)
2651 {
2652         uint_t          mask, flags;
2653         int             status;
2654 
2655         mutex_enter(&state->hs_spec_qplock);
2656         flags = state->hs_spec_qpflags;
2657         if (type == IBT_SMI_SQP) {
2658                 /*
2659                  * Check here to see if the driver has been configured
2660                  * to instruct the Hermon firmware to handle all incoming
2661                  * SMP messages (i.e. messages sent to SMA).  If so,
2662                  * then we will treat QP0 as if it has already been
2663                  * allocated (for internal use).  Otherwise, if we allow
2664                  * the allocation to happen, it will cause unexpected
2665                  * behaviors (e.g. Hermon SMA becomes unresponsive).
2666                  */
2667                 if (state->hs_cfg_profile->cp_qp0_agents_in_fw != 0) {
2668                         mutex_exit(&state->hs_spec_qplock);
2669                         return (IBT_QP_IN_USE);
2670                 }
2671 
2672                 /*
2673                  * If this is the first QP0 allocation, then post
2674                  * a CONF_SPECIAL_QP firmware command
2675                  */
2676                 if ((flags & HERMON_SPECIAL_QP0_RSRC_MASK) == 0) {
2677                         status = hermon_conf_special_qp_cmd_post(state,
2678                             state->hs_spec_qp0->hr_indx, HERMON_CMD_QP_SMI,
2679                             HERMON_CMD_NOSLEEP_SPIN,
2680                             HERMON_CMD_SPEC_QP_OPMOD(
2681                             state->hs_cfg_profile->cp_qp0_agents_in_fw,
2682                             state->hs_cfg_profile->cp_qp1_agents_in_fw));
2683                         if (status != HERMON_CMD_SUCCESS) {
2684                                 mutex_exit(&state->hs_spec_qplock);
2685                                 cmn_err(CE_NOTE, "hermon%d: CONF_SPECIAL_QP "
2686                                     "command failed: %08x\n",
2687                                     state->hs_instance, status);
2688                                 return (IBT_INSUFF_RESOURCE);
2689                         }
2690                 }
2691 
2692                 /*
2693                  * Now check (and, if necessary, modify) the flags to indicate
2694                  * whether the allocation was successful
2695                  */
2696                 mask = (1 << (HERMON_SPECIAL_QP0_RSRC + port));
2697                 if (flags & mask) {
2698                         mutex_exit(&state->hs_spec_qplock);
2699                         return (IBT_QP_IN_USE);
2700                 }
2701                 state->hs_spec_qpflags |= mask;
2702                 *qp_rsrc = state->hs_spec_qp0;
2703 
2704         } else {
2705                 /*
2706                  * If this is the first QP1 allocation, then post
2707                  * a CONF_SPECIAL_QP firmware command
2708                  */
2709                 if ((flags & HERMON_SPECIAL_QP1_RSRC_MASK) == 0) {
2710                         status = hermon_conf_special_qp_cmd_post(state,
2711                             state->hs_spec_qp1->hr_indx, HERMON_CMD_QP_GSI,
2712                             HERMON_CMD_NOSLEEP_SPIN,
2713                             HERMON_CMD_SPEC_QP_OPMOD(
2714                             state->hs_cfg_profile->cp_qp0_agents_in_fw,
2715                             state->hs_cfg_profile->cp_qp1_agents_in_fw));
2716                         if (status != HERMON_CMD_SUCCESS) {
2717                                 mutex_exit(&state->hs_spec_qplock);
2718                                 cmn_err(CE_NOTE, "hermon%d: CONF_SPECIAL_QP "
2719                                     "command failed: %08x\n",
2720                                     state->hs_instance, status);
2721                                 return (IBT_INSUFF_RESOURCE);
2722                         }
2723                 }
2724 
2725                 /*
2726                  * Now check (and, if necessary, modify) the flags to indicate
2727                  * whether the allocation was successful
2728                  */
2729                 mask = (1 << (HERMON_SPECIAL_QP1_RSRC + port));
2730                 if (flags & mask) {
2731                         mutex_exit(&state->hs_spec_qplock);
2732                         return (IBT_QP_IN_USE);
2733                 }
2734                 state->hs_spec_qpflags |= mask;
2735                 *qp_rsrc = state->hs_spec_qp1;
2736         }
2737 
2738         mutex_exit(&state->hs_spec_qplock);
2739         return (DDI_SUCCESS);
2740 }
2741 
2742 
2743 /*
2744  * hermon_special_qp_rsrc_free
2745  *    Context: Can be called from interrupt or base context.
2746  */
2747 static int
2748 hermon_special_qp_rsrc_free(hermon_state_t *state, ibt_sqp_type_t type,
2749     uint_t port)
2750 {
2751         uint_t          mask, flags;
2752         int             status;
2753 
2754         mutex_enter(&state->hs_spec_qplock);
2755         if (type == IBT_SMI_SQP) {
2756                 mask = (1 << (HERMON_SPECIAL_QP0_RSRC + port));
2757                 state->hs_spec_qpflags &= ~mask;
2758                 flags = state->hs_spec_qpflags;
2759 
2760                 /*
2761                  * If this is the last QP0 free, then post a CONF_SPECIAL_QP
2762                  * NOW, If this is the last Special QP free, then post a
2763                  * CONF_SPECIAL_QP firmware command - it'll stop them all
2764                  */
2765                 if (flags) {
2766                         status = hermon_conf_special_qp_cmd_post(state, 0,
2767                             HERMON_CMD_QP_SMI, HERMON_CMD_NOSLEEP_SPIN, 0);
2768                         if (status != HERMON_CMD_SUCCESS) {
2769                                 mutex_exit(&state->hs_spec_qplock);
2770                                 cmn_err(CE_NOTE, "hermon%d: CONF_SPECIAL_QP "
2771                                     "command failed: %08x\n",
2772                                     state->hs_instance, status);
2773                                 if (status == HERMON_CMD_INVALID_STATUS) {
2774                                         hermon_fm_ereport(state, HCA_SYS_ERR,
2775                                             HCA_ERR_SRV_LOST);
2776                                 }
2777                                 return (ibc_get_ci_failure(0));
2778                         }
2779                 }
2780         } else {
2781                 mask = (1 << (HERMON_SPECIAL_QP1_RSRC + port));
2782                 state->hs_spec_qpflags &= ~mask;
2783                 flags = state->hs_spec_qpflags;
2784 
2785                 /*
2786                  * If this is the last QP1 free, then post a CONF_SPECIAL_QP
2787                  * NOW, if this is the last special QP free, then post a
2788                  * CONF_SPECIAL_QP firmware command - it'll stop them all
2789                  */
2790                 if (flags) {
2791                         status = hermon_conf_special_qp_cmd_post(state, 0,
2792                             HERMON_CMD_QP_GSI, HERMON_CMD_NOSLEEP_SPIN, 0);
2793                         if (status != HERMON_CMD_SUCCESS) {
2794                                 mutex_exit(&state->hs_spec_qplock);
2795                                 cmn_err(CE_NOTE, "hermon%d: CONF_SPECIAL_QP "
2796                                     "command failed: %08x\n",
2797                                     state->hs_instance, status);
2798                                 if (status == HERMON_CMD_INVALID_STATUS) {
2799                                         hermon_fm_ereport(state, HCA_SYS_ERR,
2800                                             HCA_ERR_SRV_LOST);
2801                                 }
2802                                 return (ibc_get_ci_failure(0));
2803                         }
2804                 }
2805         }
2806 
2807         mutex_exit(&state->hs_spec_qplock);
2808         return (DDI_SUCCESS);
2809 }
2810 
2811 
2812 /*
2813  * hermon_qp_sgl_to_logwqesz()
2814  *    Context: Can be called from interrupt or base context.
2815  */
2816 static void
2817 hermon_qp_sgl_to_logwqesz(hermon_state_t *state, uint_t num_sgl,
2818     uint_t real_max_sgl, hermon_qp_wq_type_t wq_type,
2819     uint_t *logwqesz, uint_t *max_sgl)
2820 {
2821         uint_t  max_size, log2, actual_sgl;
2822 
2823         switch (wq_type) {
2824         case HERMON_QP_WQ_TYPE_SENDQ_UD:
2825                 /*
2826                  * Use requested maximum SGL to calculate max descriptor size
2827                  * (while guaranteeing that the descriptor size is a
2828                  * power-of-2 cachelines).
2829                  */
2830                 max_size = (HERMON_QP_WQE_MLX_SND_HDRS + (num_sgl << 4));
2831                 log2 = highbit(max_size);
2832                 if (ISP2(max_size)) {
2833                         log2 = log2 - 1;
2834                 }
2835 
2836                 /* Make sure descriptor is at least the minimum size */
2837                 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM);
2838 
2839                 /* Calculate actual number of SGL (given WQE size) */
2840                 actual_sgl = ((1 << log2) -
2841                     sizeof (hermon_hw_snd_wqe_ctrl_t)) >> 4;
2842                 break;
2843 
2844         case HERMON_QP_WQ_TYPE_SENDQ_CONN:
2845                 /*
2846                  * Use requested maximum SGL to calculate max descriptor size
2847                  * (while guaranteeing that the descriptor size is a
2848                  * power-of-2 cachelines).
2849                  */
2850                 max_size = (HERMON_QP_WQE_MLX_SND_HDRS + (num_sgl << 4));
2851                 log2 = highbit(max_size);
2852                 if (ISP2(max_size)) {
2853                         log2 = log2 - 1;
2854                 }
2855 
2856                 /* Make sure descriptor is at least the minimum size */
2857                 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM);
2858 
2859                 /* Calculate actual number of SGL (given WQE size) */
2860                 actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_SND_HDRS) >> 4;
2861                 break;
2862 
2863         case HERMON_QP_WQ_TYPE_RECVQ:
2864                 /*
2865                  * Same as above (except for Recv WQEs)
2866                  */
2867                 max_size = (HERMON_QP_WQE_MLX_RCV_HDRS + (num_sgl << 4));
2868                 log2 = highbit(max_size);
2869                 if (ISP2(max_size)) {
2870                         log2 = log2 - 1;
2871                 }
2872 
2873                 /* Make sure descriptor is at least the minimum size */
2874                 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM);
2875 
2876                 /* Calculate actual number of SGL (given WQE size) */
2877                 actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_RCV_HDRS) >> 4;
2878                 break;
2879 
2880         case HERMON_QP_WQ_TYPE_SENDMLX_QP0:
2881                 /*
2882                  * Same as above (except for MLX transport WQEs).  For these
2883                  * WQEs we have to account for the space consumed by the
2884                  * "inline" packet headers.  (This is smaller than for QP1
2885                  * below because QP0 is not allowed to send packets with a GRH.
2886                  */
2887                 max_size = (HERMON_QP_WQE_MLX_QP0_HDRS + (num_sgl << 4));
2888                 log2 = highbit(max_size);
2889                 if (ISP2(max_size)) {
2890                         log2 = log2 - 1;
2891                 }
2892 
2893                 /* Make sure descriptor is at least the minimum size */
2894                 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM);
2895 
2896                 /* Calculate actual number of SGL (given WQE size) */
2897                 actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_QP0_HDRS) >> 4;
2898                 break;
2899 
2900         case HERMON_QP_WQ_TYPE_SENDMLX_QP1:
2901                 /*
2902                  * Same as above.  For these WQEs we again have to account for
2903                  * the space consumed by the "inline" packet headers.  (This
2904                  * is larger than for QP0 above because we have to account for
2905                  * the possibility of a GRH in each packet - and this
2906                  * introduces an alignment issue that causes us to consume
2907                  * an additional 8 bytes).
2908                  */
2909                 max_size = (HERMON_QP_WQE_MLX_QP1_HDRS + (num_sgl << 4));
2910                 log2 = highbit(max_size);
2911                 if (ISP2(max_size)) {
2912                         log2 = log2 - 1;
2913                 }
2914 
2915                 /* Make sure descriptor is at least the minimum size */
2916                 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM);
2917 
2918                 /* Calculate actual number of SGL (given WQE size) */
2919                 actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_QP1_HDRS) >> 4;
2920                 break;
2921 
2922         default:
2923                 HERMON_WARNING(state, "unexpected work queue type");
2924                 break;
2925         }
2926 
2927         /* Fill in the return values */
2928         *logwqesz = log2;
2929         *max_sgl  = min(real_max_sgl, actual_sgl);
2930 }
--- EOF ---