5255 uts shouldn't open-code ISP2

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * tavor_srq.c
  29  *    Tavor Shared Receive Queue Processing Routines
  30  *
  31  *    Implements all the routines necessary for allocating, freeing, querying,
  32  *    modifying and posting shared receive queues.
  33  */
  34 

  35 #include <sys/types.h>
  36 #include <sys/conf.h>
  37 #include <sys/ddi.h>
  38 #include <sys/sunddi.h>
  39 #include <sys/modctl.h>
  40 #include <sys/bitmap.h>
  41 
  42 #include <sys/ib/adapters/tavor/tavor.h>
  43 
  44 static void tavor_srq_sgl_to_logwqesz(tavor_state_t *state, uint_t num_sgl,
  45     tavor_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl);
  46 
  47 /*
  48  * tavor_srq_alloc()
  49  *    Context: Can be called only from user or kernel context.
  50  */
  51 int
  52 tavor_srq_alloc(tavor_state_t *state, tavor_srq_info_t *srqinfo,
  53     uint_t sleepflag, tavor_srq_options_t *op)
  54 {
  55         ibt_srq_hdl_t           ibt_srqhdl;
  56         tavor_pdhdl_t           pd;
  57         ibt_srq_sizes_t         *sizes;
  58         ibt_srq_sizes_t         *real_sizes;
  59         tavor_srqhdl_t          *srqhdl;
  60         ibt_srq_flags_t         flags;
  61         tavor_rsrc_t            *srqc, *rsrc;
  62         tavor_hw_srqc_t         srqc_entry;
  63         uint32_t                *buf;
  64         tavor_srqhdl_t          srq;
  65         tavor_umap_db_entry_t   *umapdb;
  66         ibt_mr_attr_t           mr_attr;
  67         tavor_mr_options_t      mr_op;
  68         tavor_mrhdl_t           mr;
  69         uint64_t                addr;
  70         uint64_t                value, srq_desc_off;
  71         uint32_t                lkey;
  72         uint32_t                log_srq_size;
  73         uint32_t                uarpg;
  74         uint_t                  wq_location, dma_xfer_mode, srq_is_umap;
  75         int                     flag, status;
  76         char                    *errormsg;
  77         uint_t                  max_sgl;
  78         uint_t                  wqesz;
  79 
  80         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sizes))
  81 
  82         TAVOR_TNF_ENTER(tavor_srq_alloc);
  83 
  84         /*
  85          * Check the "options" flag.  Currently this flag tells the driver
  86          * whether or not the SRQ's work queues should be come from normal
  87          * system memory or whether they should be allocated from DDR memory.
  88          */
  89         if (op == NULL) {
  90                 wq_location = TAVOR_QUEUE_LOCATION_NORMAL;
  91         } else {
  92                 wq_location = op->srqo_wq_loc;
  93         }
  94 
  95         /*
  96          * Extract the necessary info from the tavor_srq_info_t structure
  97          */
  98         real_sizes = srqinfo->srqi_real_sizes;
  99         sizes      = srqinfo->srqi_sizes;
 100         pd         = srqinfo->srqi_pd;
 101         ibt_srqhdl = srqinfo->srqi_ibt_srqhdl;
 102         flags      = srqinfo->srqi_flags;
 103         srqhdl     = srqinfo->srqi_srqhdl;
 104 
 105         /*
 106          * Determine whether SRQ is being allocated for userland access or
 107          * whether it is being allocated for kernel access.  If the SRQ is
 108          * being allocated for userland access, then lookup the UAR doorbell
 109          * page number for the current process.  Note:  If this is not found
 110          * (e.g. if the process has not previously open()'d the Tavor driver),
 111          * then an error is returned.
 112          */
 113         srq_is_umap = (flags & IBT_SRQ_USER_MAP) ? 1 : 0;
 114         if (srq_is_umap) {
 115                 status = tavor_umap_db_find(state->ts_instance, ddi_get_pid(),
 116                     MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
 117                 if (status != DDI_SUCCESS) {
 118                         /* Set "status" and "errormsg" and goto failure */
 119                         TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "failed UAR page");
 120                         goto srqalloc_fail3;
 121                 }
 122                 uarpg = ((tavor_rsrc_t *)(uintptr_t)value)->tr_indx;
 123         }
 124 
 125         /* Increase PD refcnt */
 126         tavor_pd_refcnt_inc(pd);
 127 
 128         /* Allocate an SRQ context entry */
 129         status = tavor_rsrc_alloc(state, TAVOR_SRQC, 1, sleepflag, &srqc);
 130         if (status != DDI_SUCCESS) {
 131                 /* Set "status" and "errormsg" and goto failure */
 132                 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed SRQ context");
 133                 goto srqalloc_fail1;
 134         }
 135 
 136         /* Allocate the SRQ Handle entry */
 137         status = tavor_rsrc_alloc(state, TAVOR_SRQHDL, 1, sleepflag, &rsrc);
 138         if (status != DDI_SUCCESS) {
 139                 /* Set "status" and "errormsg" and goto failure */
 140                 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed SRQ handle");
 141                 goto srqalloc_fail2;
 142         }
 143 
 144         srq = (tavor_srqhdl_t)rsrc->tr_addr;
 145         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*srq))
 146 
 147         srq->srq_srqnum = srqc->tr_indx;  /* just use index */
 148 
 149         /*
 150          * If this will be a user-mappable SRQ, then allocate an entry for
 151          * the "userland resources database".  This will later be added to
 152          * the database (after all further SRQ operations are successful).
 153          * If we fail here, we must undo the reference counts and the
 154          * previous resource allocation.
 155          */
 156         if (srq_is_umap) {
 157                 umapdb = tavor_umap_db_alloc(state->ts_instance,
 158                     srq->srq_srqnum, MLNX_UMAP_SRQMEM_RSRC,
 159                     (uint64_t)(uintptr_t)rsrc);
 160                 if (umapdb == NULL) {
 161                         /* Set "status" and "errormsg" and goto failure */
 162                         TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umap add");
 163                         goto srqalloc_fail3;
 164                 }
 165         }
 166 
 167         /*
 168          * Calculate the appropriate size for the SRQ.
 169          * Note:  All Tavor SRQs must be a power-of-2 in size.  Also
 170          * they may not be any smaller than TAVOR_SRQ_MIN_SIZE.  This step
 171          * is to round the requested size up to the next highest power-of-2
 172          */
 173         sizes->srq_wr_sz = max(sizes->srq_wr_sz, TAVOR_SRQ_MIN_SIZE);
 174         log_srq_size = highbit(sizes->srq_wr_sz);
 175         if ((sizes->srq_wr_sz & (sizes->srq_wr_sz - 1)) == 0) {
 176                 log_srq_size = log_srq_size - 1;
 177         }
 178 
 179         /*
 180          * Next we verify that the rounded-up size is valid (i.e. consistent
 181          * with the device limits and/or software-configured limits).  If not,
 182          * then obviously we have a lot of cleanup to do before returning.
 183          */
 184         if (log_srq_size > state->ts_cfg_profile->cp_log_max_srq_sz) {
 185                 /* Set "status" and "errormsg" and goto failure */
 186                 TAVOR_TNF_FAIL(IBT_HCA_WR_EXCEEDED, "max SRQ size");
 187                 goto srqalloc_fail4;
 188         }
 189 
 190         /*
 191          * Next we verify that the requested number of SGL is valid (i.e.
 192          * consistent with the device limits and/or software-configured
 193          * limits).  If not, then obviously the same cleanup needs to be done.
 194          */
 195         max_sgl = state->ts_cfg_profile->cp_srq_max_sgl;
 196         if (sizes->srq_sgl_sz > max_sgl) {
 197                 /* Set "status" and "errormsg" and goto failure */
 198                 TAVOR_TNF_FAIL(IBT_HCA_SGL_EXCEEDED, "max SRQ SGL");
 199                 goto srqalloc_fail4;
 200         }
 201 
 202         /*
 203          * Determine the SRQ's WQE sizes.  This depends on the requested
 204          * number of SGLs.  Note: This also has the side-effect of
 205          * calculating the real number of SGLs (for the calculated WQE size)
 206          */
 207         tavor_srq_sgl_to_logwqesz(state, sizes->srq_sgl_sz,
 208             TAVOR_QP_WQ_TYPE_RECVQ, &srq->srq_wq_log_wqesz,
 209             &srq->srq_wq_sgl);
 210 
 211         /*
 212          * Allocate the memory for SRQ work queues.  Note:  The location from
 213          * which we will allocate these work queues has been passed in through
 214          * the tavor_qp_options_t structure.  Since Tavor work queues are not
 215          * allowed to cross a 32-bit (4GB) boundary, the alignment of the work
 216          * queue memory is very important.  We used to allocate work queues
 217          * (the combined receive and send queues) so that they would be aligned
 218          * on their combined size.  That alignment guaranteed that they would
 219          * never cross the 4GB boundary (Tavor work queues are on the order of
 220          * MBs at maximum).  Now we are able to relax this alignment constraint
 221          * by ensuring that the IB address assigned to the queue memory (as a
 222          * result of the tavor_mr_register() call) is offset from zero.
 223          * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
 224          * guarantee the alignment, but when attempting to use IOMMU bypass
 225          * mode we found that we were not allowed to specify any alignment that
 226          * was more restrictive than the system page size.  So we avoided this
 227          * constraint by passing two alignment values, one for the memory
 228          * allocation itself and the other for the DMA handle (for later bind).
 229          * This used to cause more memory than necessary to be allocated (in
 230          * order to guarantee the more restrictive alignment contraint).  But
 231          * be guaranteeing the zero-based IB virtual address for the queue, we
 232          * are able to conserve this memory.
 233          *
 234          * Note: If SRQ is not user-mappable, then it may come from either
 235          * kernel system memory or from HCA-attached local DDR memory.
 236          *
 237          * Note2: We align this queue on a pagesize boundary.  This is required
 238          * to make sure that all the resulting IB addresses will start at 0, for
 239          * a zero-based queue.  By making sure we are aligned on at least a
 240          * page, any offset we use into our queue will be the same as when we
 241          * perform tavor_srq_modify() operations later.
 242          */
 243         wqesz = (1 << srq->srq_wq_log_wqesz);
 244         srq->srq_wqinfo.qa_size = (1 << log_srq_size) * wqesz;
 245         srq->srq_wqinfo.qa_alloc_align = PAGESIZE;
 246         srq->srq_wqinfo.qa_bind_align = PAGESIZE;
 247         if (srq_is_umap) {
 248                 srq->srq_wqinfo.qa_location = TAVOR_QUEUE_LOCATION_USERLAND;
 249         } else {
 250                 srq->srq_wqinfo.qa_location = wq_location;
 251         }
 252         status = tavor_queue_alloc(state, &srq->srq_wqinfo, sleepflag);
 253         if (status != DDI_SUCCESS) {
 254                 /* Set "status" and "errormsg" and goto failure */
 255                 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed srq");
 256                 goto srqalloc_fail4;
 257         }
 258         buf = (uint32_t *)srq->srq_wqinfo.qa_buf_aligned;
 259         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf))
 260 
 261         /*
 262          * Register the memory for the SRQ work queues.  The memory for the SRQ
 263          * must be registered in the Tavor TPT tables.  This gives us the LKey
 264          * to specify in the SRQ context later.  Note: If the work queue is to
 265          * be allocated from DDR memory, then only a "bypass" mapping is
 266          * appropriate.  And if the SRQ memory is user-mappable, then we force
 267          * DDI_DMA_CONSISTENT mapping.  Also, in order to meet the alignment
 268          * restriction, we pass the "mro_bind_override_addr" flag in the call
 269          * to tavor_mr_register().  This guarantees that the resulting IB vaddr
 270          * will be zero-based (modulo the offset into the first page).  If we
 271          * fail here, we still have the bunch of resource and reference count
 272          * cleanup to do.
 273          */
 274         flag = (sleepflag == TAVOR_SLEEP) ? IBT_MR_SLEEP :
 275             IBT_MR_NOSLEEP;
 276         mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf;
 277         mr_attr.mr_len   = srq->srq_wqinfo.qa_size;
 278         mr_attr.mr_as    = NULL;
 279         mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE;
 280         if (srq_is_umap) {
 281                 mr_op.mro_bind_type   = state->ts_cfg_profile->cp_iommu_bypass;
 282         } else {
 283                 if (wq_location == TAVOR_QUEUE_LOCATION_NORMAL) {
 284                         mr_op.mro_bind_type =
 285                             state->ts_cfg_profile->cp_iommu_bypass;
 286                         dma_xfer_mode =
 287                             state->ts_cfg_profile->cp_streaming_consistent;
 288                         if (dma_xfer_mode == DDI_DMA_STREAMING) {
 289                                 mr_attr.mr_flags |= IBT_MR_NONCOHERENT;
 290                         }
 291                 } else {
 292                         mr_op.mro_bind_type = TAVOR_BINDMEM_BYPASS;
 293                 }
 294         }
 295         mr_op.mro_bind_dmahdl = srq->srq_wqinfo.qa_dmahdl;
 296         mr_op.mro_bind_override_addr = 1;
 297         status = tavor_mr_register(state, pd, &mr_attr, &mr, &mr_op);
 298         if (status != DDI_SUCCESS) {
 299                 /* Set "status" and "errormsg" and goto failure */
 300                 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed register mr");
 301                 goto srqalloc_fail5;
 302         }
 303         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
 304         addr = mr->mr_bindinfo.bi_addr;
 305         lkey = mr->mr_lkey;
 306 
 307         /*
 308          * Calculate the offset between the kernel virtual address space
 309          * and the IB virtual address space.  This will be used when
 310          * posting work requests to properly initialize each WQE.
 311          */
 312         srq_desc_off = (uint64_t)(uintptr_t)srq->srq_wqinfo.qa_buf_aligned -
 313             (uint64_t)mr->mr_bindinfo.bi_addr;
 314 
 315         /*
 316          * Create WQL and Wridlist for use by this SRQ
 317          */
 318         srq->srq_wrid_wql = tavor_wrid_wql_create(state);
 319         if (srq->srq_wrid_wql == NULL) {
 320                 /* Set "status" and "errormsg" and goto failure */
 321                 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed wql create");
 322                 goto srqalloc_fail6;
 323         }
 324         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(srq->srq_wrid_wql)))
 325 
 326         srq->srq_wridlist = tavor_wrid_get_list(1 << log_srq_size);
 327         if (srq->srq_wridlist == NULL) {
 328                 /* Set "status" and "errormsg" and goto failure */
 329                 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed wridlist create");
 330                 goto srqalloc_fail7;
 331         }
 332         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(srq->srq_wridlist)))
 333 
 334         srq->srq_wridlist->wl_srq_en = 1;
 335         srq->srq_wridlist->wl_free_list_indx = -1;
 336 
 337         /*
 338          * Fill in all the return arguments (if necessary).  This includes
 339          * real queue size and real SGLs.
 340          */
 341         if (real_sizes != NULL) {
 342                 real_sizes->srq_wr_sz = (1 << log_srq_size);
 343                 real_sizes->srq_sgl_sz = srq->srq_wq_sgl;
 344         }
 345 
 346         /*
 347          * Fill in the SRQC entry.  This is the final step before passing
 348          * ownership of the SRQC entry to the Tavor hardware.  We use all of
 349          * the information collected/calculated above to fill in the
 350          * requisite portions of the SRQC.  Note: If this SRQ is going to be
 351          * used for userland access, then we need to set the UAR page number
 352          * appropriately (otherwise it's a "don't care")
 353          */
 354         bzero(&srqc_entry, sizeof (tavor_hw_srqc_t));
 355         srqc_entry.wqe_addr_h      = (addr >> 32);
 356         srqc_entry.next_wqe_addr_l = 0;
 357         srqc_entry.ds              = (wqesz >> 4);
 358         srqc_entry.state           = TAVOR_SRQ_STATE_HW_OWNER;
 359         srqc_entry.pd              = pd->pd_pdnum;
 360         srqc_entry.lkey            = lkey;
 361         srqc_entry.wqe_cnt         = 0;
 362         if (srq_is_umap) {
 363                 srqc_entry.uar     = uarpg;
 364         } else {
 365                 srqc_entry.uar     = 0;
 366         }
 367 
 368         /*
 369          * Write the SRQC entry to hardware.  Lastly, we pass ownership of
 370          * the entry to the hardware (using the Tavor SW2HW_SRQ firmware
 371          * command).  Note: In general, this operation shouldn't fail.  But
 372          * if it does, we have to undo everything we've done above before
 373          * returning error.
 374          */
 375         status = tavor_cmn_ownership_cmd_post(state, SW2HW_SRQ, &srqc_entry,
 376             sizeof (tavor_hw_srqc_t), srq->srq_srqnum,
 377             sleepflag);
 378         if (status != TAVOR_CMD_SUCCESS) {
 379                 cmn_err(CE_CONT, "Tavor: SW2HW_SRQ command failed: %08x\n",
 380                     status);
 381                 TNF_PROBE_1(tavor_srq_alloc_sw2hw_srq_cmd_fail,
 382                     TAVOR_TNF_ERROR, "", tnf_uint, status, status);
 383                 /* Set "status" and "errormsg" and goto failure */
 384                 TAVOR_TNF_FAIL(IBT_FAILURE, "tavor SW2HW_SRQ command");
 385                 goto srqalloc_fail8;
 386         }
 387 
 388         /*
 389          * Fill in the rest of the Tavor SRQ handle.  We can update
 390          * the following fields for use in further operations on the SRQ.
 391          */
 392         srq->srq_srqcrsrcp = srqc;
 393         srq->srq_rsrcp          = rsrc;
 394         srq->srq_mrhdl          = mr;
 395         srq->srq_refcnt         = 0;
 396         srq->srq_is_umap   = srq_is_umap;
 397         srq->srq_uarpg          = (srq->srq_is_umap) ? uarpg : 0;
 398         srq->srq_umap_dhp  = (devmap_cookie_t)NULL;
 399         srq->srq_pdhdl          = pd;
 400         srq->srq_wq_lastwqeindx = -1;
 401         srq->srq_wq_bufsz  = (1 << log_srq_size);
 402         srq->srq_wq_buf         = buf;
 403         srq->srq_desc_off  = srq_desc_off;
 404         srq->srq_hdlrarg   = (void *)ibt_srqhdl;
 405         srq->srq_state          = 0;
 406         srq->srq_real_sizes.srq_wr_sz = (1 << log_srq_size);
 407         srq->srq_real_sizes.srq_sgl_sz = srq->srq_wq_sgl;
 408 
 409         /* Determine if later ddi_dma_sync will be necessary */
 410         srq->srq_sync = TAVOR_SRQ_IS_SYNC_REQ(state, srq->srq_wqinfo);
 411 
 412         /*
 413          * Put SRQ handle in Tavor SRQNum-to-SRQhdl list.  Then fill in the
 414          * "srqhdl" and return success
 415          */
 416         ASSERT(state->ts_srqhdl[srqc->tr_indx] == NULL);
 417         state->ts_srqhdl[srqc->tr_indx] = srq;
 418 
 419         /*
 420          * If this is a user-mappable SRQ, then we need to insert the
 421          * previously allocated entry into the "userland resources database".
 422          * This will allow for later lookup during devmap() (i.e. mmap())
 423          * calls.
 424          */
 425         if (srq->srq_is_umap) {
 426                 tavor_umap_db_add(umapdb);
 427         } else {
 428                 mutex_enter(&srq->srq_wrid_wql->wql_lock);
 429                 tavor_wrid_list_srq_init(srq->srq_wridlist, srq, 0);
 430                 mutex_exit(&srq->srq_wrid_wql->wql_lock);
 431         }
 432 
 433         *srqhdl = srq;
 434 
 435         TAVOR_TNF_EXIT(tavor_srq_alloc);
 436         return (status);
 437 
 438 /*
 439  * The following is cleanup for all possible failure cases in this routine
 440  */
 441 srqalloc_fail8:
 442         kmem_free(srq->srq_wridlist->wl_wre, srq->srq_wridlist->wl_size *
 443             sizeof (tavor_wrid_entry_t));
 444         kmem_free(srq->srq_wridlist, sizeof (tavor_wrid_list_hdr_t));
 445 srqalloc_fail7:
 446         tavor_wql_refcnt_dec(srq->srq_wrid_wql);
 447 srqalloc_fail6:
 448         if (tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL,
 449             TAVOR_SLEEPFLAG_FOR_CONTEXT()) != DDI_SUCCESS) {
 450                 TAVOR_WARNING(state, "failed to deregister SRQ memory");
 451         }
 452 srqalloc_fail5:
 453         tavor_queue_free(state, &srq->srq_wqinfo);
 454 srqalloc_fail4:
 455         if (srq_is_umap) {
 456                 tavor_umap_db_free(umapdb);
 457         }
 458 srqalloc_fail3:
 459         tavor_rsrc_free(state, &rsrc);
 460 srqalloc_fail2:
 461         tavor_rsrc_free(state, &srqc);
 462 srqalloc_fail1:
 463         tavor_pd_refcnt_dec(pd);
 464 srqalloc_fail:
 465         TNF_PROBE_1(tavor_srq_alloc_fail, TAVOR_TNF_ERROR, "",
 466             tnf_string, msg, errormsg);
 467         TAVOR_TNF_EXIT(tavor_srq_alloc);
 468         return (status);
 469 }
 470 
 471 
 472 /*
 473  * tavor_srq_free()
 474  *    Context: Can be called only from user or kernel context.
 475  */
 476 /* ARGSUSED */
 477 int
 478 tavor_srq_free(tavor_state_t *state, tavor_srqhdl_t *srqhdl, uint_t sleepflag)
 479 {
 480         tavor_rsrc_t            *srqc, *rsrc;
 481         tavor_umap_db_entry_t   *umapdb;
 482         uint64_t                value;
 483         tavor_srqhdl_t          srq;
 484         tavor_mrhdl_t           mr;
 485         tavor_pdhdl_t           pd;
 486         tavor_hw_srqc_t         srqc_entry;
 487         uint32_t                srqnum;
 488         uint32_t                size;
 489         uint_t                  maxprot;
 490         int                     status;
 491 
 492         TAVOR_TNF_ENTER(tavor_srq_free);
 493 
 494         /*
 495          * Pull all the necessary information from the Tavor Shared Receive
 496          * Queue handle.  This is necessary here because the resource for the
 497          * SRQ handle is going to be freed up as part of this operation.
 498          */
 499         srq     = *srqhdl;
 500         mutex_enter(&srq->srq_lock);
 501         srqc    = srq->srq_srqcrsrcp;
 502         rsrc    = srq->srq_rsrcp;
 503         pd      = srq->srq_pdhdl;
 504         mr      = srq->srq_mrhdl;
 505         srqnum  = srq->srq_srqnum;
 506 
 507         /*
 508          * If there are work queues still associated with the SRQ, then return
 509          * an error.  Otherwise, we will be holding the SRQ lock.
 510          */
 511         if (srq->srq_refcnt != 0) {
 512                 mutex_exit(&srq->srq_lock);
 513                 TNF_PROBE_1(tavor_srq_free_refcnt_fail, TAVOR_TNF_ERROR, "",
 514                     tnf_int, refcnt, srq->srq_refcnt);
 515                 TAVOR_TNF_EXIT(tavor_srq_free);
 516                 return (IBT_SRQ_IN_USE);
 517         }
 518 
 519         /*
 520          * If this was a user-mappable SRQ, then we need to remove its entry
 521          * from the "userland resources database".  If it is also currently
 522          * mmap()'d out to a user process, then we need to call
 523          * devmap_devmem_remap() to remap the SRQ memory to an invalid mapping.
 524          * We also need to invalidate the SRQ tracking information for the
 525          * user mapping.
 526          */
 527         if (srq->srq_is_umap) {
 528                 status = tavor_umap_db_find(state->ts_instance, srq->srq_srqnum,
 529                     MLNX_UMAP_SRQMEM_RSRC, &value, TAVOR_UMAP_DB_REMOVE,
 530                     &umapdb);
 531                 if (status != DDI_SUCCESS) {
 532                         mutex_exit(&srq->srq_lock);
 533                         TAVOR_WARNING(state, "failed to find in database");
 534                         TAVOR_TNF_EXIT(tavor_srq_free);
 535                         return (ibc_get_ci_failure(0));
 536                 }
 537                 tavor_umap_db_free(umapdb);
 538                 if (srq->srq_umap_dhp != NULL) {
 539                         maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
 540                         status = devmap_devmem_remap(srq->srq_umap_dhp,
 541                             state->ts_dip, 0, 0, srq->srq_wqinfo.qa_size,
 542                             maxprot, DEVMAP_MAPPING_INVALID, NULL);
 543                         if (status != DDI_SUCCESS) {
 544                                 mutex_exit(&srq->srq_lock);
 545                                 TAVOR_WARNING(state, "failed in SRQ memory "
 546                                     "devmap_devmem_remap()");
 547                                 TAVOR_TNF_EXIT(tavor_srq_free);
 548                                 return (ibc_get_ci_failure(0));
 549                         }
 550                         srq->srq_umap_dhp = (devmap_cookie_t)NULL;
 551                 }
 552         }
 553 
 554         /*
 555          * Put NULL into the Tavor SRQNum-to-SRQHdl list.  This will allow any
 556          * in-progress events to detect that the SRQ corresponding to this
 557          * number has been freed.
 558          */
 559         state->ts_srqhdl[srqc->tr_indx] = NULL;
 560 
 561         mutex_exit(&srq->srq_lock);
 562         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*srq));
 563         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*srq->srq_wridlist));
 564 
 565         /*
 566          * Reclaim SRQC entry from hardware (using the Tavor HW2SW_SRQ
 567          * firmware command).  If the ownership transfer fails for any reason,
 568          * then it is an indication that something (either in HW or SW) has
 569          * gone seriously wrong.
 570          */
 571         status = tavor_cmn_ownership_cmd_post(state, HW2SW_SRQ, &srqc_entry,
 572             sizeof (tavor_hw_srqc_t), srqnum, sleepflag);
 573         if (status != TAVOR_CMD_SUCCESS) {
 574                 TAVOR_WARNING(state, "failed to reclaim SRQC ownership");
 575                 cmn_err(CE_CONT, "Tavor: HW2SW_SRQ command failed: %08x\n",
 576                     status);
 577                 TNF_PROBE_1(tavor_srq_free_hw2sw_srq_cmd_fail,
 578                     TAVOR_TNF_ERROR, "", tnf_uint, status, status);
 579                 TAVOR_TNF_EXIT(tavor_srq_free);
 580                 return (IBT_FAILURE);
 581         }
 582 
 583         /*
 584          * Deregister the memory for the Shared Receive Queue.  If this fails
 585          * for any reason, then it is an indication that something (either
 586          * in HW or SW) has gone seriously wrong.  So we print a warning
 587          * message and return.
 588          */
 589         status = tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL,
 590             sleepflag);
 591         if (status != DDI_SUCCESS) {
 592                 TAVOR_WARNING(state, "failed to deregister SRQ memory");
 593                 TNF_PROBE_0(tavor_srq_free_dereg_mr_fail, TAVOR_TNF_ERROR, "");
 594                 TAVOR_TNF_EXIT(tavor_srq_free);
 595                 return (IBT_FAILURE);
 596         }
 597 
 598         /* Calculate the size and free the wridlist container */
 599         if (srq->srq_wridlist != NULL) {
 600                 size = (srq->srq_wridlist->wl_size *
 601                     sizeof (tavor_wrid_entry_t));
 602                 kmem_free(srq->srq_wridlist->wl_wre, size);
 603                 kmem_free(srq->srq_wridlist, sizeof (tavor_wrid_list_hdr_t));
 604 
 605                 /*
 606                  * Release reference to WQL; If this is the last reference,
 607                  * this call also has the side effect of freeing up the
 608                  * 'srq_wrid_wql' memory.
 609                  */
 610                 tavor_wql_refcnt_dec(srq->srq_wrid_wql);
 611         }
 612 
 613         /* Free the memory for the SRQ */
 614         tavor_queue_free(state, &srq->srq_wqinfo);
 615 
 616         /* Free the Tavor SRQ Handle */
 617         tavor_rsrc_free(state, &rsrc);
 618 
 619         /* Free the SRQC entry resource */
 620         tavor_rsrc_free(state, &srqc);
 621 
 622         /* Decrement the reference count on the protection domain (PD) */
 623         tavor_pd_refcnt_dec(pd);
 624 
 625         /* Set the srqhdl pointer to NULL and return success */
 626         *srqhdl = NULL;
 627 
 628         TAVOR_TNF_EXIT(tavor_srq_free);
 629         return (DDI_SUCCESS);
 630 }
 631 
 632 
 633 /*
 634  * tavor_srq_modify()
 635  *    Context: Can be called only from user or kernel context.
 636  */
 637 int
 638 tavor_srq_modify(tavor_state_t *state, tavor_srqhdl_t srq, uint_t size,
 639     uint_t *real_size, uint_t sleepflag)
 640 {
 641         tavor_qalloc_info_t     new_srqinfo, old_srqinfo;
 642         tavor_rsrc_t            *mtt, *mpt, *old_mtt;
 643         tavor_bind_info_t       bind;
 644         tavor_bind_info_t       old_bind;
 645         tavor_rsrc_pool_info_t  *rsrc_pool;
 646         tavor_mrhdl_t           mr;
 647         tavor_hw_mpt_t          mpt_entry;
 648         tavor_wrid_entry_t      *wre_new, *wre_old;
 649         uint64_t                mtt_ddrbaseaddr, mtt_addr;
 650         uint64_t                srq_desc_off;
 651         uint32_t                *buf, srq_old_bufsz;
 652         uint32_t                wqesz;
 653         uint_t                  max_srq_size;
 654         uint_t                  dma_xfer_mode, mtt_pgsize_bits;
 655         uint_t                  srq_sync, log_srq_size, maxprot;
 656         uint_t                  wq_location;
 657         int                     status;
 658         char                    *errormsg;
 659 
 660         TAVOR_TNF_ENTER(tavor_srq_modify);
 661 
 662         /*
 663          * Check the "inddr" flag.  This flag tells the driver whether or not
 664          * the SRQ's work queues should be come from normal system memory or
 665          * whether they should be allocated from DDR memory.
 666          */
 667         wq_location = state->ts_cfg_profile->cp_srq_wq_inddr;
 668 
 669         /*
 670          * If size requested is larger than device capability, return
 671          * Insufficient Resources
 672          */
 673         max_srq_size = (1 << state->ts_cfg_profile->cp_log_max_srq_sz);
 674         if (size > max_srq_size) {
 675                 TNF_PROBE_0(tavor_srq_modify_size_larger_than_maxsize,
 676                     TAVOR_TNF_ERROR, "");
 677                 TAVOR_TNF_EXIT(tavor_srq_modify);
 678                 return (IBT_HCA_WR_EXCEEDED);
 679         }
 680 
 681         /*
 682          * Calculate the appropriate size for the SRQ.
 683          * Note:  All Tavor SRQs must be a power-of-2 in size.  Also
 684          * they may not be any smaller than TAVOR_SRQ_MIN_SIZE.  This step
 685          * is to round the requested size up to the next highest power-of-2
 686          */
 687         size = max(size, TAVOR_SRQ_MIN_SIZE);
 688         log_srq_size = highbit(size);
 689         if ((size & (size - 1)) == 0) {
 690                 log_srq_size = log_srq_size - 1;
 691         }
 692 
 693         /*
 694          * Next we verify that the rounded-up size is valid (i.e. consistent
 695          * with the device limits and/or software-configured limits).
 696          */
 697         if (log_srq_size > state->ts_cfg_profile->cp_log_max_srq_sz) {
 698                 /* Set "status" and "errormsg" and goto failure */
 699                 TAVOR_TNF_FAIL(IBT_HCA_WR_EXCEEDED, "max SRQ size");
 700                 goto srqmodify_fail;
 701         }
 702 
 703         /*
 704          * Allocate the memory for newly resized Shared Receive Queue.
 705          *
 706          * Note: If SRQ is not user-mappable, then it may come from either
 707          * kernel system memory or from HCA-attached local DDR memory.
 708          *
 709          * Note2: We align this queue on a pagesize boundary.  This is required
 710          * to make sure that all the resulting IB addresses will start at 0,
 711          * for a zero-based queue.  By making sure we are aligned on at least a
 712          * page, any offset we use into our queue will be the same as it was
 713          * when we allocated it at tavor_srq_alloc() time.
 714          */
 715         wqesz = (1 << srq->srq_wq_log_wqesz);
 716         new_srqinfo.qa_size = (1 << log_srq_size) * wqesz;
 717         new_srqinfo.qa_alloc_align = PAGESIZE;
 718         new_srqinfo.qa_bind_align  = PAGESIZE;
 719         if (srq->srq_is_umap) {
 720                 new_srqinfo.qa_location = TAVOR_QUEUE_LOCATION_USERLAND;
 721         } else {
 722                 new_srqinfo.qa_location = wq_location;
 723         }
 724         status = tavor_queue_alloc(state, &new_srqinfo, sleepflag);
 725         if (status != DDI_SUCCESS) {
 726                 /* Set "status" and "errormsg" and goto failure */
 727                 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed srq");
 728                 goto srqmodify_fail;
 729         }
 730         buf = (uint32_t *)new_srqinfo.qa_buf_aligned;
 731         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf))
 732 
 733         /*
 734          * Allocate the memory for the new WRE list.  This will be used later
 735          * when we resize the wridlist based on the new SRQ size.
 736          */
 737         wre_new = (tavor_wrid_entry_t *)kmem_zalloc((1 << log_srq_size) *
 738             sizeof (tavor_wrid_entry_t), sleepflag);
 739         if (wre_new == NULL) {
 740                 /* Set "status" and "errormsg" and goto failure */
 741                 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE,
 742                     "failed wre_new alloc");
 743                 goto srqmodify_fail;
 744         }
 745 
 746         /*
 747          * Fill in the "bind" struct.  This struct provides the majority
 748          * of the information that will be used to distinguish between an
 749          * "addr" binding (as is the case here) and a "buf" binding (see
 750          * below).  The "bind" struct is later passed to tavor_mr_mem_bind()
 751          * which does most of the "heavy lifting" for the Tavor memory
 752          * registration routines.
 753          */
 754         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(bind))
 755         bzero(&bind, sizeof (tavor_bind_info_t));
 756         bind.bi_type  = TAVOR_BINDHDL_VADDR;
 757         bind.bi_addr  = (uint64_t)(uintptr_t)buf;
 758         bind.bi_len   = new_srqinfo.qa_size;
 759         bind.bi_as    = NULL;
 760         bind.bi_flags = sleepflag == TAVOR_SLEEP ? IBT_MR_SLEEP :
 761             IBT_MR_NOSLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
 762         if (srq->srq_is_umap) {
 763                 bind.bi_bypass = state->ts_cfg_profile->cp_iommu_bypass;
 764         } else {
 765                 if (wq_location == TAVOR_QUEUE_LOCATION_NORMAL) {
 766                         bind.bi_bypass =
 767                             state->ts_cfg_profile->cp_iommu_bypass;
 768                         dma_xfer_mode =
 769                             state->ts_cfg_profile->cp_streaming_consistent;
 770                         if (dma_xfer_mode == DDI_DMA_STREAMING) {
 771                                 bind.bi_flags |= IBT_MR_NONCOHERENT;
 772                         }
 773                 } else {
 774                         bind.bi_bypass = TAVOR_BINDMEM_BYPASS;
 775                 }
 776         }
 777         status = tavor_mr_mtt_bind(state, &bind, new_srqinfo.qa_dmahdl, &mtt,
 778             &mtt_pgsize_bits);
 779         if (status != DDI_SUCCESS) {
 780                 /* Set "status" and "errormsg" and goto failure */
 781                 TAVOR_TNF_FAIL(status, "failed mtt bind");
 782                 kmem_free(wre_new, srq->srq_wq_bufsz *
 783                     sizeof (tavor_wrid_entry_t));
 784                 tavor_queue_free(state, &new_srqinfo);
 785                 goto srqmodify_fail;
 786         }
 787 
 788         /*
 789          * Calculate the offset between the kernel virtual address space
 790          * and the IB virtual address space.  This will be used when
 791          * posting work requests to properly initialize each WQE.
 792          *
 793          * Note: bind addr is zero-based (from alloc) so we calculate the
 794          * correct new offset here.
 795          */
 796         bind.bi_addr = bind.bi_addr & ((1 << mtt_pgsize_bits) - 1);
 797         srq_desc_off = (uint64_t)(uintptr_t)new_srqinfo.qa_buf_aligned -
 798             (uint64_t)bind.bi_addr;
 799 
 800         /*
 801          * Get the base address for the MTT table.  This will be necessary
 802          * below when we are modifying the MPT entry.
 803          */
 804         rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT];
 805         mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset;
 806 
 807         /*
 808          * Fill in the MPT entry.  This is the final step before passing
 809          * ownership of the MPT entry to the Tavor hardware.  We use all of
 810          * the information collected/calculated above to fill in the
 811          * requisite portions of the MPT.
 812          */
 813         bzero(&mpt_entry, sizeof (tavor_hw_mpt_t));
 814         mpt_entry.reg_win_len   = bind.bi_len;
 815         mtt_addr = mtt_ddrbaseaddr + (mtt->tr_indx << TAVOR_MTT_SIZE_SHIFT);
 816         mpt_entry.mttseg_addr_h = mtt_addr >> 32;
 817         mpt_entry.mttseg_addr_l = mtt_addr >> 6;
 818 
 819         /*
 820          * Now we grab the SRQ lock.  Since we will be updating the actual
 821          * SRQ location and the producer/consumer indexes, we should hold
 822          * the lock.
 823          *
 824          * We do a TAVOR_NOSLEEP here (and below), though, because we are
 825          * holding the "srq_lock" and if we got raised to interrupt level
 826          * by priority inversion, we would not want to block in this routine
 827          * waiting for success.
 828          */
 829         mutex_enter(&srq->srq_lock);
 830 
 831         /*
 832          * Copy old entries to new buffer
 833          */
 834         srq_old_bufsz = srq->srq_wq_bufsz;
 835         bcopy(srq->srq_wq_buf, buf, srq_old_bufsz * wqesz);
 836 
 837         /* Determine if later ddi_dma_sync will be necessary */
 838         srq_sync = TAVOR_SRQ_IS_SYNC_REQ(state, srq->srq_wqinfo);
 839 
 840         /* Sync entire "new" SRQ for use by hardware (if necessary) */
 841         if (srq_sync) {
 842                 (void) ddi_dma_sync(bind.bi_dmahdl, 0,
 843                     new_srqinfo.qa_size, DDI_DMA_SYNC_FORDEV);
 844         }
 845 
 846         /*
 847          * Setup MPT information for use in the MODIFY_MPT command
 848          */
 849         mr = srq->srq_mrhdl;
 850         mutex_enter(&mr->mr_lock);
 851         mpt = srq->srq_mrhdl->mr_mptrsrcp;
 852 
 853         /*
 854          * MODIFY_MPT
 855          *
 856          * If this fails for any reason, then it is an indication that
 857          * something (either in HW or SW) has gone seriously wrong.  So we
 858          * print a warning message and return.
 859          */
 860         status = tavor_modify_mpt_cmd_post(state, &mpt_entry, mpt->tr_indx,
 861             TAVOR_CMD_MODIFY_MPT_RESIZESRQ, sleepflag);
 862         if (status != TAVOR_CMD_SUCCESS) {
 863                 cmn_err(CE_CONT, "Tavor: MODIFY_MPT command failed: %08x\n",
 864                     status);
 865                 TNF_PROBE_1(tavor_mr_common_reg_sw2hw_mpt_cmd_fail,
 866                     TAVOR_TNF_ERROR, "", tnf_uint, status, status);
 867                 TAVOR_TNF_FAIL(status, "MODIFY_MPT command failed");
 868                 (void) tavor_mr_mtt_unbind(state, &srq->srq_mrhdl->mr_bindinfo,
 869                     srq->srq_mrhdl->mr_mttrsrcp);
 870                 kmem_free(wre_new, srq->srq_wq_bufsz *
 871                     sizeof (tavor_wrid_entry_t));
 872                 tavor_queue_free(state, &new_srqinfo);
 873                 mutex_exit(&mr->mr_lock);
 874                 mutex_exit(&srq->srq_lock);
 875                 return (ibc_get_ci_failure(0));
 876         }
 877 
 878         /*
 879          * Update the Tavor Shared Receive Queue handle with all the new
 880          * information.  At the same time, save away all the necessary
 881          * information for freeing up the old resources
 882          */
 883         old_srqinfo        = srq->srq_wqinfo;
 884         old_mtt            = srq->srq_mrhdl->mr_mttrsrcp;
 885         bcopy(&srq->srq_mrhdl->mr_bindinfo, &old_bind,
 886             sizeof (tavor_bind_info_t));
 887 
 888         /* Now set the new info */
 889         srq->srq_wqinfo         = new_srqinfo;
 890         srq->srq_wq_buf         = buf;
 891         srq->srq_wq_bufsz  = (1 << log_srq_size);
 892         bcopy(&bind, &srq->srq_mrhdl->mr_bindinfo, sizeof (tavor_bind_info_t));
 893         srq->srq_mrhdl->mr_mttrsrcp = mtt;
 894         srq->srq_desc_off  = srq_desc_off;
 895         srq->srq_real_sizes.srq_wr_sz = (1 << log_srq_size);
 896 
 897         /* Update MR mtt pagesize */
 898         mr->mr_logmttpgsz = mtt_pgsize_bits;
 899         mutex_exit(&mr->mr_lock);
 900 
 901 #ifdef __lock_lint
 902         mutex_enter(&srq->srq_wrid_wql->wql_lock);
 903 #else
 904         if (srq->srq_wrid_wql != NULL) {
 905                 mutex_enter(&srq->srq_wrid_wql->wql_lock);
 906         }
 907 #endif
 908 
 909         /*
 910          * Initialize new wridlist, if needed.
 911          *
 912          * If a wridlist already is setup on an SRQ (the QP associated with an
 913          * SRQ has moved "from_reset") then we must update this wridlist based
 914          * on the new SRQ size.  We allocate the new size of Work Request ID
 915          * Entries, copy over the old entries to the new list, and
 916          * re-initialize the srq wridlist in non-umap case
 917          */
 918         wre_old = NULL;
 919         if (srq->srq_wridlist != NULL) {
 920                 wre_old = srq->srq_wridlist->wl_wre;
 921 
 922                 bcopy(wre_old, wre_new, srq_old_bufsz *
 923                     sizeof (tavor_wrid_entry_t));
 924 
 925                 /* Setup new sizes in wre */
 926                 srq->srq_wridlist->wl_wre = wre_new;
 927                 srq->srq_wridlist->wl_size = srq->srq_wq_bufsz;
 928 
 929                 if (!srq->srq_is_umap) {
 930                         tavor_wrid_list_srq_init(srq->srq_wridlist, srq,
 931                             srq_old_bufsz);
 932                 }
 933         }
 934 
 935 #ifdef __lock_lint
 936         mutex_exit(&srq->srq_wrid_wql->wql_lock);
 937 #else
 938         if (srq->srq_wrid_wql != NULL) {
 939                 mutex_exit(&srq->srq_wrid_wql->wql_lock);
 940         }
 941 #endif
 942 
 943         /*
 944          * If "old" SRQ was a user-mappable SRQ that is currently mmap()'d out
 945          * to a user process, then we need to call devmap_devmem_remap() to
 946          * invalidate the mapping to the SRQ memory.  We also need to
 947          * invalidate the SRQ tracking information for the user mapping.
 948          *
 949          * Note: On failure, the remap really shouldn't ever happen.  So, if it
 950          * does, it is an indication that something has gone seriously wrong.
 951          * So we print a warning message and return error (knowing, of course,
 952          * that the "old" SRQ memory will be leaked)
 953          */
 954         if ((srq->srq_is_umap) && (srq->srq_umap_dhp != NULL)) {
 955                 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
 956                 status = devmap_devmem_remap(srq->srq_umap_dhp,
 957                     state->ts_dip, 0, 0, srq->srq_wqinfo.qa_size, maxprot,
 958                     DEVMAP_MAPPING_INVALID, NULL);
 959                 if (status != DDI_SUCCESS) {
 960                         mutex_exit(&srq->srq_lock);
 961                         TAVOR_WARNING(state, "failed in SRQ memory "
 962                             "devmap_devmem_remap()");
 963                         /* We can, however, free the memory for old wre */
 964                         if (wre_old != NULL) {
 965                                 kmem_free(wre_old, srq_old_bufsz *
 966                                     sizeof (tavor_wrid_entry_t));
 967                         }
 968                         TAVOR_TNF_EXIT(tavor_srq_modify);
 969                         return (ibc_get_ci_failure(0));
 970                 }
 971                 srq->srq_umap_dhp = (devmap_cookie_t)NULL;
 972         }
 973 
 974         /*
 975          * Drop the SRQ lock now.  The only thing left to do is to free up
 976          * the old resources.
 977          */
 978         mutex_exit(&srq->srq_lock);
 979 
 980         /*
 981          * Unbind the MTT entries.
 982          */
 983         status = tavor_mr_mtt_unbind(state, &old_bind, old_mtt);
 984         if (status != DDI_SUCCESS) {
 985                 TAVOR_WARNING(state, "failed to unbind old SRQ memory");
 986                 /* Set "status" and "errormsg" and goto failure */
 987                 TAVOR_TNF_FAIL(ibc_get_ci_failure(0),
 988                     "failed to unbind (old)");
 989                 goto srqmodify_fail;
 990         }
 991 
 992         /* Free the memory for old wre */
 993         if (wre_old != NULL) {
 994                 kmem_free(wre_old, srq_old_bufsz *
 995                     sizeof (tavor_wrid_entry_t));
 996         }
 997 
 998         /* Free the memory for the old SRQ */
 999         tavor_queue_free(state, &old_srqinfo);
1000 
1001         /*
1002          * Fill in the return arguments (if necessary).  This includes the
1003          * real new completion queue size.
1004          */
1005         if (real_size != NULL) {
1006                 *real_size = (1 << log_srq_size);
1007         }
1008 
1009         TAVOR_TNF_EXIT(tavor_srq_modify);
1010         return (DDI_SUCCESS);
1011 
1012 srqmodify_fail:
1013         TNF_PROBE_1(tavor_srq_modify_fail, TAVOR_TNF_ERROR, "",
1014             tnf_string, msg, errormsg);
1015         TAVOR_TNF_EXIT(tavor_srq_modify);
1016         return (status);
1017 }
1018 
1019 
1020 /*
1021  * tavor_srq_refcnt_inc()
1022  *    Context: Can be called from interrupt or base context.
1023  */
1024 void
1025 tavor_srq_refcnt_inc(tavor_srqhdl_t srq)
1026 {
1027         mutex_enter(&srq->srq_lock);
1028         TNF_PROBE_1_DEBUG(tavor_srq_refcnt_inc, TAVOR_TNF_TRACE, "",
1029             tnf_uint, refcnt, srq->srq_refcnt);
1030         srq->srq_refcnt++;
1031         mutex_exit(&srq->srq_lock);
1032 }
1033 
1034 
1035 /*
1036  * tavor_srq_refcnt_dec()
1037  *    Context: Can be called from interrupt or base context.
1038  */
1039 void
1040 tavor_srq_refcnt_dec(tavor_srqhdl_t srq)
1041 {
1042         mutex_enter(&srq->srq_lock);
1043         srq->srq_refcnt--;
1044         TNF_PROBE_1_DEBUG(tavor_srq_refcnt_dec, TAVOR_TNF_TRACE, "",
1045             tnf_uint, refcnt, srq->srq_refcnt);
1046         mutex_exit(&srq->srq_lock);
1047 }
1048 
1049 
1050 /*
1051  * tavor_srqhdl_from_srqnum()
1052  *    Context: Can be called from interrupt or base context.
1053  *
1054  *    This routine is important because changing the unconstrained
1055  *    portion of the SRQ number is critical to the detection of a
1056  *    potential race condition in the SRQ handler code (i.e. the case
1057  *    where a SRQ is freed and alloc'd again before an event for the
1058  *    "old" SRQ can be handled).
1059  *
1060  *    While this is not a perfect solution (not sure that one exists)
1061  *    it does help to mitigate the chance that this race condition will
1062  *    cause us to deliver a "stale" event to the new SRQ owner.  Note:
1063  *    this solution does not scale well because the number of constrained
1064  *    bits increases (and, hence, the number of unconstrained bits
1065  *    decreases) as the number of supported SRQ grows.  For small and
1066  *    intermediate values, it should hopefully provide sufficient
1067  *    protection.
1068  */
1069 tavor_srqhdl_t
1070 tavor_srqhdl_from_srqnum(tavor_state_t *state, uint_t srqnum)
1071 {
1072         uint_t  srqindx, srqmask;
1073 
1074         /* Calculate the SRQ table index from the srqnum */
1075         srqmask = (1 << state->ts_cfg_profile->cp_log_num_srq) - 1;
1076         srqindx = srqnum & srqmask;
1077         return (state->ts_srqhdl[srqindx]);
1078 }
1079 
1080 
1081 /*
1082  * tavor_srq_sgl_to_logwqesz()
1083  *    Context: Can be called from interrupt or base context.
1084  */
1085 static void
1086 tavor_srq_sgl_to_logwqesz(tavor_state_t *state, uint_t num_sgl,
1087     tavor_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl)
1088 {
1089         uint_t  max_size, log2, actual_sgl;
1090 
1091         TAVOR_TNF_ENTER(tavor_srq_sgl_to_logwqesz);
1092 
1093         switch (wq_type) {
1094         case TAVOR_QP_WQ_TYPE_RECVQ:
1095                 /*
1096                  * Use requested maximum SGL to calculate max descriptor size
1097                  * (while guaranteeing that the descriptor size is a
1098                  * power-of-2 cachelines).
1099                  */
1100                 max_size = (TAVOR_QP_WQE_MLX_RCV_HDRS + (num_sgl << 4));
1101                 log2 = highbit(max_size);
1102                 if ((max_size & (max_size - 1)) == 0) {
1103                         log2 = log2 - 1;
1104                 }
1105 
1106                 /* Make sure descriptor is at least the minimum size */
1107                 log2 = max(log2, TAVOR_QP_WQE_LOG_MINIMUM);
1108 
1109                 /* Calculate actual number of SGL (given WQE size) */
1110                 actual_sgl = ((1 << log2) - TAVOR_QP_WQE_MLX_RCV_HDRS) >> 4;
1111                 break;
1112 
1113         default:
1114                 TAVOR_WARNING(state, "unexpected work queue type");
1115                 TNF_PROBE_0(tavor_srq_sgl_to_logwqesz_inv_wqtype_fail,
1116                     TAVOR_TNF_ERROR, "");
1117                 break;
1118         }
1119 
1120         /* Fill in the return values */
1121         *logwqesz = log2;
1122         *max_sgl  = min(state->ts_cfg_profile->cp_srq_max_sgl, actual_sgl);
1123 
1124         TAVOR_TNF_EXIT(tavor_qp_sgl_to_logwqesz);
1125 }
--- EOF ---