1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * hermon_srq.c
  28  *    Hermon Shared Receive Queue Processing Routines
  29  *
  30  *    Implements all the routines necessary for allocating, freeing, querying,
  31  *    modifying and posting shared receive queues.
  32  */
  33 
  34 #include <sys/types.h>
  35 #include <sys/conf.h>
  36 #include <sys/ddi.h>
  37 #include <sys/sunddi.h>
  38 #include <sys/modctl.h>
  39 #include <sys/bitmap.h>
  40 
  41 #include <sys/ib/adapters/hermon/hermon.h>
  42 
  43 static void hermon_srq_sgl_to_logwqesz(hermon_state_t *state, uint_t num_sgl,
  44     hermon_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl);
  45 
  46 /*
  47  * hermon_srq_alloc()
  48  *    Context: Can be called only from user or kernel context.
  49  */
  50 int
  51 hermon_srq_alloc(hermon_state_t *state, hermon_srq_info_t *srqinfo,
  52     uint_t sleepflag)
  53 {
  54         ibt_srq_hdl_t           ibt_srqhdl;
  55         hermon_pdhdl_t          pd;
  56         ibt_srq_sizes_t         *sizes;
  57         ibt_srq_sizes_t         *real_sizes;
  58         hermon_srqhdl_t         *srqhdl;
  59         ibt_srq_flags_t         flags;
  60         hermon_rsrc_t           *srqc, *rsrc;
  61         hermon_hw_srqc_t        srqc_entry;
  62         uint32_t                *buf;
  63         hermon_srqhdl_t         srq;
  64         hermon_umap_db_entry_t  *umapdb;
  65         ibt_mr_attr_t           mr_attr;
  66         hermon_mr_options_t     mr_op;
  67         hermon_mrhdl_t          mr;
  68         uint64_t                value, srq_desc_off;
  69         uint32_t                log_srq_size;
  70         uint32_t                uarpg;
  71         uint_t                  srq_is_umap;
  72         int                     flag, status;
  73         uint_t                  max_sgl;
  74         uint_t                  wqesz;
  75         uint_t                  srq_wr_sz;
  76         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sizes))
  77 
  78         /*
  79          * options-->wq_location used to be for location, now explicitly
  80          * LOCATION_NORMAL
  81          */
  82 
  83         /*
  84          * Extract the necessary info from the hermon_srq_info_t structure
  85          */
  86         real_sizes = srqinfo->srqi_real_sizes;
  87         sizes      = srqinfo->srqi_sizes;
  88         pd         = srqinfo->srqi_pd;
  89         ibt_srqhdl = srqinfo->srqi_ibt_srqhdl;
  90         flags      = srqinfo->srqi_flags;
  91         srqhdl     = srqinfo->srqi_srqhdl;
  92 
  93         /*
  94          * Determine whether SRQ is being allocated for userland access or
  95          * whether it is being allocated for kernel access.  If the SRQ is
  96          * being allocated for userland access, then lookup the UAR doorbell
  97          * page number for the current process.  Note:  If this is not found
  98          * (e.g. if the process has not previously open()'d the Hermon driver),
  99          * then an error is returned.
 100          */
 101         srq_is_umap = (flags & IBT_SRQ_USER_MAP) ? 1 : 0;
 102         if (srq_is_umap) {
 103                 status = hermon_umap_db_find(state->hs_instance, ddi_get_pid(),
 104                     MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
 105                 if (status != DDI_SUCCESS) {
 106                         status = IBT_INVALID_PARAM;
 107                         goto srqalloc_fail3;
 108                 }
 109                 uarpg = ((hermon_rsrc_t *)(uintptr_t)value)->hr_indx;
 110         } else {
 111                 uarpg = state->hs_kernel_uar_index;
 112         }
 113 
 114         /* Increase PD refcnt */
 115         hermon_pd_refcnt_inc(pd);
 116 
 117         /* Allocate an SRQ context entry */
 118         status = hermon_rsrc_alloc(state, HERMON_SRQC, 1, sleepflag, &srqc);
 119         if (status != DDI_SUCCESS) {
 120                 status = IBT_INSUFF_RESOURCE;
 121                 goto srqalloc_fail1;
 122         }
 123 
 124         /* Allocate the SRQ Handle entry */
 125         status = hermon_rsrc_alloc(state, HERMON_SRQHDL, 1, sleepflag, &rsrc);
 126         if (status != DDI_SUCCESS) {
 127                 status = IBT_INSUFF_RESOURCE;
 128                 goto srqalloc_fail2;
 129         }
 130 
 131         srq = (hermon_srqhdl_t)rsrc->hr_addr;
 132         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*srq))
 133 
 134         bzero(srq, sizeof (struct hermon_sw_srq_s));
 135         /* Calculate the SRQ number */
 136 
 137         /* just use the index, implicit in Hermon */
 138         srq->srq_srqnum = srqc->hr_indx;
 139 
 140         /*
 141          * If this will be a user-mappable SRQ, then allocate an entry for
 142          * the "userland resources database".  This will later be added to
 143          * the database (after all further SRQ operations are successful).
 144          * If we fail here, we must undo the reference counts and the
 145          * previous resource allocation.
 146          */
 147         if (srq_is_umap) {
 148                 umapdb = hermon_umap_db_alloc(state->hs_instance,
 149                     srq->srq_srqnum, MLNX_UMAP_SRQMEM_RSRC,
 150                     (uint64_t)(uintptr_t)rsrc);
 151                 if (umapdb == NULL) {
 152                         status = IBT_INSUFF_RESOURCE;
 153                         goto srqalloc_fail3;
 154                 }
 155         }
 156 
 157         /*
 158          * Allocate the doorbell record.  Hermon just needs one for the
 159          * SRQ, and use uarpg (above) as the uar index
 160          */
 161 
 162         status = hermon_dbr_alloc(state, uarpg, &srq->srq_wq_dbr_acchdl,
 163             &srq->srq_wq_vdbr, &srq->srq_wq_pdbr, &srq->srq_rdbr_mapoffset);
 164         if (status != DDI_SUCCESS) {
 165                 status = IBT_INSUFF_RESOURCE;
 166                 goto srqalloc_fail4;
 167         }
 168 
 169         /*
 170          * Calculate the appropriate size for the SRQ.
 171          * Note:  All Hermon SRQs must be a power-of-2 in size.  Also
 172          * they may not be any smaller than HERMON_SRQ_MIN_SIZE.  This step
 173          * is to round the requested size up to the next highest power-of-2
 174          */
 175         srq_wr_sz = max(sizes->srq_wr_sz + 1, HERMON_SRQ_MIN_SIZE);
 176         log_srq_size = highbit(srq_wr_sz);
 177         if ((srq_wr_sz & (srq_wr_sz - 1)) == 0) {
 178                 log_srq_size = log_srq_size - 1;
 179         }
 180 
 181         /*
 182          * Next we verify that the rounded-up size is valid (i.e. consistent
 183          * with the device limits and/or software-configured limits).  If not,
 184          * then obviously we have a lot of cleanup to do before returning.
 185          */
 186         if (log_srq_size > state->hs_cfg_profile->cp_log_max_srq_sz) {
 187                 status = IBT_HCA_WR_EXCEEDED;
 188                 goto srqalloc_fail4a;
 189         }
 190 
 191         /*
 192          * Next we verify that the requested number of SGL is valid (i.e.
 193          * consistent with the device limits and/or software-configured
 194          * limits).  If not, then obviously the same cleanup needs to be done.
 195          */
 196         max_sgl = state->hs_ibtfinfo.hca_attr->hca_max_srq_sgl;
 197         if (sizes->srq_sgl_sz > max_sgl) {
 198                 status = IBT_HCA_SGL_EXCEEDED;
 199                 goto srqalloc_fail4a;
 200         }
 201 
 202         /*
 203          * Determine the SRQ's WQE sizes.  This depends on the requested
 204          * number of SGLs.  Note: This also has the side-effect of
 205          * calculating the real number of SGLs (for the calculated WQE size)
 206          */
 207         hermon_srq_sgl_to_logwqesz(state, sizes->srq_sgl_sz,
 208             HERMON_QP_WQ_TYPE_RECVQ, &srq->srq_wq_log_wqesz,
 209             &srq->srq_wq_sgl);
 210 
 211         /*
 212          * Allocate the memory for SRQ work queues.  Note:  The location from
 213          * which we will allocate these work queues is always
 214          * QUEUE_LOCATION_NORMAL.  Since Hermon work queues are not
 215          * allowed to cross a 32-bit (4GB) boundary, the alignment of the work
 216          * queue memory is very important.  We used to allocate work queues
 217          * (the combined receive and send queues) so that they would be aligned
 218          * on their combined size.  That alignment guaranteed that they would
 219          * never cross the 4GB boundary (Hermon work queues are on the order of
 220          * MBs at maximum).  Now we are able to relax this alignment constraint
 221          * by ensuring that the IB address assigned to the queue memory (as a
 222          * result of the hermon_mr_register() call) is offset from zero.
 223          * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
 224          * guarantee the alignment, but when attempting to use IOMMU bypass
 225          * mode we found that we were not allowed to specify any alignment that
 226          * was more restrictive than the system page size.  So we avoided this
 227          * constraint by passing two alignment values, one for the memory
 228          * allocation itself and the other for the DMA handle (for later bind).
 229          * This used to cause more memory than necessary to be allocated (in
 230          * order to guarantee the more restrictive alignment contraint).  But
 231          * be guaranteeing the zero-based IB virtual address for the queue, we
 232          * are able to conserve this memory.
 233          *
 234          * Note: If SRQ is not user-mappable, then it may come from either
 235          * kernel system memory or from HCA-attached local DDR memory.
 236          *
 237          * Note2: We align this queue on a pagesize boundary.  This is required
 238          * to make sure that all the resulting IB addresses will start at 0, for
 239          * a zero-based queue.  By making sure we are aligned on at least a
 240          * page, any offset we use into our queue will be the same as when we
 241          * perform hermon_srq_modify() operations later.
 242          */
 243         wqesz = (1 << srq->srq_wq_log_wqesz);
 244         srq->srq_wqinfo.qa_size = (1 << log_srq_size) * wqesz;
 245         srq->srq_wqinfo.qa_alloc_align = PAGESIZE;
 246         srq->srq_wqinfo.qa_bind_align = PAGESIZE;
 247         if (srq_is_umap) {
 248                 srq->srq_wqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND;
 249         } else {
 250                 srq->srq_wqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
 251         }
 252         status = hermon_queue_alloc(state, &srq->srq_wqinfo, sleepflag);
 253         if (status != DDI_SUCCESS) {
 254                 status = IBT_INSUFF_RESOURCE;
 255                 goto srqalloc_fail4a;
 256         }
 257         buf = (uint32_t *)srq->srq_wqinfo.qa_buf_aligned;
 258         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf))
 259 
 260         /*
 261          * Register the memory for the SRQ work queues.  The memory for the SRQ
 262          * must be registered in the Hermon cMPT tables.  This gives us the LKey
 263          * to specify in the SRQ context later.  Note: If the work queue is to
 264          * be allocated from DDR memory, then only a "bypass" mapping is
 265          * appropriate.  And if the SRQ memory is user-mappable, then we force
 266          * DDI_DMA_CONSISTENT mapping.  Also, in order to meet the alignment
 267          * restriction, we pass the "mro_bind_override_addr" flag in the call
 268          * to hermon_mr_register().  This guarantees that the resulting IB vaddr
 269          * will be zero-based (modulo the offset into the first page).  If we
 270          * fail here, we still have the bunch of resource and reference count
 271          * cleanup to do.
 272          */
 273         flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP :
 274             IBT_MR_NOSLEEP;
 275         mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf;
 276         mr_attr.mr_len   = srq->srq_wqinfo.qa_size;
 277         mr_attr.mr_as    = NULL;
 278         mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE;
 279         mr_op.mro_bind_type   = state->hs_cfg_profile->cp_iommu_bypass;
 280         mr_op.mro_bind_dmahdl = srq->srq_wqinfo.qa_dmahdl;
 281         mr_op.mro_bind_override_addr = 1;
 282         status = hermon_mr_register(state, pd, &mr_attr, &mr,
 283             &mr_op, HERMON_SRQ_CMPT);
 284         if (status != DDI_SUCCESS) {
 285                 status = IBT_INSUFF_RESOURCE;
 286                 goto srqalloc_fail5;
 287         }
 288         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
 289 
 290         /*
 291          * Calculate the offset between the kernel virtual address space
 292          * and the IB virtual address space.  This will be used when
 293          * posting work requests to properly initialize each WQE.
 294          */
 295         srq_desc_off = (uint64_t)(uintptr_t)srq->srq_wqinfo.qa_buf_aligned -
 296             (uint64_t)mr->mr_bindinfo.bi_addr;
 297 
 298         srq->srq_wq_wqhdr = hermon_wrid_wqhdr_create(1 << log_srq_size);
 299 
 300         /*
 301          * Fill in all the return arguments (if necessary).  This includes
 302          * real queue size and real SGLs.
 303          */
 304         if (real_sizes != NULL) {
 305                 real_sizes->srq_wr_sz = (1 << log_srq_size) - 1;
 306                 real_sizes->srq_sgl_sz = srq->srq_wq_sgl;
 307         }
 308 
 309         /*
 310          * Fill in the SRQC entry.  This is the final step before passing
 311          * ownership of the SRQC entry to the Hermon hardware.  We use all of
 312          * the information collected/calculated above to fill in the
 313          * requisite portions of the SRQC.  Note: If this SRQ is going to be
 314          * used for userland access, then we need to set the UAR page number
 315          * appropriately (otherwise it's a "don't care")
 316          */
 317         bzero(&srqc_entry, sizeof (hermon_hw_srqc_t));
 318         srqc_entry.state           = HERMON_SRQ_STATE_HW_OWNER;
 319         srqc_entry.log_srq_size    = log_srq_size;
 320         srqc_entry.srqn            = srq->srq_srqnum;
 321         srqc_entry.log_rq_stride   = srq->srq_wq_log_wqesz - 4;
 322                                         /* 16-byte chunks */
 323 
 324         srqc_entry.page_offs       = srq->srq_wqinfo.qa_pgoffs >> 6;
 325         srqc_entry.log2_pgsz       = mr->mr_log2_pgsz;
 326         srqc_entry.mtt_base_addrh  = (uint32_t)((mr->mr_mttaddr >> 32) & 0xFF);
 327         srqc_entry.mtt_base_addrl  = mr->mr_mttaddr >> 3;
 328         srqc_entry.pd              = pd->pd_pdnum;
 329         srqc_entry.dbr_addrh = (uint32_t)((uint64_t)srq->srq_wq_pdbr >> 32);
 330         srqc_entry.dbr_addrl = (uint32_t)((uint64_t)srq->srq_wq_pdbr >> 2);
 331 
 332         /*
 333          * all others - specifically, xrcd, cqn_xrc, lwm, wqe_cnt, and wqe_cntr
 334          * are zero thanks to the bzero of the structure
 335          */
 336 
 337         /*
 338          * Write the SRQC entry to hardware.  Lastly, we pass ownership of
 339          * the entry to the hardware (using the Hermon SW2HW_SRQ firmware
 340          * command).  Note: In general, this operation shouldn't fail.  But
 341          * if it does, we have to undo everything we've done above before
 342          * returning error.
 343          */
 344         status = hermon_cmn_ownership_cmd_post(state, SW2HW_SRQ, &srqc_entry,
 345             sizeof (hermon_hw_srqc_t), srq->srq_srqnum,
 346             sleepflag);
 347         if (status != HERMON_CMD_SUCCESS) {
 348                 cmn_err(CE_CONT, "Hermon: SW2HW_SRQ command failed: %08x\n",
 349                     status);
 350                 if (status == HERMON_CMD_INVALID_STATUS) {
 351                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
 352                 }
 353                 status = ibc_get_ci_failure(0);
 354                 goto srqalloc_fail8;
 355         }
 356 
 357         /*
 358          * Fill in the rest of the Hermon SRQ handle.  We can update
 359          * the following fields for use in further operations on the SRQ.
 360          */
 361         srq->srq_srqcrsrcp = srqc;
 362         srq->srq_rsrcp          = rsrc;
 363         srq->srq_mrhdl          = mr;
 364         srq->srq_refcnt         = 0;
 365         srq->srq_is_umap   = srq_is_umap;
 366         srq->srq_uarpg          = uarpg;
 367         srq->srq_umap_dhp  = (devmap_cookie_t)NULL;
 368         srq->srq_pdhdl          = pd;
 369         srq->srq_wq_bufsz  = (1 << log_srq_size);
 370         srq->srq_wq_buf         = buf;
 371         srq->srq_desc_off  = srq_desc_off;
 372         srq->srq_hdlrarg   = (void *)ibt_srqhdl;
 373         srq->srq_state          = 0;
 374         srq->srq_real_sizes.srq_wr_sz = (1 << log_srq_size);
 375         srq->srq_real_sizes.srq_sgl_sz = srq->srq_wq_sgl;
 376 
 377         /*
 378          * Put SRQ handle in Hermon SRQNum-to-SRQhdl list.  Then fill in the
 379          * "srqhdl" and return success
 380          */
 381         hermon_icm_set_num_to_hdl(state, HERMON_SRQC, srqc->hr_indx, srq);
 382 
 383         /*
 384          * If this is a user-mappable SRQ, then we need to insert the
 385          * previously allocated entry into the "userland resources database".
 386          * This will allow for later lookup during devmap() (i.e. mmap())
 387          * calls.
 388          */
 389         if (srq->srq_is_umap) {
 390                 hermon_umap_db_add(umapdb);
 391         } else {        /* initialize work queue for kernel SRQs */
 392                 int i, len, last;
 393                 uint16_t *desc;
 394 
 395                 desc = (uint16_t *)buf;
 396                 len = wqesz / sizeof (*desc);
 397                 last = srq->srq_wq_bufsz - 1;
 398                 for (i = 0; i < last; i++) {
 399                         desc[1] = htons(i + 1);
 400                         desc += len;
 401                 }
 402                 srq->srq_wq_wqhdr->wq_tail = last;
 403                 srq->srq_wq_wqhdr->wq_head = 0;
 404         }
 405 
 406         *srqhdl = srq;
 407 
 408         return (status);
 409 
 410 /*
 411  * The following is cleanup for all possible failure cases in this routine
 412  */
 413 srqalloc_fail8:
 414         hermon_wrid_wqhdr_destroy(srq->srq_wq_wqhdr);
 415 srqalloc_fail7:
 416         if (hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
 417             HERMON_SLEEPFLAG_FOR_CONTEXT()) != DDI_SUCCESS) {
 418                 HERMON_WARNING(state, "failed to deregister SRQ memory");
 419         }
 420 srqalloc_fail5:
 421         hermon_queue_free(&srq->srq_wqinfo);
 422 srqalloc_fail4a:
 423         hermon_dbr_free(state, uarpg, srq->srq_wq_vdbr);
 424 srqalloc_fail4:
 425         if (srq_is_umap) {
 426                 hermon_umap_db_free(umapdb);
 427         }
 428 srqalloc_fail3:
 429         hermon_rsrc_free(state, &rsrc);
 430 srqalloc_fail2:
 431         hermon_rsrc_free(state, &srqc);
 432 srqalloc_fail1:
 433         hermon_pd_refcnt_dec(pd);
 434 srqalloc_fail:
 435         return (status);
 436 }
 437 
 438 
 439 /*
 440  * hermon_srq_free()
 441  *    Context: Can be called only from user or kernel context.
 442  */
 443 /* ARGSUSED */
 444 int
 445 hermon_srq_free(hermon_state_t *state, hermon_srqhdl_t *srqhdl,
 446     uint_t sleepflag)
 447 {
 448         hermon_rsrc_t           *srqc, *rsrc;
 449         hermon_umap_db_entry_t  *umapdb;
 450         uint64_t                value;
 451         hermon_srqhdl_t         srq;
 452         hermon_mrhdl_t          mr;
 453         hermon_pdhdl_t          pd;
 454         hermon_hw_srqc_t        srqc_entry;
 455         uint32_t                srqnum;
 456         uint_t                  maxprot;
 457         int                     status;
 458 
 459         /*
 460          * Pull all the necessary information from the Hermon Shared Receive
 461          * Queue handle.  This is necessary here because the resource for the
 462          * SRQ handle is going to be freed up as part of this operation.
 463          */
 464         srq     = *srqhdl;
 465         mutex_enter(&srq->srq_lock);
 466         srqc    = srq->srq_srqcrsrcp;
 467         rsrc    = srq->srq_rsrcp;
 468         pd      = srq->srq_pdhdl;
 469         mr      = srq->srq_mrhdl;
 470         srqnum  = srq->srq_srqnum;
 471 
 472         /*
 473          * If there are work queues still associated with the SRQ, then return
 474          * an error.  Otherwise, we will be holding the SRQ lock.
 475          */
 476         if (srq->srq_refcnt != 0) {
 477                 mutex_exit(&srq->srq_lock);
 478                 return (IBT_SRQ_IN_USE);
 479         }
 480 
 481         /*
 482          * If this was a user-mappable SRQ, then we need to remove its entry
 483          * from the "userland resources database".  If it is also currently
 484          * mmap()'d out to a user process, then we need to call
 485          * devmap_devmem_remap() to remap the SRQ memory to an invalid mapping.
 486          * We also need to invalidate the SRQ tracking information for the
 487          * user mapping.
 488          */
 489         if (srq->srq_is_umap) {
 490                 status = hermon_umap_db_find(state->hs_instance,
 491                     srq->srq_srqnum, MLNX_UMAP_SRQMEM_RSRC, &value,
 492                     HERMON_UMAP_DB_REMOVE, &umapdb);
 493                 if (status != DDI_SUCCESS) {
 494                         mutex_exit(&srq->srq_lock);
 495                         HERMON_WARNING(state, "failed to find in database");
 496                         return (ibc_get_ci_failure(0));
 497                 }
 498                 hermon_umap_db_free(umapdb);
 499                 if (srq->srq_umap_dhp != NULL) {
 500                         maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
 501                         status = devmap_devmem_remap(srq->srq_umap_dhp,
 502                             state->hs_dip, 0, 0, srq->srq_wqinfo.qa_size,
 503                             maxprot, DEVMAP_MAPPING_INVALID, NULL);
 504                         if (status != DDI_SUCCESS) {
 505                                 mutex_exit(&srq->srq_lock);
 506                                 HERMON_WARNING(state, "failed in SRQ memory "
 507                                     "devmap_devmem_remap()");
 508                                 return (ibc_get_ci_failure(0));
 509                         }
 510                         srq->srq_umap_dhp = (devmap_cookie_t)NULL;
 511                 }
 512         }
 513 
 514         /*
 515          * Put NULL into the Hermon SRQNum-to-SRQHdl list.  This will allow any
 516          * in-progress events to detect that the SRQ corresponding to this
 517          * number has been freed.
 518          */
 519         hermon_icm_set_num_to_hdl(state, HERMON_SRQC, srqc->hr_indx, NULL);
 520 
 521         mutex_exit(&srq->srq_lock);
 522         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*srq));
 523 
 524         /*
 525          * Reclaim SRQC entry from hardware (using the Hermon HW2SW_SRQ
 526          * firmware command).  If the ownership transfer fails for any reason,
 527          * then it is an indication that something (either in HW or SW) has
 528          * gone seriously wrong.
 529          */
 530         status = hermon_cmn_ownership_cmd_post(state, HW2SW_SRQ, &srqc_entry,
 531             sizeof (hermon_hw_srqc_t), srqnum, sleepflag);
 532         if (status != HERMON_CMD_SUCCESS) {
 533                 HERMON_WARNING(state, "failed to reclaim SRQC ownership");
 534                 cmn_err(CE_CONT, "Hermon: HW2SW_SRQ command failed: %08x\n",
 535                     status);
 536                 if (status == HERMON_CMD_INVALID_STATUS) {
 537                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
 538                 }
 539                 return (ibc_get_ci_failure(0));
 540         }
 541 
 542         /*
 543          * Deregister the memory for the Shared Receive Queue.  If this fails
 544          * for any reason, then it is an indication that something (either
 545          * in HW or SW) has gone seriously wrong.  So we print a warning
 546          * message and return.
 547          */
 548         status = hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
 549             sleepflag);
 550         if (status != DDI_SUCCESS) {
 551                 HERMON_WARNING(state, "failed to deregister SRQ memory");
 552                 return (IBT_FAILURE);
 553         }
 554 
 555         hermon_wrid_wqhdr_destroy(srq->srq_wq_wqhdr);
 556 
 557         /* Free the memory for the SRQ */
 558         hermon_queue_free(&srq->srq_wqinfo);
 559 
 560         /* Free the dbr */
 561         hermon_dbr_free(state, srq->srq_uarpg, srq->srq_wq_vdbr);
 562 
 563         /* Free the Hermon SRQ Handle */
 564         hermon_rsrc_free(state, &rsrc);
 565 
 566         /* Free the SRQC entry resource */
 567         hermon_rsrc_free(state, &srqc);
 568 
 569         /* Decrement the reference count on the protection domain (PD) */
 570         hermon_pd_refcnt_dec(pd);
 571 
 572         /* Set the srqhdl pointer to NULL and return success */
 573         *srqhdl = NULL;
 574 
 575         return (DDI_SUCCESS);
 576 }
 577 
 578 
 579 /*
 580  * hermon_srq_modify()
 581  *    Context: Can be called only from user or kernel context.
 582  */
 583 int
 584 hermon_srq_modify(hermon_state_t *state, hermon_srqhdl_t srq, uint_t size,
 585     uint_t *real_size, uint_t sleepflag)
 586 {
 587         hermon_qalloc_info_t    new_srqinfo, old_srqinfo;
 588         hermon_rsrc_t           *mtt, *old_mtt;
 589         hermon_bind_info_t      bind;
 590         hermon_bind_info_t      old_bind;
 591         hermon_mrhdl_t          mr;
 592         hermon_hw_srqc_t        srqc_entry;
 593         hermon_hw_dmpt_t        mpt_entry;
 594         uint64_t                *wre_new, *wre_old;
 595         uint64_t                mtt_addr;
 596         uint64_t                srq_pgoffs;
 597         uint64_t                srq_desc_off;
 598         uint32_t                *buf, srq_old_bufsz;
 599         uint32_t                wqesz;
 600         uint_t                  max_srq_size;
 601         uint_t                  mtt_pgsize_bits;
 602         uint_t                  log_srq_size, maxprot;
 603         int                     status;
 604 
 605         if ((state->hs_devlim.mod_wr_srq == 0) ||
 606             (state->hs_cfg_profile->cp_srq_resize_enabled == 0))
 607                 return (IBT_NOT_SUPPORTED);
 608 
 609         /*
 610          * If size requested is larger than device capability, return
 611          * Insufficient Resources
 612          */
 613         max_srq_size = (1 << state->hs_cfg_profile->cp_log_max_srq_sz);
 614         if (size > max_srq_size) {
 615                 return (IBT_HCA_WR_EXCEEDED);
 616         }
 617 
 618         /*
 619          * Calculate the appropriate size for the SRQ.
 620          * Note:  All Hermon SRQs must be a power-of-2 in size.  Also
 621          * they may not be any smaller than HERMON_SRQ_MIN_SIZE.  This step
 622          * is to round the requested size up to the next highest power-of-2
 623          */
 624         size = max(size, HERMON_SRQ_MIN_SIZE);
 625         log_srq_size = highbit(size);
 626         if ((size & (size - 1)) == 0) {
 627                 log_srq_size = log_srq_size - 1;
 628         }
 629 
 630         /*
 631          * Next we verify that the rounded-up size is valid (i.e. consistent
 632          * with the device limits and/or software-configured limits).
 633          */
 634         if (log_srq_size > state->hs_cfg_profile->cp_log_max_srq_sz) {
 635                 status = IBT_HCA_WR_EXCEEDED;
 636                 goto srqmodify_fail;
 637         }
 638 
 639         /*
 640          * Allocate the memory for newly resized Shared Receive Queue.
 641          *
 642          * Note: If SRQ is not user-mappable, then it may come from either
 643          * kernel system memory or from HCA-attached local DDR memory.
 644          *
 645          * Note2: We align this queue on a pagesize boundary.  This is required
 646          * to make sure that all the resulting IB addresses will start at 0,
 647          * for a zero-based queue.  By making sure we are aligned on at least a
 648          * page, any offset we use into our queue will be the same as it was
 649          * when we allocated it at hermon_srq_alloc() time.
 650          */
 651         wqesz = (1 << srq->srq_wq_log_wqesz);
 652         new_srqinfo.qa_size = (1 << log_srq_size) * wqesz;
 653         new_srqinfo.qa_alloc_align = PAGESIZE;
 654         new_srqinfo.qa_bind_align  = PAGESIZE;
 655         if (srq->srq_is_umap) {
 656                 new_srqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND;
 657         } else {
 658                 new_srqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
 659         }
 660         status = hermon_queue_alloc(state, &new_srqinfo, sleepflag);
 661         if (status != DDI_SUCCESS) {
 662                 status = IBT_INSUFF_RESOURCE;
 663                 goto srqmodify_fail;
 664         }
 665         buf = (uint32_t *)new_srqinfo.qa_buf_aligned;
 666         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf))
 667 
 668         /*
 669          * Allocate the memory for the new WRE list.  This will be used later
 670          * when we resize the wridlist based on the new SRQ size.
 671          */
 672         wre_new = kmem_zalloc((1 << log_srq_size) * sizeof (uint64_t),
 673             sleepflag);
 674         if (wre_new == NULL) {
 675                 status = IBT_INSUFF_RESOURCE;
 676                 goto srqmodify_fail;
 677         }
 678 
 679         /*
 680          * Fill in the "bind" struct.  This struct provides the majority
 681          * of the information that will be used to distinguish between an
 682          * "addr" binding (as is the case here) and a "buf" binding (see
 683          * below).  The "bind" struct is later passed to hermon_mr_mem_bind()
 684          * which does most of the "heavy lifting" for the Hermon memory
 685          * registration routines.
 686          */
 687         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(bind))
 688         bzero(&bind, sizeof (hermon_bind_info_t));
 689         bind.bi_type  = HERMON_BINDHDL_VADDR;
 690         bind.bi_addr  = (uint64_t)(uintptr_t)buf;
 691         bind.bi_len   = new_srqinfo.qa_size;
 692         bind.bi_as    = NULL;
 693         bind.bi_flags = sleepflag == HERMON_SLEEP ? IBT_MR_SLEEP :
 694             IBT_MR_NOSLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
 695         bind.bi_bypass = state->hs_cfg_profile->cp_iommu_bypass;
 696 
 697         status = hermon_mr_mtt_bind(state, &bind, new_srqinfo.qa_dmahdl, &mtt,
 698             &mtt_pgsize_bits, 0); /* no relaxed ordering */
 699         if (status != DDI_SUCCESS) {
 700                 status = status;
 701                 kmem_free(wre_new, (1 << log_srq_size) *
 702                     sizeof (uint64_t));
 703                 hermon_queue_free(&new_srqinfo);
 704                 goto srqmodify_fail;
 705         }
 706 
 707         /*
 708          * Calculate the offset between the kernel virtual address space
 709          * and the IB virtual address space.  This will be used when
 710          * posting work requests to properly initialize each WQE.
 711          *
 712          * Note: bind addr is zero-based (from alloc) so we calculate the
 713          * correct new offset here.
 714          */
 715         bind.bi_addr = bind.bi_addr & ((1 << mtt_pgsize_bits) - 1);
 716         srq_desc_off = (uint64_t)(uintptr_t)new_srqinfo.qa_buf_aligned -
 717             (uint64_t)bind.bi_addr;
 718         srq_pgoffs   = (uint_t)
 719             ((uintptr_t)new_srqinfo.qa_buf_aligned & HERMON_PAGEOFFSET);
 720 
 721         /*
 722          * Fill in the MPT entry.  This is the final step before passing
 723          * ownership of the MPT entry to the Hermon hardware.  We use all of
 724          * the information collected/calculated above to fill in the
 725          * requisite portions of the MPT.
 726          */
 727         bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
 728         mpt_entry.reg_win_len   = bind.bi_len;
 729         mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
 730         mpt_entry.mtt_addr_h = mtt_addr >> 32;
 731         mpt_entry.mtt_addr_l = mtt_addr >> 3;
 732 
 733         /*
 734          * for hermon we build up a new srqc and pass that (partially filled
 735          * to resize SRQ instead of modifying the (d)mpt directly
 736          */
 737 
 738 
 739 
 740         /*
 741          * Now we grab the SRQ lock.  Since we will be updating the actual
 742          * SRQ location and the producer/consumer indexes, we should hold
 743          * the lock.
 744          *
 745          * We do a HERMON_NOSLEEP here (and below), though, because we are
 746          * holding the "srq_lock" and if we got raised to interrupt level
 747          * by priority inversion, we would not want to block in this routine
 748          * waiting for success.
 749          */
 750         mutex_enter(&srq->srq_lock);
 751 
 752         /*
 753          * Copy old entries to new buffer
 754          */
 755         srq_old_bufsz = srq->srq_wq_bufsz;
 756         bcopy(srq->srq_wq_buf, buf, srq_old_bufsz * wqesz);
 757 
 758         /*
 759          * Setup MPT information for use in the MODIFY_MPT command
 760          */
 761         mr = srq->srq_mrhdl;
 762         mutex_enter(&mr->mr_lock);
 763 
 764         /*
 765          * now, setup the srqc information needed for resize - limit the
 766          * values, but use the same structure as the srqc
 767          */
 768 
 769         srqc_entry.log_srq_size   = log_srq_size;
 770         srqc_entry.page_offs      = srq_pgoffs >> 6;
 771         srqc_entry.log2_pgsz      = mr->mr_log2_pgsz;
 772         srqc_entry.mtt_base_addrl = (uint64_t)mtt_addr >> 32;
 773         srqc_entry.mtt_base_addrh = mtt_addr >> 3;
 774 
 775         /*
 776          * RESIZE_SRQ
 777          *
 778          * If this fails for any reason, then it is an indication that
 779          * something (either in HW or SW) has gone seriously wrong.  So we
 780          * print a warning message and return.
 781          */
 782         status = hermon_resize_srq_cmd_post(state, &srqc_entry,
 783             srq->srq_srqnum, sleepflag);
 784         if (status != HERMON_CMD_SUCCESS) {
 785                 cmn_err(CE_CONT, "Hermon: RESIZE_SRQ command failed: %08x\n",
 786                     status);
 787                 if (status == HERMON_CMD_INVALID_STATUS) {
 788                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
 789                 }
 790                 (void) hermon_mr_mtt_unbind(state, &bind, mtt);
 791                 kmem_free(wre_new, (1 << log_srq_size) *
 792                     sizeof (uint64_t));
 793                 hermon_queue_free(&new_srqinfo);
 794                 mutex_exit(&mr->mr_lock);
 795                 mutex_exit(&srq->srq_lock);
 796                 return (ibc_get_ci_failure(0));
 797         }
 798         /*
 799          * Update the Hermon Shared Receive Queue handle with all the new
 800          * information.  At the same time, save away all the necessary
 801          * information for freeing up the old resources
 802          */
 803         old_srqinfo        = srq->srq_wqinfo;
 804         old_mtt            = srq->srq_mrhdl->mr_mttrsrcp;
 805         bcopy(&srq->srq_mrhdl->mr_bindinfo, &old_bind,
 806             sizeof (hermon_bind_info_t));
 807 
 808         /* Now set the new info */
 809         srq->srq_wqinfo         = new_srqinfo;
 810         srq->srq_wq_buf         = buf;
 811         srq->srq_wq_bufsz  = (1 << log_srq_size);
 812         bcopy(&bind, &srq->srq_mrhdl->mr_bindinfo, sizeof (hermon_bind_info_t));
 813         srq->srq_mrhdl->mr_mttrsrcp = mtt;
 814         srq->srq_desc_off  = srq_desc_off;
 815         srq->srq_real_sizes.srq_wr_sz = (1 << log_srq_size);
 816 
 817         /* Update MR mtt pagesize */
 818         mr->mr_logmttpgsz = mtt_pgsize_bits;
 819         mutex_exit(&mr->mr_lock);
 820 
 821         /*
 822          * Initialize new wridlist, if needed.
 823          *
 824          * If a wridlist already is setup on an SRQ (the QP associated with an
 825          * SRQ has moved "from_reset") then we must update this wridlist based
 826          * on the new SRQ size.  We allocate the new size of Work Request ID
 827          * Entries, copy over the old entries to the new list, and
 828          * re-initialize the srq wridlist in non-umap case
 829          */
 830         wre_old = srq->srq_wq_wqhdr->wq_wrid;
 831 
 832         bcopy(wre_old, wre_new, srq_old_bufsz * sizeof (uint64_t));
 833 
 834         /* Setup new sizes in wre */
 835         srq->srq_wq_wqhdr->wq_wrid = wre_new;
 836 
 837         /*
 838          * If "old" SRQ was a user-mappable SRQ that is currently mmap()'d out
 839          * to a user process, then we need to call devmap_devmem_remap() to
 840          * invalidate the mapping to the SRQ memory.  We also need to
 841          * invalidate the SRQ tracking information for the user mapping.
 842          *
 843          * Note: On failure, the remap really shouldn't ever happen.  So, if it
 844          * does, it is an indication that something has gone seriously wrong.
 845          * So we print a warning message and return error (knowing, of course,
 846          * that the "old" SRQ memory will be leaked)
 847          */
 848         if ((srq->srq_is_umap) && (srq->srq_umap_dhp != NULL)) {
 849                 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
 850                 status = devmap_devmem_remap(srq->srq_umap_dhp,
 851                     state->hs_dip, 0, 0, srq->srq_wqinfo.qa_size, maxprot,
 852                     DEVMAP_MAPPING_INVALID, NULL);
 853                 if (status != DDI_SUCCESS) {
 854                         mutex_exit(&srq->srq_lock);
 855                         HERMON_WARNING(state, "failed in SRQ memory "
 856                             "devmap_devmem_remap()");
 857                         /* We can, however, free the memory for old wre */
 858                         kmem_free(wre_old, srq_old_bufsz * sizeof (uint64_t));
 859                         return (ibc_get_ci_failure(0));
 860                 }
 861                 srq->srq_umap_dhp = (devmap_cookie_t)NULL;
 862         }
 863 
 864         /*
 865          * Drop the SRQ lock now.  The only thing left to do is to free up
 866          * the old resources.
 867          */
 868         mutex_exit(&srq->srq_lock);
 869 
 870         /*
 871          * Unbind the MTT entries.
 872          */
 873         status = hermon_mr_mtt_unbind(state, &old_bind, old_mtt);
 874         if (status != DDI_SUCCESS) {
 875                 HERMON_WARNING(state, "failed to unbind old SRQ memory");
 876                 status = ibc_get_ci_failure(0);
 877                 goto srqmodify_fail;
 878         }
 879 
 880         /* Free the memory for old wre */
 881         kmem_free(wre_old, srq_old_bufsz * sizeof (uint64_t));
 882 
 883         /* Free the memory for the old SRQ */
 884         hermon_queue_free(&old_srqinfo);
 885 
 886         /*
 887          * Fill in the return arguments (if necessary).  This includes the
 888          * real new completion queue size.
 889          */
 890         if (real_size != NULL) {
 891                 *real_size = (1 << log_srq_size);
 892         }
 893 
 894         return (DDI_SUCCESS);
 895 
 896 srqmodify_fail:
 897         return (status);
 898 }
 899 
 900 
 901 /*
 902  * hermon_srq_refcnt_inc()
 903  *    Context: Can be called from interrupt or base context.
 904  */
 905 void
 906 hermon_srq_refcnt_inc(hermon_srqhdl_t srq)
 907 {
 908         mutex_enter(&srq->srq_lock);
 909         srq->srq_refcnt++;
 910         mutex_exit(&srq->srq_lock);
 911 }
 912 
 913 
 914 /*
 915  * hermon_srq_refcnt_dec()
 916  *    Context: Can be called from interrupt or base context.
 917  */
 918 void
 919 hermon_srq_refcnt_dec(hermon_srqhdl_t srq)
 920 {
 921         mutex_enter(&srq->srq_lock);
 922         srq->srq_refcnt--;
 923         mutex_exit(&srq->srq_lock);
 924 }
 925 
 926 
 927 /*
 928  * hermon_srqhdl_from_srqnum()
 929  *    Context: Can be called from interrupt or base context.
 930  *
 931  *    This routine is important because changing the unconstrained
 932  *    portion of the SRQ number is critical to the detection of a
 933  *    potential race condition in the SRQ handler code (i.e. the case
 934  *    where a SRQ is freed and alloc'd again before an event for the
 935  *    "old" SRQ can be handled).
 936  *
 937  *    While this is not a perfect solution (not sure that one exists)
 938  *    it does help to mitigate the chance that this race condition will
 939  *    cause us to deliver a "stale" event to the new SRQ owner.  Note:
 940  *    this solution does not scale well because the number of constrained
 941  *    bits increases (and, hence, the number of unconstrained bits
 942  *    decreases) as the number of supported SRQ grows.  For small and
 943  *    intermediate values, it should hopefully provide sufficient
 944  *    protection.
 945  */
 946 hermon_srqhdl_t
 947 hermon_srqhdl_from_srqnum(hermon_state_t *state, uint_t srqnum)
 948 {
 949         uint_t  srqindx, srqmask;
 950 
 951         /* Calculate the SRQ table index from the srqnum */
 952         srqmask = (1 << state->hs_cfg_profile->cp_log_num_srq) - 1;
 953         srqindx = srqnum & srqmask;
 954         return (hermon_icm_num_to_hdl(state, HERMON_SRQC, srqindx));
 955 }
 956 
 957 
 958 /*
 959  * hermon_srq_sgl_to_logwqesz()
 960  *    Context: Can be called from interrupt or base context.
 961  */
 962 static void
 963 hermon_srq_sgl_to_logwqesz(hermon_state_t *state, uint_t num_sgl,
 964     hermon_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl)
 965 {
 966         uint_t  max_size, log2, actual_sgl;
 967 
 968         switch (wq_type) {
 969         case HERMON_QP_WQ_TYPE_RECVQ:
 970                 /*
 971                  * Use requested maximum SGL to calculate max descriptor size
 972                  * (while guaranteeing that the descriptor size is a
 973                  * power-of-2 cachelines).
 974                  */
 975                 max_size = (HERMON_QP_WQE_MLX_SRQ_HDRS + (num_sgl << 4));
 976                 log2 = highbit(max_size);
 977                 if ((max_size & (max_size - 1)) == 0) {
 978                         log2 = log2 - 1;
 979                 }
 980 
 981                 /* Make sure descriptor is at least the minimum size */
 982                 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM);
 983 
 984                 /* Calculate actual number of SGL (given WQE size) */
 985                 actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_SRQ_HDRS) >> 4;
 986                 break;
 987 
 988         default:
 989                 HERMON_WARNING(state, "unexpected work queue type");
 990                 break;
 991         }
 992 
 993         /* Fill in the return values */
 994         *logwqesz = log2;
 995         *max_sgl  = min(state->hs_cfg_profile->cp_srq_max_sgl, actual_sgl);
 996 }