1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * hermon_srq.c 28 * Hermon Shared Receive Queue Processing Routines 29 * 30 * Implements all the routines necessary for allocating, freeing, querying, 31 * modifying and posting shared receive queues. 32 */ 33 34 #include <sys/types.h> 35 #include <sys/conf.h> 36 #include <sys/ddi.h> 37 #include <sys/sunddi.h> 38 #include <sys/modctl.h> 39 #include <sys/bitmap.h> 40 41 #include <sys/ib/adapters/hermon/hermon.h> 42 43 static void hermon_srq_sgl_to_logwqesz(hermon_state_t *state, uint_t num_sgl, 44 hermon_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl); 45 46 /* 47 * hermon_srq_alloc() 48 * Context: Can be called only from user or kernel context. 49 */ 50 int 51 hermon_srq_alloc(hermon_state_t *state, hermon_srq_info_t *srqinfo, 52 uint_t sleepflag) 53 { 54 ibt_srq_hdl_t ibt_srqhdl; 55 hermon_pdhdl_t pd; 56 ibt_srq_sizes_t *sizes; 57 ibt_srq_sizes_t *real_sizes; 58 hermon_srqhdl_t *srqhdl; 59 ibt_srq_flags_t flags; 60 hermon_rsrc_t *srqc, *rsrc; 61 hermon_hw_srqc_t srqc_entry; 62 uint32_t *buf; 63 hermon_srqhdl_t srq; 64 hermon_umap_db_entry_t *umapdb; 65 ibt_mr_attr_t mr_attr; 66 hermon_mr_options_t mr_op; 67 hermon_mrhdl_t mr; 68 uint64_t value, srq_desc_off; 69 uint32_t log_srq_size; 70 uint32_t uarpg; 71 uint_t srq_is_umap; 72 int flag, status; 73 uint_t max_sgl; 74 uint_t wqesz; 75 uint_t srq_wr_sz; 76 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sizes)) 77 78 /* 79 * options-->wq_location used to be for location, now explicitly 80 * LOCATION_NORMAL 81 */ 82 83 /* 84 * Extract the necessary info from the hermon_srq_info_t structure 85 */ 86 real_sizes = srqinfo->srqi_real_sizes; 87 sizes = srqinfo->srqi_sizes; 88 pd = srqinfo->srqi_pd; 89 ibt_srqhdl = srqinfo->srqi_ibt_srqhdl; 90 flags = srqinfo->srqi_flags; 91 srqhdl = srqinfo->srqi_srqhdl; 92 93 /* 94 * Determine whether SRQ is being allocated for userland access or 95 * whether it is being allocated for kernel access. If the SRQ is 96 * being allocated for userland access, then lookup the UAR doorbell 97 * page number for the current process. Note: If this is not found 98 * (e.g. if the process has not previously open()'d the Hermon driver), 99 * then an error is returned. 100 */ 101 srq_is_umap = (flags & IBT_SRQ_USER_MAP) ? 1 : 0; 102 if (srq_is_umap) { 103 status = hermon_umap_db_find(state->hs_instance, ddi_get_pid(), 104 MLNX_UMAP_UARPG_RSRC, &value, 0, NULL); 105 if (status != DDI_SUCCESS) { 106 status = IBT_INVALID_PARAM; 107 goto srqalloc_fail3; 108 } 109 uarpg = ((hermon_rsrc_t *)(uintptr_t)value)->hr_indx; 110 } else { 111 uarpg = state->hs_kernel_uar_index; 112 } 113 114 /* Increase PD refcnt */ 115 hermon_pd_refcnt_inc(pd); 116 117 /* Allocate an SRQ context entry */ 118 status = hermon_rsrc_alloc(state, HERMON_SRQC, 1, sleepflag, &srqc); 119 if (status != DDI_SUCCESS) { 120 status = IBT_INSUFF_RESOURCE; 121 goto srqalloc_fail1; 122 } 123 124 /* Allocate the SRQ Handle entry */ 125 status = hermon_rsrc_alloc(state, HERMON_SRQHDL, 1, sleepflag, &rsrc); 126 if (status != DDI_SUCCESS) { 127 status = IBT_INSUFF_RESOURCE; 128 goto srqalloc_fail2; 129 } 130 131 srq = (hermon_srqhdl_t)rsrc->hr_addr; 132 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*srq)) 133 134 bzero(srq, sizeof (struct hermon_sw_srq_s)); 135 /* Calculate the SRQ number */ 136 137 /* just use the index, implicit in Hermon */ 138 srq->srq_srqnum = srqc->hr_indx; 139 140 /* 141 * If this will be a user-mappable SRQ, then allocate an entry for 142 * the "userland resources database". This will later be added to 143 * the database (after all further SRQ operations are successful). 144 * If we fail here, we must undo the reference counts and the 145 * previous resource allocation. 146 */ 147 if (srq_is_umap) { 148 umapdb = hermon_umap_db_alloc(state->hs_instance, 149 srq->srq_srqnum, MLNX_UMAP_SRQMEM_RSRC, 150 (uint64_t)(uintptr_t)rsrc); 151 if (umapdb == NULL) { 152 status = IBT_INSUFF_RESOURCE; 153 goto srqalloc_fail3; 154 } 155 } 156 157 /* 158 * Allocate the doorbell record. Hermon just needs one for the 159 * SRQ, and use uarpg (above) as the uar index 160 */ 161 162 status = hermon_dbr_alloc(state, uarpg, &srq->srq_wq_dbr_acchdl, 163 &srq->srq_wq_vdbr, &srq->srq_wq_pdbr, &srq->srq_rdbr_mapoffset); 164 if (status != DDI_SUCCESS) { 165 status = IBT_INSUFF_RESOURCE; 166 goto srqalloc_fail4; 167 } 168 169 /* 170 * Calculate the appropriate size for the SRQ. 171 * Note: All Hermon SRQs must be a power-of-2 in size. Also 172 * they may not be any smaller than HERMON_SRQ_MIN_SIZE. This step 173 * is to round the requested size up to the next highest power-of-2 174 */ 175 srq_wr_sz = max(sizes->srq_wr_sz + 1, HERMON_SRQ_MIN_SIZE); 176 log_srq_size = highbit(srq_wr_sz); 177 if ((srq_wr_sz & (srq_wr_sz - 1)) == 0) { 178 log_srq_size = log_srq_size - 1; 179 } 180 181 /* 182 * Next we verify that the rounded-up size is valid (i.e. consistent 183 * with the device limits and/or software-configured limits). If not, 184 * then obviously we have a lot of cleanup to do before returning. 185 */ 186 if (log_srq_size > state->hs_cfg_profile->cp_log_max_srq_sz) { 187 status = IBT_HCA_WR_EXCEEDED; 188 goto srqalloc_fail4a; 189 } 190 191 /* 192 * Next we verify that the requested number of SGL is valid (i.e. 193 * consistent with the device limits and/or software-configured 194 * limits). If not, then obviously the same cleanup needs to be done. 195 */ 196 max_sgl = state->hs_ibtfinfo.hca_attr->hca_max_srq_sgl; 197 if (sizes->srq_sgl_sz > max_sgl) { 198 status = IBT_HCA_SGL_EXCEEDED; 199 goto srqalloc_fail4a; 200 } 201 202 /* 203 * Determine the SRQ's WQE sizes. This depends on the requested 204 * number of SGLs. Note: This also has the side-effect of 205 * calculating the real number of SGLs (for the calculated WQE size) 206 */ 207 hermon_srq_sgl_to_logwqesz(state, sizes->srq_sgl_sz, 208 HERMON_QP_WQ_TYPE_RECVQ, &srq->srq_wq_log_wqesz, 209 &srq->srq_wq_sgl); 210 211 /* 212 * Allocate the memory for SRQ work queues. Note: The location from 213 * which we will allocate these work queues is always 214 * QUEUE_LOCATION_NORMAL. Since Hermon work queues are not 215 * allowed to cross a 32-bit (4GB) boundary, the alignment of the work 216 * queue memory is very important. We used to allocate work queues 217 * (the combined receive and send queues) so that they would be aligned 218 * on their combined size. That alignment guaranteed that they would 219 * never cross the 4GB boundary (Hermon work queues are on the order of 220 * MBs at maximum). Now we are able to relax this alignment constraint 221 * by ensuring that the IB address assigned to the queue memory (as a 222 * result of the hermon_mr_register() call) is offset from zero. 223 * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to 224 * guarantee the alignment, but when attempting to use IOMMU bypass 225 * mode we found that we were not allowed to specify any alignment that 226 * was more restrictive than the system page size. So we avoided this 227 * constraint by passing two alignment values, one for the memory 228 * allocation itself and the other for the DMA handle (for later bind). 229 * This used to cause more memory than necessary to be allocated (in 230 * order to guarantee the more restrictive alignment contraint). But 231 * be guaranteeing the zero-based IB virtual address for the queue, we 232 * are able to conserve this memory. 233 * 234 * Note: If SRQ is not user-mappable, then it may come from either 235 * kernel system memory or from HCA-attached local DDR memory. 236 * 237 * Note2: We align this queue on a pagesize boundary. This is required 238 * to make sure that all the resulting IB addresses will start at 0, for 239 * a zero-based queue. By making sure we are aligned on at least a 240 * page, any offset we use into our queue will be the same as when we 241 * perform hermon_srq_modify() operations later. 242 */ 243 wqesz = (1 << srq->srq_wq_log_wqesz); 244 srq->srq_wqinfo.qa_size = (1 << log_srq_size) * wqesz; 245 srq->srq_wqinfo.qa_alloc_align = PAGESIZE; 246 srq->srq_wqinfo.qa_bind_align = PAGESIZE; 247 if (srq_is_umap) { 248 srq->srq_wqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND; 249 } else { 250 srq->srq_wqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL; 251 } 252 status = hermon_queue_alloc(state, &srq->srq_wqinfo, sleepflag); 253 if (status != DDI_SUCCESS) { 254 status = IBT_INSUFF_RESOURCE; 255 goto srqalloc_fail4a; 256 } 257 buf = (uint32_t *)srq->srq_wqinfo.qa_buf_aligned; 258 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf)) 259 260 /* 261 * Register the memory for the SRQ work queues. The memory for the SRQ 262 * must be registered in the Hermon cMPT tables. This gives us the LKey 263 * to specify in the SRQ context later. Note: If the work queue is to 264 * be allocated from DDR memory, then only a "bypass" mapping is 265 * appropriate. And if the SRQ memory is user-mappable, then we force 266 * DDI_DMA_CONSISTENT mapping. Also, in order to meet the alignment 267 * restriction, we pass the "mro_bind_override_addr" flag in the call 268 * to hermon_mr_register(). This guarantees that the resulting IB vaddr 269 * will be zero-based (modulo the offset into the first page). If we 270 * fail here, we still have the bunch of resource and reference count 271 * cleanup to do. 272 */ 273 flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP : 274 IBT_MR_NOSLEEP; 275 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf; 276 mr_attr.mr_len = srq->srq_wqinfo.qa_size; 277 mr_attr.mr_as = NULL; 278 mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE; 279 mr_op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass; 280 mr_op.mro_bind_dmahdl = srq->srq_wqinfo.qa_dmahdl; 281 mr_op.mro_bind_override_addr = 1; 282 status = hermon_mr_register(state, pd, &mr_attr, &mr, 283 &mr_op, HERMON_SRQ_CMPT); 284 if (status != DDI_SUCCESS) { 285 status = IBT_INSUFF_RESOURCE; 286 goto srqalloc_fail5; 287 } 288 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 289 290 /* 291 * Calculate the offset between the kernel virtual address space 292 * and the IB virtual address space. This will be used when 293 * posting work requests to properly initialize each WQE. 294 */ 295 srq_desc_off = (uint64_t)(uintptr_t)srq->srq_wqinfo.qa_buf_aligned - 296 (uint64_t)mr->mr_bindinfo.bi_addr; 297 298 srq->srq_wq_wqhdr = hermon_wrid_wqhdr_create(1 << log_srq_size); 299 300 /* 301 * Fill in all the return arguments (if necessary). This includes 302 * real queue size and real SGLs. 303 */ 304 if (real_sizes != NULL) { 305 real_sizes->srq_wr_sz = (1 << log_srq_size) - 1; 306 real_sizes->srq_sgl_sz = srq->srq_wq_sgl; 307 } 308 309 /* 310 * Fill in the SRQC entry. This is the final step before passing 311 * ownership of the SRQC entry to the Hermon hardware. We use all of 312 * the information collected/calculated above to fill in the 313 * requisite portions of the SRQC. Note: If this SRQ is going to be 314 * used for userland access, then we need to set the UAR page number 315 * appropriately (otherwise it's a "don't care") 316 */ 317 bzero(&srqc_entry, sizeof (hermon_hw_srqc_t)); 318 srqc_entry.state = HERMON_SRQ_STATE_HW_OWNER; 319 srqc_entry.log_srq_size = log_srq_size; 320 srqc_entry.srqn = srq->srq_srqnum; 321 srqc_entry.log_rq_stride = srq->srq_wq_log_wqesz - 4; 322 /* 16-byte chunks */ 323 324 srqc_entry.page_offs = srq->srq_wqinfo.qa_pgoffs >> 6; 325 srqc_entry.log2_pgsz = mr->mr_log2_pgsz; 326 srqc_entry.mtt_base_addrh = (uint32_t)((mr->mr_mttaddr >> 32) & 0xFF); 327 srqc_entry.mtt_base_addrl = mr->mr_mttaddr >> 3; 328 srqc_entry.pd = pd->pd_pdnum; 329 srqc_entry.dbr_addrh = (uint32_t)((uint64_t)srq->srq_wq_pdbr >> 32); 330 srqc_entry.dbr_addrl = (uint32_t)((uint64_t)srq->srq_wq_pdbr >> 2); 331 332 /* 333 * all others - specifically, xrcd, cqn_xrc, lwm, wqe_cnt, and wqe_cntr 334 * are zero thanks to the bzero of the structure 335 */ 336 337 /* 338 * Write the SRQC entry to hardware. Lastly, we pass ownership of 339 * the entry to the hardware (using the Hermon SW2HW_SRQ firmware 340 * command). Note: In general, this operation shouldn't fail. But 341 * if it does, we have to undo everything we've done above before 342 * returning error. 343 */ 344 status = hermon_cmn_ownership_cmd_post(state, SW2HW_SRQ, &srqc_entry, 345 sizeof (hermon_hw_srqc_t), srq->srq_srqnum, 346 sleepflag); 347 if (status != HERMON_CMD_SUCCESS) { 348 cmn_err(CE_CONT, "Hermon: SW2HW_SRQ command failed: %08x\n", 349 status); 350 if (status == HERMON_CMD_INVALID_STATUS) { 351 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 352 } 353 status = ibc_get_ci_failure(0); 354 goto srqalloc_fail8; 355 } 356 357 /* 358 * Fill in the rest of the Hermon SRQ handle. We can update 359 * the following fields for use in further operations on the SRQ. 360 */ 361 srq->srq_srqcrsrcp = srqc; 362 srq->srq_rsrcp = rsrc; 363 srq->srq_mrhdl = mr; 364 srq->srq_refcnt = 0; 365 srq->srq_is_umap = srq_is_umap; 366 srq->srq_uarpg = uarpg; 367 srq->srq_umap_dhp = (devmap_cookie_t)NULL; 368 srq->srq_pdhdl = pd; 369 srq->srq_wq_bufsz = (1 << log_srq_size); 370 srq->srq_wq_buf = buf; 371 srq->srq_desc_off = srq_desc_off; 372 srq->srq_hdlrarg = (void *)ibt_srqhdl; 373 srq->srq_state = 0; 374 srq->srq_real_sizes.srq_wr_sz = (1 << log_srq_size); 375 srq->srq_real_sizes.srq_sgl_sz = srq->srq_wq_sgl; 376 377 /* 378 * Put SRQ handle in Hermon SRQNum-to-SRQhdl list. Then fill in the 379 * "srqhdl" and return success 380 */ 381 hermon_icm_set_num_to_hdl(state, HERMON_SRQC, srqc->hr_indx, srq); 382 383 /* 384 * If this is a user-mappable SRQ, then we need to insert the 385 * previously allocated entry into the "userland resources database". 386 * This will allow for later lookup during devmap() (i.e. mmap()) 387 * calls. 388 */ 389 if (srq->srq_is_umap) { 390 hermon_umap_db_add(umapdb); 391 } else { /* initialize work queue for kernel SRQs */ 392 int i, len, last; 393 uint16_t *desc; 394 395 desc = (uint16_t *)buf; 396 len = wqesz / sizeof (*desc); 397 last = srq->srq_wq_bufsz - 1; 398 for (i = 0; i < last; i++) { 399 desc[1] = htons(i + 1); 400 desc += len; 401 } 402 srq->srq_wq_wqhdr->wq_tail = last; 403 srq->srq_wq_wqhdr->wq_head = 0; 404 } 405 406 *srqhdl = srq; 407 408 return (status); 409 410 /* 411 * The following is cleanup for all possible failure cases in this routine 412 */ 413 srqalloc_fail8: 414 hermon_wrid_wqhdr_destroy(srq->srq_wq_wqhdr); 415 srqalloc_fail7: 416 if (hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL, 417 HERMON_SLEEPFLAG_FOR_CONTEXT()) != DDI_SUCCESS) { 418 HERMON_WARNING(state, "failed to deregister SRQ memory"); 419 } 420 srqalloc_fail5: 421 hermon_queue_free(&srq->srq_wqinfo); 422 srqalloc_fail4a: 423 hermon_dbr_free(state, uarpg, srq->srq_wq_vdbr); 424 srqalloc_fail4: 425 if (srq_is_umap) { 426 hermon_umap_db_free(umapdb); 427 } 428 srqalloc_fail3: 429 hermon_rsrc_free(state, &rsrc); 430 srqalloc_fail2: 431 hermon_rsrc_free(state, &srqc); 432 srqalloc_fail1: 433 hermon_pd_refcnt_dec(pd); 434 srqalloc_fail: 435 return (status); 436 } 437 438 439 /* 440 * hermon_srq_free() 441 * Context: Can be called only from user or kernel context. 442 */ 443 /* ARGSUSED */ 444 int 445 hermon_srq_free(hermon_state_t *state, hermon_srqhdl_t *srqhdl, 446 uint_t sleepflag) 447 { 448 hermon_rsrc_t *srqc, *rsrc; 449 hermon_umap_db_entry_t *umapdb; 450 uint64_t value; 451 hermon_srqhdl_t srq; 452 hermon_mrhdl_t mr; 453 hermon_pdhdl_t pd; 454 hermon_hw_srqc_t srqc_entry; 455 uint32_t srqnum; 456 uint_t maxprot; 457 int status; 458 459 /* 460 * Pull all the necessary information from the Hermon Shared Receive 461 * Queue handle. This is necessary here because the resource for the 462 * SRQ handle is going to be freed up as part of this operation. 463 */ 464 srq = *srqhdl; 465 mutex_enter(&srq->srq_lock); 466 srqc = srq->srq_srqcrsrcp; 467 rsrc = srq->srq_rsrcp; 468 pd = srq->srq_pdhdl; 469 mr = srq->srq_mrhdl; 470 srqnum = srq->srq_srqnum; 471 472 /* 473 * If there are work queues still associated with the SRQ, then return 474 * an error. Otherwise, we will be holding the SRQ lock. 475 */ 476 if (srq->srq_refcnt != 0) { 477 mutex_exit(&srq->srq_lock); 478 return (IBT_SRQ_IN_USE); 479 } 480 481 /* 482 * If this was a user-mappable SRQ, then we need to remove its entry 483 * from the "userland resources database". If it is also currently 484 * mmap()'d out to a user process, then we need to call 485 * devmap_devmem_remap() to remap the SRQ memory to an invalid mapping. 486 * We also need to invalidate the SRQ tracking information for the 487 * user mapping. 488 */ 489 if (srq->srq_is_umap) { 490 status = hermon_umap_db_find(state->hs_instance, 491 srq->srq_srqnum, MLNX_UMAP_SRQMEM_RSRC, &value, 492 HERMON_UMAP_DB_REMOVE, &umapdb); 493 if (status != DDI_SUCCESS) { 494 mutex_exit(&srq->srq_lock); 495 HERMON_WARNING(state, "failed to find in database"); 496 return (ibc_get_ci_failure(0)); 497 } 498 hermon_umap_db_free(umapdb); 499 if (srq->srq_umap_dhp != NULL) { 500 maxprot = (PROT_READ | PROT_WRITE | PROT_USER); 501 status = devmap_devmem_remap(srq->srq_umap_dhp, 502 state->hs_dip, 0, 0, srq->srq_wqinfo.qa_size, 503 maxprot, DEVMAP_MAPPING_INVALID, NULL); 504 if (status != DDI_SUCCESS) { 505 mutex_exit(&srq->srq_lock); 506 HERMON_WARNING(state, "failed in SRQ memory " 507 "devmap_devmem_remap()"); 508 return (ibc_get_ci_failure(0)); 509 } 510 srq->srq_umap_dhp = (devmap_cookie_t)NULL; 511 } 512 } 513 514 /* 515 * Put NULL into the Hermon SRQNum-to-SRQHdl list. This will allow any 516 * in-progress events to detect that the SRQ corresponding to this 517 * number has been freed. 518 */ 519 hermon_icm_set_num_to_hdl(state, HERMON_SRQC, srqc->hr_indx, NULL); 520 521 mutex_exit(&srq->srq_lock); 522 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*srq)); 523 524 /* 525 * Reclaim SRQC entry from hardware (using the Hermon HW2SW_SRQ 526 * firmware command). If the ownership transfer fails for any reason, 527 * then it is an indication that something (either in HW or SW) has 528 * gone seriously wrong. 529 */ 530 status = hermon_cmn_ownership_cmd_post(state, HW2SW_SRQ, &srqc_entry, 531 sizeof (hermon_hw_srqc_t), srqnum, sleepflag); 532 if (status != HERMON_CMD_SUCCESS) { 533 HERMON_WARNING(state, "failed to reclaim SRQC ownership"); 534 cmn_err(CE_CONT, "Hermon: HW2SW_SRQ command failed: %08x\n", 535 status); 536 if (status == HERMON_CMD_INVALID_STATUS) { 537 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 538 } 539 return (ibc_get_ci_failure(0)); 540 } 541 542 /* 543 * Deregister the memory for the Shared Receive Queue. If this fails 544 * for any reason, then it is an indication that something (either 545 * in HW or SW) has gone seriously wrong. So we print a warning 546 * message and return. 547 */ 548 status = hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL, 549 sleepflag); 550 if (status != DDI_SUCCESS) { 551 HERMON_WARNING(state, "failed to deregister SRQ memory"); 552 return (IBT_FAILURE); 553 } 554 555 hermon_wrid_wqhdr_destroy(srq->srq_wq_wqhdr); 556 557 /* Free the memory for the SRQ */ 558 hermon_queue_free(&srq->srq_wqinfo); 559 560 /* Free the dbr */ 561 hermon_dbr_free(state, srq->srq_uarpg, srq->srq_wq_vdbr); 562 563 /* Free the Hermon SRQ Handle */ 564 hermon_rsrc_free(state, &rsrc); 565 566 /* Free the SRQC entry resource */ 567 hermon_rsrc_free(state, &srqc); 568 569 /* Decrement the reference count on the protection domain (PD) */ 570 hermon_pd_refcnt_dec(pd); 571 572 /* Set the srqhdl pointer to NULL and return success */ 573 *srqhdl = NULL; 574 575 return (DDI_SUCCESS); 576 } 577 578 579 /* 580 * hermon_srq_modify() 581 * Context: Can be called only from user or kernel context. 582 */ 583 int 584 hermon_srq_modify(hermon_state_t *state, hermon_srqhdl_t srq, uint_t size, 585 uint_t *real_size, uint_t sleepflag) 586 { 587 hermon_qalloc_info_t new_srqinfo, old_srqinfo; 588 hermon_rsrc_t *mtt, *old_mtt; 589 hermon_bind_info_t bind; 590 hermon_bind_info_t old_bind; 591 hermon_mrhdl_t mr; 592 hermon_hw_srqc_t srqc_entry; 593 hermon_hw_dmpt_t mpt_entry; 594 uint64_t *wre_new, *wre_old; 595 uint64_t mtt_addr; 596 uint64_t srq_pgoffs; 597 uint64_t srq_desc_off; 598 uint32_t *buf, srq_old_bufsz; 599 uint32_t wqesz; 600 uint_t max_srq_size; 601 uint_t mtt_pgsize_bits; 602 uint_t log_srq_size, maxprot; 603 int status; 604 605 if ((state->hs_devlim.mod_wr_srq == 0) || 606 (state->hs_cfg_profile->cp_srq_resize_enabled == 0)) 607 return (IBT_NOT_SUPPORTED); 608 609 /* 610 * If size requested is larger than device capability, return 611 * Insufficient Resources 612 */ 613 max_srq_size = (1 << state->hs_cfg_profile->cp_log_max_srq_sz); 614 if (size > max_srq_size) { 615 return (IBT_HCA_WR_EXCEEDED); 616 } 617 618 /* 619 * Calculate the appropriate size for the SRQ. 620 * Note: All Hermon SRQs must be a power-of-2 in size. Also 621 * they may not be any smaller than HERMON_SRQ_MIN_SIZE. This step 622 * is to round the requested size up to the next highest power-of-2 623 */ 624 size = max(size, HERMON_SRQ_MIN_SIZE); 625 log_srq_size = highbit(size); 626 if ((size & (size - 1)) == 0) { 627 log_srq_size = log_srq_size - 1; 628 } 629 630 /* 631 * Next we verify that the rounded-up size is valid (i.e. consistent 632 * with the device limits and/or software-configured limits). 633 */ 634 if (log_srq_size > state->hs_cfg_profile->cp_log_max_srq_sz) { 635 status = IBT_HCA_WR_EXCEEDED; 636 goto srqmodify_fail; 637 } 638 639 /* 640 * Allocate the memory for newly resized Shared Receive Queue. 641 * 642 * Note: If SRQ is not user-mappable, then it may come from either 643 * kernel system memory or from HCA-attached local DDR memory. 644 * 645 * Note2: We align this queue on a pagesize boundary. This is required 646 * to make sure that all the resulting IB addresses will start at 0, 647 * for a zero-based queue. By making sure we are aligned on at least a 648 * page, any offset we use into our queue will be the same as it was 649 * when we allocated it at hermon_srq_alloc() time. 650 */ 651 wqesz = (1 << srq->srq_wq_log_wqesz); 652 new_srqinfo.qa_size = (1 << log_srq_size) * wqesz; 653 new_srqinfo.qa_alloc_align = PAGESIZE; 654 new_srqinfo.qa_bind_align = PAGESIZE; 655 if (srq->srq_is_umap) { 656 new_srqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND; 657 } else { 658 new_srqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL; 659 } 660 status = hermon_queue_alloc(state, &new_srqinfo, sleepflag); 661 if (status != DDI_SUCCESS) { 662 status = IBT_INSUFF_RESOURCE; 663 goto srqmodify_fail; 664 } 665 buf = (uint32_t *)new_srqinfo.qa_buf_aligned; 666 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf)) 667 668 /* 669 * Allocate the memory for the new WRE list. This will be used later 670 * when we resize the wridlist based on the new SRQ size. 671 */ 672 wre_new = kmem_zalloc((1 << log_srq_size) * sizeof (uint64_t), 673 sleepflag); 674 if (wre_new == NULL) { 675 status = IBT_INSUFF_RESOURCE; 676 goto srqmodify_fail; 677 } 678 679 /* 680 * Fill in the "bind" struct. This struct provides the majority 681 * of the information that will be used to distinguish between an 682 * "addr" binding (as is the case here) and a "buf" binding (see 683 * below). The "bind" struct is later passed to hermon_mr_mem_bind() 684 * which does most of the "heavy lifting" for the Hermon memory 685 * registration routines. 686 */ 687 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(bind)) 688 bzero(&bind, sizeof (hermon_bind_info_t)); 689 bind.bi_type = HERMON_BINDHDL_VADDR; 690 bind.bi_addr = (uint64_t)(uintptr_t)buf; 691 bind.bi_len = new_srqinfo.qa_size; 692 bind.bi_as = NULL; 693 bind.bi_flags = sleepflag == HERMON_SLEEP ? IBT_MR_SLEEP : 694 IBT_MR_NOSLEEP | IBT_MR_ENABLE_LOCAL_WRITE; 695 bind.bi_bypass = state->hs_cfg_profile->cp_iommu_bypass; 696 697 status = hermon_mr_mtt_bind(state, &bind, new_srqinfo.qa_dmahdl, &mtt, 698 &mtt_pgsize_bits, 0); /* no relaxed ordering */ 699 if (status != DDI_SUCCESS) { 700 status = status; 701 kmem_free(wre_new, (1 << log_srq_size) * 702 sizeof (uint64_t)); 703 hermon_queue_free(&new_srqinfo); 704 goto srqmodify_fail; 705 } 706 707 /* 708 * Calculate the offset between the kernel virtual address space 709 * and the IB virtual address space. This will be used when 710 * posting work requests to properly initialize each WQE. 711 * 712 * Note: bind addr is zero-based (from alloc) so we calculate the 713 * correct new offset here. 714 */ 715 bind.bi_addr = bind.bi_addr & ((1 << mtt_pgsize_bits) - 1); 716 srq_desc_off = (uint64_t)(uintptr_t)new_srqinfo.qa_buf_aligned - 717 (uint64_t)bind.bi_addr; 718 srq_pgoffs = (uint_t) 719 ((uintptr_t)new_srqinfo.qa_buf_aligned & HERMON_PAGEOFFSET); 720 721 /* 722 * Fill in the MPT entry. This is the final step before passing 723 * ownership of the MPT entry to the Hermon hardware. We use all of 724 * the information collected/calculated above to fill in the 725 * requisite portions of the MPT. 726 */ 727 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t)); 728 mpt_entry.reg_win_len = bind.bi_len; 729 mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT); 730 mpt_entry.mtt_addr_h = mtt_addr >> 32; 731 mpt_entry.mtt_addr_l = mtt_addr >> 3; 732 733 /* 734 * for hermon we build up a new srqc and pass that (partially filled 735 * to resize SRQ instead of modifying the (d)mpt directly 736 */ 737 738 739 740 /* 741 * Now we grab the SRQ lock. Since we will be updating the actual 742 * SRQ location and the producer/consumer indexes, we should hold 743 * the lock. 744 * 745 * We do a HERMON_NOSLEEP here (and below), though, because we are 746 * holding the "srq_lock" and if we got raised to interrupt level 747 * by priority inversion, we would not want to block in this routine 748 * waiting for success. 749 */ 750 mutex_enter(&srq->srq_lock); 751 752 /* 753 * Copy old entries to new buffer 754 */ 755 srq_old_bufsz = srq->srq_wq_bufsz; 756 bcopy(srq->srq_wq_buf, buf, srq_old_bufsz * wqesz); 757 758 /* 759 * Setup MPT information for use in the MODIFY_MPT command 760 */ 761 mr = srq->srq_mrhdl; 762 mutex_enter(&mr->mr_lock); 763 764 /* 765 * now, setup the srqc information needed for resize - limit the 766 * values, but use the same structure as the srqc 767 */ 768 769 srqc_entry.log_srq_size = log_srq_size; 770 srqc_entry.page_offs = srq_pgoffs >> 6; 771 srqc_entry.log2_pgsz = mr->mr_log2_pgsz; 772 srqc_entry.mtt_base_addrl = (uint64_t)mtt_addr >> 32; 773 srqc_entry.mtt_base_addrh = mtt_addr >> 3; 774 775 /* 776 * RESIZE_SRQ 777 * 778 * If this fails for any reason, then it is an indication that 779 * something (either in HW or SW) has gone seriously wrong. So we 780 * print a warning message and return. 781 */ 782 status = hermon_resize_srq_cmd_post(state, &srqc_entry, 783 srq->srq_srqnum, sleepflag); 784 if (status != HERMON_CMD_SUCCESS) { 785 cmn_err(CE_CONT, "Hermon: RESIZE_SRQ command failed: %08x\n", 786 status); 787 if (status == HERMON_CMD_INVALID_STATUS) { 788 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 789 } 790 (void) hermon_mr_mtt_unbind(state, &bind, mtt); 791 kmem_free(wre_new, (1 << log_srq_size) * 792 sizeof (uint64_t)); 793 hermon_queue_free(&new_srqinfo); 794 mutex_exit(&mr->mr_lock); 795 mutex_exit(&srq->srq_lock); 796 return (ibc_get_ci_failure(0)); 797 } 798 /* 799 * Update the Hermon Shared Receive Queue handle with all the new 800 * information. At the same time, save away all the necessary 801 * information for freeing up the old resources 802 */ 803 old_srqinfo = srq->srq_wqinfo; 804 old_mtt = srq->srq_mrhdl->mr_mttrsrcp; 805 bcopy(&srq->srq_mrhdl->mr_bindinfo, &old_bind, 806 sizeof (hermon_bind_info_t)); 807 808 /* Now set the new info */ 809 srq->srq_wqinfo = new_srqinfo; 810 srq->srq_wq_buf = buf; 811 srq->srq_wq_bufsz = (1 << log_srq_size); 812 bcopy(&bind, &srq->srq_mrhdl->mr_bindinfo, sizeof (hermon_bind_info_t)); 813 srq->srq_mrhdl->mr_mttrsrcp = mtt; 814 srq->srq_desc_off = srq_desc_off; 815 srq->srq_real_sizes.srq_wr_sz = (1 << log_srq_size); 816 817 /* Update MR mtt pagesize */ 818 mr->mr_logmttpgsz = mtt_pgsize_bits; 819 mutex_exit(&mr->mr_lock); 820 821 /* 822 * Initialize new wridlist, if needed. 823 * 824 * If a wridlist already is setup on an SRQ (the QP associated with an 825 * SRQ has moved "from_reset") then we must update this wridlist based 826 * on the new SRQ size. We allocate the new size of Work Request ID 827 * Entries, copy over the old entries to the new list, and 828 * re-initialize the srq wridlist in non-umap case 829 */ 830 wre_old = srq->srq_wq_wqhdr->wq_wrid; 831 832 bcopy(wre_old, wre_new, srq_old_bufsz * sizeof (uint64_t)); 833 834 /* Setup new sizes in wre */ 835 srq->srq_wq_wqhdr->wq_wrid = wre_new; 836 837 /* 838 * If "old" SRQ was a user-mappable SRQ that is currently mmap()'d out 839 * to a user process, then we need to call devmap_devmem_remap() to 840 * invalidate the mapping to the SRQ memory. We also need to 841 * invalidate the SRQ tracking information for the user mapping. 842 * 843 * Note: On failure, the remap really shouldn't ever happen. So, if it 844 * does, it is an indication that something has gone seriously wrong. 845 * So we print a warning message and return error (knowing, of course, 846 * that the "old" SRQ memory will be leaked) 847 */ 848 if ((srq->srq_is_umap) && (srq->srq_umap_dhp != NULL)) { 849 maxprot = (PROT_READ | PROT_WRITE | PROT_USER); 850 status = devmap_devmem_remap(srq->srq_umap_dhp, 851 state->hs_dip, 0, 0, srq->srq_wqinfo.qa_size, maxprot, 852 DEVMAP_MAPPING_INVALID, NULL); 853 if (status != DDI_SUCCESS) { 854 mutex_exit(&srq->srq_lock); 855 HERMON_WARNING(state, "failed in SRQ memory " 856 "devmap_devmem_remap()"); 857 /* We can, however, free the memory for old wre */ 858 kmem_free(wre_old, srq_old_bufsz * sizeof (uint64_t)); 859 return (ibc_get_ci_failure(0)); 860 } 861 srq->srq_umap_dhp = (devmap_cookie_t)NULL; 862 } 863 864 /* 865 * Drop the SRQ lock now. The only thing left to do is to free up 866 * the old resources. 867 */ 868 mutex_exit(&srq->srq_lock); 869 870 /* 871 * Unbind the MTT entries. 872 */ 873 status = hermon_mr_mtt_unbind(state, &old_bind, old_mtt); 874 if (status != DDI_SUCCESS) { 875 HERMON_WARNING(state, "failed to unbind old SRQ memory"); 876 status = ibc_get_ci_failure(0); 877 goto srqmodify_fail; 878 } 879 880 /* Free the memory for old wre */ 881 kmem_free(wre_old, srq_old_bufsz * sizeof (uint64_t)); 882 883 /* Free the memory for the old SRQ */ 884 hermon_queue_free(&old_srqinfo); 885 886 /* 887 * Fill in the return arguments (if necessary). This includes the 888 * real new completion queue size. 889 */ 890 if (real_size != NULL) { 891 *real_size = (1 << log_srq_size); 892 } 893 894 return (DDI_SUCCESS); 895 896 srqmodify_fail: 897 return (status); 898 } 899 900 901 /* 902 * hermon_srq_refcnt_inc() 903 * Context: Can be called from interrupt or base context. 904 */ 905 void 906 hermon_srq_refcnt_inc(hermon_srqhdl_t srq) 907 { 908 mutex_enter(&srq->srq_lock); 909 srq->srq_refcnt++; 910 mutex_exit(&srq->srq_lock); 911 } 912 913 914 /* 915 * hermon_srq_refcnt_dec() 916 * Context: Can be called from interrupt or base context. 917 */ 918 void 919 hermon_srq_refcnt_dec(hermon_srqhdl_t srq) 920 { 921 mutex_enter(&srq->srq_lock); 922 srq->srq_refcnt--; 923 mutex_exit(&srq->srq_lock); 924 } 925 926 927 /* 928 * hermon_srqhdl_from_srqnum() 929 * Context: Can be called from interrupt or base context. 930 * 931 * This routine is important because changing the unconstrained 932 * portion of the SRQ number is critical to the detection of a 933 * potential race condition in the SRQ handler code (i.e. the case 934 * where a SRQ is freed and alloc'd again before an event for the 935 * "old" SRQ can be handled). 936 * 937 * While this is not a perfect solution (not sure that one exists) 938 * it does help to mitigate the chance that this race condition will 939 * cause us to deliver a "stale" event to the new SRQ owner. Note: 940 * this solution does not scale well because the number of constrained 941 * bits increases (and, hence, the number of unconstrained bits 942 * decreases) as the number of supported SRQ grows. For small and 943 * intermediate values, it should hopefully provide sufficient 944 * protection. 945 */ 946 hermon_srqhdl_t 947 hermon_srqhdl_from_srqnum(hermon_state_t *state, uint_t srqnum) 948 { 949 uint_t srqindx, srqmask; 950 951 /* Calculate the SRQ table index from the srqnum */ 952 srqmask = (1 << state->hs_cfg_profile->cp_log_num_srq) - 1; 953 srqindx = srqnum & srqmask; 954 return (hermon_icm_num_to_hdl(state, HERMON_SRQC, srqindx)); 955 } 956 957 958 /* 959 * hermon_srq_sgl_to_logwqesz() 960 * Context: Can be called from interrupt or base context. 961 */ 962 static void 963 hermon_srq_sgl_to_logwqesz(hermon_state_t *state, uint_t num_sgl, 964 hermon_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl) 965 { 966 uint_t max_size, log2, actual_sgl; 967 968 switch (wq_type) { 969 case HERMON_QP_WQ_TYPE_RECVQ: 970 /* 971 * Use requested maximum SGL to calculate max descriptor size 972 * (while guaranteeing that the descriptor size is a 973 * power-of-2 cachelines). 974 */ 975 max_size = (HERMON_QP_WQE_MLX_SRQ_HDRS + (num_sgl << 4)); 976 log2 = highbit(max_size); 977 if ((max_size & (max_size - 1)) == 0) { 978 log2 = log2 - 1; 979 } 980 981 /* Make sure descriptor is at least the minimum size */ 982 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM); 983 984 /* Calculate actual number of SGL (given WQE size) */ 985 actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_SRQ_HDRS) >> 4; 986 break; 987 988 default: 989 HERMON_WARNING(state, "unexpected work queue type"); 990 break; 991 } 992 993 /* Fill in the return values */ 994 *logwqesz = log2; 995 *max_sgl = min(state->hs_cfg_profile->cp_srq_max_sgl, actual_sgl); 996 }