1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 /* 25 * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved. 26 * 27 * This software is available to you under a choice of one of two 28 * licenses. You may choose to be licensed under the terms of the GNU 29 * General Public License (GPL) Version 2, available from the file 30 * COPYING in the main directory of this source tree, or the 31 * OpenIB.org BSD license below: 32 * 33 * Redistribution and use in source and binary forms, with or 34 * without modification, are permitted provided that the following 35 * conditions are met: 36 * 37 * - Redistributions of source code must retain the above 38 * copyright notice, this list of conditions and the following 39 * disclaimer. 40 * 41 * - Redistributions in binary form must reproduce the above 42 * copyright notice, this list of conditions and the following 43 * disclaimer in the documentation and/or other materials 44 * provided with the distribution. 45 * 46 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 47 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 48 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 49 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 50 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 51 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 52 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 53 * SOFTWARE. 54 * 55 */ 56 /* 57 * Sun elects to include this software in Sun product 58 * under the OpenIB BSD license. 59 * 60 * 61 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 62 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 63 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 64 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 65 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 66 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 67 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 68 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 69 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 70 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 71 * POSSIBILITY OF SUCH DAMAGE. 72 */ 73 74 #include <sys/types.h> 75 #include <sys/ddi.h> 76 #include <sys/sunddi.h> 77 #include <sys/ib/clients/rds/rdsib_cm.h> 78 #include <sys/ib/clients/rds/rdsib_ib.h> 79 #include <sys/ib/clients/rds/rdsib_buf.h> 80 #include <sys/ib/clients/rds/rdsib_ep.h> 81 #include <sys/ib/clients/rds/rds_kstat.h> 82 83 static void rds_async_handler(void *clntp, ibt_hca_hdl_t hdl, 84 ibt_async_code_t code, ibt_async_event_t *event); 85 86 static struct ibt_clnt_modinfo_s rds_ib_modinfo = { 87 IBTI_V_CURR, 88 IBT_NETWORK, 89 rds_async_handler, 90 NULL, 91 "RDS" 92 }; 93 94 /* performance tunables */ 95 uint_t rds_no_interrupts = 0; 96 uint_t rds_poll_percent_full = 25; 97 uint_t rds_wc_signal = IBT_NEXT_SOLICITED; 98 uint_t rds_waittime_ms = 100; /* ms */ 99 100 extern dev_info_t *rdsib_dev_info; 101 extern void rds_close_sessions(); 102 103 static void 104 rdsib_validate_chan_sizes(ibt_hca_attr_t *hattrp) 105 { 106 /* The SQ size should not be more than that supported by the HCA */ 107 if (((MaxDataSendBuffers + RDS_NUM_ACKS) > hattrp->hca_max_chan_sz) || 108 ((MaxDataSendBuffers + RDS_NUM_ACKS) > hattrp->hca_max_cq_sz)) { 109 RDS_DPRINTF2("RDSIB", "MaxDataSendBuffers + %d is greater " 110 "than that supported by the HCA driver " 111 "(%d + %d > %d or %d), lowering it to a supported value.", 112 RDS_NUM_ACKS, MaxDataSendBuffers, RDS_NUM_ACKS, 113 hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz); 114 115 MaxDataSendBuffers = (hattrp->hca_max_chan_sz > 116 hattrp->hca_max_cq_sz) ? 117 hattrp->hca_max_cq_sz - RDS_NUM_ACKS : 118 hattrp->hca_max_chan_sz - RDS_NUM_ACKS; 119 } 120 121 /* The RQ size should not be more than that supported by the HCA */ 122 if ((MaxDataRecvBuffers > hattrp->hca_max_chan_sz) || 123 (MaxDataRecvBuffers > hattrp->hca_max_cq_sz)) { 124 RDS_DPRINTF2("RDSIB", "MaxDataRecvBuffers is greater than that " 125 "supported by the HCA driver (%d > %d or %d), lowering it " 126 "to a supported value.", MaxDataRecvBuffers, 127 hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz); 128 129 MaxDataRecvBuffers = (hattrp->hca_max_chan_sz > 130 hattrp->hca_max_cq_sz) ? hattrp->hca_max_cq_sz : 131 hattrp->hca_max_chan_sz; 132 } 133 134 /* The SQ size should not be more than that supported by the HCA */ 135 if ((MaxCtrlSendBuffers > hattrp->hca_max_chan_sz) || 136 (MaxCtrlSendBuffers > hattrp->hca_max_cq_sz)) { 137 RDS_DPRINTF2("RDSIB", "MaxCtrlSendBuffers is greater than that " 138 "supported by the HCA driver (%d > %d or %d), lowering it " 139 "to a supported value.", MaxCtrlSendBuffers, 140 hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz); 141 142 MaxCtrlSendBuffers = (hattrp->hca_max_chan_sz > 143 hattrp->hca_max_cq_sz) ? hattrp->hca_max_cq_sz : 144 hattrp->hca_max_chan_sz; 145 } 146 147 /* The RQ size should not be more than that supported by the HCA */ 148 if ((MaxCtrlRecvBuffers > hattrp->hca_max_chan_sz) || 149 (MaxCtrlRecvBuffers > hattrp->hca_max_cq_sz)) { 150 RDS_DPRINTF2("RDSIB", "MaxCtrlRecvBuffers is greater than that " 151 "supported by the HCA driver (%d > %d or %d), lowering it " 152 "to a supported value.", MaxCtrlRecvBuffers, 153 hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz); 154 155 MaxCtrlRecvBuffers = (hattrp->hca_max_chan_sz > 156 hattrp->hca_max_cq_sz) ? hattrp->hca_max_cq_sz : 157 hattrp->hca_max_chan_sz; 158 } 159 160 /* The MaxRecvMemory should be less than that supported by the HCA */ 161 if ((NDataRX * RdsPktSize) > hattrp->hca_max_memr_len) { 162 RDS_DPRINTF2("RDSIB", "MaxRecvMemory is greater than that " 163 "supported by the HCA driver (%d > %d), lowering it to %d", 164 NDataRX * RdsPktSize, hattrp->hca_max_memr_len, 165 hattrp->hca_max_memr_len); 166 167 NDataRX = hattrp->hca_max_memr_len/RdsPktSize; 168 } 169 } 170 171 /* Return hcap, given the hca guid */ 172 rds_hca_t * 173 rds_lkup_hca(ib_guid_t hca_guid) 174 { 175 rds_hca_t *hcap; 176 177 RDS_DPRINTF4("rds_lkup_hca", "Enter: statep: 0x%p " 178 "guid: %llx", rdsib_statep, hca_guid); 179 180 rw_enter(&rdsib_statep->rds_hca_lock, RW_READER); 181 182 hcap = rdsib_statep->rds_hcalistp; 183 while ((hcap != NULL) && (hcap->hca_guid != hca_guid)) { 184 hcap = hcap->hca_nextp; 185 } 186 187 rw_exit(&rdsib_statep->rds_hca_lock); 188 189 RDS_DPRINTF4("rds_lkup_hca", "return"); 190 191 return (hcap); 192 } 193 194 void rds_randomize_qps(rds_hca_t *hcap); 195 196 static rds_hca_t * 197 rdsib_init_hca(ib_guid_t hca_guid) 198 { 199 rds_hca_t *hcap; 200 boolean_t alloc = B_FALSE; 201 int ret; 202 203 RDS_DPRINTF2("rdsib_init_hca", "enter: HCA 0x%llx", hca_guid); 204 205 /* Do a HCA lookup */ 206 hcap = rds_lkup_hca(hca_guid); 207 208 if (hcap != NULL && hcap->hca_hdl != NULL) { 209 /* 210 * This can happen if we get IBT_HCA_ATTACH_EVENT on an HCA 211 * that we have already opened. Just return NULL so that 212 * we'll not end up reinitializing the HCA again. 213 */ 214 RDS_DPRINTF2("rdsib_init_hca", "HCA already initialized"); 215 return (NULL); 216 } 217 218 if (hcap == NULL) { 219 RDS_DPRINTF2("rdsib_init_hca", "New HCA is added"); 220 hcap = (rds_hca_t *)kmem_zalloc(sizeof (rds_hca_t), KM_SLEEP); 221 alloc = B_TRUE; 222 } 223 224 hcap->hca_guid = hca_guid; 225 ret = ibt_open_hca(rdsib_statep->rds_ibhdl, hca_guid, 226 &hcap->hca_hdl); 227 if (ret != IBT_SUCCESS) { 228 if (ret == IBT_HCA_IN_USE) { 229 RDS_DPRINTF2("rdsib_init_hca", 230 "ibt_open_hca: 0x%llx returned IBT_HCA_IN_USE", 231 hca_guid); 232 } else { 233 RDS_DPRINTF2("rdsib_init_hca", 234 "ibt_open_hca: 0x%llx failed: %d", hca_guid, ret); 235 } 236 if (alloc == B_TRUE) { 237 kmem_free(hcap, sizeof (rds_hca_t)); 238 } 239 return (NULL); 240 } 241 242 ret = ibt_query_hca(hcap->hca_hdl, &hcap->hca_attr); 243 if (ret != IBT_SUCCESS) { 244 RDS_DPRINTF2("rdsib_init_hca", 245 "Query HCA: 0x%llx failed: %d", hca_guid, ret); 246 ret = ibt_close_hca(hcap->hca_hdl); 247 ASSERT(ret == IBT_SUCCESS); 248 if (alloc == B_TRUE) { 249 kmem_free(hcap, sizeof (rds_hca_t)); 250 } else { 251 hcap->hca_hdl = NULL; 252 } 253 return (NULL); 254 } 255 256 ret = ibt_query_hca_ports(hcap->hca_hdl, 0, 257 &hcap->hca_pinfop, &hcap->hca_nports, &hcap->hca_pinfo_sz); 258 if (ret != IBT_SUCCESS) { 259 RDS_DPRINTF2("rdsib_init_hca", 260 "Query HCA 0x%llx ports failed: %d", hca_guid, 261 ret); 262 ret = ibt_close_hca(hcap->hca_hdl); 263 hcap->hca_hdl = NULL; 264 ASSERT(ret == IBT_SUCCESS); 265 if (alloc == B_TRUE) { 266 kmem_free(hcap, sizeof (rds_hca_t)); 267 } else { 268 hcap->hca_hdl = NULL; 269 } 270 return (NULL); 271 } 272 273 /* Only one PD per HCA is allocated, so do it here */ 274 ret = ibt_alloc_pd(hcap->hca_hdl, IBT_PD_NO_FLAGS, 275 &hcap->hca_pdhdl); 276 if (ret != IBT_SUCCESS) { 277 RDS_DPRINTF2("rdsib_init_hca", 278 "ibt_alloc_pd 0x%llx failed: %d", hca_guid, ret); 279 (void) ibt_free_portinfo(hcap->hca_pinfop, 280 hcap->hca_pinfo_sz); 281 ret = ibt_close_hca(hcap->hca_hdl); 282 ASSERT(ret == IBT_SUCCESS); 283 hcap->hca_hdl = NULL; 284 if (alloc == B_TRUE) { 285 kmem_free(hcap, sizeof (rds_hca_t)); 286 } else { 287 hcap->hca_hdl = NULL; 288 } 289 return (NULL); 290 } 291 292 rdsib_validate_chan_sizes(&hcap->hca_attr); 293 294 /* To minimize stale connections after ungraceful reboots */ 295 rds_randomize_qps(hcap); 296 297 rw_enter(&rdsib_statep->rds_hca_lock, RW_WRITER); 298 hcap->hca_state = RDS_HCA_STATE_OPEN; 299 if (alloc == B_TRUE) { 300 /* this is a new HCA, add it to the list */ 301 rdsib_statep->rds_nhcas++; 302 hcap->hca_nextp = rdsib_statep->rds_hcalistp; 303 rdsib_statep->rds_hcalistp = hcap; 304 } 305 rw_exit(&rdsib_statep->rds_hca_lock); 306 307 RDS_DPRINTF2("rdsib_init_hca", "return: HCA 0x%llx", hca_guid); 308 309 return (hcap); 310 } 311 312 /* 313 * Called from attach 314 */ 315 int 316 rdsib_initialize_ib() 317 { 318 ib_guid_t *guidp; 319 rds_hca_t *hcap; 320 uint_t ix, hcaix, nhcas; 321 int ret; 322 323 RDS_DPRINTF2("rdsib_initialize_ib", "enter: statep %p", rdsib_statep); 324 325 ASSERT(rdsib_statep != NULL); 326 if (rdsib_statep == NULL) { 327 RDS_DPRINTF1("rdsib_initialize_ib", 328 "RDS Statep not initialized"); 329 return (-1); 330 } 331 332 /* How many hcas are there? */ 333 nhcas = ibt_get_hca_list(&guidp); 334 if (nhcas == 0) { 335 RDS_DPRINTF2("rdsib_initialize_ib", "No IB HCAs Available"); 336 return (-1); 337 } 338 339 RDS_DPRINTF3("rdsib_initialize_ib", "Number of HCAs: %d", nhcas); 340 341 /* Register with IBTF */ 342 ret = ibt_attach(&rds_ib_modinfo, rdsib_dev_info, rdsib_statep, 343 &rdsib_statep->rds_ibhdl); 344 if (ret != IBT_SUCCESS) { 345 RDS_DPRINTF2("rdsib_initialize_ib", "ibt_attach failed: %d", 346 ret); 347 (void) ibt_free_hca_list(guidp, nhcas); 348 return (-1); 349 } 350 351 /* 352 * Open each HCA and gather its information. Don't care about HCAs 353 * that cannot be opened. It is OK as long as atleast one HCA can be 354 * opened. 355 * Initialize a HCA only if all the information is available. 356 */ 357 for (ix = 0, hcaix = 0; ix < nhcas; ix++) { 358 RDS_DPRINTF3(LABEL, "Open HCA: 0x%llx", guidp[ix]); 359 360 hcap = rdsib_init_hca(guidp[ix]); 361 if (hcap != NULL) hcaix++; 362 } 363 364 /* free the HCA list, we are done with it */ 365 (void) ibt_free_hca_list(guidp, nhcas); 366 367 if (hcaix == 0) { 368 /* Failed to Initialize even one HCA */ 369 RDS_DPRINTF2("rdsib_initialize_ib", "No HCAs are initialized"); 370 (void) ibt_detach(rdsib_statep->rds_ibhdl); 371 rdsib_statep->rds_ibhdl = NULL; 372 return (-1); 373 } 374 375 if (hcaix < nhcas) { 376 RDS_DPRINTF2("rdsib_open_ib", "HCAs %d/%d failed to initialize", 377 (nhcas - hcaix), nhcas); 378 } 379 380 RDS_DPRINTF2("rdsib_initialize_ib", "return: statep %p", rdsib_statep); 381 382 return (0); 383 } 384 385 /* 386 * Called from detach 387 */ 388 void 389 rdsib_deinitialize_ib() 390 { 391 rds_hca_t *hcap, *nextp; 392 int ret; 393 394 RDS_DPRINTF2("rdsib_deinitialize_ib", "enter: statep %p", rdsib_statep); 395 396 /* close and destroy all the sessions */ 397 rds_close_sessions(NULL); 398 399 /* Release all HCA resources */ 400 rw_enter(&rdsib_statep->rds_hca_lock, RW_WRITER); 401 RDS_DPRINTF2("rdsib_deinitialize_ib", "HCA List: %p, NHCA: %d", 402 rdsib_statep->rds_hcalistp, rdsib_statep->rds_nhcas); 403 hcap = rdsib_statep->rds_hcalistp; 404 rdsib_statep->rds_hcalistp = NULL; 405 rdsib_statep->rds_nhcas = 0; 406 rw_exit(&rdsib_statep->rds_hca_lock); 407 408 while (hcap != NULL) { 409 nextp = hcap->hca_nextp; 410 411 if (hcap->hca_hdl != NULL) { 412 ret = ibt_free_pd(hcap->hca_hdl, hcap->hca_pdhdl); 413 ASSERT(ret == IBT_SUCCESS); 414 415 (void) ibt_free_portinfo(hcap->hca_pinfop, 416 hcap->hca_pinfo_sz); 417 418 ret = ibt_close_hca(hcap->hca_hdl); 419 ASSERT(ret == IBT_SUCCESS); 420 } 421 422 kmem_free(hcap, sizeof (rds_hca_t)); 423 hcap = nextp; 424 } 425 426 /* Deregister with IBTF */ 427 if (rdsib_statep->rds_ibhdl != NULL) { 428 (void) ibt_detach(rdsib_statep->rds_ibhdl); 429 rdsib_statep->rds_ibhdl = NULL; 430 } 431 432 RDS_DPRINTF2("rdsib_deinitialize_ib", "return: statep %p", 433 rdsib_statep); 434 } 435 436 /* 437 * Called on open of first RDS socket 438 */ 439 int 440 rdsib_open_ib() 441 { 442 int ret; 443 444 RDS_DPRINTF2("rdsib_open_ib", "enter: statep %p", rdsib_statep); 445 446 /* Enable incoming connection requests */ 447 if (rdsib_statep->rds_srvhdl == NULL) { 448 rdsib_statep->rds_srvhdl = 449 rds_register_service(rdsib_statep->rds_ibhdl); 450 if (rdsib_statep->rds_srvhdl == NULL) { 451 RDS_DPRINTF2("rdsib_open_ib", 452 "Service registration failed"); 453 return (-1); 454 } else { 455 /* bind the service on all available ports */ 456 ret = rds_bind_service(rdsib_statep); 457 if (ret != 0) { 458 RDS_DPRINTF2("rdsib_open_ib", 459 "Bind service failed: %d", ret); 460 } 461 } 462 } 463 464 RDS_DPRINTF2("rdsib_open_ib", "return: statep %p", rdsib_statep); 465 466 return (0); 467 } 468 469 /* 470 * Called when all ports are closed. 471 */ 472 void 473 rdsib_close_ib() 474 { 475 int ret; 476 477 RDS_DPRINTF2("rdsib_close_ib", "enter: statep %p", rdsib_statep); 478 479 /* Disable incoming connection requests */ 480 if (rdsib_statep->rds_srvhdl != NULL) { 481 ret = ibt_unbind_all_services(rdsib_statep->rds_srvhdl); 482 if (ret != 0) { 483 RDS_DPRINTF2("rdsib_close_ib", 484 "ibt_unbind_all_services failed: %d\n", ret); 485 } 486 ret = ibt_deregister_service(rdsib_statep->rds_ibhdl, 487 rdsib_statep->rds_srvhdl); 488 if (ret != 0) { 489 RDS_DPRINTF2("rdsib_close_ib", 490 "ibt_deregister_service failed: %d\n", ret); 491 } else { 492 rdsib_statep->rds_srvhdl = NULL; 493 } 494 } 495 496 RDS_DPRINTF2("rdsib_close_ib", "return: statep %p", rdsib_statep); 497 } 498 499 /* Return hcap, given the hca guid */ 500 rds_hca_t * 501 rds_get_hcap(rds_state_t *statep, ib_guid_t hca_guid) 502 { 503 rds_hca_t *hcap; 504 505 RDS_DPRINTF4("rds_get_hcap", "rds_get_hcap: Enter: statep: 0x%p " 506 "guid: %llx", statep, hca_guid); 507 508 rw_enter(&statep->rds_hca_lock, RW_READER); 509 510 hcap = statep->rds_hcalistp; 511 while ((hcap != NULL) && (hcap->hca_guid != hca_guid)) { 512 hcap = hcap->hca_nextp; 513 } 514 515 /* 516 * don't let anyone use this HCA until the RECV memory 517 * is registered with this HCA 518 */ 519 if ((hcap != NULL) && 520 (hcap->hca_state == RDS_HCA_STATE_MEM_REGISTERED)) { 521 ASSERT(hcap->hca_mrhdl != NULL); 522 rw_exit(&statep->rds_hca_lock); 523 return (hcap); 524 } 525 526 RDS_DPRINTF2("rds_get_hcap", 527 "HCA (0x%p, 0x%llx) is not initialized", hcap, hca_guid); 528 rw_exit(&statep->rds_hca_lock); 529 530 RDS_DPRINTF4("rds_get_hcap", "rds_get_hcap: return"); 531 532 return (NULL); 533 } 534 535 /* Return hcap, given a gid */ 536 rds_hca_t * 537 rds_gid_to_hcap(rds_state_t *statep, ib_gid_t gid) 538 { 539 rds_hca_t *hcap; 540 uint_t ix; 541 542 RDS_DPRINTF4("rds_gid_to_hcap", "Enter: statep: 0x%p gid: %llx:%llx", 543 statep, gid.gid_prefix, gid.gid_guid); 544 545 rw_enter(&statep->rds_hca_lock, RW_READER); 546 547 hcap = statep->rds_hcalistp; 548 while (hcap != NULL) { 549 550 /* 551 * don't let anyone use this HCA until the RECV memory 552 * is registered with this HCA 553 */ 554 if (hcap->hca_state != RDS_HCA_STATE_MEM_REGISTERED) { 555 RDS_DPRINTF3("rds_gid_to_hcap", 556 "HCA (0x%p, 0x%llx) is not initialized", 557 hcap, gid.gid_guid); 558 hcap = hcap->hca_nextp; 559 continue; 560 } 561 562 for (ix = 0; ix < hcap->hca_nports; ix++) { 563 if ((hcap->hca_pinfop[ix].p_sgid_tbl[0].gid_prefix == 564 gid.gid_prefix) && 565 (hcap->hca_pinfop[ix].p_sgid_tbl[0].gid_guid == 566 gid.gid_guid)) { 567 RDS_DPRINTF4("rds_gid_to_hcap", 568 "gid found in hcap: 0x%p", hcap); 569 rw_exit(&statep->rds_hca_lock); 570 return (hcap); 571 } 572 } 573 hcap = hcap->hca_nextp; 574 } 575 576 rw_exit(&statep->rds_hca_lock); 577 578 return (NULL); 579 } 580 581 /* This is called from the send CQ handler */ 582 void 583 rds_send_acknowledgement(rds_ep_t *ep) 584 { 585 int ret; 586 uint_t ix; 587 588 RDS_DPRINTF4("rds_send_acknowledgement", "Enter EP(%p)", ep); 589 590 mutex_enter(&ep->ep_lock); 591 592 ASSERT(ep->ep_rdmacnt != 0); 593 594 /* 595 * The previous ACK completed successfully, send the next one 596 * if more messages were received after sending the last ACK 597 */ 598 if (ep->ep_rbufid != *(uintptr_t *)(uintptr_t)ep->ep_ackds.ds_va) { 599 *(uintptr_t *)(uintptr_t)ep->ep_ackds.ds_va = ep->ep_rbufid; 600 mutex_exit(&ep->ep_lock); 601 602 /* send acknowledgement */ 603 RDS_INCR_TXACKS(); 604 ret = ibt_post_send(ep->ep_chanhdl, &ep->ep_ackwr, 1, &ix); 605 if (ret != IBT_SUCCESS) { 606 RDS_DPRINTF2("rds_send_acknowledgement", 607 "EP(%p): ibt_post_send for acknowledgement " 608 "failed: %d, SQ depth: %d", 609 ep, ret, ep->ep_sndpool.pool_nbusy); 610 mutex_enter(&ep->ep_lock); 611 ep->ep_rdmacnt--; 612 mutex_exit(&ep->ep_lock); 613 } 614 } else { 615 /* ACKed all messages, no more to ACK */ 616 ep->ep_rdmacnt--; 617 mutex_exit(&ep->ep_lock); 618 return; 619 } 620 621 RDS_DPRINTF4("rds_send_acknowledgement", "Return EP(%p)", ep); 622 } 623 624 static int 625 rds_poll_ctrl_completions(ibt_cq_hdl_t cq, rds_ep_t *ep) 626 { 627 ibt_wc_t wc; 628 uint_t npolled; 629 rds_buf_t *bp; 630 rds_ctrl_pkt_t *cpkt; 631 rds_qp_t *recvqp; 632 int ret = IBT_SUCCESS; 633 634 RDS_DPRINTF4("rds_poll_ctrl_completions", "Enter: EP(%p)", ep); 635 636 bzero(&wc, sizeof (ibt_wc_t)); 637 ret = ibt_poll_cq(cq, &wc, 1, &npolled); 638 if (ret != IBT_SUCCESS) { 639 if (ret != IBT_CQ_EMPTY) { 640 RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_poll_cq " 641 "returned: %d", ep, cq, ret); 642 } else { 643 RDS_DPRINTF5(LABEL, "EP(%p) CQ(%p): ibt_poll_cq " 644 "returned: IBT_CQ_EMPTY", ep, cq); 645 } 646 return (ret); 647 } 648 649 bp = (rds_buf_t *)(uintptr_t)wc.wc_id; 650 651 if (wc.wc_status != IBT_WC_SUCCESS) { 652 mutex_enter(&ep->ep_recvqp.qp_lock); 653 ep->ep_recvqp.qp_level--; 654 mutex_exit(&ep->ep_recvqp.qp_lock); 655 656 /* Free the buffer */ 657 bp->buf_state = RDS_RCVBUF_FREE; 658 rds_free_recv_buf(bp, 1); 659 660 /* Receive completion failure */ 661 if (wc.wc_status != IBT_WC_WR_FLUSHED_ERR) { 662 RDS_DPRINTF2("rds_poll_ctrl_completions", 663 "EP(%p) CQ(%p) BP(%p): WC Error Status: %d", 664 ep, cq, wc.wc_id, wc.wc_status); 665 } 666 return (ret); 667 } 668 669 /* there is one less in the RQ */ 670 recvqp = &ep->ep_recvqp; 671 mutex_enter(&recvqp->qp_lock); 672 recvqp->qp_level--; 673 if ((recvqp->qp_taskqpending == B_FALSE) && 674 (recvqp->qp_level <= recvqp->qp_lwm)) { 675 /* Time to post more buffers into the RQ */ 676 recvqp->qp_taskqpending = B_TRUE; 677 mutex_exit(&recvqp->qp_lock); 678 679 ret = ddi_taskq_dispatch(rds_taskq, 680 rds_post_recv_buf, (void *)ep->ep_chanhdl, DDI_NOSLEEP); 681 if (ret != DDI_SUCCESS) { 682 RDS_DPRINTF2(LABEL, "ddi_taskq_dispatch failed: %d", 683 ret); 684 mutex_enter(&recvqp->qp_lock); 685 recvqp->qp_taskqpending = B_FALSE; 686 mutex_exit(&recvqp->qp_lock); 687 } 688 } else { 689 mutex_exit(&recvqp->qp_lock); 690 } 691 692 cpkt = (rds_ctrl_pkt_t *)(uintptr_t)bp->buf_ds.ds_va; 693 rds_handle_control_message(ep->ep_sp, cpkt); 694 695 bp->buf_state = RDS_RCVBUF_FREE; 696 rds_free_recv_buf(bp, 1); 697 698 RDS_DPRINTF4("rds_poll_ctrl_completions", "Return: EP(%p)", ep); 699 700 return (ret); 701 } 702 703 #define RDS_POST_FEW_ATATIME 100 704 /* Post recv WRs into the RQ. Assumes the ep->refcnt is already incremented */ 705 void 706 rds_post_recv_buf(void *arg) 707 { 708 ibt_channel_hdl_t chanhdl; 709 rds_ep_t *ep; 710 rds_session_t *sp; 711 rds_qp_t *recvqp; 712 rds_bufpool_t *gp; 713 rds_buf_t *bp, *bp1; 714 ibt_recv_wr_t *wrp, wr[RDS_POST_FEW_ATATIME]; 715 rds_hca_t *hcap; 716 uint_t npost, nspace, rcv_len; 717 uint_t ix, jx, kx; 718 int ret; 719 720 chanhdl = (ibt_channel_hdl_t)arg; 721 RDS_DPRINTF4("rds_post_recv_buf", "Enter: CHAN(%p)", chanhdl); 722 RDS_INCR_POST_RCV_BUF_CALLS(); 723 724 ep = (rds_ep_t *)ibt_get_chan_private(chanhdl); 725 ASSERT(ep != NULL); 726 sp = ep->ep_sp; 727 recvqp = &ep->ep_recvqp; 728 729 RDS_DPRINTF5("rds_post_recv_buf", "EP(%p)", ep); 730 731 /* get the hcap for the HCA hosting this channel */ 732 hcap = rds_lkup_hca(ep->ep_hca_guid); 733 if (hcap == NULL) { 734 RDS_DPRINTF2("rds_post_recv_buf", "HCA (0x%llx) not found", 735 ep->ep_hca_guid); 736 return; 737 } 738 739 /* Make sure the session is still connected */ 740 rw_enter(&sp->session_lock, RW_READER); 741 if ((sp->session_state != RDS_SESSION_STATE_INIT) && 742 (sp->session_state != RDS_SESSION_STATE_CONNECTED) && 743 (sp->session_state != RDS_SESSION_STATE_HCA_CLOSING)) { 744 RDS_DPRINTF2("rds_post_recv_buf", "EP(%p): Session is not " 745 "in active state (%d)", ep, sp->session_state); 746 rw_exit(&sp->session_lock); 747 return; 748 } 749 rw_exit(&sp->session_lock); 750 751 /* how many can be posted */ 752 mutex_enter(&recvqp->qp_lock); 753 nspace = recvqp->qp_depth - recvqp->qp_level; 754 if (nspace == 0) { 755 RDS_DPRINTF2("rds_post_recv_buf", "RQ is FULL"); 756 recvqp->qp_taskqpending = B_FALSE; 757 mutex_exit(&recvqp->qp_lock); 758 return; 759 } 760 mutex_exit(&recvqp->qp_lock); 761 762 if (ep->ep_type == RDS_EP_TYPE_DATA) { 763 gp = &rds_dpool; 764 rcv_len = RdsPktSize; 765 } else { 766 gp = &rds_cpool; 767 rcv_len = RDS_CTRLPKT_SIZE; 768 } 769 770 bp = rds_get_buf(gp, nspace, &jx); 771 if (bp == NULL) { 772 RDS_DPRINTF2(LABEL, "EP(%p): No Recv buffers available", ep); 773 /* try again later */ 774 ret = ddi_taskq_dispatch(rds_taskq, rds_post_recv_buf, 775 (void *)chanhdl, DDI_NOSLEEP); 776 if (ret != DDI_SUCCESS) { 777 RDS_DPRINTF2(LABEL, "ddi_taskq_dispatch failed: %d", 778 ret); 779 mutex_enter(&recvqp->qp_lock); 780 recvqp->qp_taskqpending = B_FALSE; 781 mutex_exit(&recvqp->qp_lock); 782 } 783 return; 784 } 785 786 if (jx != nspace) { 787 RDS_DPRINTF2(LABEL, "EP(%p): Recv buffers " 788 "needed: %d available: %d", ep, nspace, jx); 789 nspace = jx; 790 } 791 792 bp1 = bp; 793 for (ix = 0; ix < nspace; ix++) { 794 bp1->buf_ep = ep; 795 ASSERT(bp1->buf_state == RDS_RCVBUF_FREE); 796 bp1->buf_state = RDS_RCVBUF_POSTED; 797 bp1->buf_ds.ds_key = hcap->hca_lkey; 798 bp1->buf_ds.ds_len = rcv_len; 799 bp1 = bp1->buf_nextp; 800 } 801 802 #if 0 803 wrp = kmem_zalloc(RDS_POST_FEW_ATATIME * sizeof (ibt_recv_wr_t), 804 KM_SLEEP); 805 #else 806 wrp = &wr[0]; 807 #endif 808 809 npost = nspace; 810 while (npost) { 811 jx = (npost > RDS_POST_FEW_ATATIME) ? 812 RDS_POST_FEW_ATATIME : npost; 813 for (ix = 0; ix < jx; ix++) { 814 wrp[ix].wr_id = (uintptr_t)bp; 815 wrp[ix].wr_nds = 1; 816 wrp[ix].wr_sgl = &bp->buf_ds; 817 bp = bp->buf_nextp; 818 } 819 820 ret = ibt_post_recv(chanhdl, wrp, jx, &kx); 821 if ((ret != IBT_SUCCESS) || (kx != jx)) { 822 RDS_DPRINTF2(LABEL, "ibt_post_recv for %d WRs failed: " 823 "%d", npost, ret); 824 npost -= kx; 825 break; 826 } 827 828 npost -= jx; 829 } 830 831 mutex_enter(&recvqp->qp_lock); 832 if (npost != 0) { 833 RDS_DPRINTF2("rds_post_recv_buf", 834 "EP(%p) Failed to post %d WRs", ep, npost); 835 recvqp->qp_level += (nspace - npost); 836 } else { 837 recvqp->qp_level += nspace; 838 } 839 840 /* 841 * sometimes, the recv WRs can get consumed as soon as they are 842 * posted. In that case, taskq thread to post more WRs to the RQ will 843 * not be scheduled as the taskqpending flag is still set. 844 */ 845 if (recvqp->qp_level == 0) { 846 mutex_exit(&recvqp->qp_lock); 847 ret = ddi_taskq_dispatch(rds_taskq, 848 rds_post_recv_buf, (void *)chanhdl, DDI_NOSLEEP); 849 if (ret != DDI_SUCCESS) { 850 RDS_DPRINTF2("rds_post_recv_buf", 851 "ddi_taskq_dispatch failed: %d", ret); 852 mutex_enter(&recvqp->qp_lock); 853 recvqp->qp_taskqpending = B_FALSE; 854 mutex_exit(&recvqp->qp_lock); 855 } 856 } else { 857 recvqp->qp_taskqpending = B_FALSE; 858 mutex_exit(&recvqp->qp_lock); 859 } 860 861 #if 0 862 kmem_free(wrp, RDS_POST_FEW_ATATIME * sizeof (ibt_recv_wr_t)); 863 #endif 864 865 RDS_DPRINTF4("rds_post_recv_buf", "Return: EP(%p)", ep); 866 } 867 868 static int 869 rds_poll_data_completions(ibt_cq_hdl_t cq, rds_ep_t *ep) 870 { 871 ibt_wc_t wc; 872 rds_buf_t *bp; 873 rds_data_hdr_t *pktp; 874 rds_qp_t *recvqp; 875 uint_t npolled; 876 int ret = IBT_SUCCESS; 877 878 879 RDS_DPRINTF4("rds_poll_data_completions", "Enter: EP(%p)", ep); 880 881 bzero(&wc, sizeof (ibt_wc_t)); 882 ret = ibt_poll_cq(cq, &wc, 1, &npolled); 883 if (ret != IBT_SUCCESS) { 884 if (ret != IBT_CQ_EMPTY) { 885 RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_poll_cq " 886 "returned: %d", ep, cq, ret); 887 } else { 888 RDS_DPRINTF5(LABEL, "EP(%p) CQ(%p): ibt_poll_cq " 889 "returned: IBT_CQ_EMPTY", ep, cq); 890 } 891 return (ret); 892 } 893 894 bp = (rds_buf_t *)(uintptr_t)wc.wc_id; 895 ASSERT(bp->buf_state == RDS_RCVBUF_POSTED); 896 bp->buf_state = RDS_RCVBUF_ONSOCKQ; 897 bp->buf_nextp = NULL; 898 899 if (wc.wc_status != IBT_WC_SUCCESS) { 900 mutex_enter(&ep->ep_recvqp.qp_lock); 901 ep->ep_recvqp.qp_level--; 902 mutex_exit(&ep->ep_recvqp.qp_lock); 903 904 /* free the buffer */ 905 bp->buf_state = RDS_RCVBUF_FREE; 906 rds_free_recv_buf(bp, 1); 907 908 /* Receive completion failure */ 909 if (wc.wc_status != IBT_WC_WR_FLUSHED_ERR) { 910 RDS_DPRINTF2("rds_poll_data_completions", 911 "EP(%p) CQ(%p) BP(%p): WC Error Status: %d", 912 ep, cq, wc.wc_id, wc.wc_status); 913 RDS_INCR_RXERRS(); 914 } 915 return (ret); 916 } 917 918 /* there is one less in the RQ */ 919 recvqp = &ep->ep_recvqp; 920 mutex_enter(&recvqp->qp_lock); 921 recvqp->qp_level--; 922 if ((recvqp->qp_taskqpending == B_FALSE) && 923 (recvqp->qp_level <= recvqp->qp_lwm)) { 924 /* Time to post more buffers into the RQ */ 925 recvqp->qp_taskqpending = B_TRUE; 926 mutex_exit(&recvqp->qp_lock); 927 928 ret = ddi_taskq_dispatch(rds_taskq, 929 rds_post_recv_buf, (void *)ep->ep_chanhdl, DDI_NOSLEEP); 930 if (ret != DDI_SUCCESS) { 931 RDS_DPRINTF2(LABEL, "ddi_taskq_dispatch failed: %d", 932 ret); 933 mutex_enter(&recvqp->qp_lock); 934 recvqp->qp_taskqpending = B_FALSE; 935 mutex_exit(&recvqp->qp_lock); 936 } 937 } else { 938 mutex_exit(&recvqp->qp_lock); 939 } 940 941 pktp = (rds_data_hdr_t *)(uintptr_t)bp->buf_ds.ds_va; 942 ASSERT(pktp->dh_datalen != 0); 943 944 RDS_DPRINTF5(LABEL, "Message Received: sendIP: 0x%x recvIP: 0x%x " 945 "sendport: %d recvport: %d npkts: %d pktno: %d", ep->ep_remip, 946 ep->ep_myip, pktp->dh_sendport, pktp->dh_recvport, 947 pktp->dh_npkts, pktp->dh_psn); 948 949 RDS_DPRINTF3(LABEL, "BP(%p): npkts: %d psn: %d", bp, 950 pktp->dh_npkts, pktp->dh_psn); 951 952 if (pktp->dh_npkts == 1) { 953 /* single pkt or last packet */ 954 if (pktp->dh_psn != 0) { 955 /* last packet of a segmented message */ 956 ASSERT(ep->ep_seglbp != NULL); 957 ep->ep_seglbp->buf_nextp = bp; 958 ep->ep_seglbp = bp; 959 rds_received_msg(ep, ep->ep_segfbp); 960 ep->ep_segfbp = NULL; 961 ep->ep_seglbp = NULL; 962 } else { 963 /* single packet */ 964 rds_received_msg(ep, bp); 965 } 966 } else { 967 /* multi-pkt msg */ 968 if (pktp->dh_psn == 0) { 969 /* first packet */ 970 ASSERT(ep->ep_segfbp == NULL); 971 ep->ep_segfbp = bp; 972 ep->ep_seglbp = bp; 973 } else { 974 /* intermediate packet */ 975 ASSERT(ep->ep_segfbp != NULL); 976 ep->ep_seglbp->buf_nextp = bp; 977 ep->ep_seglbp = bp; 978 } 979 } 980 981 RDS_DPRINTF4("rds_poll_data_completions", "Return: EP(%p)", ep); 982 983 return (ret); 984 } 985 986 void 987 rds_recvcq_handler(ibt_cq_hdl_t cq, void *arg) 988 { 989 rds_ep_t *ep; 990 int ret = IBT_SUCCESS; 991 int (*func)(ibt_cq_hdl_t, rds_ep_t *); 992 993 ep = (rds_ep_t *)arg; 994 995 RDS_DPRINTF4("rds_recvcq_handler", "enter: EP(%p)", ep); 996 997 if (ep->ep_type == RDS_EP_TYPE_DATA) { 998 func = rds_poll_data_completions; 999 } else { 1000 func = rds_poll_ctrl_completions; 1001 } 1002 1003 do { 1004 ret = func(cq, ep); 1005 } while (ret != IBT_CQ_EMPTY); 1006 1007 /* enable the CQ */ 1008 ret = ibt_enable_cq_notify(cq, rds_wc_signal); 1009 if (ret != IBT_SUCCESS) { 1010 RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_enable_cq_notify " 1011 "failed: %d", ep, cq, ret); 1012 return; 1013 } 1014 1015 do { 1016 ret = func(cq, ep); 1017 } while (ret != IBT_CQ_EMPTY); 1018 1019 RDS_DPRINTF4("rds_recvcq_handler", "Return: EP(%p)", ep); 1020 } 1021 1022 void 1023 rds_poll_send_completions(ibt_cq_hdl_t cq, rds_ep_t *ep, boolean_t lock) 1024 { 1025 ibt_wc_t wc[RDS_NUM_DATA_SEND_WCS]; 1026 uint_t npolled, nret, send_error = 0; 1027 rds_buf_t *headp, *tailp, *bp; 1028 int ret, ix; 1029 1030 RDS_DPRINTF4("rds_poll_send_completions", "Enter EP(%p)", ep); 1031 1032 headp = NULL; 1033 tailp = NULL; 1034 npolled = 0; 1035 do { 1036 ret = ibt_poll_cq(cq, wc, RDS_NUM_DATA_SEND_WCS, &nret); 1037 if (ret != IBT_SUCCESS) { 1038 if (ret != IBT_CQ_EMPTY) { 1039 RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): " 1040 "ibt_poll_cq returned: %d", ep, cq, ret); 1041 } else { 1042 RDS_DPRINTF5(LABEL, "EP(%p) CQ(%p): " 1043 "ibt_poll_cq returned: IBT_CQ_EMPTY", 1044 ep, cq); 1045 } 1046 1047 break; 1048 } 1049 1050 for (ix = 0; ix < nret; ix++) { 1051 if (wc[ix].wc_status == IBT_WC_SUCCESS) { 1052 if (wc[ix].wc_type == IBT_WRC_RDMAW) { 1053 rds_send_acknowledgement(ep); 1054 continue; 1055 } 1056 1057 bp = (rds_buf_t *)(uintptr_t)wc[ix].wc_id; 1058 ASSERT(bp->buf_state == RDS_SNDBUF_PENDING); 1059 bp->buf_state = RDS_SNDBUF_FREE; 1060 } else if (wc[ix].wc_status == IBT_WC_WR_FLUSHED_ERR) { 1061 RDS_INCR_TXERRS(); 1062 RDS_DPRINTF5("rds_poll_send_completions", 1063 "EP(%p): WC ID: %p ERROR: %d", ep, 1064 wc[ix].wc_id, wc[ix].wc_status); 1065 1066 send_error = 1; 1067 1068 if (wc[ix].wc_id == RDS_RDMAW_WRID) { 1069 mutex_enter(&ep->ep_lock); 1070 ep->ep_rdmacnt--; 1071 mutex_exit(&ep->ep_lock); 1072 continue; 1073 } 1074 1075 bp = (rds_buf_t *)(uintptr_t)wc[ix].wc_id; 1076 ASSERT(bp->buf_state == RDS_SNDBUF_PENDING); 1077 bp->buf_state = RDS_SNDBUF_FREE; 1078 } else { 1079 RDS_INCR_TXERRS(); 1080 RDS_DPRINTF2("rds_poll_send_completions", 1081 "EP(%p): WC ID: %p ERROR: %d", ep, 1082 wc[ix].wc_id, wc[ix].wc_status); 1083 if (send_error == 0) { 1084 rds_session_t *sp = ep->ep_sp; 1085 1086 /* don't let anyone send anymore */ 1087 rw_enter(&sp->session_lock, RW_WRITER); 1088 if (sp->session_state != 1089 RDS_SESSION_STATE_ERROR) { 1090 sp->session_state = 1091 RDS_SESSION_STATE_ERROR; 1092 /* Make this the active end */ 1093 sp->session_type = 1094 RDS_SESSION_ACTIVE; 1095 } 1096 rw_exit(&sp->session_lock); 1097 } 1098 1099 send_error = 1; 1100 1101 if (wc[ix].wc_id == RDS_RDMAW_WRID) { 1102 mutex_enter(&ep->ep_lock); 1103 ep->ep_rdmacnt--; 1104 mutex_exit(&ep->ep_lock); 1105 continue; 1106 } 1107 1108 bp = (rds_buf_t *)(uintptr_t)wc[ix].wc_id; 1109 ASSERT(bp->buf_state == RDS_SNDBUF_PENDING); 1110 bp->buf_state = RDS_SNDBUF_FREE; 1111 } 1112 1113 bp->buf_nextp = NULL; 1114 if (headp) { 1115 tailp->buf_nextp = bp; 1116 tailp = bp; 1117 } else { 1118 headp = bp; 1119 tailp = bp; 1120 } 1121 1122 npolled++; 1123 } 1124 1125 if (rds_no_interrupts && (npolled > 100)) { 1126 break; 1127 } 1128 1129 if (rds_no_interrupts == 1) { 1130 break; 1131 } 1132 } while (ret != IBT_CQ_EMPTY); 1133 1134 RDS_DPRINTF5("rds_poll_send_completions", "Npolled: %d send_error: %d", 1135 npolled, send_error); 1136 1137 /* put the buffers to the pool */ 1138 if (npolled != 0) { 1139 rds_free_send_buf(ep, headp, tailp, npolled, lock); 1140 } 1141 1142 if (send_error != 0) { 1143 rds_handle_send_error(ep); 1144 } 1145 1146 RDS_DPRINTF4("rds_poll_send_completions", "Return EP(%p)", ep); 1147 } 1148 1149 void 1150 rds_sendcq_handler(ibt_cq_hdl_t cq, void *arg) 1151 { 1152 rds_ep_t *ep; 1153 int ret; 1154 1155 ep = (rds_ep_t *)arg; 1156 1157 RDS_DPRINTF4("rds_sendcq_handler", "Enter: EP(%p)", ep); 1158 1159 /* enable the CQ */ 1160 ret = ibt_enable_cq_notify(cq, IBT_NEXT_COMPLETION); 1161 if (ret != IBT_SUCCESS) { 1162 RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_enable_cq_notify " 1163 "failed: %d", ep, cq, ret); 1164 return; 1165 } 1166 1167 rds_poll_send_completions(cq, ep, B_FALSE); 1168 1169 RDS_DPRINTF4("rds_sendcq_handler", "Return: EP(%p)", ep); 1170 } 1171 1172 void 1173 rds_ep_free_rc_channel(rds_ep_t *ep) 1174 { 1175 int ret; 1176 1177 RDS_DPRINTF2("rds_ep_free_rc_channel", "EP(%p) - Enter", ep); 1178 1179 ASSERT(mutex_owned(&ep->ep_lock)); 1180 1181 /* free the QP */ 1182 if (ep->ep_chanhdl != NULL) { 1183 /* wait until the RQ is empty */ 1184 (void) ibt_flush_channel(ep->ep_chanhdl); 1185 (void) rds_is_recvq_empty(ep, B_TRUE); 1186 ret = ibt_free_channel(ep->ep_chanhdl); 1187 if (ret != IBT_SUCCESS) { 1188 RDS_DPRINTF2("rds_ep_free_rc_channel", "EP(%p) " 1189 "ibt_free_channel returned: %d", ep, ret); 1190 } 1191 ep->ep_chanhdl = NULL; 1192 } else { 1193 RDS_DPRINTF2("rds_ep_free_rc_channel", 1194 "EP(%p) Channel is ALREADY FREE", ep); 1195 } 1196 1197 /* free the Send CQ */ 1198 if (ep->ep_sendcq != NULL) { 1199 ret = ibt_free_cq(ep->ep_sendcq); 1200 if (ret != IBT_SUCCESS) { 1201 RDS_DPRINTF2("rds_ep_free_rc_channel", 1202 "EP(%p) - for sendcq, ibt_free_cq returned %d", 1203 ep, ret); 1204 } 1205 ep->ep_sendcq = NULL; 1206 } else { 1207 RDS_DPRINTF2("rds_ep_free_rc_channel", 1208 "EP(%p) SendCQ is ALREADY FREE", ep); 1209 } 1210 1211 /* free the Recv CQ */ 1212 if (ep->ep_recvcq != NULL) { 1213 ret = ibt_free_cq(ep->ep_recvcq); 1214 if (ret != IBT_SUCCESS) { 1215 RDS_DPRINTF2("rds_ep_free_rc_channel", 1216 "EP(%p) - for recvcq, ibt_free_cq returned %d", 1217 ep, ret); 1218 } 1219 ep->ep_recvcq = NULL; 1220 } else { 1221 RDS_DPRINTF2("rds_ep_free_rc_channel", 1222 "EP(%p) RecvCQ is ALREADY FREE", ep); 1223 } 1224 1225 RDS_DPRINTF2("rds_ep_free_rc_channel", "EP(%p) - Return", ep); 1226 } 1227 1228 /* Allocate resources for RC channel */ 1229 ibt_channel_hdl_t 1230 rds_ep_alloc_rc_channel(rds_ep_t *ep, uint8_t hca_port) 1231 { 1232 int ret = IBT_SUCCESS; 1233 ibt_cq_attr_t scqattr, rcqattr; 1234 ibt_rc_chan_alloc_args_t chanargs; 1235 ibt_channel_hdl_t chanhdl; 1236 rds_session_t *sp; 1237 rds_hca_t *hcap; 1238 1239 RDS_DPRINTF4("rds_ep_alloc_rc_channel", "Enter: 0x%p port: %d", 1240 ep, hca_port); 1241 1242 /* Update the EP with the right IP address and HCA guid */ 1243 sp = ep->ep_sp; 1244 ASSERT(sp != NULL); 1245 rw_enter(&sp->session_lock, RW_READER); 1246 mutex_enter(&ep->ep_lock); 1247 ep->ep_myip = sp->session_myip; 1248 ep->ep_remip = sp->session_remip; 1249 hcap = rds_gid_to_hcap(rdsib_statep, sp->session_lgid); 1250 ep->ep_hca_guid = hcap->hca_guid; 1251 mutex_exit(&ep->ep_lock); 1252 rw_exit(&sp->session_lock); 1253 1254 /* reset taskqpending flag here */ 1255 ep->ep_recvqp.qp_taskqpending = B_FALSE; 1256 1257 if (ep->ep_type == RDS_EP_TYPE_CTRL) { 1258 scqattr.cq_size = MaxCtrlSendBuffers; 1259 scqattr.cq_sched = NULL; 1260 scqattr.cq_flags = IBT_CQ_NO_FLAGS; 1261 1262 rcqattr.cq_size = MaxCtrlRecvBuffers; 1263 rcqattr.cq_sched = NULL; 1264 rcqattr.cq_flags = IBT_CQ_NO_FLAGS; 1265 1266 chanargs.rc_sizes.cs_sq = MaxCtrlSendBuffers; 1267 chanargs.rc_sizes.cs_rq = MaxCtrlRecvBuffers; 1268 chanargs.rc_sizes.cs_sq_sgl = 1; 1269 chanargs.rc_sizes.cs_rq_sgl = 1; 1270 } else { 1271 scqattr.cq_size = MaxDataSendBuffers + RDS_NUM_ACKS; 1272 scqattr.cq_sched = NULL; 1273 scqattr.cq_flags = IBT_CQ_NO_FLAGS; 1274 1275 rcqattr.cq_size = MaxDataRecvBuffers; 1276 rcqattr.cq_sched = NULL; 1277 rcqattr.cq_flags = IBT_CQ_NO_FLAGS; 1278 1279 chanargs.rc_sizes.cs_sq = MaxDataSendBuffers + RDS_NUM_ACKS; 1280 chanargs.rc_sizes.cs_rq = MaxDataRecvBuffers; 1281 chanargs.rc_sizes.cs_sq_sgl = 1; 1282 chanargs.rc_sizes.cs_rq_sgl = 1; 1283 } 1284 1285 mutex_enter(&ep->ep_lock); 1286 if (ep->ep_sendcq == NULL) { 1287 /* returned size is always greater than the requested size */ 1288 ret = ibt_alloc_cq(hcap->hca_hdl, &scqattr, 1289 &ep->ep_sendcq, NULL); 1290 if (ret != IBT_SUCCESS) { 1291 RDS_DPRINTF2(LABEL, "ibt_alloc_cq for sendCQ " 1292 "failed, size = %d: %d", scqattr.cq_size, ret); 1293 mutex_exit(&ep->ep_lock); 1294 return (NULL); 1295 } 1296 1297 (void) ibt_set_cq_handler(ep->ep_sendcq, rds_sendcq_handler, 1298 ep); 1299 1300 if (rds_no_interrupts == 0) { 1301 ret = ibt_enable_cq_notify(ep->ep_sendcq, 1302 IBT_NEXT_COMPLETION); 1303 if (ret != IBT_SUCCESS) { 1304 RDS_DPRINTF2(LABEL, 1305 "ibt_enable_cq_notify failed: %d", ret); 1306 (void) ibt_free_cq(ep->ep_sendcq); 1307 ep->ep_sendcq = NULL; 1308 mutex_exit(&ep->ep_lock); 1309 return (NULL); 1310 } 1311 } 1312 } 1313 1314 if (ep->ep_recvcq == NULL) { 1315 /* returned size is always greater than the requested size */ 1316 ret = ibt_alloc_cq(hcap->hca_hdl, &rcqattr, 1317 &ep->ep_recvcq, NULL); 1318 if (ret != IBT_SUCCESS) { 1319 RDS_DPRINTF2(LABEL, "ibt_alloc_cq for recvCQ " 1320 "failed, size = %d: %d", rcqattr.cq_size, ret); 1321 (void) ibt_free_cq(ep->ep_sendcq); 1322 ep->ep_sendcq = NULL; 1323 mutex_exit(&ep->ep_lock); 1324 return (NULL); 1325 } 1326 1327 (void) ibt_set_cq_handler(ep->ep_recvcq, rds_recvcq_handler, 1328 ep); 1329 1330 ret = ibt_enable_cq_notify(ep->ep_recvcq, rds_wc_signal); 1331 if (ret != IBT_SUCCESS) { 1332 RDS_DPRINTF2(LABEL, 1333 "ibt_enable_cq_notify failed: %d", ret); 1334 (void) ibt_free_cq(ep->ep_recvcq); 1335 ep->ep_recvcq = NULL; 1336 (void) ibt_free_cq(ep->ep_sendcq); 1337 ep->ep_sendcq = NULL; 1338 mutex_exit(&ep->ep_lock); 1339 return (NULL); 1340 } 1341 } 1342 1343 chanargs.rc_flags = IBT_ALL_SIGNALED; 1344 chanargs.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR | 1345 IBT_CEP_ATOMIC; 1346 chanargs.rc_hca_port_num = hca_port; 1347 chanargs.rc_scq = ep->ep_sendcq; 1348 chanargs.rc_rcq = ep->ep_recvcq; 1349 chanargs.rc_pd = hcap->hca_pdhdl; 1350 chanargs.rc_srq = NULL; 1351 1352 ret = ibt_alloc_rc_channel(hcap->hca_hdl, 1353 IBT_ACHAN_NO_FLAGS, &chanargs, &chanhdl, NULL); 1354 if (ret != IBT_SUCCESS) { 1355 RDS_DPRINTF2(LABEL, "ibt_alloc_rc_channel fail: %d", 1356 ret); 1357 (void) ibt_free_cq(ep->ep_recvcq); 1358 ep->ep_recvcq = NULL; 1359 (void) ibt_free_cq(ep->ep_sendcq); 1360 ep->ep_sendcq = NULL; 1361 mutex_exit(&ep->ep_lock); 1362 return (NULL); 1363 } 1364 mutex_exit(&ep->ep_lock); 1365 1366 /* Chan private should contain the ep */ 1367 (void) ibt_set_chan_private(chanhdl, ep); 1368 1369 RDS_DPRINTF4("rds_ep_alloc_rc_channel", "Return: 0x%p", chanhdl); 1370 1371 return (chanhdl); 1372 } 1373 1374 1375 #if 0 1376 1377 /* Return node guid given a port gid */ 1378 ib_guid_t 1379 rds_gid_to_node_guid(ib_gid_t gid) 1380 { 1381 ibt_node_info_t nodeinfo; 1382 int ret; 1383 1384 RDS_DPRINTF4("rds_gid_to_node_guid", "Enter: gid: %llx:%llx", 1385 gid.gid_prefix, gid.gid_guid); 1386 1387 ret = ibt_gid_to_node_info(gid, &nodeinfo); 1388 if (ret != IBT_SUCCESS) { 1389 RDS_DPRINTF2(LABEL, "ibt_gid_node_info for gid: %llx:%llx " 1390 "failed", gid.gid_prefix, gid.gid_guid); 1391 return (0LL); 1392 } 1393 1394 RDS_DPRINTF4("rds_gid_to_node_guid", "Return: Node guid: %llx", 1395 nodeinfo.n_node_guid); 1396 1397 return (nodeinfo.n_node_guid); 1398 } 1399 1400 #endif 1401 1402 static void 1403 rds_handle_portup_event(rds_state_t *statep, ibt_hca_hdl_t hdl, 1404 ibt_async_event_t *event) 1405 { 1406 rds_hca_t *hcap; 1407 ibt_hca_portinfo_t *newpinfop, *oldpinfop; 1408 uint_t newsize, oldsize, nport; 1409 ib_gid_t gid; 1410 int ret; 1411 1412 RDS_DPRINTF2("rds_handle_portup_event", 1413 "Enter: GUID: 0x%llx Statep: %p", event->ev_hca_guid, statep); 1414 1415 rw_enter(&statep->rds_hca_lock, RW_WRITER); 1416 1417 hcap = statep->rds_hcalistp; 1418 while ((hcap != NULL) && (hcap->hca_guid != event->ev_hca_guid)) { 1419 hcap = hcap->hca_nextp; 1420 } 1421 1422 if (hcap == NULL) { 1423 RDS_DPRINTF2("rds_handle_portup_event", "HCA: 0x%llx is " 1424 "not in our list", event->ev_hca_guid); 1425 rw_exit(&statep->rds_hca_lock); 1426 return; 1427 } 1428 1429 ret = ibt_query_hca_ports(hdl, 0, &newpinfop, &nport, &newsize); 1430 if (ret != IBT_SUCCESS) { 1431 RDS_DPRINTF2(LABEL, "ibt_query_hca_ports failed: %d", ret); 1432 rw_exit(&statep->rds_hca_lock); 1433 return; 1434 } 1435 1436 oldpinfop = hcap->hca_pinfop; 1437 oldsize = hcap->hca_pinfo_sz; 1438 hcap->hca_pinfop = newpinfop; 1439 hcap->hca_pinfo_sz = newsize; 1440 1441 (void) ibt_free_portinfo(oldpinfop, oldsize); 1442 1443 /* If RDS service is not registered then no bind is needed */ 1444 if (statep->rds_srvhdl == NULL) { 1445 RDS_DPRINTF2("rds_handle_portup_event", 1446 "RDS Service is not registered, so no action needed"); 1447 rw_exit(&statep->rds_hca_lock); 1448 return; 1449 } 1450 1451 /* 1452 * If the service was previously bound on this port and 1453 * if this port has changed state down and now up, we do not 1454 * need to bind the service again. The bind is expected to 1455 * persist across state changes. If the service was never bound 1456 * before then we bind it this time. 1457 */ 1458 if (hcap->hca_bindhdl[event->ev_port - 1] == NULL) { 1459 1460 /* structure copy */ 1461 gid = newpinfop[event->ev_port - 1].p_sgid_tbl[0]; 1462 1463 /* bind RDS service on the port, pass statep as cm_private */ 1464 ret = ibt_bind_service(statep->rds_srvhdl, gid, NULL, statep, 1465 &hcap->hca_bindhdl[event->ev_port - 1]); 1466 if (ret != IBT_SUCCESS) { 1467 RDS_DPRINTF2("rds_handle_portup_event", 1468 "Bind service for HCA: 0x%llx Port: %d " 1469 "gid %llx:%llx returned: %d", event->ev_hca_guid, 1470 event->ev_port, gid.gid_prefix, gid.gid_guid, ret); 1471 } 1472 } 1473 1474 rw_exit(&statep->rds_hca_lock); 1475 1476 RDS_DPRINTF2("rds_handle_portup_event", "Return: GUID: 0x%llx", 1477 event->ev_hca_guid); 1478 } 1479 1480 static void 1481 rdsib_add_hca(ib_guid_t hca_guid) 1482 { 1483 rds_hca_t *hcap; 1484 ibt_mr_attr_t mem_attr; 1485 ibt_mr_desc_t mem_desc; 1486 int ret; 1487 1488 RDS_DPRINTF2("rdsib_add_hca", "Enter: GUID: 0x%llx", hca_guid); 1489 1490 hcap = rdsib_init_hca(hca_guid); 1491 if (hcap == NULL) 1492 return; 1493 1494 /* register the recv memory with this hca */ 1495 mutex_enter(&rds_dpool.pool_lock); 1496 if (rds_dpool.pool_memp == NULL) { 1497 /* no memory to register */ 1498 RDS_DPRINTF2("rdsib_add_hca", "No memory to register"); 1499 mutex_exit(&rds_dpool.pool_lock); 1500 return; 1501 } 1502 1503 mem_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)rds_dpool.pool_memp; 1504 mem_attr.mr_len = rds_dpool.pool_memsize; 1505 mem_attr.mr_as = NULL; 1506 mem_attr.mr_flags = IBT_MR_ENABLE_LOCAL_WRITE; 1507 1508 ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl, &mem_attr, 1509 &hcap->hca_mrhdl, &mem_desc); 1510 1511 mutex_exit(&rds_dpool.pool_lock); 1512 1513 if (ret != IBT_SUCCESS) { 1514 RDS_DPRINTF2("rdsib_add_hca", "ibt_register_mr failed: %d", 1515 ret); 1516 } else { 1517 rw_enter(&rdsib_statep->rds_hca_lock, RW_WRITER); 1518 hcap->hca_state = RDS_HCA_STATE_MEM_REGISTERED; 1519 hcap->hca_lkey = mem_desc.md_lkey; 1520 hcap->hca_rkey = mem_desc.md_rkey; 1521 rw_exit(&rdsib_statep->rds_hca_lock); 1522 } 1523 1524 RDS_DPRINTF2("rdsib_add_hca", "Retrun: GUID: 0x%llx", hca_guid); 1525 } 1526 1527 void rds_close_this_session(rds_session_t *sp, uint8_t wait); 1528 int rds_post_control_message(rds_session_t *sp, uint8_t code, in_port_t port); 1529 1530 static void 1531 rdsib_del_hca(rds_state_t *statep, ib_guid_t hca_guid) 1532 { 1533 rds_session_t *sp; 1534 rds_hca_t *hcap; 1535 rds_hca_state_t saved_state; 1536 int ret, ix; 1537 1538 RDS_DPRINTF2("rdsib_del_hca", "Enter: GUID: 0x%llx", hca_guid); 1539 1540 /* 1541 * This should be a write lock as we don't want anyone to get access 1542 * to the hcap while we are modifing its contents 1543 */ 1544 rw_enter(&statep->rds_hca_lock, RW_WRITER); 1545 1546 hcap = statep->rds_hcalistp; 1547 while ((hcap != NULL) && (hcap->hca_guid != hca_guid)) { 1548 hcap = hcap->hca_nextp; 1549 } 1550 1551 /* Prevent initiating any new activity on this HCA */ 1552 ASSERT(hcap != NULL); 1553 saved_state = hcap->hca_state; 1554 hcap->hca_state = RDS_HCA_STATE_STOPPING; 1555 1556 rw_exit(&statep->rds_hca_lock); 1557 1558 /* 1559 * stop the outgoing traffic and close any active sessions on this hca. 1560 * Any pending messages in the SQ will be allowed to complete. 1561 */ 1562 rw_enter(&statep->rds_sessionlock, RW_READER); 1563 sp = statep->rds_sessionlistp; 1564 while (sp) { 1565 if (sp->session_hca_guid != hca_guid) { 1566 sp = sp->session_nextp; 1567 continue; 1568 } 1569 1570 rw_enter(&sp->session_lock, RW_WRITER); 1571 RDS_DPRINTF2("rdsib_del_hca", "SP(%p) State: %d", sp, 1572 sp->session_state); 1573 /* 1574 * We are changing the session state in advance. This prevents 1575 * further messages to be posted to the SQ. We then 1576 * send a control message to the remote and tell it close 1577 * the session. 1578 */ 1579 sp->session_state = RDS_SESSION_STATE_HCA_CLOSING; 1580 RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State " 1581 "RDS_SESSION_STATE_PASSIVE_CLOSING", sp); 1582 rw_exit(&sp->session_lock); 1583 1584 /* 1585 * wait until the sendq is empty then tell the remote to 1586 * close this session. This enables for graceful shutdown of 1587 * the session 1588 */ 1589 (void) rds_is_sendq_empty(&sp->session_dataep, 2); 1590 (void) rds_post_control_message(sp, 1591 RDS_CTRL_CODE_CLOSE_SESSION, 0); 1592 1593 sp = sp->session_nextp; 1594 } 1595 1596 /* wait until all the sessions are off this HCA */ 1597 sp = statep->rds_sessionlistp; 1598 while (sp) { 1599 if (sp->session_hca_guid != hca_guid) { 1600 sp = sp->session_nextp; 1601 continue; 1602 } 1603 1604 rw_enter(&sp->session_lock, RW_READER); 1605 RDS_DPRINTF2("rdsib_del_hca", "SP(%p) State: %d", sp, 1606 sp->session_state); 1607 1608 while ((sp->session_state == RDS_SESSION_STATE_HCA_CLOSING) || 1609 (sp->session_state == RDS_SESSION_STATE_ERROR) || 1610 (sp->session_state == RDS_SESSION_STATE_PASSIVE_CLOSING) || 1611 (sp->session_state == RDS_SESSION_STATE_CLOSED)) { 1612 rw_exit(&sp->session_lock); 1613 delay(drv_usectohz(1000000)); 1614 rw_enter(&sp->session_lock, RW_READER); 1615 RDS_DPRINTF2("rdsib_del_hca", "SP(%p) State: %d", sp, 1616 sp->session_state); 1617 } 1618 1619 rw_exit(&sp->session_lock); 1620 1621 sp = sp->session_nextp; 1622 } 1623 rw_exit(&statep->rds_sessionlock); 1624 1625 /* 1626 * if rdsib_close_ib was called before this, then that would have 1627 * unbound the service on all ports. In that case, the HCA structs 1628 * will contain stale bindhdls. Hence, we do not call unbind unless 1629 * the service is still registered. 1630 */ 1631 if (statep->rds_srvhdl != NULL) { 1632 /* unbind RDS service on all ports on this HCA */ 1633 for (ix = 0; ix < hcap->hca_nports; ix++) { 1634 if (hcap->hca_bindhdl[ix] == NULL) { 1635 continue; 1636 } 1637 1638 RDS_DPRINTF2("rdsib_del_hca", 1639 "Unbinding Service: port: %d, bindhdl: %p", 1640 ix + 1, hcap->hca_bindhdl[ix]); 1641 (void) ibt_unbind_service(rdsib_statep->rds_srvhdl, 1642 hcap->hca_bindhdl[ix]); 1643 hcap->hca_bindhdl[ix] = NULL; 1644 } 1645 } 1646 1647 RDS_DPRINTF2("rdsib_del_hca", "HCA(%p) State: %d", hcap, 1648 hcap->hca_state); 1649 1650 switch (saved_state) { 1651 case RDS_HCA_STATE_MEM_REGISTERED: 1652 ASSERT(hcap->hca_mrhdl != NULL); 1653 ret = ibt_deregister_mr(hcap->hca_hdl, hcap->hca_mrhdl); 1654 if (ret != IBT_SUCCESS) { 1655 RDS_DPRINTF2("rdsib_del_hca", 1656 "ibt_deregister_mr failed: %d", ret); 1657 return; 1658 } 1659 hcap->hca_mrhdl = NULL; 1660 /* FALLTHRU */ 1661 case RDS_HCA_STATE_OPEN: 1662 ASSERT(hcap->hca_hdl != NULL); 1663 ASSERT(hcap->hca_pdhdl != NULL); 1664 1665 1666 ret = ibt_free_pd(hcap->hca_hdl, hcap->hca_pdhdl); 1667 if (ret != IBT_SUCCESS) { 1668 RDS_DPRINTF2("rdsib_del_hca", 1669 "ibt_free_pd failed: %d", ret); 1670 } 1671 1672 (void) ibt_free_portinfo(hcap->hca_pinfop, hcap->hca_pinfo_sz); 1673 1674 ret = ibt_close_hca(hcap->hca_hdl); 1675 if (ret != IBT_SUCCESS) { 1676 RDS_DPRINTF2("rdsib_del_hca", 1677 "ibt_close_hca failed: %d", ret); 1678 } 1679 1680 hcap->hca_hdl = NULL; 1681 hcap->hca_pdhdl = NULL; 1682 hcap->hca_lkey = 0; 1683 hcap->hca_rkey = 0; 1684 } 1685 1686 /* 1687 * This should be a write lock as we don't want anyone to get access 1688 * to the hcap while we are modifing its contents 1689 */ 1690 rw_enter(&statep->rds_hca_lock, RW_WRITER); 1691 hcap->hca_state = RDS_HCA_STATE_REMOVED; 1692 rw_exit(&statep->rds_hca_lock); 1693 1694 RDS_DPRINTF2("rdsib_del_hca", "Return: GUID: 0x%llx", hca_guid); 1695 } 1696 1697 static void 1698 rds_async_handler(void *clntp, ibt_hca_hdl_t hdl, ibt_async_code_t code, 1699 ibt_async_event_t *event) 1700 { 1701 rds_state_t *statep = (rds_state_t *)clntp; 1702 1703 RDS_DPRINTF2("rds_async_handler", "Async code: %d", code); 1704 1705 switch (code) { 1706 case IBT_EVENT_PORT_UP: 1707 rds_handle_portup_event(statep, hdl, event); 1708 break; 1709 case IBT_HCA_ATTACH_EVENT: 1710 /* 1711 * NOTE: In some error recovery paths, it is possible to 1712 * receive IBT_HCA_ATTACH_EVENTs on already known HCAs. 1713 */ 1714 (void) rdsib_add_hca(event->ev_hca_guid); 1715 break; 1716 case IBT_HCA_DETACH_EVENT: 1717 (void) rdsib_del_hca(statep, event->ev_hca_guid); 1718 break; 1719 1720 default: 1721 RDS_DPRINTF2(LABEL, "Async event: %d not handled", code); 1722 } 1723 1724 RDS_DPRINTF2("rds_async_handler", "Return: code: %d", code); 1725 } 1726 1727 /* 1728 * This routine exists to minimize stale connections across ungraceful 1729 * reboots of nodes in a cluster. 1730 */ 1731 void 1732 rds_randomize_qps(rds_hca_t *hcap) 1733 { 1734 ibt_cq_attr_t cqattr; 1735 ibt_rc_chan_alloc_args_t chanargs; 1736 ibt_channel_hdl_t qp1, qp2; 1737 ibt_cq_hdl_t cq_hdl; 1738 hrtime_t nsec; 1739 uint8_t i, j, rand1, rand2; 1740 int ret; 1741 1742 bzero(&cqattr, sizeof (ibt_cq_attr_t)); 1743 cqattr.cq_size = 1; 1744 cqattr.cq_sched = NULL; 1745 cqattr.cq_flags = IBT_CQ_NO_FLAGS; 1746 ret = ibt_alloc_cq(hcap->hca_hdl, &cqattr, &cq_hdl, NULL); 1747 if (ret != IBT_SUCCESS) { 1748 RDS_DPRINTF2("rds_randomize_qps", 1749 "ibt_alloc_cq failed: %d", ret); 1750 return; 1751 } 1752 1753 bzero(&chanargs, sizeof (ibt_rc_chan_alloc_args_t)); 1754 chanargs.rc_flags = IBT_ALL_SIGNALED; 1755 chanargs.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR | 1756 IBT_CEP_ATOMIC; 1757 chanargs.rc_hca_port_num = 1; 1758 chanargs.rc_scq = cq_hdl; 1759 chanargs.rc_rcq = cq_hdl; 1760 chanargs.rc_pd = hcap->hca_pdhdl; 1761 chanargs.rc_srq = NULL; 1762 1763 nsec = gethrtime(); 1764 rand1 = (nsec & 0xF); 1765 rand2 = (nsec >> 4) & 0xF; 1766 RDS_DPRINTF2("rds_randomize_qps", "rand1: %d rand2: %d", 1767 rand1, rand2); 1768 1769 for (i = 0; i < rand1 + 3; i++) { 1770 if (ibt_alloc_rc_channel(hcap->hca_hdl, 1771 IBT_ACHAN_NO_FLAGS, &chanargs, &qp1, NULL) != 1772 IBT_SUCCESS) { 1773 RDS_DPRINTF2("rds_randomize_qps", 1774 "Bailing at i: %d", i); 1775 (void) ibt_free_cq(cq_hdl); 1776 return; 1777 } 1778 for (j = 0; j < rand2 + 3; j++) { 1779 if (ibt_alloc_rc_channel(hcap->hca_hdl, 1780 IBT_ACHAN_NO_FLAGS, &chanargs, &qp2, 1781 NULL) != IBT_SUCCESS) { 1782 RDS_DPRINTF2("rds_randomize_qps", 1783 "Bailing at i: %d j: %d", i, j); 1784 (void) ibt_free_channel(qp1); 1785 (void) ibt_free_cq(cq_hdl); 1786 return; 1787 } 1788 (void) ibt_free_channel(qp2); 1789 } 1790 (void) ibt_free_channel(qp1); 1791 } 1792 1793 (void) ibt_free_cq(cq_hdl); 1794 }