Print this page
XXXX introduce drv_sectohz
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/io/ib/clients/rds/rdsib_cm.c
+++ new/usr/src/uts/common/io/ib/clients/rds/rdsib_cm.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25 /*
26 26 * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved.
27 27 *
28 28 * This software is available to you under a choice of one of two
29 29 * licenses. You may choose to be licensed under the terms of the GNU
30 30 * General Public License (GPL) Version 2, available from the file
31 31 * COPYING in the main directory of this source tree, or the
32 32 * OpenIB.org BSD license below:
33 33 *
34 34 * Redistribution and use in source and binary forms, with or
35 35 * without modification, are permitted provided that the following
36 36 * conditions are met:
37 37 *
38 38 * - Redistributions of source code must retain the above
39 39 * copyright notice, this list of conditions and the following
40 40 * disclaimer.
41 41 *
42 42 * - Redistributions in binary form must reproduce the above
43 43 * copyright notice, this list of conditions and the following
44 44 * disclaimer in the documentation and/or other materials
45 45 * provided with the distribution.
46 46 *
47 47 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
48 48 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
49 49 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
50 50 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
51 51 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
52 52 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
53 53 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
54 54 * SOFTWARE.
55 55 *
56 56 */
57 57 /*
58 58 * Sun elects to include this software in Sun product
59 59 * under the OpenIB BSD license.
60 60 *
61 61 *
62 62 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
63 63 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
64 64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
65 65 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
66 66 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
67 67 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
68 68 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
69 69 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
70 70 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
71 71 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
72 72 * POSSIBILITY OF SUCH DAMAGE.
73 73 */
74 74
75 75 #include <sys/ib/clients/rds/rdsib_cm.h>
76 76 #include <sys/ib/clients/rds/rdsib_ib.h>
77 77 #include <sys/ib/clients/rds/rdsib_buf.h>
78 78 #include <sys/ib/clients/rds/rdsib_ep.h>
79 79
80 80 /*
81 81 * This file contains CM related work:
82 82 *
83 83 * Service registration/deregistration
84 84 * Path lookup
85 85 * CM connection callbacks
86 86 * CM active and passive connection establishment
87 87 * Connection failover
88 88 */
89 89
90 90 #define SRCIP src_addr.un.ip4addr
91 91 #define DSTIP dst_addr.un.ip4addr
92 92
93 93 /*
94 94 * Handle an incoming CM REQ
95 95 */
96 96 /* ARGSUSED */
97 97 static ibt_cm_status_t
98 98 rds_handle_cm_req(rds_state_t *statep, ibt_cm_event_t *evp,
99 99 ibt_cm_return_args_t *rargsp, void *rcmp, ibt_priv_data_len_t rcmp_len)
100 100 {
101 101 ibt_cm_req_rcv_t *reqp;
102 102 ib_gid_t lgid, rgid;
103 103 rds_cm_private_data_t cmp;
104 104 rds_session_t *sp;
105 105 rds_ep_t *ep;
106 106 ibt_channel_hdl_t chanhdl;
107 107 ibt_ip_cm_info_t ipcm_info;
108 108 uint8_t save_state, save_type;
109 109 int ret;
110 110
111 111 RDS_DPRINTF2("rds_handle_cm_req", "Enter");
112 112
113 113 reqp = &evp->cm_event.req;
114 114 rgid = reqp->req_prim_addr.av_dgid; /* requester gid */
115 115 lgid = reqp->req_prim_addr.av_sgid; /* receiver gid */
116 116
117 117 RDS_DPRINTF2(LABEL, "REQ Received: From: %llx:%llx To: %llx:%llx",
118 118 rgid.gid_prefix, rgid.gid_guid, lgid.gid_prefix, lgid.gid_guid);
119 119
120 120 /*
121 121 * CM private data brings IP information
122 122 * Private data received is a stream of bytes and may not be properly
123 123 * aligned. So, bcopy the data onto the stack before accessing it.
124 124 */
125 125 bcopy((uint8_t *)evp->cm_priv_data, &cmp,
126 126 sizeof (rds_cm_private_data_t));
127 127
128 128 /* extract the CM IP info */
129 129 ret = ibt_get_ip_data(evp->cm_priv_data_len, evp->cm_priv_data,
130 130 &ipcm_info);
131 131 if (ret != IBT_SUCCESS) {
132 132 RDS_DPRINTF2("rds_handle_cm_req", "ibt_get_ip_data failed: %d",
133 133 ret);
134 134 return (IBT_CM_REJECT);
135 135 }
136 136
137 137 RDS_DPRINTF2("rds_handle_cm_req",
138 138 "REQ Received: From IP: 0x%x To IP: 0x%x type: %d",
139 139 ipcm_info.SRCIP, ipcm_info.DSTIP, cmp.cmp_eptype);
140 140
141 141 if (cmp.cmp_version != RDS_VERSION) {
142 142 RDS_DPRINTF2(LABEL, "Version Mismatch: Local version: %d "
143 143 "Remote version: %d", RDS_VERSION, cmp.cmp_version);
144 144 return (IBT_CM_REJECT);
145 145 }
146 146
147 147 /* RDS supports V4 addresses only */
148 148 if ((ipcm_info.src_addr.family != AF_INET) ||
149 149 (ipcm_info.dst_addr.family != AF_INET)) {
150 150 RDS_DPRINTF2(LABEL, "Unsupported Address Family: "
151 151 "src: %d dst: %d", ipcm_info.src_addr.family,
152 152 ipcm_info.dst_addr.family);
153 153 return (IBT_CM_REJECT);
154 154 }
155 155
156 156 if (cmp.cmp_arch != RDS_THIS_ARCH) {
157 157 RDS_DPRINTF2(LABEL, "ARCH does not match (%d != %d)",
158 158 cmp.cmp_arch, RDS_THIS_ARCH);
159 159 return (IBT_CM_REJECT);
160 160 }
161 161
162 162 if ((cmp.cmp_eptype != RDS_EP_TYPE_CTRL) &&
163 163 (cmp.cmp_eptype != RDS_EP_TYPE_DATA)) {
164 164 RDS_DPRINTF2(LABEL, "Unknown Channel type: %d", cmp.cmp_eptype);
165 165 return (IBT_CM_REJECT);
166 166 }
167 167
168 168 /* user_buffer_size should be same on all nodes */
169 169 if (cmp.cmp_user_buffer_size != UserBufferSize) {
170 170 RDS_DPRINTF2(LABEL,
171 171 "UserBufferSize Mismatch, this node: %d remote node: %d",
172 172 UserBufferSize, cmp.cmp_user_buffer_size);
173 173 return (IBT_CM_REJECT);
174 174 }
175 175
176 176 /*
177 177 * RDS needs more time to process a failover REQ so send an MRA.
178 178 * Otherwise, the remote may retry the REQ and fail the connection.
179 179 */
180 180 if ((cmp.cmp_failover) && (cmp.cmp_eptype == RDS_EP_TYPE_DATA)) {
181 181 RDS_DPRINTF2("rds_handle_cm_req", "Session Failover, send MRA");
182 182 (void) ibt_cm_delay(IBT_CM_DELAY_REQ, evp->cm_session_id,
183 183 10000000 /* 10 sec */, NULL, 0);
184 184 }
185 185
186 186 /* Is there a session to the destination node? */
187 187 rw_enter(&statep->rds_sessionlock, RW_READER);
188 188 sp = rds_session_lkup(statep, ipcm_info.SRCIP, rgid.gid_guid);
189 189 rw_exit(&statep->rds_sessionlock);
190 190
191 191 if (sp == NULL) {
192 192 /*
193 193 * currently there is no session to the destination
194 194 * remote ip in the private data is the local ip and vice
195 195 * versa
196 196 */
197 197 sp = rds_session_create(statep, ipcm_info.DSTIP,
198 198 ipcm_info.SRCIP, reqp, RDS_SESSION_PASSIVE);
199 199 if (sp == NULL) {
200 200 /* Check the list anyway. */
201 201 rw_enter(&statep->rds_sessionlock, RW_READER);
202 202 sp = rds_session_lkup(statep, ipcm_info.SRCIP,
203 203 rgid.gid_guid);
204 204 rw_exit(&statep->rds_sessionlock);
205 205 if (sp == NULL) {
206 206 /*
207 207 * The only way this can fail is due to lack
208 208 * of kernel resources
209 209 */
210 210 return (IBT_CM_REJECT);
211 211 }
212 212 }
213 213 }
214 214
215 215 rw_enter(&sp->session_lock, RW_WRITER);
216 216
217 217 /* catch peer-to-peer case as soon as possible */
218 218 if ((sp->session_state == RDS_SESSION_STATE_CREATED) ||
219 219 (sp->session_state == RDS_SESSION_STATE_INIT)) {
220 220 /* Check possible peer-to-peer case here */
221 221 if (sp->session_type != RDS_SESSION_PASSIVE) {
222 222 RDS_DPRINTF2("rds_handle_cm_req",
223 223 "SP(%p) Peer-peer connection handling", sp);
224 224 if (lgid.gid_guid > rgid.gid_guid) {
225 225 /* this node is active so reject this request */
226 226 rw_exit(&sp->session_lock);
227 227 return (IBT_CM_REJECT);
228 228 } else {
229 229 /* this node is passive, change the session */
230 230 sp->session_type = RDS_SESSION_PASSIVE;
231 231 sp->session_lgid = lgid;
232 232 sp->session_rgid = rgid;
233 233 }
234 234 }
235 235 }
236 236
237 237 RDS_DPRINTF2(LABEL, "SP(%p) state: %d", sp, sp->session_state);
238 238 save_state = sp->session_state;
239 239 save_type = sp->session_type;
240 240
241 241 switch (sp->session_state) {
242 242 case RDS_SESSION_STATE_CONNECTED:
243 243 RDS_DPRINTF2(LABEL, "STALE Session Detected SP(%p)", sp);
244 244 sp->session_state = RDS_SESSION_STATE_ERROR;
245 245 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State "
246 246 "RDS_SESSION_STATE_ERROR", sp);
247 247
248 248 /* FALLTHRU */
249 249 case RDS_SESSION_STATE_ERROR:
250 250 case RDS_SESSION_STATE_PASSIVE_CLOSING:
251 251 /*
252 252 * Some other thread must be processing this session,
253 253 * this thread must wait until the other thread finishes.
254 254 */
255 255 sp->session_type = RDS_SESSION_PASSIVE;
256 256 rw_exit(&sp->session_lock);
257 257
258 258 /* Handling this will take some time, so send an MRA */
259 259 (void) ibt_cm_delay(IBT_CM_DELAY_REQ, evp->cm_session_id,
260 260 10000000 /* 10 sec */, NULL, 0);
261 261
262 262 /*
263 263 * Any pending completions don't get flushed until the channel
264 264 * is closed. So, passing 0 here will not wait for pending
265 265 * completions in rds_session_close before closing the channel
266 266 */
267 267 rds_session_close(sp, IBT_NOCALLBACKS, 0);
268 268
269 269 rw_enter(&sp->session_lock, RW_WRITER);
270 270
271 271 /*
272 272 * If the session was in ERROR, then either a failover thread
273 273 * or event_failure thread would be processing this session.
274 274 * This thread should wait for event_failure thread to
275 275 * complete. This need not wait for failover thread.
276 276 */
277 277 if ((save_state != RDS_SESSION_STATE_CONNECTED) &&
↓ open down ↓ |
277 lines elided |
↑ open up ↑ |
278 278 (save_type == RDS_SESSION_PASSIVE)) {
279 279 /*
280 280 * The other thread is event_failure thread,
281 281 * wait until it finishes.
282 282 */
283 283 while (!((sp->session_state ==
284 284 RDS_SESSION_STATE_FAILED) ||
285 285 (sp->session_state ==
286 286 RDS_SESSION_STATE_FINI))) {
287 287 rw_exit(&sp->session_lock);
288 - delay(drv_usectohz(1000000));
288 + delay(drv_sectohz(1));
289 289 rw_enter(&sp->session_lock, RW_WRITER);
290 290 }
291 291 }
292 292
293 293 /* move the session to init state */
294 294 if ((sp->session_state == RDS_SESSION_STATE_ERROR) ||
295 295 (sp->session_state == RDS_SESSION_STATE_PASSIVE_CLOSING)) {
296 296 ret = rds_session_reinit(sp, lgid);
297 297 sp->session_myip = ipcm_info.DSTIP;
298 298 sp->session_lgid = lgid;
299 299 sp->session_rgid = rgid;
300 300 if (ret != 0) {
301 301 rds_session_fini(sp);
302 302 sp->session_state = RDS_SESSION_STATE_FAILED;
303 303 RDS_DPRINTF3("rds_handle_cm_req",
304 304 "SP(%p) State RDS_SESSION_STATE_FAILED",
305 305 sp);
306 306 rw_exit(&sp->session_lock);
307 307 return (IBT_CM_REJECT);
308 308 } else {
309 309 sp->session_state = RDS_SESSION_STATE_INIT;
310 310 RDS_DPRINTF3("rds_handle_cm_req",
311 311 "SP(%p) State RDS_SESSION_STATE_INIT", sp);
312 312 }
313 313
314 314 if (cmp.cmp_eptype == RDS_EP_TYPE_CTRL) {
315 315 ep = &sp->session_ctrlep;
316 316 } else {
317 317 ep = &sp->session_dataep;
318 318 }
319 319 break;
320 320 }
321 321
322 322 /* FALLTHRU */
323 323 case RDS_SESSION_STATE_CREATED:
324 324 case RDS_SESSION_STATE_FAILED:
325 325 case RDS_SESSION_STATE_FINI:
326 326 /*
327 327 * Initialize both channels, we accept this connection
328 328 * only if both channels are initialized
329 329 */
330 330 sp->session_type = RDS_SESSION_PASSIVE;
331 331 sp->session_lgid = lgid;
332 332 sp->session_rgid = rgid;
333 333 sp->session_state = RDS_SESSION_STATE_CREATED;
334 334 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State "
335 335 "RDS_SESSION_STATE_CREATED", sp);
336 336 ret = rds_session_init(sp);
337 337 if (ret != 0) {
338 338 /* Seems like there are not enough resources */
339 339 sp->session_state = RDS_SESSION_STATE_FAILED;
340 340 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State "
341 341 "RDS_SESSION_STATE_FAILED", sp);
342 342 rw_exit(&sp->session_lock);
343 343 return (IBT_CM_REJECT);
344 344 }
345 345 sp->session_state = RDS_SESSION_STATE_INIT;
346 346 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State "
347 347 "RDS_SESSION_STATE_INIT", sp);
348 348
349 349 /* FALLTHRU */
350 350 case RDS_SESSION_STATE_INIT:
351 351 /*
352 352 * When re-using an existing session, make sure the
353 353 * session is still through the same HCA. Otherwise, the
354 354 * memory registrations have to moved to the new HCA.
355 355 */
356 356 if (cmp.cmp_eptype == RDS_EP_TYPE_DATA) {
357 357 if (sp->session_lgid.gid_guid != lgid.gid_guid) {
358 358 RDS_DPRINTF2("rds_handle_cm_req",
359 359 "Existing Session but different gid "
360 360 "existing: 0x%llx, new: 0x%llx, "
361 361 "sending an MRA",
362 362 sp->session_lgid.gid_guid, lgid.gid_guid);
363 363 (void) ibt_cm_delay(IBT_CM_DELAY_REQ,
364 364 evp->cm_session_id, 10000000 /* 10 sec */,
365 365 NULL, 0);
366 366 ret = rds_session_reinit(sp, lgid);
367 367 if (ret != 0) {
368 368 rds_session_fini(sp);
369 369 sp->session_state =
370 370 RDS_SESSION_STATE_FAILED;
371 371 sp->session_failover = 0;
372 372 RDS_DPRINTF3("rds_failover_session",
373 373 "SP(%p) State "
374 374 "RDS_SESSION_STATE_FAILED", sp);
375 375 rw_exit(&sp->session_lock);
376 376 return (IBT_CM_REJECT);
377 377 }
378 378 }
379 379 ep = &sp->session_dataep;
380 380 } else {
381 381 ep = &sp->session_ctrlep;
382 382 }
383 383
384 384 break;
385 385 default:
386 386 RDS_DPRINTF2(LABEL, "ERROR: SP(%p) is in an unexpected "
387 387 "state: %d", sp, sp->session_state);
388 388 rw_exit(&sp->session_lock);
389 389 return (IBT_CM_REJECT);
390 390 }
391 391
392 392 sp->session_failover = 0; /* reset any previous value */
393 393 if (cmp.cmp_failover) {
394 394 RDS_DPRINTF2("rds_handle_cm_req",
395 395 "SP(%p) Failover Session (BP %p)", sp, cmp.cmp_last_bufid);
396 396 sp->session_failover = 1;
397 397 }
398 398
399 399 mutex_enter(&ep->ep_lock);
400 400 if (ep->ep_state == RDS_EP_STATE_UNCONNECTED) {
401 401 ep->ep_state = RDS_EP_STATE_PASSIVE_PENDING;
402 402 sp->session_type = RDS_SESSION_PASSIVE;
403 403 rw_exit(&sp->session_lock);
404 404 } else if (ep->ep_state == RDS_EP_STATE_ACTIVE_PENDING) {
405 405 rw_exit(&sp->session_lock);
406 406 /*
407 407 * Peer to peer connection. There is an active
408 408 * connection pending on this ep. The one with
409 409 * greater port guid becomes active and the
410 410 * other becomes passive.
411 411 */
412 412 RDS_DPRINTF2("rds_handle_cm_req",
413 413 "EP(%p) Peer-peer connection handling", ep);
414 414 if (lgid.gid_guid > rgid.gid_guid) {
415 415 /* this node is active so reject this request */
416 416 mutex_exit(&ep->ep_lock);
417 417 RDS_DPRINTF2(LABEL, "SP(%p) EP(%p): "
418 418 "Rejecting passive in favor of active", sp, ep);
419 419 return (IBT_CM_REJECT);
420 420 } else {
421 421 /*
422 422 * This session is not the active end, change it
423 423 * to passive end.
424 424 */
425 425 ep->ep_state = RDS_EP_STATE_PASSIVE_PENDING;
426 426
427 427 rw_enter(&sp->session_lock, RW_WRITER);
428 428 sp->session_type = RDS_SESSION_PASSIVE;
429 429 sp->session_lgid = lgid;
430 430 sp->session_rgid = rgid;
431 431 rw_exit(&sp->session_lock);
432 432 }
433 433 } else {
434 434 rw_exit(&sp->session_lock);
435 435 }
436 436
437 437 ep->ep_lbufid = cmp.cmp_last_bufid;
438 438 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_raddr = (ib_vaddr_t)cmp.cmp_ack_addr;
439 439 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_rkey = cmp.cmp_ack_rkey;
440 440 cmp.cmp_last_bufid = ep->ep_rbufid;
441 441 cmp.cmp_ack_addr = ep->ep_ack_addr;
442 442 cmp.cmp_ack_rkey = ep->ep_ack_rkey;
443 443 mutex_exit(&ep->ep_lock);
444 444
445 445 /* continue with accepting the connection request for this channel */
446 446 chanhdl = rds_ep_alloc_rc_channel(ep, reqp->req_prim_hca_port);
447 447 if (chanhdl == NULL) {
448 448 mutex_enter(&ep->ep_lock);
449 449 ep->ep_state = RDS_EP_STATE_UNCONNECTED;
450 450 mutex_exit(&ep->ep_lock);
451 451 return (IBT_CM_REJECT);
452 452 }
453 453
454 454 /* pre-post recv buffers in the RQ */
455 455 rds_post_recv_buf((void *)chanhdl);
456 456
457 457 rargsp->cm_ret_len = sizeof (rds_cm_private_data_t);
458 458 bcopy((uint8_t *)&cmp, rcmp, sizeof (rds_cm_private_data_t));
459 459 rargsp->cm_ret.rep.cm_channel = chanhdl;
460 460 rargsp->cm_ret.rep.cm_rdma_ra_out = 4;
461 461 rargsp->cm_ret.rep.cm_rdma_ra_in = 4;
462 462 rargsp->cm_ret.rep.cm_rnr_retry_cnt = MinRnrRetry;
463 463
464 464 RDS_DPRINTF2("rds_handle_cm_req", "Return: SP(%p) EP(%p) Chan (%p)",
465 465 sp, ep, chanhdl);
466 466
467 467 return (IBT_CM_ACCEPT);
468 468 }
469 469
470 470 /*
471 471 * Handle an incoming CM REP
472 472 * Pre-post recv buffers for the QP
473 473 */
474 474 /* ARGSUSED */
475 475 static ibt_cm_status_t
476 476 rds_handle_cm_rep(ibt_cm_event_t *evp, ibt_cm_return_args_t *rargsp,
477 477 void *rcmp, ibt_priv_data_len_t rcmp_len)
478 478 {
479 479 rds_ep_t *ep;
480 480 rds_cm_private_data_t cmp;
481 481
482 482 RDS_DPRINTF2("rds_handle_cm_rep", "Enter");
483 483
484 484 /* pre-post recv buffers in the RQ */
485 485 rds_post_recv_buf((void *)evp->cm_channel);
486 486
487 487 ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel);
488 488 bcopy((uint8_t *)evp->cm_priv_data, &cmp,
489 489 sizeof (rds_cm_private_data_t));
490 490 ep->ep_lbufid = cmp.cmp_last_bufid;
491 491 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_raddr = (ib_vaddr_t)cmp.cmp_ack_addr;
492 492 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_rkey = cmp.cmp_ack_rkey;
493 493
494 494 rargsp->cm_ret_len = 0;
495 495
496 496 RDS_DPRINTF2("rds_handle_cm_rep", "Return: lbufid: %p", ep->ep_lbufid);
497 497
498 498 return (IBT_CM_ACCEPT);
499 499 }
500 500
501 501 /*
502 502 * Handle CONN EST
503 503 */
504 504 static ibt_cm_status_t
505 505 rds_handle_cm_conn_est(ibt_cm_event_t *evp)
506 506 {
507 507 rds_session_t *sp;
508 508 rds_ep_t *ep;
509 509
510 510 ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel);
511 511
512 512 RDS_DPRINTF2("rds_handle_cm_conn_est", "EP(%p) State: %d", ep,
513 513 ep->ep_state);
514 514
515 515 mutex_enter(&ep->ep_lock);
516 516 ASSERT((ep->ep_state == RDS_EP_STATE_ACTIVE_PENDING) ||
517 517 (ep->ep_state == RDS_EP_STATE_PASSIVE_PENDING));
518 518 ep->ep_state = RDS_EP_STATE_CONNECTED;
519 519 ep->ep_chanhdl = evp->cm_channel;
520 520 sp = ep->ep_sp;
521 521 mutex_exit(&ep->ep_lock);
522 522
523 523 (void) rds_session_active(sp);
524 524
525 525 RDS_DPRINTF2("rds_handle_cm_conn_est", "Return");
526 526 return (IBT_CM_ACCEPT);
527 527 }
528 528
529 529 /*
530 530 * Handle CONN CLOSED
531 531 */
532 532 static ibt_cm_status_t
533 533 rds_handle_cm_conn_closed(ibt_cm_event_t *evp)
534 534 {
535 535 rds_ep_t *ep;
536 536 rds_session_t *sp;
537 537
538 538 /* Catch DREQs but ignore DREPs */
539 539 if (evp->cm_event.closed != IBT_CM_CLOSED_DREQ_RCVD) {
540 540 RDS_DPRINTF2("rds_handle_cm_conn_closed",
541 541 "Ignoring Event: %d received", evp->cm_event.closed);
542 542 return (IBT_CM_ACCEPT);
543 543 }
544 544
545 545 ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel);
546 546 sp = ep->ep_sp;
547 547 RDS_DPRINTF2("rds_handle_cm_conn_closed", "EP(%p) Chan(%p) Enter",
548 548 ep, evp->cm_channel);
549 549
550 550 mutex_enter(&ep->ep_lock);
551 551 if (ep->ep_state != RDS_EP_STATE_CONNECTED) {
552 552 /* Ignore this DREQ */
553 553 RDS_DPRINTF2("rds_handle_cm_conn_closed",
554 554 "EP(%p) not connected, state: %d", ep, ep->ep_state);
555 555 mutex_exit(&ep->ep_lock);
556 556 return (IBT_CM_ACCEPT);
557 557 }
558 558 ep->ep_state = RDS_EP_STATE_CLOSING;
559 559 mutex_exit(&ep->ep_lock);
560 560
561 561 rw_enter(&sp->session_lock, RW_WRITER);
562 562 RDS_DPRINTF2("rds_handle_cm_conn_closed", "SP(%p) - state: %d", sp,
563 563 sp->session_state);
564 564
565 565 switch (sp->session_state) {
566 566 case RDS_SESSION_STATE_CONNECTED:
567 567 case RDS_SESSION_STATE_HCA_CLOSING:
568 568 sp->session_state = RDS_SESSION_STATE_PASSIVE_CLOSING;
569 569 RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State "
570 570 "RDS_SESSION_STATE_PASSIVE_CLOSING", sp);
571 571 break;
572 572
573 573 case RDS_SESSION_STATE_PASSIVE_CLOSING:
574 574 sp->session_state = RDS_SESSION_STATE_CLOSED;
575 575 RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State "
576 576 "RDS_SESSION_STATE_CLOSED", sp);
577 577 rds_passive_session_fini(sp);
578 578 sp->session_state = RDS_SESSION_STATE_FINI;
579 579 RDS_DPRINTF3("rds_handle_cm_conn_closed",
580 580 "SP(%p) State RDS_SESSION_STATE_FINI", sp);
581 581 break;
582 582
583 583 case RDS_SESSION_STATE_ACTIVE_CLOSING:
584 584 case RDS_SESSION_STATE_ERROR:
585 585 case RDS_SESSION_STATE_CLOSED:
586 586 break;
587 587
588 588 case RDS_SESSION_STATE_INIT:
589 589 sp->session_state = RDS_SESSION_STATE_ERROR;
590 590 RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State "
591 591 "RDS_SESSION_STATE_ERROR", sp);
592 592 rds_passive_session_fini(sp);
593 593 sp->session_state = RDS_SESSION_STATE_FAILED;
594 594 RDS_DPRINTF3("rds_handle_cm_conn_closed",
595 595 "SP(%p) State RDS_SESSION_STATE_FAILED", sp);
596 596 break;
597 597
598 598 default:
599 599 RDS_DPRINTF2("rds_handle_cm_conn_closed",
600 600 "SP(%p) - Unexpected state: %d", sp, sp->session_state);
601 601 rds_passive_session_fini(sp);
602 602 sp->session_state = RDS_SESSION_STATE_FAILED;
603 603 RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State "
604 604 "RDS_SESSION_STATE_FAILED", sp);
605 605 }
606 606 rw_exit(&sp->session_lock);
607 607
608 608 mutex_enter(&ep->ep_lock);
609 609 ep->ep_state = RDS_EP_STATE_CLOSED;
610 610 mutex_exit(&ep->ep_lock);
611 611
612 612 RDS_DPRINTF2("rds_handle_cm_conn_closed", "SP(%p) Return", sp);
613 613 return (IBT_CM_ACCEPT);
614 614 }
615 615
616 616 /*
617 617 * Handle EVENT FAILURE
618 618 */
619 619 static ibt_cm_status_t
620 620 rds_handle_cm_event_failure(ibt_cm_event_t *evp)
621 621 {
622 622 rds_ep_t *ep;
623 623 rds_session_t *sp;
624 624 int ret;
625 625
626 626 RDS_DPRINTF2("rds_handle_cm_event_failure", "Enter: Chan hdl: 0x%p "
627 627 "Code: %d msg: %d reason: %d", evp->cm_channel,
628 628 evp->cm_event.failed.cf_code, evp->cm_event.failed.cf_msg,
629 629 evp->cm_event.failed.cf_reason);
630 630
631 631 if (evp->cm_event.failed.cf_reason == IBT_CM_INVALID_SID) {
632 632 RDS_DPRINTF2(LABEL,
633 633 "Received REJ with reason IBT_CM_INVALID_SID: "
634 634 "RDS may not be loaded on the remote system");
635 635 }
636 636
637 637 if (evp->cm_channel == NULL) {
638 638 return (IBT_CM_ACCEPT);
639 639 }
640 640
641 641 if ((evp->cm_event.failed.cf_code != IBT_CM_FAILURE_STALE) &&
642 642 (evp->cm_event.failed.cf_msg == IBT_CM_FAILURE_REQ)) {
643 643 /*
644 644 * This end is active, just ignore, ibt_open_rc_channel()
645 645 * caller will take care of cleanup.
646 646 */
647 647 RDS_DPRINTF2("rds_handle_cm_event_failure",
648 648 "Ignoring this event: Chan hdl: 0x%p", evp->cm_channel);
649 649 return (IBT_CM_ACCEPT);
650 650 }
651 651
652 652 ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel);
653 653 sp = ep->ep_sp;
654 654
655 655 rw_enter(&sp->session_lock, RW_WRITER);
656 656 if (sp->session_type == RDS_SESSION_PASSIVE) {
657 657 RDS_DPRINTF2("rds_handle_cm_event_failure",
658 658 "SP(%p) - state: %d", sp, sp->session_state);
659 659 if ((sp->session_state == RDS_SESSION_STATE_INIT) ||
660 660 (sp->session_state == RDS_SESSION_STATE_CONNECTED)) {
661 661 sp->session_state = RDS_SESSION_STATE_ERROR;
662 662 RDS_DPRINTF3("rds_handle_cm_event_failure",
663 663 "SP(%p) State RDS_SESSION_STATE_ERROR", sp);
664 664
665 665 /*
666 666 * Store the cm_channel for freeing later
667 667 * Active side frees it on ibt_open_rc_channel
668 668 * failure
669 669 */
670 670 if (ep->ep_chanhdl == NULL) {
671 671 ep->ep_chanhdl = evp->cm_channel;
672 672 }
673 673 rw_exit(&sp->session_lock);
674 674
675 675 /*
676 676 * rds_passive_session_fini should not be called
677 677 * directly in the CM handler. It will cause a deadlock.
678 678 */
679 679 ret = ddi_taskq_dispatch(rds_taskq,
680 680 rds_cleanup_passive_session, (void *)sp,
681 681 DDI_NOSLEEP);
682 682 if (ret != DDI_SUCCESS) {
683 683 RDS_DPRINTF2("rds_handle_cm_event_failure",
684 684 "SP(%p) TaskQ dispatch FAILED:%d", sp, ret);
685 685 }
686 686 return (IBT_CM_ACCEPT);
687 687 }
688 688 }
689 689 rw_exit(&sp->session_lock);
690 690
691 691 RDS_DPRINTF2("rds_handle_cm_event_failure", "SP(%p) Return", sp);
692 692 return (IBT_CM_ACCEPT);
693 693 }
694 694
695 695 /*
696 696 * CM Handler
697 697 *
698 698 * Called by IBCM
699 699 * The cm_private type differs for active and passive events.
700 700 */
701 701 ibt_cm_status_t
702 702 rds_cm_handler(void *cm_private, ibt_cm_event_t *eventp,
703 703 ibt_cm_return_args_t *ret_args, void *ret_priv_data,
704 704 ibt_priv_data_len_t ret_len_max)
705 705 {
706 706 ibt_cm_status_t ret = IBT_CM_ACCEPT;
707 707
708 708 RDS_DPRINTF2("rds_cm_handler", "Enter: event: %d", eventp->cm_type);
709 709
710 710 switch (eventp->cm_type) {
711 711 case IBT_CM_EVENT_REQ_RCV:
712 712 ret = rds_handle_cm_req((rds_state_t *)cm_private, eventp,
713 713 ret_args, ret_priv_data, ret_len_max);
714 714 break;
715 715 case IBT_CM_EVENT_REP_RCV:
716 716 ret = rds_handle_cm_rep(eventp, ret_args, ret_priv_data,
717 717 ret_len_max);
718 718 break;
719 719 case IBT_CM_EVENT_MRA_RCV:
720 720 /* Not supported */
721 721 break;
722 722 case IBT_CM_EVENT_CONN_EST:
723 723 ret = rds_handle_cm_conn_est(eventp);
724 724 break;
725 725 case IBT_CM_EVENT_CONN_CLOSED:
726 726 ret = rds_handle_cm_conn_closed(eventp);
727 727 break;
728 728 case IBT_CM_EVENT_FAILURE:
729 729 ret = rds_handle_cm_event_failure(eventp);
730 730 break;
731 731 case IBT_CM_EVENT_LAP_RCV:
732 732 /* Not supported */
733 733 RDS_DPRINTF2(LABEL, "LAP message received");
734 734 break;
735 735 case IBT_CM_EVENT_APR_RCV:
736 736 /* Not supported */
737 737 RDS_DPRINTF2(LABEL, "APR message received");
738 738 break;
739 739 default:
740 740 break;
741 741 }
742 742
743 743 RDS_DPRINTF2("rds_cm_handler", "Return");
744 744
745 745 return (ret);
746 746 }
747 747
748 748 /* This is based on OFED Linux RDS */
749 749 #define RDS_PORT_NUM 6556
750 750
751 751 /*
752 752 * Register the wellknown service with service id: RDS_SERVICE_ID
753 753 * Incoming connection requests should arrive on this service id.
754 754 */
755 755 ibt_srv_hdl_t
756 756 rds_register_service(ibt_clnt_hdl_t rds_ibhdl)
757 757 {
758 758 ibt_srv_hdl_t srvhdl;
759 759 ibt_srv_desc_t srvdesc;
760 760 int ret;
761 761
762 762 RDS_DPRINTF2("rds_register_service", "Enter: 0x%p", rds_ibhdl);
763 763
764 764 bzero(&srvdesc, sizeof (ibt_srv_desc_t));
765 765 srvdesc.sd_handler = rds_cm_handler;
766 766 srvdesc.sd_flags = IBT_SRV_NO_FLAGS;
767 767
768 768 /*
769 769 * This is the new service id as per:
770 770 * Annex A11: RDMA IP CM Service
771 771 */
772 772 rdsib_statep->rds_service_id = ibt_get_ip_sid(IPPROTO_TCP,
773 773 RDS_PORT_NUM);
774 774 ret = ibt_register_service(rds_ibhdl, &srvdesc,
775 775 rdsib_statep->rds_service_id, 1, &srvhdl, NULL);
776 776 if (ret != IBT_SUCCESS) {
777 777 RDS_DPRINTF2(LABEL,
778 778 "RDS Service (0x%llx) Registration Failed: %d",
779 779 rdsib_statep->rds_service_id, ret);
780 780 return (NULL);
781 781 }
782 782
783 783 RDS_DPRINTF2("rds_register_service", "Return: 0x%p", srvhdl);
784 784 return (srvhdl);
785 785 }
786 786
787 787 /* Bind the RDS service on all ports */
788 788 int
789 789 rds_bind_service(rds_state_t *statep)
790 790 {
791 791 rds_hca_t *hcap;
792 792 ib_gid_t gid;
793 793 uint_t jx, nbinds = 0, nports = 0;
794 794 int ret;
795 795
796 796 RDS_DPRINTF2("rds_bind_service", "Enter: 0x%p", statep);
797 797
798 798 rw_enter(&statep->rds_hca_lock, RW_READER);
799 799
800 800 hcap = statep->rds_hcalistp;
801 801 while (hcap != NULL) {
802 802
803 803 /* skip the HCAs that are not fully online */
804 804 if ((hcap->hca_state != RDS_HCA_STATE_OPEN) &&
805 805 (hcap->hca_state != RDS_HCA_STATE_MEM_REGISTERED)) {
806 806 RDS_DPRINTF2("rds_bind_service",
807 807 "Skipping HCA: 0x%llx, state: %d",
808 808 hcap->hca_guid, hcap->hca_state);
809 809 hcap = hcap->hca_nextp;
810 810 continue;
811 811 }
812 812
813 813 /* currently, we have space for only 4 bindhdls */
814 814 ASSERT(hcap->hca_nports < 4);
815 815 for (jx = 0; jx < hcap->hca_nports; jx++) {
816 816 nports++;
817 817 if (hcap->hca_pinfop[jx].p_linkstate !=
818 818 IBT_PORT_ACTIVE) {
819 819 /*
820 820 * service bind will be called in the async
821 821 * handler when the port comes up. Clear any
822 822 * stale bind handle.
823 823 */
824 824 hcap->hca_bindhdl[jx] = NULL;
825 825 continue;
826 826 }
827 827
828 828 gid = hcap->hca_pinfop[jx].p_sgid_tbl[0];
829 829 RDS_DPRINTF5(LABEL, "HCA: 0x%llx Port: %d "
830 830 "gid: %llx:%llx", hcap->hca_guid,
831 831 hcap->hca_pinfop[jx].p_port_num, gid.gid_prefix,
832 832 gid.gid_guid);
833 833
834 834 /* pass statep as cm_private */
835 835 ret = ibt_bind_service(statep->rds_srvhdl, gid,
836 836 NULL, statep, &hcap->hca_bindhdl[jx]);
837 837 if (ret != IBT_SUCCESS) {
838 838 RDS_DPRINTF2(LABEL, "Bind service for "
839 839 "HCA: 0x%llx Port: %d gid %llx:%llx "
840 840 "failed: %d", hcap->hca_guid,
841 841 hcap->hca_pinfop[jx].p_port_num,
842 842 gid.gid_prefix, gid.gid_guid, ret);
843 843 continue;
844 844 }
845 845
846 846 nbinds++;
847 847 }
848 848 hcap = hcap->hca_nextp;
849 849 }
850 850
851 851 rw_exit(&statep->rds_hca_lock);
852 852
853 853 RDS_DPRINTF2(LABEL, "RDS Service available on %d/%d ports",
854 854 nbinds, nports);
855 855
856 856 #if 0
857 857 if (nbinds == 0) {
858 858 return (-1);
859 859 }
860 860 #endif
861 861
862 862 RDS_DPRINTF2("rds_bind_service", "Return");
863 863
864 864 return (0);
865 865 }
866 866
867 867 /* Open an RC connection */
868 868 int
869 869 rds_open_rc_channel(rds_ep_t *ep, ibt_path_info_t *pinfo,
870 870 ibt_execution_mode_t mode, ibt_channel_hdl_t *chanhdl)
871 871 {
872 872 rds_session_t *sp;
873 873 ibt_chan_open_args_t ocargs;
874 874 ibt_rc_returns_t ocrets;
875 875 rds_cm_private_data_t cmp;
876 876 uint8_t hca_port;
877 877 ibt_channel_hdl_t hdl;
878 878 ibt_status_t ret = 0;
879 879 ibt_ip_cm_info_t ipcm_info;
880 880
881 881 RDS_DPRINTF2("rds_open_rc_channel", "Enter: EP(%p) mode: %d", ep, mode);
882 882
883 883 sp = ep->ep_sp;
884 884
885 885 bzero(&ipcm_info, sizeof (ibt_ip_cm_info_t));
886 886 ipcm_info.src_addr.family = AF_INET;
887 887 ipcm_info.SRCIP = sp->session_myip;
888 888 ipcm_info.dst_addr.family = AF_INET;
889 889 ipcm_info.DSTIP = sp->session_remip;
890 890 ipcm_info.src_port = RDS_PORT_NUM;
891 891 ret = ibt_format_ip_private_data(&ipcm_info,
892 892 sizeof (rds_cm_private_data_t), &cmp);
893 893 if (ret != IBT_SUCCESS) {
894 894 RDS_DPRINTF2(LABEL, "SP(%p) EP(%p) ibt_format_ip_private_data "
895 895 "failed: %d", sp, ep, ret);
896 896 return (-1);
897 897 }
898 898
899 899 hca_port = pinfo->pi_prim_cep_path.cep_hca_port_num;
900 900
901 901 hdl = rds_ep_alloc_rc_channel(ep, hca_port);
902 902 if (hdl == NULL) {
903 903 return (-1);
904 904 }
905 905
906 906 cmp.cmp_version = RDS_VERSION;
907 907 cmp.cmp_arch = RDS_THIS_ARCH;
908 908 cmp.cmp_eptype = ep->ep_type;
909 909 cmp.cmp_failover = sp->session_failover;
910 910 cmp.cmp_last_bufid = ep->ep_rbufid;
911 911 cmp.cmp_user_buffer_size = UserBufferSize;
912 912 cmp.cmp_ack_addr = ep->ep_ack_addr;
913 913 cmp.cmp_ack_rkey = ep->ep_ack_rkey;
914 914
915 915 bzero(&ocargs, sizeof (ibt_chan_open_args_t));
916 916 bzero(&ocrets, sizeof (ibt_rc_returns_t));
917 917 ocargs.oc_path = pinfo;
918 918 ocargs.oc_cm_handler = rds_cm_handler;
919 919 ocargs.oc_cm_clnt_private = NULL;
920 920 ocargs.oc_rdma_ra_out = 4;
921 921 ocargs.oc_rdma_ra_in = 4;
922 922 ocargs.oc_priv_data_len = sizeof (rds_cm_private_data_t);
923 923 ocargs.oc_priv_data = &cmp;
924 924 ocargs.oc_path_retry_cnt = IBPathRetryCount;
925 925 ocargs.oc_path_rnr_retry_cnt = MinRnrRetry;
926 926 ret = ibt_open_rc_channel(hdl, IBT_OCHAN_NO_FLAGS,
927 927 mode, &ocargs, &ocrets);
928 928 if (ret != IBT_SUCCESS) {
929 929 RDS_DPRINTF2(LABEL, "SP(%p) EP(%p) ibt_open_rc_channel "
930 930 "failed: %d", sp, ep, ret);
931 931 (void) ibt_flush_channel(hdl);
932 932 (void) ibt_free_channel(hdl);
933 933
934 934 mutex_enter(&ep->ep_lock);
935 935 /* don't cleanup if this failure is due to peer-peer race */
936 936 if (ep->ep_state == RDS_EP_STATE_ACTIVE_PENDING) {
937 937 /* cleanup stuff allocated in rds_ep_alloc_rc_channel */
938 938 ep->ep_state = RDS_EP_STATE_ERROR;
939 939 rds_ep_free_rc_channel(ep);
940 940 }
941 941 mutex_exit(&ep->ep_lock);
942 942
943 943 return (-1);
944 944 }
945 945
946 946 *chanhdl = hdl;
947 947
948 948 RDS_DPRINTF2("rds_open_rc_channel", "Return: EP(%p) Chan: %p", ep,
949 949 *chanhdl);
950 950
951 951 return (0);
952 952 }
953 953
954 954 int
955 955 rds_close_rc_channel(ibt_channel_hdl_t chanhdl, ibt_execution_mode_t mode)
956 956 {
957 957 int ret;
958 958
959 959 RDS_DPRINTF2("rds_close_rc_channel", "Enter: Chan(%p) Mode(%d)",
960 960 chanhdl, mode);
961 961
962 962 ret = ibt_close_rc_channel(chanhdl, mode, NULL, 0, NULL, NULL, 0);
963 963
964 964 RDS_DPRINTF2("rds_close_rc_channel", "Return Chan(%p)", chanhdl);
965 965
966 966 return (ret);
967 967 }
↓ open down ↓ |
669 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX