Print this page
5045 use atomic_{inc,dec}_* instead of atomic_add_*
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/io/ib/clients/ibd/ibd_cm.c
+++ new/usr/src/uts/common/io/ib/clients/ibd/ibd_cm.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25 /* Copyright (c) 1990 Mentat Inc. */
26 26
27 27 /*
28 28 * An implementation of the IPoIB-CM standard based on PSARC 2009/593.
29 29 */
30 30 #include <sys/types.h>
31 31 #include <sys/conf.h>
32 32 #include <sys/ddi.h>
33 33 #include <sys/sunddi.h>
34 34 #include <sys/modctl.h>
35 35 #include <sys/stropts.h>
36 36 #include <sys/stream.h>
37 37 #include <sys/strsun.h>
38 38 #include <sys/strsubr.h>
39 39 #include <sys/dlpi.h>
40 40 #include <sys/mac_provider.h>
41 41
42 42 #include <sys/pattr.h> /* for HCK_FULLCKSUM */
43 43 #include <sys/atomic.h> /* for atomic_add*() */
44 44 #include <sys/ethernet.h> /* for ETHERTYPE_IP */
45 45 #include <netinet/in.h> /* for netinet/ip.h below */
46 46 #include <netinet/ip.h> /* for struct ip */
47 47 #include <inet/common.h> /* for inet/ip.h below */
48 48 #include <inet/ip.h> /* for ipha_t */
49 49 #include <inet/ip_if.h> /* for ETHERTYPE_IPV6 */
50 50 #include <inet/ip6.h> /* for ip6_t */
51 51 #include <netinet/icmp6.h> /* for icmp6_t */
52 52
53 53 #include <sys/ib/clients/ibd/ibd.h>
54 54
55 55 extern ibd_global_state_t ibd_gstate;
56 56 extern int ibd_rc_conn_timeout;
57 57 uint_t ibd_rc_tx_softintr = 1;
58 58 /*
59 59 * If the number of WRs in receive queue of each RC connection less than
60 60 * IBD_RC_RX_WR_THRESHOLD, we will post more receive WRs into it.
61 61 */
62 62 #define IBD_RC_RX_WR_THRESHOLD 0x20
63 63
64 64 /*
65 65 * If the number of free SWQEs (or large Tx buf) is larger than or equal to
66 66 * IBD_RC_TX_FREE_THRESH, we will call mac_tx_update to notify GLD to continue
67 67 * transmitting packets.
68 68 */
69 69 #define IBD_RC_TX_FREE_THRESH 8
70 70
71 71 #define IBD_RC_QPN_TO_SID(qpn) \
72 72 ((uint64_t)(IBD_RC_SERVICE_ID | ((qpn) & 0xffffff)))
73 73
74 74 /* For interop with legacy OFED */
75 75 #define IBD_RC_QPN_TO_SID_OFED_INTEROP(qpn) \
76 76 ((uint64_t)(IBD_RC_SERVICE_ID_OFED_INTEROP | ((qpn) & 0xffffff)))
77 77
78 78 /* Internet Header + 64 bits of Data Datagram. Refer to RFC 792 */
79 79 #define IBD_RC_IP_ICMP_RETURN_DATA_BYTES 64
80 80
81 81
82 82 /* Functions for Reliable Connected Mode */
83 83 /* Connection Setup/Close Functions */
84 84 static ibt_cm_status_t ibd_rc_dispatch_pass_mad(void *,
85 85 ibt_cm_event_t *, ibt_cm_return_args_t *, void *, ibt_priv_data_len_t);
86 86 static ibt_cm_status_t ibd_rc_dispatch_actv_mad(void *,
87 87 ibt_cm_event_t *, ibt_cm_return_args_t *, void *, ibt_priv_data_len_t);
88 88 static void ibd_rc_act_close(ibd_rc_chan_t *, boolean_t);
89 89
90 90 static inline void ibd_rc_add_to_chan_list(ibd_rc_chan_list_t *,
91 91 ibd_rc_chan_t *);
92 92 static inline ibd_rc_chan_t *ibd_rc_rm_header_chan_list(
93 93 ibd_rc_chan_list_t *);
94 94 static inline ibd_rc_chan_t *ibd_rc_rm_from_chan_list(ibd_rc_chan_list_t *,
95 95 ibd_rc_chan_t *);
96 96
97 97 /* CQ handlers */
98 98 static void ibd_rc_rcq_handler(ibt_cq_hdl_t, void *);
99 99 static void ibd_rc_scq_handler(ibt_cq_hdl_t, void *);
100 100 static void ibd_rc_poll_rcq(ibd_rc_chan_t *, ibt_cq_hdl_t);
101 101
102 102 /* Receive Functions */
103 103 static int ibd_rc_post_srq(ibd_state_t *, ibd_rwqe_t *);
104 104 static void ibd_rc_srq_freemsg_cb(char *);
105 105 static void ibd_rc_srq_free_rwqe(ibd_state_t *, ibd_rwqe_t *);
106 106
107 107 static int ibd_rc_post_rwqe(ibd_rc_chan_t *, ibd_rwqe_t *);
108 108 static void ibd_rc_freemsg_cb(char *);
109 109 static void ibd_rc_process_rx(ibd_rc_chan_t *, ibd_rwqe_t *, ibt_wc_t *);
110 110 static void ibd_rc_free_rwqe(ibd_rc_chan_t *, ibd_rwqe_t *);
111 111 static void ibd_rc_fini_rxlist(ibd_rc_chan_t *);
112 112
113 113
114 114 /* Send Functions */
115 115 static void ibd_rc_release_swqe(ibd_rc_chan_t *, ibd_swqe_t *);
116 116 static int ibd_rc_init_txlist(ibd_rc_chan_t *);
117 117 static void ibd_rc_fini_txlist(ibd_rc_chan_t *);
118 118 static uint_t ibd_rc_tx_recycle(caddr_t);
119 119
120 120
121 121 void
122 122 ibd_async_rc_close_act_chan(ibd_state_t *state, ibd_req_t *req)
123 123 {
124 124 ibd_rc_chan_t *rc_chan = req->rq_ptr;
125 125 ibd_ace_t *ace;
126 126
127 127 while (rc_chan != NULL) {
128 128 ace = rc_chan->ace;
129 129 ASSERT(ace != NULL);
130 130 /* Close old RC channel */
131 131 ibd_rc_act_close(rc_chan, B_TRUE);
132 132 mutex_enter(&state->id_ac_mutex);
133 133 ASSERT(ace->ac_ref != 0);
134 134 atomic_dec_32(&ace->ac_ref);
135 135 ace->ac_chan = NULL;
136 136 if ((ace->ac_ref == 0) || (ace->ac_ref == CYCLEVAL)) {
137 137 IBD_ACACHE_INSERT_FREE(state, ace);
138 138 ace->ac_ref = 0;
139 139 } else {
140 140 ace->ac_ref |= CYCLEVAL;
141 141 state->rc_delay_ace_recycle++;
142 142 }
143 143 mutex_exit(&state->id_ac_mutex);
144 144 rc_chan = ibd_rc_rm_header_chan_list(
145 145 &state->rc_obs_act_chan_list);
146 146 }
147 147 }
148 148
149 149 void
150 150 ibd_async_rc_recycle_ace(ibd_state_t *state, ibd_req_t *req)
151 151 {
152 152 ibd_ace_t *ace = req->rq_ptr;
153 153 ibd_rc_chan_t *rc_chan;
154 154
155 155 ASSERT(ace != NULL);
156 156 rc_chan = ace->ac_chan;
157 157 ASSERT(rc_chan != NULL);
158 158 /* Close old RC channel */
159 159 ibd_rc_act_close(rc_chan, B_TRUE);
160 160 mutex_enter(&state->id_ac_mutex);
161 161 ASSERT(ace->ac_ref != 0);
162 162 atomic_dec_32(&ace->ac_ref);
163 163 ace->ac_chan = NULL;
164 164 if ((ace->ac_ref == 0) || (ace->ac_ref == CYCLEVAL)) {
165 165 IBD_ACACHE_INSERT_FREE(state, ace);
166 166 ace->ac_ref = 0;
167 167 } else {
168 168 ace->ac_ref |= CYCLEVAL;
169 169 state->rc_delay_ace_recycle++;
170 170 }
171 171 mutex_exit(&state->id_ac_mutex);
172 172 mutex_enter(&state->rc_ace_recycle_lock);
173 173 state->rc_ace_recycle = NULL;
174 174 mutex_exit(&state->rc_ace_recycle_lock);
175 175 }
176 176
177 177 /* Simple ICMP IP Header Template */
178 178 static const ipha_t icmp_ipha = {
179 179 IP_SIMPLE_HDR_VERSION, 0, 0, 0, 0, 0, IPPROTO_ICMP
180 180 };
181 181
182 182 /* Packet is too big. Send ICMP packet to GLD to request a smaller MTU */
183 183 void
184 184 ibd_async_rc_process_too_big(ibd_state_t *state, ibd_req_t *req)
185 185 {
186 186 mblk_t *mp = req->rq_ptr;
187 187 ibd_ace_t *ace = req->rq_ptr2;
188 188 uint16_t mtu = state->id_mtu - IPOIB_HDRSIZE;
189 189 uint_t len_needed;
190 190 size_t msg_len;
191 191 mblk_t *pmtu_mp;
192 192 ushort_t sap;
193 193 ib_header_info_t *ibha; /* ib header for pmtu_pkt */
194 194 /*
195 195 * ipha: IP header for pmtu_pkt
196 196 * old_ipha: IP header for old packet
197 197 */
198 198 ipha_t *ipha, *old_ipha;
199 199 icmph_t *icmph;
200 200
201 201 sap = ntohs(((ipoib_hdr_t *)mp->b_rptr)->ipoib_type);
202 202
203 203 if (!pullupmsg(mp, -1)) {
204 204 DPRINT(40, "ibd_async_rc_process_too_big: pullupmsg fail");
205 205 goto too_big_fail;
206 206 }
207 207 /* move to IP header. */
208 208 mp->b_rptr += IPOIB_HDRSIZE;
209 209 old_ipha = (ipha_t *)mp->b_rptr;
210 210
211 211 len_needed = IPH_HDR_LENGTH(old_ipha);
212 212 if (old_ipha->ipha_protocol == IPPROTO_ENCAP) {
213 213 len_needed += IPH_HDR_LENGTH(((uchar_t *)old_ipha +
214 214 len_needed));
215 215 } else if (old_ipha->ipha_protocol == IPPROTO_IPV6) {
216 216 ip6_t *ip6h = (ip6_t *)((uchar_t *)old_ipha
217 217 + len_needed);
218 218 len_needed += ip_hdr_length_v6(mp, ip6h);
219 219 }
220 220 len_needed += IBD_RC_IP_ICMP_RETURN_DATA_BYTES;
221 221 msg_len = msgdsize(mp);
222 222 if (msg_len > len_needed) {
223 223 (void) adjmsg(mp, len_needed - msg_len);
224 224 msg_len = len_needed;
225 225 }
226 226
227 227 if ((pmtu_mp = allocb(sizeof (ib_header_info_t) + sizeof (ipha_t)
228 228 + sizeof (icmph_t), BPRI_MED)) == NULL) {
229 229 DPRINT(40, "ibd_async_rc_process_too_big: allocb fail");
230 230 goto too_big_fail;
231 231 }
232 232 pmtu_mp->b_cont = mp;
233 233 pmtu_mp->b_wptr = pmtu_mp->b_rptr + sizeof (ib_header_info_t)
234 234 + sizeof (ipha_t) + sizeof (icmph_t);
235 235
236 236 ibha = (ib_header_info_t *)pmtu_mp->b_rptr;
237 237
238 238 /* Fill IB header */
239 239 bcopy(&state->id_macaddr, &ibha->ib_dst, IPOIB_ADDRL);
240 240 /*
241 241 * If the GRH is not valid, indicate to GLDv3 by setting
242 242 * the VerTcFlow field to 0.
243 243 */
244 244 ibha->ib_grh.ipoib_vertcflow = 0;
245 245 ibha->ipib_rhdr.ipoib_type = htons(sap);
246 246 ibha->ipib_rhdr.ipoib_mbz = 0;
247 247
248 248 /* Fill IP header */
249 249 ipha = (ipha_t *)&ibha[1];
250 250 *ipha = icmp_ipha;
251 251 ipha->ipha_src = old_ipha->ipha_dst;
252 252 ipha->ipha_dst = old_ipha->ipha_src;
253 253 ipha->ipha_ttl = old_ipha->ipha_ttl;
254 254 msg_len += sizeof (icmp_ipha) + sizeof (icmph_t);
255 255 if (msg_len > IP_MAXPACKET) {
256 256 ibd_print_warn(state, "ibd_rc_process_too_big_pkt: msg_len(%d) "
257 257 "> IP_MAXPACKET", (uint32_t)msg_len);
258 258 (void) adjmsg(mp, IP_MAXPACKET - msg_len);
259 259 msg_len = IP_MAXPACKET;
260 260 }
261 261 ipha->ipha_length = htons((uint16_t)msg_len);
262 262 ipha->ipha_hdr_checksum = 0;
263 263 ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha);
264 264
265 265 /* Fill ICMP body */
266 266 icmph = (icmph_t *)&ipha[1];
267 267 bzero(icmph, sizeof (icmph_t));
268 268 icmph->icmph_type = ICMP_DEST_UNREACHABLE;
269 269 icmph->icmph_code = ICMP_FRAGMENTATION_NEEDED;
270 270 icmph->icmph_du_mtu = htons(mtu);
271 271 icmph->icmph_checksum = 0;
272 272 icmph->icmph_checksum = IP_CSUM(pmtu_mp,
273 273 (int32_t)sizeof (ib_header_info_t) + (int32_t)sizeof (ipha_t), 0);
274 274
275 275 (void) hcksum_assoc(pmtu_mp, NULL, NULL, 0, 0, 0, 0,
276 276 HCK_FULLCKSUM | HCK_FULLCKSUM_OK, 0);
277 277
278 278 DPRINT(30, "ibd_async_rc_process_too_big: sap=0x%x, ip_src=0x%x, "
279 279 "ip_dst=0x%x, ttl=%d, len_needed=%d, msg_len=%d",
280 280 sap, ipha->ipha_src, ipha->ipha_dst, ipha->ipha_ttl,
281 281 len_needed, (uint32_t)msg_len);
282 282
283 283 mac_rx(state->id_mh, state->id_rh, pmtu_mp);
284 284
285 285 mutex_enter(&ace->tx_too_big_mutex);
286 286 ace->tx_too_big_ongoing = B_FALSE;
287 287 mutex_exit(&ace->tx_too_big_mutex);
288 288 return;
289 289
290 290 too_big_fail:
291 291 /* Drop packet */
292 292 freemsg(mp);
293 293 mutex_enter(&ace->tx_too_big_mutex);
294 294 ace->tx_too_big_ongoing = B_FALSE;
295 295 mutex_exit(&ace->tx_too_big_mutex);
296 296 }
297 297
298 298 /*
299 299 * Check all active/passive channels. If any ative/passive
300 300 * channel has not been used for a long time, close it.
301 301 */
302 302 void
303 303 ibd_rc_conn_timeout_call(void *carg)
304 304 {
305 305 ibd_state_t *state = carg;
306 306 ibd_ace_t *ace, *pre_ace;
307 307 ibd_rc_chan_t *chan, *pre_chan, *next_chan;
308 308 ibd_req_t *req;
309 309
310 310 /* Check all active channels. If chan->is_used == B_FALSE, close it */
311 311 mutex_enter(&state->id_ac_mutex);
312 312 ace = list_head(&state->id_ah_active);
313 313 while ((pre_ace = ace) != NULL) {
314 314 ace = list_next(&state->id_ah_active, ace);
315 315 if (pre_ace->ac_chan != NULL) {
316 316 chan = pre_ace->ac_chan;
317 317 ASSERT(state->id_enable_rc == B_TRUE);
318 318 if (chan->chan_state == IBD_RC_STATE_ACT_ESTAB) {
319 319 if (chan->is_used == B_FALSE) {
320 320 state->rc_timeout_act++;
321 321 INC_REF(pre_ace, 1);
322 322 IBD_ACACHE_PULLOUT_ACTIVE(state,
323 323 pre_ace);
324 324 chan->chan_state =
325 325 IBD_RC_STATE_ACT_CLOSING;
326 326 ibd_rc_signal_act_close(state, pre_ace);
327 327 } else {
328 328 chan->is_used = B_FALSE;
329 329 }
330 330 }
331 331 }
332 332 }
333 333 mutex_exit(&state->id_ac_mutex);
334 334
335 335 /* Check all passive channels. If chan->is_used == B_FALSE, close it */
336 336 mutex_enter(&state->rc_pass_chan_list.chan_list_mutex);
337 337 next_chan = state->rc_pass_chan_list.chan_list;
338 338 pre_chan = NULL;
339 339 while ((chan = next_chan) != NULL) {
340 340 next_chan = chan->next;
341 341 if (chan->is_used == B_FALSE) {
342 342 req = kmem_cache_alloc(state->id_req_kmc, KM_NOSLEEP);
343 343 if (req != NULL) {
344 344 /* remove it */
345 345 state->rc_timeout_pas++;
346 346 req->rq_ptr = chan;
347 347 ibd_queue_work_slot(state, req,
348 348 IBD_ASYNC_RC_CLOSE_PAS_CHAN);
349 349 } else {
350 350 ibd_print_warn(state, "ibd_rc_conn_timeout: "
351 351 "alloc ibd_req_t fail");
352 352 if (pre_chan == NULL) {
353 353 state->rc_pass_chan_list.chan_list =
354 354 chan;
355 355 } else {
356 356 pre_chan->next = chan;
357 357 }
358 358 pre_chan = chan;
359 359 }
360 360 } else {
361 361 if (pre_chan == NULL) {
362 362 state->rc_pass_chan_list.chan_list = chan;
363 363 } else {
364 364 pre_chan->next = chan;
365 365 }
366 366 pre_chan = chan;
367 367 chan->is_used = B_FALSE;
368 368 }
369 369 }
370 370 if (pre_chan != NULL) {
371 371 pre_chan->next = NULL;
372 372 } else {
373 373 state->rc_pass_chan_list.chan_list = NULL;
374 374 }
375 375 mutex_exit(&state->rc_pass_chan_list.chan_list_mutex);
376 376
377 377 mutex_enter(&state->rc_timeout_lock);
378 378 if (state->rc_timeout_start == B_TRUE) {
379 379 state->rc_timeout = timeout(ibd_rc_conn_timeout_call, state,
380 380 SEC_TO_TICK(ibd_rc_conn_timeout));
381 381 }
382 382 mutex_exit(&state->rc_timeout_lock);
383 383 }
384 384
385 385 #ifdef DEBUG
386 386 /*
387 387 * ibd_rc_update_stats - update driver private kstat counters
388 388 *
389 389 * This routine will dump the internal statistics counters for ibd's
390 390 * Reliable Connected Mode. The current stats dump values will
391 391 * be sent to the kernel status area.
392 392 */
393 393 static int
394 394 ibd_rc_update_stats(kstat_t *ksp, int rw)
395 395 {
396 396 ibd_state_t *state;
397 397 ibd_rc_stat_t *ibd_rc_ksp;
398 398
399 399 if (rw == KSTAT_WRITE)
400 400 return (EACCES);
401 401
402 402 state = (ibd_state_t *)ksp->ks_private;
403 403 ASSERT(state != NULL);
404 404 ibd_rc_ksp = (ibd_rc_stat_t *)ksp->ks_data;
405 405
406 406 ibd_rc_ksp->rc_rcv_trans_byte.value.ul = state->rc_rcv_trans_byte;
407 407 ibd_rc_ksp->rc_rcv_trans_pkt.value.ul = state->rc_rcv_trans_pkt;
408 408 ibd_rc_ksp->rc_rcv_copy_byte.value.ul = state->rc_rcv_copy_byte;
409 409 ibd_rc_ksp->rc_rcv_copy_pkt.value.ul = state->rc_rcv_copy_pkt;
410 410 ibd_rc_ksp->rc_rcv_alloc_fail.value.ul = state->rc_rcv_alloc_fail;
411 411
412 412 ibd_rc_ksp->rc_rcq_err.value.ul = state->rc_rcq_err;
413 413
414 414 ibd_rc_ksp->rc_rwqe_short.value.ul = state->rc_rwqe_short;
415 415
416 416 ibd_rc_ksp->rc_xmt_bytes.value.ul = state->rc_xmt_bytes;
417 417 ibd_rc_ksp->rc_xmt_small_pkt.value.ul = state->rc_xmt_small_pkt;
418 418 ibd_rc_ksp->rc_xmt_fragmented_pkt.value.ul =
419 419 state->rc_xmt_fragmented_pkt;
420 420 ibd_rc_ksp->rc_xmt_map_fail_pkt.value.ul = state->rc_xmt_map_fail_pkt;
421 421 ibd_rc_ksp->rc_xmt_map_succ_pkt.value.ul = state->rc_xmt_map_succ_pkt;
422 422 ibd_rc_ksp->rc_ace_not_found.value.ul = state->rc_ace_not_found;
423 423
424 424 ibd_rc_ksp->rc_scq_no_swqe.value.ul = state->rc_scq_no_swqe;
425 425 ibd_rc_ksp->rc_scq_no_largebuf.value.ul = state->rc_scq_no_largebuf;
426 426 ibd_rc_ksp->rc_swqe_short.value.ul = state->rc_swqe_short;
427 427 ibd_rc_ksp->rc_swqe_mac_update.value.ul = state->rc_swqe_mac_update;
428 428 ibd_rc_ksp->rc_xmt_buf_short.value.ul = state->rc_xmt_buf_short;
429 429 ibd_rc_ksp->rc_xmt_buf_mac_update.value.ul =
430 430 state->rc_xmt_buf_mac_update;
431 431
432 432 ibd_rc_ksp->rc_conn_succ.value.ul = state->rc_conn_succ;
433 433 ibd_rc_ksp->rc_conn_fail.value.ul = state->rc_conn_fail;
434 434 ibd_rc_ksp->rc_null_conn.value.ul = state->rc_null_conn;
435 435 ibd_rc_ksp->rc_no_estab_conn.value.ul = state->rc_no_estab_conn;
436 436
437 437 ibd_rc_ksp->rc_act_close.value.ul = state->rc_act_close;
438 438 ibd_rc_ksp->rc_pas_close.value.ul = state->rc_pas_close;
439 439 ibd_rc_ksp->rc_delay_ace_recycle.value.ul = state->rc_delay_ace_recycle;
440 440 ibd_rc_ksp->rc_act_close_simultaneous.value.ul =
441 441 state->rc_act_close_simultaneous;
442 442 ibd_rc_ksp->rc_reset_cnt.value.ul = state->rc_reset_cnt;
443 443 ibd_rc_ksp->rc_timeout_act.value.ul = state->rc_timeout_act;
444 444 ibd_rc_ksp->rc_timeout_pas.value.ul = state->rc_timeout_pas;
445 445
446 446 return (0);
447 447 }
448 448
449 449
450 450 /*
451 451 * ibd_rc_init_stats - initialize kstat data structures
452 452 *
453 453 * This routine will create and initialize the driver private
454 454 * statistics counters.
455 455 */
456 456 int
457 457 ibd_rc_init_stats(ibd_state_t *state)
458 458 {
459 459 kstat_t *ksp;
460 460 ibd_rc_stat_t *ibd_rc_ksp;
461 461 char stat_name[KSTAT_STRLEN];
462 462 int inst;
463 463
464 464 /*
465 465 * Create and init kstat
466 466 */
467 467 inst = ddi_get_instance(state->id_dip);
468 468 (void) snprintf(stat_name, KSTAT_STRLEN, "statistics%d_%x_%u", inst,
469 469 state->id_pkey, state->id_plinkid);
470 470 ksp = kstat_create("ibd", 0, stat_name, "net", KSTAT_TYPE_NAMED,
471 471 sizeof (ibd_rc_stat_t) / sizeof (kstat_named_t), 0);
472 472
473 473 if (ksp == NULL) {
474 474 ibd_print_warn(state, "ibd_rc_init_stats: Could not create "
475 475 "kernel statistics");
476 476 return (DDI_FAILURE);
477 477 }
478 478
479 479 state->rc_ksp = ksp; /* Fill in the ksp of ibd over RC mode */
480 480
481 481 ibd_rc_ksp = (ibd_rc_stat_t *)ksp->ks_data;
482 482
483 483 /*
484 484 * Initialize all the statistics
485 485 */
486 486 kstat_named_init(&ibd_rc_ksp->rc_rcv_trans_byte, "RC: Rx Bytes, "
487 487 "transfer mode", KSTAT_DATA_ULONG);
488 488 kstat_named_init(&ibd_rc_ksp->rc_rcv_trans_pkt, "RC: Rx Pkts, "
489 489 "transfer mode", KSTAT_DATA_ULONG);
490 490 kstat_named_init(&ibd_rc_ksp->rc_rcv_copy_byte, "RC: Rx Bytes, "
491 491 "copy mode", KSTAT_DATA_ULONG);
492 492 kstat_named_init(&ibd_rc_ksp->rc_rcv_copy_pkt, "RC: Rx Pkts, "
493 493 "copy mode", KSTAT_DATA_ULONG);
494 494 kstat_named_init(&ibd_rc_ksp->rc_rcv_alloc_fail, "RC: Rx alloc fail",
495 495 KSTAT_DATA_ULONG);
496 496
497 497 kstat_named_init(&ibd_rc_ksp->rc_rcq_err, "RC: fail in Recv CQ handler",
498 498 KSTAT_DATA_ULONG);
499 499
500 500 kstat_named_init(&ibd_rc_ksp->rc_rwqe_short, "RC: Short rwqe",
501 501 KSTAT_DATA_ULONG);
502 502
503 503 kstat_named_init(&ibd_rc_ksp->rc_xmt_bytes, "RC: Sent Bytes",
504 504 KSTAT_DATA_ULONG);
505 505 kstat_named_init(&ibd_rc_ksp->rc_xmt_small_pkt,
506 506 "RC: Tx pkt small size", KSTAT_DATA_ULONG);
507 507 kstat_named_init(&ibd_rc_ksp->rc_xmt_fragmented_pkt,
508 508 "RC: Tx pkt fragmentary", KSTAT_DATA_ULONG);
509 509 kstat_named_init(&ibd_rc_ksp->rc_xmt_map_fail_pkt,
510 510 "RC: Tx pkt fail ibt_map_mem_iov()", KSTAT_DATA_ULONG);
511 511 kstat_named_init(&ibd_rc_ksp->rc_xmt_map_succ_pkt,
512 512 "RC: Tx pkt succ ibt_map_mem_iov()", KSTAT_DATA_ULONG);
513 513 kstat_named_init(&ibd_rc_ksp->rc_ace_not_found, "RC: ace not found",
514 514 KSTAT_DATA_ULONG);
515 515
516 516 kstat_named_init(&ibd_rc_ksp->rc_scq_no_swqe, "RC: No swqe after "
517 517 "recycle", KSTAT_DATA_ULONG);
518 518 kstat_named_init(&ibd_rc_ksp->rc_scq_no_largebuf, "RC: No large tx buf "
519 519 "after recycle", KSTAT_DATA_ULONG);
520 520 kstat_named_init(&ibd_rc_ksp->rc_swqe_short, "RC: No swqe in ibd_send",
521 521 KSTAT_DATA_ULONG);
522 522 kstat_named_init(&ibd_rc_ksp->rc_swqe_mac_update, "RC: mac_tx_update "
523 523 "#, swqe available", KSTAT_DATA_ULONG);
524 524 kstat_named_init(&ibd_rc_ksp->rc_xmt_buf_short, "RC: No buf in "
525 525 "ibd_send", KSTAT_DATA_ULONG);
526 526 kstat_named_init(&ibd_rc_ksp->rc_xmt_buf_mac_update, "RC: "
527 527 "mac_tx_update #, buf available", KSTAT_DATA_ULONG);
528 528
529 529 kstat_named_init(&ibd_rc_ksp->rc_conn_succ, "RC: succ connected",
530 530 KSTAT_DATA_ULONG);
531 531 kstat_named_init(&ibd_rc_ksp->rc_conn_fail, "RC: fail connect",
532 532 KSTAT_DATA_ULONG);
533 533 kstat_named_init(&ibd_rc_ksp->rc_null_conn, "RC: null conn for unicast "
534 534 "pkt", KSTAT_DATA_ULONG);
535 535 kstat_named_init(&ibd_rc_ksp->rc_no_estab_conn, "RC: not in act estab "
536 536 "state", KSTAT_DATA_ULONG);
537 537
538 538 kstat_named_init(&ibd_rc_ksp->rc_act_close, "RC: call ibd_rc_act_close",
539 539 KSTAT_DATA_ULONG);
540 540 kstat_named_init(&ibd_rc_ksp->rc_pas_close, "RC: call ibd_rc_pas_close",
541 541 KSTAT_DATA_ULONG);
542 542 kstat_named_init(&ibd_rc_ksp->rc_delay_ace_recycle, "RC: delay ace "
543 543 "recycle", KSTAT_DATA_ULONG);
544 544 kstat_named_init(&ibd_rc_ksp->rc_act_close_simultaneous, "RC: "
545 545 "simultaneous ibd_rc_act_close", KSTAT_DATA_ULONG);
546 546 kstat_named_init(&ibd_rc_ksp->rc_reset_cnt, "RC: Reset RC channel",
547 547 KSTAT_DATA_ULONG);
548 548 kstat_named_init(&ibd_rc_ksp->rc_act_close, "RC: timeout act side",
549 549 KSTAT_DATA_ULONG);
550 550 kstat_named_init(&ibd_rc_ksp->rc_pas_close, "RC: timeout pas side",
551 551 KSTAT_DATA_ULONG);
552 552
553 553 /*
554 554 * Function to provide kernel stat update on demand
555 555 */
556 556 ksp->ks_update = ibd_rc_update_stats;
557 557
558 558 /*
559 559 * Pointer into provider's raw statistics
560 560 */
561 561 ksp->ks_private = (void *)state;
562 562
563 563 /*
564 564 * Add kstat to systems kstat chain
565 565 */
566 566 kstat_install(ksp);
567 567
568 568 return (DDI_SUCCESS);
569 569 }
570 570 #endif
571 571
572 572 static ibt_status_t
573 573 ibd_rc_alloc_chan(ibd_rc_chan_t **ret_chan, ibd_state_t *state,
574 574 boolean_t is_tx_chan)
575 575 {
576 576 ibt_status_t result;
577 577 ibd_rc_chan_t *chan;
578 578 ibt_rc_chan_alloc_args_t alloc_args;
579 579 ibt_chan_alloc_flags_t alloc_flags;
580 580 ibt_chan_sizes_t sizes;
581 581 ibt_cq_attr_t cq_atts;
582 582 int rv;
583 583
584 584 chan = kmem_zalloc(sizeof (ibd_rc_chan_t), KM_SLEEP);
585 585
586 586 chan->state = state;
587 587 mutex_init(&chan->rx_wqe_list.dl_mutex, NULL, MUTEX_DRIVER, NULL);
588 588 mutex_init(&chan->rx_free_list.dl_mutex, NULL, MUTEX_DRIVER, NULL);
589 589 mutex_init(&chan->tx_wqe_list.dl_mutex, NULL, MUTEX_DRIVER, NULL);
590 590 mutex_init(&chan->tx_rel_list.dl_mutex, NULL, MUTEX_DRIVER, NULL);
591 591 mutex_init(&chan->tx_post_lock, NULL, MUTEX_DRIVER, NULL);
592 592 mutex_init(&chan->tx_poll_lock, NULL, MUTEX_DRIVER, NULL);
593 593
594 594 /* Allocate IB structures for a new RC channel. */
595 595 if (is_tx_chan) {
596 596 chan->scq_size = state->id_rc_num_swqe;
597 597 chan->rcq_size = IBD_RC_MIN_CQ_SIZE;
598 598 } else {
599 599 chan->scq_size = IBD_RC_MIN_CQ_SIZE;
600 600 chan->rcq_size = state->id_rc_num_rwqe;
601 601 }
602 602 cq_atts.cq_size = chan->scq_size;
603 603 cq_atts.cq_sched = NULL;
604 604 cq_atts.cq_flags = IBT_CQ_NO_FLAGS;
605 605 result = ibt_alloc_cq(state->id_hca_hdl, &cq_atts, &chan->scq_hdl,
606 606 &chan->scq_size);
607 607 if (result != IBT_SUCCESS) {
608 608 DPRINT(40, "ibd_rc_alloc_chan: error <%d>"
609 609 "create scq completion queue (size <%d>)",
610 610 result, chan->scq_size);
611 611 goto alloc_scq_err;
612 612 } /* if failure to alloc cq */
613 613
614 614 if (ibt_modify_cq(chan->scq_hdl, state->id_rc_tx_comp_count,
615 615 state->id_rc_tx_comp_usec, 0) != IBT_SUCCESS) {
616 616 DPRINT(30, "ibd_rc_alloc_chan: Send CQ "
617 617 "interrupt moderation failed");
618 618 }
619 619
620 620 ibt_set_cq_private(chan->scq_hdl, (void *) (uintptr_t)chan);
621 621 ibt_set_cq_handler(chan->scq_hdl, ibd_rc_scq_handler,
622 622 (void *) (uintptr_t)chan);
623 623
624 624 cq_atts.cq_size = chan->rcq_size;
625 625 cq_atts.cq_sched = NULL;
626 626 cq_atts.cq_flags = IBT_CQ_NO_FLAGS;
627 627 result = ibt_alloc_cq(state->id_hca_hdl, &cq_atts, &chan->rcq_hdl,
628 628 &chan->rcq_size);
629 629 if (result != IBT_SUCCESS) {
630 630 ibd_print_warn(state, "ibd_rc_alloc_chan: error <%d> creating "
631 631 "rx completion queue (size <%d>)", result, chan->rcq_size);
632 632 goto alloc_rcq_err;
633 633 } /* if failure to alloc cq */
634 634
635 635 if (ibt_modify_cq(chan->rcq_hdl, state->id_rc_rx_comp_count,
636 636 state->id_rc_rx_comp_usec, 0) != IBT_SUCCESS) {
637 637 DPRINT(30, "ibd_rc_alloc_chan: Receive CQ "
638 638 "interrupt moderation failed");
639 639 }
640 640
641 641 ibt_set_cq_private(chan->rcq_hdl, (void *) (uintptr_t)chan);
642 642 ibt_set_cq_handler(chan->rcq_hdl, ibd_rc_rcq_handler,
643 643 (void *)(uintptr_t)chan);
644 644
645 645 if (is_tx_chan) {
646 646 chan->is_tx_chan = B_TRUE;
647 647 if (ibd_rc_init_txlist(chan) != DDI_SUCCESS) {
648 648 ibd_print_warn(state, "ibd_rc_alloc_chan: "
649 649 "ibd_rc_init_txlist failed");
650 650 goto init_txlist_err;
651 651 }
652 652 if (ibd_rc_tx_softintr == 1) {
653 653 if ((rv = ddi_add_softintr(state->id_dip,
654 654 DDI_SOFTINT_LOW, &chan->scq_softintr, NULL, NULL,
655 655 ibd_rc_tx_recycle, (caddr_t)chan)) !=
656 656 DDI_SUCCESS) {
657 657 DPRINT(10, "ibd_rc_alloc_chan: failed in "
658 658 "ddi_add_softintr(scq_softintr), ret=%d",
659 659 rv);
660 660 goto alloc_softintr_err;
661 661 }
662 662 }
663 663 } else {
664 664 chan->is_tx_chan = B_FALSE;
665 665 }
666 666
667 667 /*
668 668 * enable completions
669 669 */
670 670 result = ibt_enable_cq_notify(chan->scq_hdl, IBT_NEXT_COMPLETION);
671 671 if (result != IBT_SUCCESS) {
672 672 ibd_print_warn(state, "ibd_rc_alloc_chan: ibt_enable_cq_notify"
673 673 "(scq) failed: status %d\n", result);
674 674 goto alloc_scq_enable_err;
675 675 }
676 676
677 677 /* We will enable chan->rcq_hdl later. */
678 678
679 679 /* alloc a RC channel */
680 680 bzero(&alloc_args, sizeof (ibt_rc_chan_alloc_args_t));
681 681 bzero(&sizes, sizeof (ibt_chan_sizes_t));
682 682
683 683 alloc_args.rc_flags = IBT_WR_SIGNALED;
684 684 alloc_args.rc_control = IBT_CEP_NO_FLAGS;
685 685
686 686 alloc_args.rc_scq = chan->scq_hdl;
687 687 alloc_args.rc_rcq = chan->rcq_hdl;
688 688 alloc_args.rc_pd = state->id_pd_hdl;
689 689
690 690 alloc_args.rc_hca_port_num = state->id_port;
691 691 alloc_args.rc_clone_chan = NULL;
692 692
693 693 /* scatter/gather */
694 694 alloc_args.rc_sizes.cs_sq_sgl = state->rc_tx_max_sqseg;
695 695
696 696 /*
697 697 * For the number of SGL elements in receive side, I think it
698 698 * should be 1. Because ibd driver allocates a whole block memory
699 699 * for each ibt_post_recv().
700 700 */
701 701 alloc_args.rc_sizes.cs_rq_sgl = 1;
702 702
703 703 /* The send queue size and the receive queue size */
704 704 alloc_args.rc_sizes.cs_sq = chan->scq_size;
705 705 alloc_args.rc_sizes.cs_rq = chan->rcq_size;
706 706
707 707 if (state->id_hca_res_lkey_capab) {
708 708 alloc_args.rc_flags = IBT_FAST_REG_RES_LKEY;
709 709 } else {
710 710 DPRINT(40, "ibd_rc_alloc_chan: not support reserved lkey");
711 711 }
712 712
713 713 if (state->rc_enable_srq) {
714 714 alloc_flags = IBT_ACHAN_USES_SRQ;
715 715 alloc_args.rc_srq = state->rc_srq_hdl;
716 716 } else {
717 717 alloc_flags = IBT_ACHAN_NO_FLAGS;
718 718 }
719 719
720 720 result = ibt_alloc_rc_channel(state->id_hca_hdl,
721 721 alloc_flags, &alloc_args, &chan->chan_hdl, &sizes);
722 722 if (result != IBT_SUCCESS) {
723 723 ibd_print_warn(state, "ibd_rc_alloc_chan: ibd_rc_open_channel"
724 724 " fail:<%d>", result);
725 725 goto alloc_scq_enable_err;
726 726 }
727 727
728 728 if (is_tx_chan)
729 729 atomic_inc_32(&state->rc_num_tx_chan);
730 730 else
731 731 atomic_inc_32(&state->rc_num_rx_chan);
732 732
733 733 /* For the connection reaper routine ibd_rc_conn_timeout_call() */
734 734 chan->is_used = B_TRUE;
735 735
736 736 *ret_chan = chan;
737 737 return (IBT_SUCCESS);
738 738
739 739 alloc_scq_enable_err:
740 740 if (is_tx_chan) {
741 741 if (ibd_rc_tx_softintr == 1) {
742 742 ddi_remove_softintr(chan->scq_softintr);
743 743 }
744 744 }
745 745 alloc_softintr_err:
746 746 if (is_tx_chan) {
747 747 ibd_rc_fini_txlist(chan);
748 748 }
749 749 init_txlist_err:
750 750 (void) ibt_free_cq(chan->rcq_hdl);
751 751 alloc_rcq_err:
752 752 (void) ibt_free_cq(chan->scq_hdl);
753 753 alloc_scq_err:
754 754 mutex_destroy(&chan->tx_poll_lock);
755 755 mutex_destroy(&chan->tx_post_lock);
756 756 mutex_destroy(&chan->tx_rel_list.dl_mutex);
757 757 mutex_destroy(&chan->tx_wqe_list.dl_mutex);
758 758 mutex_destroy(&chan->rx_free_list.dl_mutex);
759 759 mutex_destroy(&chan->rx_wqe_list.dl_mutex);
760 760 kmem_free(chan, sizeof (ibd_rc_chan_t));
761 761 return (result);
762 762 }
763 763
764 764 static void
765 765 ibd_rc_free_chan(ibd_rc_chan_t *chan)
766 766 {
767 767 ibt_status_t ret;
768 768
769 769 /* DPRINT(30, "ibd_rc_free_chan: chan=%p", chan); */
770 770
771 771 if (chan->chan_hdl != NULL) {
772 772 ret = ibt_free_channel(chan->chan_hdl);
773 773 if (ret != IBT_SUCCESS) {
774 774 DPRINT(40, "ib_rc_free_chan: ibt_free_channel failed, "
775 775 "chan=%p, returned: %d", chan, ret);
776 776 return;
777 777 }
778 778 chan->chan_hdl = NULL;
779 779 }
780 780
781 781 if (chan->rcq_hdl != NULL) {
782 782 ret = ibt_free_cq(chan->rcq_hdl);
783 783 if (ret != IBT_SUCCESS) {
784 784 DPRINT(40, "ib_rc_free_chan: ibt_free_cq(rcq) failed, "
785 785 "chan=%p, returned: %d", chan, ret);
786 786 return;
787 787 }
788 788 chan->rcq_hdl = NULL;
789 789 }
790 790
791 791 if (chan->scq_hdl != NULL) {
792 792 ret = ibt_free_cq(chan->scq_hdl);
793 793 if (ret != IBT_SUCCESS) {
794 794 DPRINT(40, "ib_rc_free_chan: ibt_free_cq(scq) failed, "
795 795 "chan=%p, returned: %d", chan, ret);
796 796 return;
797 797 }
798 798 chan->scq_hdl = NULL;
799 799 }
800 800
801 801 /* Free buffers */
802 802 if (chan->is_tx_chan) {
803 803 ibd_rc_fini_txlist(chan);
804 804 if (ibd_rc_tx_softintr == 1) {
805 805 ddi_remove_softintr(chan->scq_softintr);
806 806 }
807 807 atomic_dec_32(&chan->state->rc_num_tx_chan);
808 808 } else {
809 809 if (!chan->state->rc_enable_srq) {
810 810 ibd_rc_fini_rxlist(chan);
811 811 }
812 812 atomic_dec_32(&chan->state->rc_num_rx_chan);
813 813 }
814 814
815 815 mutex_destroy(&chan->tx_poll_lock);
816 816 mutex_destroy(&chan->tx_post_lock);
817 817 mutex_destroy(&chan->tx_rel_list.dl_mutex);
818 818 mutex_destroy(&chan->tx_wqe_list.dl_mutex);
819 819 mutex_destroy(&chan->rx_free_list.dl_mutex);
820 820 mutex_destroy(&chan->rx_wqe_list.dl_mutex);
821 821
822 822 /*
823 823 * If it is a passive channel, must make sure it has been removed
824 824 * from chan->state->rc_pass_chan_list
825 825 */
826 826 kmem_free(chan, sizeof (ibd_rc_chan_t));
827 827 }
828 828
829 829 /* Add a RC channel */
830 830 static inline void
831 831 ibd_rc_add_to_chan_list(ibd_rc_chan_list_t *list, ibd_rc_chan_t *chan)
832 832 {
833 833 mutex_enter(&list->chan_list_mutex);
834 834 if (list->chan_list == NULL) {
835 835 list->chan_list = chan;
836 836 chan->next = NULL;
837 837 } else {
838 838 chan->next = list->chan_list;
839 839 list->chan_list = chan;
840 840 }
841 841 mutex_exit(&list->chan_list_mutex);
842 842 }
843 843
844 844 static boolean_t
845 845 ibd_rc_re_add_to_pas_chan_list(ibd_rc_chan_t *chan)
846 846 {
847 847 ibd_state_t *state = chan->state;
848 848
849 849 mutex_enter(&state->rc_pass_chan_list.chan_list_mutex);
850 850 if ((state->id_mac_state & IBD_DRV_STARTED) == 0) {
851 851 mutex_exit(&state->rc_pass_chan_list.chan_list_mutex);
852 852 return (B_FALSE);
853 853 } else {
854 854 if (state->rc_pass_chan_list.chan_list == NULL) {
855 855 state->rc_pass_chan_list.chan_list = chan;
856 856 chan->next = NULL;
857 857 } else {
858 858 chan->next = state->rc_pass_chan_list.chan_list;
859 859 state->rc_pass_chan_list.chan_list = chan;
860 860 }
861 861 mutex_exit(&state->rc_pass_chan_list.chan_list_mutex);
862 862 return (B_TRUE);
863 863 }
864 864 }
865 865
866 866 /* Remove a RC channel */
867 867 static inline ibd_rc_chan_t *
868 868 ibd_rc_rm_from_chan_list(ibd_rc_chan_list_t *list, ibd_rc_chan_t *chan)
869 869 {
870 870 ibd_rc_chan_t *pre_chan;
871 871
872 872 mutex_enter(&list->chan_list_mutex);
873 873 if (list->chan_list == chan) {
874 874 DPRINT(30, "ibd_rc_rm_from_chan_list(first): found chan(%p)"
875 875 " in chan_list", chan);
876 876 list->chan_list = chan->next;
877 877 } else {
878 878 pre_chan = list->chan_list;
879 879 while (pre_chan != NULL) {
880 880 if (pre_chan->next == chan) {
881 881 DPRINT(30, "ibd_rc_rm_from_chan_list"
882 882 "(middle): found chan(%p)", chan);
883 883 pre_chan->next = chan->next;
884 884 break;
885 885 }
886 886 pre_chan = pre_chan->next;
887 887 }
888 888 if (pre_chan == NULL)
889 889 chan = NULL;
890 890 }
891 891 mutex_exit(&list->chan_list_mutex);
892 892 return (chan);
893 893 }
894 894
895 895 static inline ibd_rc_chan_t *
896 896 ibd_rc_rm_header_chan_list(ibd_rc_chan_list_t *list)
897 897 {
898 898 ibd_rc_chan_t *rc_chan;
899 899
900 900 mutex_enter(&list->chan_list_mutex);
901 901 rc_chan = list->chan_list;
902 902 if (rc_chan != NULL) {
903 903 list->chan_list = rc_chan->next;
904 904 }
905 905 mutex_exit(&list->chan_list_mutex);
906 906 return (rc_chan);
907 907 }
908 908
909 909 static int
910 910 ibd_rc_alloc_srq_copybufs(ibd_state_t *state)
911 911 {
912 912 ibt_mr_attr_t mem_attr;
913 913 uint_t rc_rx_bufs_sz;
914 914
915 915 /*
916 916 * Allocate one big chunk for all regular rx copy bufs
917 917 */
918 918 rc_rx_bufs_sz = (state->rc_mtu + IPOIB_GRH_SIZE) * state->rc_srq_size;
919 919
920 920 state->rc_srq_rx_bufs = kmem_zalloc(rc_rx_bufs_sz, KM_SLEEP);
921 921
922 922 state->rc_srq_rwqes = kmem_zalloc(state->rc_srq_size *
923 923 sizeof (ibd_rwqe_t), KM_SLEEP);
924 924
925 925 /*
926 926 * Do one memory registration on the entire rxbuf area
927 927 */
928 928 mem_attr.mr_vaddr = (uint64_t)(uintptr_t)state->rc_srq_rx_bufs;
929 929 mem_attr.mr_len = rc_rx_bufs_sz;
930 930 mem_attr.mr_as = NULL;
931 931 mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
932 932 if (ibt_register_mr(state->id_hca_hdl, state->id_pd_hdl, &mem_attr,
933 933 &state->rc_srq_rx_mr_hdl, &state->rc_srq_rx_mr_desc)
934 934 != IBT_SUCCESS) {
935 935 DPRINT(40, "ibd_rc_alloc_srq_copybufs: ibt_register_mr() "
936 936 "failed");
937 937 kmem_free(state->rc_srq_rwqes,
938 938 state->rc_srq_size * sizeof (ibd_rwqe_t));
939 939 kmem_free(state->rc_srq_rx_bufs, rc_rx_bufs_sz);
940 940 state->rc_srq_rx_bufs = NULL;
941 941 state->rc_srq_rwqes = NULL;
942 942 return (DDI_FAILURE);
943 943 }
944 944
945 945 return (DDI_SUCCESS);
946 946 }
947 947
948 948 static void
949 949 ibd_rc_free_srq_copybufs(ibd_state_t *state)
950 950 {
951 951 uint_t rc_rx_buf_sz;
952 952
953 953 /*
954 954 * Don't change the value of state->rc_mtu at the period from call
955 955 * ibd_rc_alloc_srq_copybufs() to call ibd_rc_free_srq_copybufs().
956 956 */
957 957 rc_rx_buf_sz = state->rc_mtu + IPOIB_GRH_SIZE;
958 958
959 959 /*
960 960 * Unregister rxbuf mr
961 961 */
962 962 if (ibt_deregister_mr(state->id_hca_hdl,
963 963 state->rc_srq_rx_mr_hdl) != IBT_SUCCESS) {
964 964 DPRINT(40, "ibd_rc_free_srq_copybufs: ibt_deregister_mr()"
965 965 " failed");
966 966 }
967 967 state->rc_srq_rx_mr_hdl = NULL;
968 968
969 969 /*
970 970 * Free rxbuf memory
971 971 */
972 972 kmem_free(state->rc_srq_rwqes,
973 973 state->rc_srq_size * sizeof (ibd_rwqe_t));
974 974 kmem_free(state->rc_srq_rx_bufs, state->rc_srq_size * rc_rx_buf_sz);
975 975 state->rc_srq_rwqes = NULL;
976 976 state->rc_srq_rx_bufs = NULL;
977 977 }
978 978
979 979 /*
980 980 * Allocate and post a certain number of SRQ receive buffers and WRs.
981 981 */
982 982 int
983 983 ibd_rc_init_srq_list(ibd_state_t *state)
984 984 {
985 985 ibd_rwqe_t *rwqe;
986 986 ibt_lkey_t lkey;
987 987 int i;
988 988 uint_t len;
989 989 uint8_t *bufaddr;
990 990 ibt_srq_sizes_t srq_sizes;
991 991 ibt_srq_sizes_t srq_real_sizes;
992 992 ibt_status_t ret;
993 993
994 994 srq_sizes.srq_sgl_sz = 1;
995 995 srq_sizes.srq_wr_sz = state->id_rc_num_srq;
996 996 ret = ibt_alloc_srq(state->id_hca_hdl, IBT_SRQ_NO_FLAGS,
997 997 state->id_pd_hdl, &srq_sizes, &state->rc_srq_hdl, &srq_real_sizes);
998 998 if (ret != IBT_SUCCESS) {
999 999 /*
1000 1000 * The following code is for CR 6932460 (can't configure ibd
1001 1001 * interface on 32 bits x86 systems). 32 bits x86 system has
1002 1002 * less memory resource than 64 bits x86 system. If current
1003 1003 * resource request can't be satisfied, we request less
1004 1004 * resource here.
1005 1005 */
1006 1006 len = state->id_rc_num_srq;
1007 1007 while ((ret == IBT_HCA_WR_EXCEEDED) &&
1008 1008 (len >= 2 * IBD_RC_MIN_CQ_SIZE)) {
1009 1009 len = len/2;
1010 1010 srq_sizes.srq_sgl_sz = 1;
1011 1011 srq_sizes.srq_wr_sz = len;
1012 1012 ret = ibt_alloc_srq(state->id_hca_hdl,
1013 1013 IBT_SRQ_NO_FLAGS, state->id_pd_hdl, &srq_sizes,
1014 1014 &state->rc_srq_hdl, &srq_real_sizes);
1015 1015 }
1016 1016 if (ret != IBT_SUCCESS) {
1017 1017 DPRINT(10, "ibd_rc_init_srq_list: ibt_alloc_srq failed."
1018 1018 "req_sgl_sz=%d, req_wr_sz=0x%x, final_req_wr_sz="
1019 1019 "0x%x, ret=%d", srq_sizes.srq_sgl_sz,
1020 1020 srq_sizes.srq_wr_sz, len, ret);
1021 1021 return (DDI_FAILURE);
1022 1022 }
1023 1023 state->id_rc_num_srq = len;
1024 1024 state->id_rc_num_rwqe = state->id_rc_num_srq + 1;
1025 1025 }
1026 1026
1027 1027 state->rc_srq_size = srq_real_sizes.srq_wr_sz;
1028 1028 if (ibd_rc_alloc_srq_copybufs(state) != DDI_SUCCESS) {
1029 1029 ret = ibt_free_srq(state->rc_srq_hdl);
1030 1030 if (ret != IBT_SUCCESS) {
1031 1031 ibd_print_warn(state, "ibd_rc_init_srq_list: "
1032 1032 "ibt_free_srq fail, ret=%d", ret);
1033 1033 }
1034 1034 return (DDI_FAILURE);
1035 1035 }
1036 1036
1037 1037 /*
1038 1038 * Allocate and setup the rwqe list
1039 1039 */
1040 1040 lkey = state->rc_srq_rx_mr_desc.md_lkey;
1041 1041 rwqe = state->rc_srq_rwqes;
1042 1042 bufaddr = state->rc_srq_rx_bufs;
1043 1043 len = state->rc_mtu + IPOIB_GRH_SIZE;
1044 1044 state->rc_srq_rwqe_list.dl_cnt = 0;
1045 1045 state->rc_srq_rwqe_list.dl_bufs_outstanding = 0;
1046 1046 for (i = 0; i < state->rc_srq_size; i++, rwqe++, bufaddr += len) {
1047 1047 rwqe->w_state = state;
1048 1048 rwqe->w_freeing_wqe = B_FALSE;
1049 1049 rwqe->w_freemsg_cb.free_func = ibd_rc_srq_freemsg_cb;
1050 1050 rwqe->w_freemsg_cb.free_arg = (char *)rwqe;
1051 1051 rwqe->rwqe_copybuf.ic_bufaddr = bufaddr;
1052 1052
1053 1053 if ((rwqe->rwqe_im_mblk = desballoc(bufaddr, len, 0,
1054 1054 &rwqe->w_freemsg_cb)) == NULL) {
1055 1055 DPRINT(40, "ibd_rc_init_srq_list : desballoc() failed");
1056 1056 rwqe->rwqe_copybuf.ic_bufaddr = NULL;
1057 1057 if (atomic_dec_32_nv(&state->id_running) != 0) {
1058 1058 cmn_err(CE_WARN, "ibd_rc_init_srq_list: "
1059 1059 "id_running was not 1\n");
1060 1060 }
1061 1061 ibd_rc_fini_srq_list(state);
1062 1062 atomic_inc_32(&state->id_running);
1063 1063 return (DDI_FAILURE);
1064 1064 }
1065 1065
1066 1066 rwqe->rwqe_copybuf.ic_sgl.ds_key = lkey;
1067 1067 /* Leave IPOIB_GRH_SIZE space */
1068 1068 rwqe->rwqe_copybuf.ic_sgl.ds_va =
1069 1069 (ib_vaddr_t)(uintptr_t)(bufaddr + IPOIB_GRH_SIZE);
1070 1070 rwqe->rwqe_copybuf.ic_sgl.ds_len = state->rc_mtu;
1071 1071 rwqe->w_rwr.wr_id = (ibt_wrid_t)(uintptr_t)rwqe;
1072 1072 rwqe->w_rwr.wr_nds = 1;
1073 1073 rwqe->w_rwr.wr_sgl = &rwqe->rwqe_copybuf.ic_sgl;
1074 1074 (void) ibd_rc_post_srq(state, rwqe);
1075 1075 }
1076 1076
1077 1077 mutex_enter(&state->rc_srq_free_list.dl_mutex);
1078 1078 state->rc_srq_free_list.dl_head = NULL;
1079 1079 state->rc_srq_free_list.dl_cnt = 0;
1080 1080 mutex_exit(&state->rc_srq_free_list.dl_mutex);
1081 1081
1082 1082 return (DDI_SUCCESS);
1083 1083 }
1084 1084
1085 1085 /*
1086 1086 * Free the statically allocated Rx buffer list for SRQ.
1087 1087 */
1088 1088 void
1089 1089 ibd_rc_fini_srq_list(ibd_state_t *state)
1090 1090 {
1091 1091 ibd_rwqe_t *rwqe;
1092 1092 int i;
1093 1093 ibt_status_t ret;
1094 1094
1095 1095 ASSERT(state->id_running == 0);
1096 1096 ret = ibt_free_srq(state->rc_srq_hdl);
1097 1097 if (ret != IBT_SUCCESS) {
1098 1098 ibd_print_warn(state, "ibd_rc_fini_srq_list: "
1099 1099 "ibt_free_srq fail, ret=%d", ret);
1100 1100 }
1101 1101
1102 1102 mutex_enter(&state->rc_srq_rwqe_list.dl_mutex);
1103 1103 rwqe = state->rc_srq_rwqes;
1104 1104 for (i = 0; i < state->rc_srq_size; i++, rwqe++) {
1105 1105 if (rwqe->rwqe_im_mblk != NULL) {
1106 1106 rwqe->w_freeing_wqe = B_TRUE;
1107 1107 freemsg(rwqe->rwqe_im_mblk);
1108 1108 }
1109 1109 }
1110 1110 mutex_exit(&state->rc_srq_rwqe_list.dl_mutex);
1111 1111
1112 1112 ibd_rc_free_srq_copybufs(state);
1113 1113 }
1114 1114
1115 1115 /* Repost the elements in state->ib_rc_free_list */
1116 1116 int
1117 1117 ibd_rc_repost_srq_free_list(ibd_state_t *state)
1118 1118 {
1119 1119 ibd_rwqe_t *rwqe;
1120 1120 ibd_wqe_t *list;
1121 1121 uint_t len;
1122 1122
1123 1123 mutex_enter(&state->rc_srq_free_list.dl_mutex);
1124 1124 if (state->rc_srq_free_list.dl_head != NULL) {
1125 1125 /* repost them */
1126 1126 len = state->rc_mtu + IPOIB_GRH_SIZE;
1127 1127 list = state->rc_srq_free_list.dl_head;
1128 1128 state->rc_srq_free_list.dl_head = NULL;
1129 1129 state->rc_srq_free_list.dl_cnt = 0;
1130 1130 mutex_exit(&state->rc_srq_free_list.dl_mutex);
1131 1131 while (list != NULL) {
1132 1132 rwqe = WQE_TO_RWQE(list);
1133 1133 if ((rwqe->rwqe_im_mblk == NULL) &&
1134 1134 ((rwqe->rwqe_im_mblk = desballoc(
1135 1135 rwqe->rwqe_copybuf.ic_bufaddr, len, 0,
1136 1136 &rwqe->w_freemsg_cb)) == NULL)) {
1137 1137 DPRINT(40, "ibd_rc_repost_srq_free_list: "
1138 1138 "failed in desballoc()");
1139 1139 do {
1140 1140 ibd_rc_srq_free_rwqe(state, rwqe);
1141 1141 list = list->w_next;
1142 1142 rwqe = WQE_TO_RWQE(list);
1143 1143 } while (list != NULL);
1144 1144 return (DDI_FAILURE);
1145 1145 }
1146 1146 if (ibd_rc_post_srq(state, rwqe) == DDI_FAILURE) {
1147 1147 ibd_rc_srq_free_rwqe(state, rwqe);
1148 1148 }
1149 1149 list = list->w_next;
1150 1150 }
1151 1151 return (DDI_SUCCESS);
1152 1152 }
1153 1153 mutex_exit(&state->rc_srq_free_list.dl_mutex);
1154 1154 return (DDI_SUCCESS);
1155 1155 }
1156 1156
1157 1157 /*
1158 1158 * Free an allocated recv wqe.
1159 1159 */
1160 1160 static void
1161 1161 ibd_rc_srq_free_rwqe(ibd_state_t *state, ibd_rwqe_t *rwqe)
1162 1162 {
1163 1163 /*
1164 1164 * desballoc() failed (no memory) or the posting of rwqe failed.
1165 1165 *
1166 1166 * This rwqe is placed on a free list so that it
1167 1167 * can be reinstated in future.
1168 1168 *
1169 1169 * NOTE: no code currently exists to reinstate
1170 1170 * these "lost" rwqes.
1171 1171 */
1172 1172 mutex_enter(&state->rc_srq_free_list.dl_mutex);
1173 1173 state->rc_srq_free_list.dl_cnt++;
1174 1174 rwqe->rwqe_next = state->rc_srq_free_list.dl_head;
1175 1175 state->rc_srq_free_list.dl_head = RWQE_TO_WQE(rwqe);
1176 1176 mutex_exit(&state->rc_srq_free_list.dl_mutex);
1177 1177 }
1178 1178
1179 1179 static void
1180 1180 ibd_rc_srq_freemsg_cb(char *arg)
1181 1181 {
1182 1182 ibd_rwqe_t *rwqe = (ibd_rwqe_t *)arg;
1183 1183 ibd_state_t *state = rwqe->w_state;
1184 1184
1185 1185 ASSERT(state->rc_enable_srq);
1186 1186
1187 1187 /*
1188 1188 * If the driver is stopped, just free the rwqe.
1189 1189 */
1190 1190 if (atomic_add_32_nv(&state->id_running, 0) == 0) {
1191 1191 if (!rwqe->w_freeing_wqe) {
1192 1192 atomic_dec_32(
1193 1193 &state->rc_srq_rwqe_list.dl_bufs_outstanding);
1194 1194 DPRINT(6, "ibd_rc_srq_freemsg_cb: wqe being freed");
1195 1195 rwqe->rwqe_im_mblk = NULL;
1196 1196 ibd_rc_srq_free_rwqe(state, rwqe);
1197 1197 }
1198 1198 return;
1199 1199 }
1200 1200
1201 1201 atomic_dec_32(&state->rc_srq_rwqe_list.dl_bufs_outstanding);
1202 1202
1203 1203 ASSERT(state->rc_srq_rwqe_list.dl_cnt < state->rc_srq_size);
1204 1204 ASSERT(!rwqe->w_freeing_wqe);
1205 1205
1206 1206 /*
1207 1207 * Upper layer has released held mblk, so we have
1208 1208 * no more use for keeping the old pointer in
1209 1209 * our rwqe.
1210 1210 */
1211 1211 rwqe->rwqe_im_mblk = desballoc(rwqe->rwqe_copybuf.ic_bufaddr,
1212 1212 state->rc_mtu + IPOIB_GRH_SIZE, 0, &rwqe->w_freemsg_cb);
1213 1213 if (rwqe->rwqe_im_mblk == NULL) {
1214 1214 DPRINT(40, "ibd_rc_srq_freemsg_cb: desballoc failed");
1215 1215 ibd_rc_srq_free_rwqe(state, rwqe);
1216 1216 return;
1217 1217 }
1218 1218
1219 1219 if (ibd_rc_post_srq(state, rwqe) == DDI_FAILURE) {
1220 1220 ibd_print_warn(state, "ibd_rc_srq_freemsg_cb: ibd_rc_post_srq"
1221 1221 " failed");
1222 1222 ibd_rc_srq_free_rwqe(state, rwqe);
1223 1223 return;
1224 1224 }
1225 1225 }
1226 1226
1227 1227 /*
1228 1228 * Post a rwqe to the hardware and add it to the Rx list.
↓ open down ↓ |
1228 lines elided |
↑ open up ↑ |
1229 1229 */
1230 1230 static int
1231 1231 ibd_rc_post_srq(ibd_state_t *state, ibd_rwqe_t *rwqe)
1232 1232 {
1233 1233 /*
1234 1234 * Here we should add dl_cnt before post recv, because
1235 1235 * we would have to make sure dl_cnt is updated before
1236 1236 * the corresponding ibd_rc_process_rx() is called.
1237 1237 */
1238 1238 ASSERT(state->rc_srq_rwqe_list.dl_cnt < state->rc_srq_size);
1239 - atomic_add_32(&state->rc_srq_rwqe_list.dl_cnt, 1);
1239 + atomic_inc_32(&state->rc_srq_rwqe_list.dl_cnt);
1240 1240 if (ibt_post_srq(state->rc_srq_hdl, &rwqe->w_rwr, 1, NULL) !=
1241 1241 IBT_SUCCESS) {
1242 1242 atomic_dec_32(&state->rc_srq_rwqe_list.dl_cnt);
1243 1243 DPRINT(40, "ibd_rc_post_srq : ibt_post_srq() failed");
1244 1244 return (DDI_FAILURE);
1245 1245 }
1246 1246
1247 1247 return (DDI_SUCCESS);
1248 1248 }
1249 1249
1250 1250 /*
↓ open down ↓ |
1 lines elided |
↑ open up ↑ |
1251 1251 * Post a rwqe to the hardware and add it to the Rx list.
1252 1252 */
1253 1253 static int
1254 1254 ibd_rc_post_rwqe(ibd_rc_chan_t *chan, ibd_rwqe_t *rwqe)
1255 1255 {
1256 1256 /*
1257 1257 * Here we should add dl_cnt before post recv, because we would
1258 1258 * have to make sure dl_cnt has already updated before
1259 1259 * corresponding ibd_rc_process_rx() is called.
1260 1260 */
1261 - atomic_add_32(&chan->rx_wqe_list.dl_cnt, 1);
1261 + atomic_inc_32(&chan->rx_wqe_list.dl_cnt);
1262 1262 if (ibt_post_recv(chan->chan_hdl, &rwqe->w_rwr, 1, NULL) !=
1263 1263 IBT_SUCCESS) {
1264 1264 atomic_dec_32(&chan->rx_wqe_list.dl_cnt);
1265 1265 DPRINT(40, "ibd_rc_post_rwqe : failed in ibt_post_recv()");
1266 1266 return (DDI_FAILURE);
1267 1267 }
1268 1268 return (DDI_SUCCESS);
1269 1269 }
1270 1270
1271 1271 static int
1272 1272 ibd_rc_alloc_rx_copybufs(ibd_rc_chan_t *chan)
1273 1273 {
1274 1274 ibd_state_t *state = chan->state;
1275 1275 ibt_mr_attr_t mem_attr;
1276 1276 uint_t rc_rx_bufs_sz;
1277 1277
1278 1278 /*
1279 1279 * Allocate one big chunk for all regular rx copy bufs
1280 1280 */
1281 1281 rc_rx_bufs_sz = (state->rc_mtu + IPOIB_GRH_SIZE) * chan->rcq_size;
1282 1282
1283 1283 chan->rx_bufs = kmem_zalloc(rc_rx_bufs_sz, KM_SLEEP);
1284 1284
1285 1285 chan->rx_rwqes = kmem_zalloc(chan->rcq_size *
1286 1286 sizeof (ibd_rwqe_t), KM_SLEEP);
1287 1287
1288 1288 /*
1289 1289 * Do one memory registration on the entire rxbuf area
1290 1290 */
1291 1291 mem_attr.mr_vaddr = (uint64_t)(uintptr_t)chan->rx_bufs;
1292 1292 mem_attr.mr_len = rc_rx_bufs_sz;
1293 1293 mem_attr.mr_as = NULL;
1294 1294 mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
1295 1295 if (ibt_register_mr(state->id_hca_hdl, state->id_pd_hdl, &mem_attr,
1296 1296 &chan->rx_mr_hdl, &chan->rx_mr_desc) != IBT_SUCCESS) {
1297 1297 DPRINT(40, "ibd_rc_alloc_srq_copybufs: ibt_register_mr failed");
1298 1298 kmem_free(chan->rx_rwqes, chan->rcq_size * sizeof (ibd_rwqe_t));
1299 1299 kmem_free(chan->rx_bufs, rc_rx_bufs_sz);
1300 1300 chan->rx_bufs = NULL;
1301 1301 chan->rx_rwqes = NULL;
1302 1302 return (DDI_FAILURE);
1303 1303 }
1304 1304
1305 1305 return (DDI_SUCCESS);
1306 1306 }
1307 1307
1308 1308 static void
1309 1309 ibd_rc_free_rx_copybufs(ibd_rc_chan_t *chan)
1310 1310 {
1311 1311 ibd_state_t *state = chan->state;
1312 1312 uint_t rc_rx_buf_sz;
1313 1313
1314 1314 ASSERT(!state->rc_enable_srq);
1315 1315 ASSERT(chan->rx_rwqes != NULL);
1316 1316 ASSERT(chan->rx_bufs != NULL);
1317 1317
1318 1318 /*
1319 1319 * Don't change the value of state->rc_mtu at the period from call
1320 1320 * ibd_rc_alloc_rx_copybufs() to call ibd_rc_free_rx_copybufs().
1321 1321 */
1322 1322 rc_rx_buf_sz = state->rc_mtu + IPOIB_GRH_SIZE;
1323 1323
1324 1324 /*
1325 1325 * Unregister rxbuf mr
1326 1326 */
1327 1327 if (ibt_deregister_mr(state->id_hca_hdl,
1328 1328 chan->rx_mr_hdl) != IBT_SUCCESS) {
1329 1329 DPRINT(40, "ibd_rc_free_rx_copybufs: ibt_deregister_mr failed");
1330 1330 }
1331 1331 chan->rx_mr_hdl = NULL;
1332 1332
1333 1333 /*
1334 1334 * Free rxbuf memory
1335 1335 */
1336 1336 kmem_free(chan->rx_rwqes, chan->rcq_size * sizeof (ibd_rwqe_t));
1337 1337 chan->rx_rwqes = NULL;
1338 1338
1339 1339 kmem_free(chan->rx_bufs, chan->rcq_size * rc_rx_buf_sz);
1340 1340 chan->rx_bufs = NULL;
1341 1341 }
1342 1342
1343 1343 /*
1344 1344 * Post a certain number of receive buffers and WRs on a RC channel.
1345 1345 */
1346 1346 static int
1347 1347 ibd_rc_init_rxlist(ibd_rc_chan_t *chan)
1348 1348 {
1349 1349 ibd_state_t *state = chan->state;
1350 1350 ibd_rwqe_t *rwqe;
1351 1351 ibt_lkey_t lkey;
1352 1352 int i;
1353 1353 uint_t len;
1354 1354 uint8_t *bufaddr;
1355 1355
1356 1356 ASSERT(!state->rc_enable_srq);
1357 1357 if (ibd_rc_alloc_rx_copybufs(chan) != DDI_SUCCESS)
1358 1358 return (DDI_FAILURE);
1359 1359
1360 1360 /*
1361 1361 * Allocate and setup the rwqe list
1362 1362 */
1363 1363 lkey = chan->rx_mr_desc.md_lkey;
1364 1364 rwqe = chan->rx_rwqes;
1365 1365 bufaddr = chan->rx_bufs;
1366 1366 len = state->rc_mtu + IPOIB_GRH_SIZE;
1367 1367 for (i = 0; i < chan->rcq_size; i++, rwqe++, bufaddr += len) {
1368 1368 rwqe->w_state = state;
1369 1369 rwqe->w_chan = chan;
1370 1370 rwqe->w_freeing_wqe = B_FALSE;
1371 1371 rwqe->w_freemsg_cb.free_func = ibd_rc_freemsg_cb;
1372 1372 rwqe->w_freemsg_cb.free_arg = (char *)rwqe;
1373 1373 rwqe->rwqe_copybuf.ic_bufaddr = bufaddr;
1374 1374
1375 1375 if ((rwqe->rwqe_im_mblk = desballoc(bufaddr, len, 0,
1376 1376 &rwqe->w_freemsg_cb)) == NULL) {
1377 1377 DPRINT(40, "ibd_rc_init_srq_list: desballoc() failed");
1378 1378 rwqe->rwqe_copybuf.ic_bufaddr = NULL;
1379 1379 ibd_rc_fini_rxlist(chan);
1380 1380 return (DDI_FAILURE);
1381 1381 }
1382 1382
1383 1383 rwqe->rwqe_copybuf.ic_sgl.ds_key = lkey;
1384 1384 rwqe->rwqe_copybuf.ic_sgl.ds_va =
1385 1385 (ib_vaddr_t)(uintptr_t)(bufaddr + IPOIB_GRH_SIZE);
1386 1386 rwqe->rwqe_copybuf.ic_sgl.ds_len = state->rc_mtu;
1387 1387 rwqe->w_rwr.wr_id = (ibt_wrid_t)(uintptr_t)rwqe;
1388 1388 rwqe->w_rwr.wr_nds = 1;
1389 1389 rwqe->w_rwr.wr_sgl = &rwqe->rwqe_copybuf.ic_sgl;
1390 1390 (void) ibd_rc_post_rwqe(chan, rwqe);
1391 1391 }
1392 1392
1393 1393 return (DDI_SUCCESS);
1394 1394 }
1395 1395
1396 1396 /*
1397 1397 * Free the statically allocated Rx buffer list for SRQ.
1398 1398 */
1399 1399 static void
1400 1400 ibd_rc_fini_rxlist(ibd_rc_chan_t *chan)
1401 1401 {
1402 1402 ibd_rwqe_t *rwqe;
1403 1403 int i;
1404 1404
1405 1405 if (chan->rx_bufs == NULL) {
1406 1406 DPRINT(40, "ibd_rc_fini_rxlist: empty chan->rx_bufs, quit");
1407 1407 return;
1408 1408 }
1409 1409
1410 1410 /* bufs_outstanding must be 0 */
1411 1411 ASSERT((chan->rx_wqe_list.dl_head == NULL) ||
1412 1412 (chan->rx_wqe_list.dl_bufs_outstanding == 0));
1413 1413
1414 1414 mutex_enter(&chan->rx_wqe_list.dl_mutex);
1415 1415 rwqe = chan->rx_rwqes;
1416 1416 for (i = 0; i < chan->rcq_size; i++, rwqe++) {
1417 1417 if (rwqe->rwqe_im_mblk != NULL) {
1418 1418 rwqe->w_freeing_wqe = B_TRUE;
1419 1419 freemsg(rwqe->rwqe_im_mblk);
1420 1420 }
1421 1421 }
1422 1422 mutex_exit(&chan->rx_wqe_list.dl_mutex);
1423 1423
1424 1424 ibd_rc_free_rx_copybufs(chan);
1425 1425 }
1426 1426
1427 1427 /*
1428 1428 * Free an allocated recv wqe.
1429 1429 */
1430 1430 static void
1431 1431 ibd_rc_free_rwqe(ibd_rc_chan_t *chan, ibd_rwqe_t *rwqe)
1432 1432 {
1433 1433 /*
1434 1434 * desballoc() failed (no memory) or the posting of rwqe failed.
1435 1435 *
1436 1436 * This rwqe is placed on a free list so that it
1437 1437 * can be reinstated in future.
1438 1438 *
1439 1439 * NOTE: no code currently exists to reinstate
1440 1440 * these "lost" rwqes.
1441 1441 */
1442 1442 mutex_enter(&chan->rx_free_list.dl_mutex);
1443 1443 chan->rx_free_list.dl_cnt++;
1444 1444 rwqe->rwqe_next = chan->rx_free_list.dl_head;
1445 1445 chan->rx_free_list.dl_head = RWQE_TO_WQE(rwqe);
1446 1446 mutex_exit(&chan->rx_free_list.dl_mutex);
1447 1447 }
1448 1448
1449 1449 /*
1450 1450 * Processing to be done after receipt of a packet; hand off to GLD
1451 1451 * in the format expected by GLD.
1452 1452 */
1453 1453 static void
1454 1454 ibd_rc_process_rx(ibd_rc_chan_t *chan, ibd_rwqe_t *rwqe, ibt_wc_t *wc)
1455 1455 {
1456 1456 ibd_state_t *state = chan->state;
1457 1457 ib_header_info_t *phdr;
1458 1458 ipoib_hdr_t *ipibp;
1459 1459 mblk_t *mp;
1460 1460 mblk_t *mpc;
1461 1461 int rxcnt;
1462 1462 ip6_t *ip6h;
1463 1463 int len;
1464 1464
1465 1465 /*
1466 1466 * Track number handed to upper layer, and number still
1467 1467 * available to receive packets.
1468 1468 */
1469 1469 if (state->rc_enable_srq) {
1470 1470 rxcnt = atomic_dec_32_nv(&state->rc_srq_rwqe_list.dl_cnt);
1471 1471 } else {
1472 1472 rxcnt = atomic_dec_32_nv(&chan->rx_wqe_list.dl_cnt);
1473 1473 }
1474 1474
1475 1475 /*
1476 1476 * It can not be a IBA multicast packet.
1477 1477 */
1478 1478 ASSERT(!wc->wc_flags & IBT_WC_GRH_PRESENT);
1479 1479
1480 1480 /* For the connection reaper routine ibd_rc_conn_timeout_call() */
1481 1481 chan->is_used = B_TRUE;
1482 1482
1483 1483 #ifdef DEBUG
1484 1484 if (rxcnt < state->id_rc_rx_rwqe_thresh) {
1485 1485 state->rc_rwqe_short++;
1486 1486 }
1487 1487 #endif
1488 1488
1489 1489 /*
1490 1490 * Possibly replenish the Rx pool if needed.
1491 1491 */
↓ open down ↓ |
220 lines elided |
↑ open up ↑ |
1492 1492 if ((rxcnt >= state->id_rc_rx_rwqe_thresh) &&
1493 1493 (wc->wc_bytes_xfer > state->id_rc_rx_copy_thresh)) {
1494 1494 atomic_add_64(&state->rc_rcv_trans_byte, wc->wc_bytes_xfer);
1495 1495 atomic_inc_64(&state->rc_rcv_trans_pkt);
1496 1496
1497 1497 /*
1498 1498 * Record how many rwqe has been occupied by upper
1499 1499 * network layer
1500 1500 */
1501 1501 if (state->rc_enable_srq) {
1502 - atomic_add_32(&state->rc_srq_rwqe_list.
1503 - dl_bufs_outstanding, 1);
1502 + atomic_inc_32(
1503 + &state->rc_srq_rwqe_list.dl_bufs_outstanding);
1504 1504 } else {
1505 - atomic_add_32(&chan->rx_wqe_list.
1506 - dl_bufs_outstanding, 1);
1505 + atomic_inc_32(&chan->rx_wqe_list.dl_bufs_outstanding);
1507 1506 }
1508 1507 mp = rwqe->rwqe_im_mblk;
1509 1508 } else {
1510 1509 atomic_add_64(&state->rc_rcv_copy_byte, wc->wc_bytes_xfer);
1511 1510 atomic_inc_64(&state->rc_rcv_copy_pkt);
1512 1511
1513 1512 if ((mp = allocb(wc->wc_bytes_xfer + IPOIB_GRH_SIZE,
1514 1513 BPRI_HI)) == NULL) { /* no memory */
1515 1514 DPRINT(40, "ibd_rc_process_rx: allocb() failed");
1516 1515 state->rc_rcv_alloc_fail++;
1517 1516 if (state->rc_enable_srq) {
1518 1517 if (ibd_rc_post_srq(state, rwqe) ==
1519 1518 DDI_FAILURE) {
1520 1519 ibd_rc_srq_free_rwqe(state, rwqe);
1521 1520 }
1522 1521 } else {
1523 1522 if (ibd_rc_post_rwqe(chan, rwqe) ==
1524 1523 DDI_FAILURE) {
1525 1524 ibd_rc_free_rwqe(chan, rwqe);
1526 1525 }
1527 1526 }
1528 1527 return;
1529 1528 }
1530 1529
1531 1530 bcopy(rwqe->rwqe_im_mblk->b_rptr + IPOIB_GRH_SIZE,
1532 1531 mp->b_wptr + IPOIB_GRH_SIZE, wc->wc_bytes_xfer);
1533 1532
1534 1533 if (state->rc_enable_srq) {
1535 1534 if (ibd_rc_post_srq(state, rwqe) == DDI_FAILURE) {
1536 1535 ibd_rc_srq_free_rwqe(state, rwqe);
1537 1536 }
1538 1537 } else {
1539 1538 if (ibd_rc_post_rwqe(chan, rwqe) == DDI_FAILURE) {
1540 1539 ibd_rc_free_rwqe(chan, rwqe);
1541 1540 }
1542 1541 }
1543 1542 }
1544 1543
1545 1544 ipibp = (ipoib_hdr_t *)((uchar_t *)mp->b_rptr + IPOIB_GRH_SIZE);
1546 1545 if (ntohs(ipibp->ipoib_type) == ETHERTYPE_IPV6) {
1547 1546 ip6h = (ip6_t *)((uchar_t *)ipibp + sizeof (ipoib_hdr_t));
1548 1547 len = ntohs(ip6h->ip6_plen);
1549 1548 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) {
1550 1549 /* LINTED: E_CONSTANT_CONDITION */
1551 1550 IBD_PAD_NSNA(ip6h, len, IBD_RECV);
1552 1551 }
1553 1552 }
1554 1553
1555 1554 phdr = (ib_header_info_t *)mp->b_rptr;
1556 1555 phdr->ib_grh.ipoib_vertcflow = 0;
1557 1556 ovbcopy(&state->id_macaddr, &phdr->ib_dst,
1558 1557 sizeof (ipoib_mac_t));
1559 1558 mp->b_wptr = mp->b_rptr + wc->wc_bytes_xfer+ IPOIB_GRH_SIZE;
1560 1559
1561 1560 /*
1562 1561 * Can RC mode in IB guarantee its checksum correctness?
1563 1562 *
1564 1563 * (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0,
1565 1564 * HCK_FULLCKSUM | HCK_FULLCKSUM_OK, 0);
1566 1565 */
1567 1566
1568 1567 /*
1569 1568 * Make sure this is NULL or we're in trouble.
1570 1569 */
1571 1570 if (mp->b_next != NULL) {
1572 1571 ibd_print_warn(state,
1573 1572 "ibd_rc_process_rx: got duplicate mp from rcq?");
1574 1573 mp->b_next = NULL;
1575 1574 }
1576 1575
1577 1576 /*
1578 1577 * Add this mp to the list of processed mp's to send to
1579 1578 * the nw layer
1580 1579 */
1581 1580 if (state->rc_enable_srq) {
1582 1581 mutex_enter(&state->rc_rx_lock);
1583 1582 if (state->rc_rx_mp) {
1584 1583 ASSERT(state->rc_rx_mp_tail != NULL);
1585 1584 state->rc_rx_mp_tail->b_next = mp;
1586 1585 } else {
1587 1586 ASSERT(state->rc_rx_mp_tail == NULL);
1588 1587 state->rc_rx_mp = mp;
1589 1588 }
1590 1589
1591 1590 state->rc_rx_mp_tail = mp;
1592 1591 state->rc_rx_mp_len++;
1593 1592
1594 1593 if (state->rc_rx_mp_len >= IBD_MAX_RX_MP_LEN) {
1595 1594 mpc = state->rc_rx_mp;
1596 1595
1597 1596 state->rc_rx_mp = NULL;
1598 1597 state->rc_rx_mp_tail = NULL;
1599 1598 state->rc_rx_mp_len = 0;
1600 1599 mutex_exit(&state->rc_rx_lock);
1601 1600 mac_rx(state->id_mh, NULL, mpc);
1602 1601 } else {
1603 1602 mutex_exit(&state->rc_rx_lock);
1604 1603 }
1605 1604 } else {
1606 1605 mutex_enter(&chan->rx_lock);
1607 1606 if (chan->rx_mp) {
1608 1607 ASSERT(chan->rx_mp_tail != NULL);
1609 1608 chan->rx_mp_tail->b_next = mp;
1610 1609 } else {
1611 1610 ASSERT(chan->rx_mp_tail == NULL);
1612 1611 chan->rx_mp = mp;
1613 1612 }
1614 1613
1615 1614 chan->rx_mp_tail = mp;
1616 1615 chan->rx_mp_len++;
1617 1616
1618 1617 if (chan->rx_mp_len >= IBD_MAX_RX_MP_LEN) {
1619 1618 mpc = chan->rx_mp;
1620 1619
1621 1620 chan->rx_mp = NULL;
1622 1621 chan->rx_mp_tail = NULL;
1623 1622 chan->rx_mp_len = 0;
1624 1623 mutex_exit(&chan->rx_lock);
1625 1624 mac_rx(state->id_mh, NULL, mpc);
1626 1625 } else {
1627 1626 mutex_exit(&chan->rx_lock);
1628 1627 }
1629 1628 }
1630 1629 }
1631 1630
1632 1631 /*
1633 1632 * Callback code invoked from STREAMs when the recv data buffer is free
1634 1633 * for recycling.
1635 1634 */
1636 1635 static void
1637 1636 ibd_rc_freemsg_cb(char *arg)
1638 1637 {
1639 1638 ibd_rwqe_t *rwqe = (ibd_rwqe_t *)arg;
1640 1639 ibd_rc_chan_t *chan = rwqe->w_chan;
1641 1640 ibd_state_t *state = rwqe->w_state;
1642 1641
1643 1642 /*
1644 1643 * If the wqe is being destructed, do not attempt recycling.
1645 1644 */
1646 1645 if (rwqe->w_freeing_wqe == B_TRUE) {
1647 1646 return;
1648 1647 }
1649 1648
1650 1649 ASSERT(!state->rc_enable_srq);
1651 1650 ASSERT(chan->rx_wqe_list.dl_cnt < chan->rcq_size);
1652 1651
1653 1652 rwqe->rwqe_im_mblk = desballoc(rwqe->rwqe_copybuf.ic_bufaddr,
1654 1653 state->rc_mtu + IPOIB_GRH_SIZE, 0, &rwqe->w_freemsg_cb);
1655 1654 if (rwqe->rwqe_im_mblk == NULL) {
1656 1655 DPRINT(40, "ibd_rc_freemsg_cb: desballoc() failed");
1657 1656 ibd_rc_free_rwqe(chan, rwqe);
1658 1657 return;
1659 1658 }
1660 1659
1661 1660 /*
↓ open down ↓ |
145 lines elided |
↑ open up ↑ |
1662 1661 * Post back to h/w. We could actually have more than
1663 1662 * id_num_rwqe WQEs on the list if there were multiple
1664 1663 * ibd_freemsg_cb() calls outstanding (since the lock is
1665 1664 * not held the entire time). This will start getting
1666 1665 * corrected over subsequent ibd_freemsg_cb() calls.
1667 1666 */
1668 1667 if (ibd_rc_post_rwqe(chan, rwqe) == DDI_FAILURE) {
1669 1668 ibd_rc_free_rwqe(chan, rwqe);
1670 1669 return;
1671 1670 }
1672 - atomic_add_32(&chan->rx_wqe_list.dl_bufs_outstanding, -1);
1671 + atomic_dec_32(&chan->rx_wqe_list.dl_bufs_outstanding);
1673 1672 }
1674 1673
1675 1674 /*
1676 1675 * Common code for interrupt handling as well as for polling
1677 1676 * for all completed wqe's while detaching.
1678 1677 */
1679 1678 static void
1680 1679 ibd_rc_poll_rcq(ibd_rc_chan_t *chan, ibt_cq_hdl_t cq_hdl)
1681 1680 {
1682 1681 ibd_wqe_t *wqe;
1683 1682 ibt_wc_t *wc, *wcs;
1684 1683 uint_t numwcs, real_numwcs;
1685 1684 int i;
1686 1685
1687 1686 wcs = chan->rx_wc;
1688 1687 numwcs = IBD_RC_MAX_CQ_WC;
1689 1688
1690 1689 while (ibt_poll_cq(cq_hdl, wcs, numwcs, &real_numwcs) == IBT_SUCCESS) {
1691 1690 for (i = 0, wc = wcs; i < real_numwcs; i++, wc++) {
1692 1691 wqe = (ibd_wqe_t *)(uintptr_t)wc->wc_id;
1693 1692 if (wc->wc_status != IBT_WC_SUCCESS) {
1694 1693 chan->state->rc_rcq_err++;
1695 1694 /*
1696 1695 * Channel being torn down.
1697 1696 */
1698 1697 DPRINT(40, "ibd_rc_poll_rcq: wc_status(%d) != "
1699 1698 "SUCC, chan=%p", wc->wc_status, chan);
1700 1699 if (wc->wc_status == IBT_WC_WR_FLUSHED_ERR) {
1701 1700 /*
1702 1701 * Do not invoke Rx handler because
1703 1702 * it might add buffers to the Rx pool
1704 1703 * when we are trying to deinitialize.
1705 1704 */
1706 1705 continue;
1707 1706 }
1708 1707 }
1709 1708 ibd_rc_process_rx(chan, WQE_TO_RWQE(wqe), wc);
1710 1709 }
1711 1710 }
1712 1711 }
1713 1712
1714 1713 /* Receive CQ handler */
1715 1714 /* ARGSUSED */
1716 1715 static void
1717 1716 ibd_rc_rcq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
1718 1717 {
1719 1718 ibd_rc_chan_t *chan = (ibd_rc_chan_t *)arg;
1720 1719 ibd_state_t *state = chan->state;
1721 1720
1722 1721 atomic_inc_32(&chan->rcq_invoking);
1723 1722 ASSERT(chan->chan_state == IBD_RC_STATE_PAS_ESTAB);
1724 1723
1725 1724 /*
1726 1725 * Poll for completed entries; the CQ will not interrupt any
1727 1726 * more for incoming (or transmitted) packets.
1728 1727 */
1729 1728 ibd_rc_poll_rcq(chan, chan->rcq_hdl);
1730 1729
1731 1730 /*
1732 1731 * Now enable CQ notifications; all packets that arrive now
1733 1732 * (or complete transmission) will cause new interrupts.
1734 1733 */
1735 1734 if (ibt_enable_cq_notify(chan->rcq_hdl, IBT_NEXT_COMPLETION) !=
1736 1735 IBT_SUCCESS) {
1737 1736 /*
1738 1737 * We do not expect a failure here.
1739 1738 */
1740 1739 DPRINT(40, "ibd_rc_rcq_handler: ibt_enable_cq_notify() failed");
1741 1740 }
1742 1741
1743 1742 /*
1744 1743 * Repoll to catch all packets that might have arrived after
1745 1744 * we finished the first poll loop and before interrupts got
1746 1745 * armed.
1747 1746 */
1748 1747 ibd_rc_poll_rcq(chan, chan->rcq_hdl);
1749 1748
1750 1749 if (state->rc_enable_srq) {
1751 1750 mutex_enter(&state->rc_rx_lock);
1752 1751
1753 1752 if (state->rc_rx_mp != NULL) {
1754 1753 mblk_t *mpc;
1755 1754 mpc = state->rc_rx_mp;
1756 1755
1757 1756 state->rc_rx_mp = NULL;
1758 1757 state->rc_rx_mp_tail = NULL;
1759 1758 state->rc_rx_mp_len = 0;
1760 1759
1761 1760 mutex_exit(&state->rc_rx_lock);
1762 1761 mac_rx(state->id_mh, NULL, mpc);
1763 1762 } else {
1764 1763 mutex_exit(&state->rc_rx_lock);
1765 1764 }
1766 1765 } else {
1767 1766 mutex_enter(&chan->rx_lock);
1768 1767
1769 1768 if (chan->rx_mp != NULL) {
1770 1769 mblk_t *mpc;
1771 1770 mpc = chan->rx_mp;
1772 1771
1773 1772 chan->rx_mp = NULL;
1774 1773 chan->rx_mp_tail = NULL;
1775 1774 chan->rx_mp_len = 0;
1776 1775
1777 1776 mutex_exit(&chan->rx_lock);
1778 1777 mac_rx(state->id_mh, NULL, mpc);
1779 1778 } else {
1780 1779 mutex_exit(&chan->rx_lock);
1781 1780 }
1782 1781 }
1783 1782 atomic_dec_32(&chan->rcq_invoking);
1784 1783 }
1785 1784
1786 1785 /*
1787 1786 * Allocate the statically allocated Tx buffer list.
1788 1787 */
1789 1788 int
1790 1789 ibd_rc_init_tx_largebuf_list(ibd_state_t *state)
1791 1790 {
1792 1791 ibd_rc_tx_largebuf_t *lbufp;
1793 1792 ibd_rc_tx_largebuf_t *tail;
1794 1793 uint8_t *memp;
1795 1794 ibt_mr_attr_t mem_attr;
1796 1795 uint32_t num_swqe;
1797 1796 size_t mem_size;
1798 1797 int i;
1799 1798
1800 1799 num_swqe = state->id_rc_num_swqe - 1;
1801 1800
1802 1801 /*
1803 1802 * Allocate one big chunk for all Tx large copy bufs
1804 1803 */
1805 1804 /* Don't transfer IPOIB_GRH_SIZE bytes (40 bytes) */
1806 1805 mem_size = num_swqe * state->rc_mtu;
1807 1806 state->rc_tx_mr_bufs = kmem_zalloc(mem_size, KM_SLEEP);
1808 1807
1809 1808 mem_attr.mr_len = mem_size;
1810 1809 mem_attr.mr_vaddr = (uint64_t)(uintptr_t)state->rc_tx_mr_bufs;
1811 1810 mem_attr.mr_as = NULL;
1812 1811 mem_attr.mr_flags = IBT_MR_SLEEP;
1813 1812 if (ibt_register_mr(state->id_hca_hdl, state->id_pd_hdl, &mem_attr,
1814 1813 &state->rc_tx_mr_hdl, &state->rc_tx_mr_desc) != IBT_SUCCESS) {
1815 1814 DPRINT(40, "ibd_rc_init_tx_largebuf_list: ibt_register_mr "
1816 1815 "failed");
1817 1816 kmem_free(state->rc_tx_mr_bufs, mem_size);
1818 1817 state->rc_tx_mr_bufs = NULL;
1819 1818 return (DDI_FAILURE);
1820 1819 }
1821 1820
1822 1821 state->rc_tx_largebuf_desc_base = kmem_zalloc(num_swqe *
1823 1822 sizeof (ibd_rc_tx_largebuf_t), KM_SLEEP);
1824 1823
1825 1824 /*
1826 1825 * Set up the buf chain
1827 1826 */
1828 1827 memp = state->rc_tx_mr_bufs;
1829 1828 mutex_enter(&state->rc_tx_large_bufs_lock);
1830 1829 lbufp = state->rc_tx_largebuf_desc_base;
1831 1830 for (i = 0; i < num_swqe; i++) {
1832 1831 lbufp->lb_buf = memp;
1833 1832 lbufp->lb_next = lbufp + 1;
1834 1833
1835 1834 tail = lbufp;
1836 1835
1837 1836 memp += state->rc_mtu;
1838 1837 lbufp++;
1839 1838 }
1840 1839 tail->lb_next = NULL;
1841 1840
1842 1841 /*
1843 1842 * Set up the buffer information in ibd state
1844 1843 */
1845 1844 state->rc_tx_largebuf_free_head = state->rc_tx_largebuf_desc_base;
1846 1845 state->rc_tx_largebuf_nfree = num_swqe;
1847 1846 mutex_exit(&state->rc_tx_large_bufs_lock);
1848 1847 return (DDI_SUCCESS);
1849 1848 }
1850 1849
1851 1850 void
1852 1851 ibd_rc_fini_tx_largebuf_list(ibd_state_t *state)
1853 1852 {
1854 1853 uint32_t num_swqe;
1855 1854
1856 1855 num_swqe = state->id_rc_num_swqe - 1;
1857 1856
1858 1857 if (ibt_deregister_mr(state->id_hca_hdl,
1859 1858 state->rc_tx_mr_hdl) != IBT_SUCCESS) {
1860 1859 DPRINT(40, "ibd_rc_fini_tx_largebuf_list: ibt_deregister_mr() "
1861 1860 "failed");
1862 1861 }
1863 1862 state->rc_tx_mr_hdl = NULL;
1864 1863
1865 1864 kmem_free(state->rc_tx_mr_bufs, num_swqe * state->rc_mtu);
1866 1865 state->rc_tx_mr_bufs = NULL;
1867 1866
1868 1867 kmem_free(state->rc_tx_largebuf_desc_base,
1869 1868 num_swqe * sizeof (ibd_rc_tx_largebuf_t));
1870 1869 state->rc_tx_largebuf_desc_base = NULL;
1871 1870 }
1872 1871
1873 1872 static int
1874 1873 ibd_rc_alloc_tx_copybufs(ibd_rc_chan_t *chan)
1875 1874 {
1876 1875 ibt_mr_attr_t mem_attr;
1877 1876 ibd_state_t *state;
1878 1877
1879 1878 state = chan->state;
1880 1879 ASSERT(state != NULL);
1881 1880
1882 1881 /*
1883 1882 * Allocate one big chunk for all regular tx copy bufs
1884 1883 */
1885 1884 mem_attr.mr_len = chan->scq_size * state->id_rc_tx_copy_thresh;
1886 1885
1887 1886 chan->tx_mr_bufs = kmem_zalloc(mem_attr.mr_len, KM_SLEEP);
1888 1887
1889 1888 /*
1890 1889 * Do one memory registration on the entire txbuf area
1891 1890 */
1892 1891 mem_attr.mr_vaddr = (uint64_t)(uintptr_t)chan->tx_mr_bufs;
1893 1892 mem_attr.mr_as = NULL;
1894 1893 mem_attr.mr_flags = IBT_MR_SLEEP;
1895 1894 if (ibt_register_mr(state->id_hca_hdl, state->id_pd_hdl, &mem_attr,
1896 1895 &chan->tx_mr_hdl, &chan->tx_mr_desc) != IBT_SUCCESS) {
1897 1896 DPRINT(40, "ibd_rc_alloc_tx_copybufs: ibt_register_mr failed");
1898 1897 ASSERT(mem_attr.mr_len ==
1899 1898 chan->scq_size * state->id_rc_tx_copy_thresh);
1900 1899 kmem_free(chan->tx_mr_bufs, mem_attr.mr_len);
1901 1900 chan->tx_mr_bufs = NULL;
1902 1901 return (DDI_FAILURE);
1903 1902 }
1904 1903
1905 1904 return (DDI_SUCCESS);
1906 1905 }
1907 1906
1908 1907 /*
1909 1908 * Allocate the statically allocated Tx buffer list.
1910 1909 */
1911 1910 static int
1912 1911 ibd_rc_init_txlist(ibd_rc_chan_t *chan)
1913 1912 {
1914 1913 ibd_swqe_t *swqe;
1915 1914 int i;
1916 1915 ibt_lkey_t lkey;
1917 1916 ibd_state_t *state = chan->state;
1918 1917
1919 1918 if (ibd_rc_alloc_tx_copybufs(chan) != DDI_SUCCESS)
1920 1919 return (DDI_FAILURE);
1921 1920
1922 1921 /*
1923 1922 * Allocate and setup the swqe list
1924 1923 */
1925 1924 lkey = chan->tx_mr_desc.md_lkey;
1926 1925 chan->tx_wqes = kmem_zalloc(chan->scq_size *
1927 1926 sizeof (ibd_swqe_t), KM_SLEEP);
1928 1927 swqe = chan->tx_wqes;
1929 1928 for (i = 0; i < chan->scq_size; i++, swqe++) {
1930 1929 swqe->swqe_next = NULL;
1931 1930 swqe->swqe_im_mblk = NULL;
1932 1931
1933 1932 swqe->swqe_copybuf.ic_sgl.ds_key = lkey;
1934 1933 swqe->swqe_copybuf.ic_sgl.ds_len = 0; /* set in send */
1935 1934
1936 1935 swqe->w_swr.wr_id = (ibt_wrid_t)(uintptr_t)swqe;
1937 1936 swqe->w_swr.wr_flags = IBT_WR_SEND_SIGNAL;
1938 1937 swqe->swqe_copybuf.ic_sgl.ds_va = (ib_vaddr_t)(uintptr_t)
1939 1938 (chan->tx_mr_bufs + i * state->id_rc_tx_copy_thresh);
1940 1939 swqe->w_swr.wr_trans = IBT_RC_SRV;
1941 1940
1942 1941 /* Add to list */
1943 1942 mutex_enter(&chan->tx_wqe_list.dl_mutex);
1944 1943 chan->tx_wqe_list.dl_cnt++;
1945 1944 swqe->swqe_next = chan->tx_wqe_list.dl_head;
1946 1945 chan->tx_wqe_list.dl_head = SWQE_TO_WQE(swqe);
1947 1946 mutex_exit(&chan->tx_wqe_list.dl_mutex);
1948 1947 }
1949 1948
1950 1949 return (DDI_SUCCESS);
1951 1950 }
1952 1951
1953 1952 /*
1954 1953 * Free the statically allocated Tx buffer list.
1955 1954 */
1956 1955 static void
1957 1956 ibd_rc_fini_txlist(ibd_rc_chan_t *chan)
1958 1957 {
1959 1958 ibd_state_t *state = chan->state;
1960 1959 if (chan->tx_mr_hdl != NULL) {
1961 1960 if (ibt_deregister_mr(chan->state->id_hca_hdl,
1962 1961 chan->tx_mr_hdl) != IBT_SUCCESS) {
1963 1962 DPRINT(40, "ibd_rc_fini_txlist: ibt_deregister_mr "
1964 1963 "failed");
1965 1964 }
1966 1965 chan->tx_mr_hdl = NULL;
1967 1966 }
1968 1967
1969 1968 if (chan->tx_mr_bufs != NULL) {
1970 1969 kmem_free(chan->tx_mr_bufs, chan->scq_size *
1971 1970 state->id_rc_tx_copy_thresh);
1972 1971 chan->tx_mr_bufs = NULL;
1973 1972 }
1974 1973
1975 1974 if (chan->tx_wqes != NULL) {
1976 1975 kmem_free(chan->tx_wqes, chan->scq_size *
1977 1976 sizeof (ibd_swqe_t));
1978 1977 chan->tx_wqes = NULL;
1979 1978 }
1980 1979 }
1981 1980
1982 1981 /*
1983 1982 * Acquire send wqe from free list.
1984 1983 * Returns error number and send wqe pointer.
1985 1984 */
1986 1985 ibd_swqe_t *
1987 1986 ibd_rc_acquire_swqes(ibd_rc_chan_t *chan)
1988 1987 {
1989 1988 ibd_swqe_t *wqe;
1990 1989
1991 1990 mutex_enter(&chan->tx_rel_list.dl_mutex);
1992 1991 if (chan->tx_rel_list.dl_head != NULL) {
1993 1992 /* transfer id_tx_rel_list to id_tx_list */
1994 1993 chan->tx_wqe_list.dl_head =
1995 1994 chan->tx_rel_list.dl_head;
1996 1995 chan->tx_wqe_list.dl_cnt =
1997 1996 chan->tx_rel_list.dl_cnt;
1998 1997 chan->tx_wqe_list.dl_pending_sends = B_FALSE;
1999 1998
2000 1999 /* clear id_tx_rel_list */
2001 2000 chan->tx_rel_list.dl_head = NULL;
2002 2001 chan->tx_rel_list.dl_cnt = 0;
2003 2002 mutex_exit(&chan->tx_rel_list.dl_mutex);
2004 2003
2005 2004 wqe = WQE_TO_SWQE(chan->tx_wqe_list.dl_head);
2006 2005 chan->tx_wqe_list.dl_cnt -= 1;
2007 2006 chan->tx_wqe_list.dl_head = wqe->swqe_next;
2008 2007 } else { /* no free swqe */
2009 2008 mutex_exit(&chan->tx_rel_list.dl_mutex);
2010 2009 chan->tx_wqe_list.dl_pending_sends = B_TRUE;
2011 2010 wqe = NULL;
2012 2011 }
2013 2012 return (wqe);
2014 2013 }
2015 2014
2016 2015 /*
2017 2016 * Release send wqe back into free list.
2018 2017 */
2019 2018 static void
2020 2019 ibd_rc_release_swqe(ibd_rc_chan_t *chan, ibd_swqe_t *swqe)
2021 2020 {
2022 2021 /*
2023 2022 * Add back on Tx list for reuse.
2024 2023 */
2025 2024 swqe->swqe_next = NULL;
2026 2025 mutex_enter(&chan->tx_rel_list.dl_mutex);
2027 2026 chan->tx_rel_list.dl_pending_sends = B_FALSE;
2028 2027 swqe->swqe_next = chan->tx_rel_list.dl_head;
2029 2028 chan->tx_rel_list.dl_head = SWQE_TO_WQE(swqe);
2030 2029 chan->tx_rel_list.dl_cnt++;
2031 2030 mutex_exit(&chan->tx_rel_list.dl_mutex);
2032 2031 }
2033 2032
2034 2033 void
2035 2034 ibd_rc_post_send(ibd_rc_chan_t *chan, ibd_swqe_t *node)
2036 2035 {
2037 2036 uint_t i;
2038 2037 uint_t num_posted;
2039 2038 uint_t n_wrs;
2040 2039 ibt_status_t ibt_status;
2041 2040 ibt_send_wr_t wrs[IBD_MAX_TX_POST_MULTIPLE];
2042 2041 ibd_swqe_t *tx_head, *elem;
2043 2042 ibd_swqe_t *nodes[IBD_MAX_TX_POST_MULTIPLE];
2044 2043
2045 2044 /* post the one request, then check for more */
2046 2045 ibt_status = ibt_post_send(chan->chan_hdl,
2047 2046 &node->w_swr, 1, NULL);
2048 2047 if (ibt_status != IBT_SUCCESS) {
2049 2048 ibd_print_warn(chan->state, "ibd_post_send: "
2050 2049 "posting one wr failed: ret=%d", ibt_status);
2051 2050 ibd_rc_tx_cleanup(node);
2052 2051 }
2053 2052
2054 2053 tx_head = NULL;
2055 2054 for (;;) {
2056 2055 if (tx_head == NULL) {
2057 2056 mutex_enter(&chan->tx_post_lock);
2058 2057 tx_head = chan->tx_head;
2059 2058 if (tx_head == NULL) {
2060 2059 chan->tx_busy = 0;
2061 2060 mutex_exit(&chan->tx_post_lock);
2062 2061 return;
2063 2062 }
2064 2063 chan->tx_head = NULL;
2065 2064 mutex_exit(&chan->tx_post_lock);
2066 2065 }
2067 2066
2068 2067 /*
2069 2068 * Collect pending requests, IBD_MAX_TX_POST_MULTIPLE wrs
2070 2069 * at a time if possible, and keep posting them.
2071 2070 */
2072 2071 for (n_wrs = 0, elem = tx_head;
2073 2072 (elem) && (n_wrs < IBD_MAX_TX_POST_MULTIPLE);
2074 2073 elem = WQE_TO_SWQE(elem->swqe_next), n_wrs++) {
2075 2074 nodes[n_wrs] = elem;
2076 2075 wrs[n_wrs] = elem->w_swr;
2077 2076 }
2078 2077 tx_head = elem;
2079 2078
2080 2079 ASSERT(n_wrs != 0);
2081 2080
2082 2081 /*
2083 2082 * If posting fails for some reason, we'll never receive
2084 2083 * completion intimation, so we'll need to cleanup. But
2085 2084 * we need to make sure we don't clean up nodes whose
2086 2085 * wrs have been successfully posted. We assume that the
2087 2086 * hca driver returns on the first failure to post and
2088 2087 * therefore the first 'num_posted' entries don't need
2089 2088 * cleanup here.
2090 2089 */
2091 2090 num_posted = 0;
2092 2091 ibt_status = ibt_post_send(chan->chan_hdl,
2093 2092 wrs, n_wrs, &num_posted);
2094 2093 if (ibt_status != IBT_SUCCESS) {
2095 2094 ibd_print_warn(chan->state, "ibd_post_send: "
2096 2095 "posting multiple wrs failed: "
2097 2096 "requested=%d, done=%d, ret=%d",
2098 2097 n_wrs, num_posted, ibt_status);
2099 2098
2100 2099 for (i = num_posted; i < n_wrs; i++)
2101 2100 ibd_rc_tx_cleanup(nodes[i]);
2102 2101 }
2103 2102 }
2104 2103 }
2105 2104
2106 2105 /*
2107 2106 * Common code that deals with clean ups after a successful or
2108 2107 * erroneous transmission attempt.
2109 2108 */
2110 2109 void
2111 2110 ibd_rc_tx_cleanup(ibd_swqe_t *swqe)
2112 2111 {
2113 2112 ibd_ace_t *ace = swqe->w_ahandle;
2114 2113 ibd_state_t *state;
2115 2114
2116 2115 ASSERT(ace != NULL);
2117 2116 ASSERT(ace->ac_chan != NULL);
2118 2117
2119 2118 state = ace->ac_chan->state;
2120 2119
2121 2120 /*
2122 2121 * If this was a dynamic registration in ibd_send(),
2123 2122 * deregister now.
2124 2123 */
2125 2124 if (swqe->swqe_im_mblk != NULL) {
2126 2125 ASSERT(swqe->w_buftype == IBD_WQE_MAPPED);
2127 2126 if (swqe->w_buftype == IBD_WQE_MAPPED) {
2128 2127 ibd_unmap_mem(state, swqe);
2129 2128 }
2130 2129 freemsg(swqe->swqe_im_mblk);
2131 2130 swqe->swqe_im_mblk = NULL;
2132 2131 } else {
2133 2132 ASSERT(swqe->w_buftype != IBD_WQE_MAPPED);
2134 2133 }
2135 2134
2136 2135 if (swqe->w_buftype == IBD_WQE_RC_COPYBUF) {
2137 2136 ibd_rc_tx_largebuf_t *lbufp;
2138 2137
2139 2138 lbufp = swqe->w_rc_tx_largebuf;
2140 2139 ASSERT(lbufp != NULL);
2141 2140
2142 2141 mutex_enter(&state->rc_tx_large_bufs_lock);
2143 2142 lbufp->lb_next = state->rc_tx_largebuf_free_head;
2144 2143 state->rc_tx_largebuf_free_head = lbufp;
2145 2144 state->rc_tx_largebuf_nfree ++;
2146 2145 mutex_exit(&state->rc_tx_large_bufs_lock);
2147 2146 swqe->w_rc_tx_largebuf = NULL;
2148 2147 }
2149 2148
2150 2149
2151 2150 /*
2152 2151 * Release the send wqe for reuse.
2153 2152 */
2154 2153 ibd_rc_release_swqe(ace->ac_chan, swqe);
2155 2154
2156 2155 /*
2157 2156 * Drop the reference count on the AH; it can be reused
2158 2157 * now for a different destination if there are no more
2159 2158 * posted sends that will use it. This can be eliminated
2160 2159 * if we can always associate each Tx buffer with an AH.
2161 2160 * The ace can be null if we are cleaning up from the
2162 2161 * ibd_send() error path.
2163 2162 */
2164 2163 ibd_dec_ref_ace(state, ace);
2165 2164 }
2166 2165
2167 2166 void
2168 2167 ibd_rc_drain_scq(ibd_rc_chan_t *chan, ibt_cq_hdl_t cq_hdl)
2169 2168 {
2170 2169 ibd_state_t *state = chan->state;
2171 2170 ibd_wqe_t *wqe;
2172 2171 ibt_wc_t *wc, *wcs;
2173 2172 ibd_ace_t *ace;
2174 2173 uint_t numwcs, real_numwcs;
2175 2174 int i;
2176 2175 boolean_t encount_error;
2177 2176
2178 2177 wcs = chan->tx_wc;
2179 2178 numwcs = IBD_RC_MAX_CQ_WC;
2180 2179 encount_error = B_FALSE;
2181 2180
2182 2181 while (ibt_poll_cq(cq_hdl, wcs, numwcs, &real_numwcs) == IBT_SUCCESS) {
2183 2182 for (i = 0, wc = wcs; i < real_numwcs; i++, wc++) {
2184 2183 wqe = (ibd_wqe_t *)(uintptr_t)wc->wc_id;
2185 2184 if (wc->wc_status != IBT_WC_SUCCESS) {
2186 2185 if (encount_error == B_FALSE) {
2187 2186 /*
2188 2187 * This RC channle is in error status,
2189 2188 * remove it.
2190 2189 */
2191 2190 encount_error = B_TRUE;
2192 2191 mutex_enter(&state->id_ac_mutex);
2193 2192 if ((chan->chan_state ==
2194 2193 IBD_RC_STATE_ACT_ESTAB) &&
2195 2194 (chan->state->id_link_state ==
2196 2195 LINK_STATE_UP) &&
2197 2196 ((ace = ibd_acache_find(state,
2198 2197 &chan->ace->ac_mac, B_FALSE, 0))
2199 2198 != NULL) && (ace == chan->ace)) {
2200 2199 ASSERT(ace->ac_mce == NULL);
2201 2200 INC_REF(ace, 1);
2202 2201 IBD_ACACHE_PULLOUT_ACTIVE(
2203 2202 state, ace);
2204 2203 chan->chan_state =
2205 2204 IBD_RC_STATE_ACT_CLOSING;
2206 2205 mutex_exit(&state->id_ac_mutex);
2207 2206 state->rc_reset_cnt++;
2208 2207 DPRINT(30, "ibd_rc_drain_scq: "
2209 2208 "wc_status(%d) != SUCC, "
2210 2209 "chan=%p, ace=%p, "
2211 2210 "link_state=%d"
2212 2211 "reset RC channel",
2213 2212 wc->wc_status, chan,
2214 2213 chan->ace, chan->state->
2215 2214 id_link_state);
2216 2215 ibd_rc_signal_act_close(
2217 2216 state, ace);
2218 2217 } else {
2219 2218 mutex_exit(&state->id_ac_mutex);
2220 2219 state->
2221 2220 rc_act_close_simultaneous++;
2222 2221 DPRINT(40, "ibd_rc_drain_scq: "
2223 2222 "wc_status(%d) != SUCC, "
2224 2223 "chan=%p, chan_state=%d,"
2225 2224 "ace=%p, link_state=%d."
2226 2225 "other thread is closing "
2227 2226 "it", wc->wc_status, chan,
2228 2227 chan->chan_state, chan->ace,
2229 2228 chan->state->id_link_state);
2230 2229 }
2231 2230 }
2232 2231 }
2233 2232 ibd_rc_tx_cleanup(WQE_TO_SWQE(wqe));
2234 2233 }
2235 2234
2236 2235 mutex_enter(&state->id_sched_lock);
2237 2236 if (state->id_sched_needed == 0) {
2238 2237 mutex_exit(&state->id_sched_lock);
2239 2238 } else if (state->id_sched_needed & IBD_RSRC_RC_SWQE) {
2240 2239 mutex_enter(&chan->tx_wqe_list.dl_mutex);
2241 2240 mutex_enter(&chan->tx_rel_list.dl_mutex);
2242 2241 if ((chan->tx_rel_list.dl_cnt +
2243 2242 chan->tx_wqe_list.dl_cnt) > IBD_RC_TX_FREE_THRESH) {
2244 2243 state->id_sched_needed &= ~IBD_RSRC_RC_SWQE;
2245 2244 mutex_exit(&chan->tx_rel_list.dl_mutex);
2246 2245 mutex_exit(&chan->tx_wqe_list.dl_mutex);
2247 2246 mutex_exit(&state->id_sched_lock);
2248 2247 state->rc_swqe_mac_update++;
2249 2248 mac_tx_update(state->id_mh);
2250 2249 } else {
2251 2250 state->rc_scq_no_swqe++;
2252 2251 mutex_exit(&chan->tx_rel_list.dl_mutex);
2253 2252 mutex_exit(&chan->tx_wqe_list.dl_mutex);
2254 2253 mutex_exit(&state->id_sched_lock);
2255 2254 }
2256 2255 } else if (state->id_sched_needed & IBD_RSRC_RC_TX_LARGEBUF) {
2257 2256 mutex_enter(&state->rc_tx_large_bufs_lock);
2258 2257 if (state->rc_tx_largebuf_nfree >
2259 2258 IBD_RC_TX_FREE_THRESH) {
2260 2259 ASSERT(state->rc_tx_largebuf_free_head != NULL);
2261 2260 state->id_sched_needed &=
2262 2261 ~IBD_RSRC_RC_TX_LARGEBUF;
2263 2262 mutex_exit(&state->rc_tx_large_bufs_lock);
2264 2263 mutex_exit(&state->id_sched_lock);
2265 2264 state->rc_xmt_buf_mac_update++;
2266 2265 mac_tx_update(state->id_mh);
2267 2266 } else {
2268 2267 state->rc_scq_no_largebuf++;
2269 2268 mutex_exit(&state->rc_tx_large_bufs_lock);
2270 2269 mutex_exit(&state->id_sched_lock);
2271 2270 }
2272 2271 } else if (state->id_sched_needed & IBD_RSRC_SWQE) {
2273 2272 mutex_enter(&state->id_tx_list.dl_mutex);
2274 2273 mutex_enter(&state->id_tx_rel_list.dl_mutex);
2275 2274 if ((state->id_tx_list.dl_cnt +
2276 2275 state->id_tx_rel_list.dl_cnt)
2277 2276 > IBD_FREE_SWQES_THRESH) {
2278 2277 state->id_sched_needed &= ~IBD_RSRC_SWQE;
2279 2278 state->id_sched_cnt++;
2280 2279 mutex_exit(&state->id_tx_rel_list.dl_mutex);
2281 2280 mutex_exit(&state->id_tx_list.dl_mutex);
2282 2281 mutex_exit(&state->id_sched_lock);
2283 2282 mac_tx_update(state->id_mh);
2284 2283 } else {
2285 2284 mutex_exit(&state->id_tx_rel_list.dl_mutex);
2286 2285 mutex_exit(&state->id_tx_list.dl_mutex);
2287 2286 mutex_exit(&state->id_sched_lock);
2288 2287 }
2289 2288 } else {
2290 2289 mutex_exit(&state->id_sched_lock);
2291 2290 }
2292 2291 }
2293 2292 }
2294 2293
2295 2294 /* Send CQ handler, call ibd_rx_tx_cleanup to recycle Tx buffers */
2296 2295 /* ARGSUSED */
2297 2296 static void
2298 2297 ibd_rc_scq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
2299 2298 {
2300 2299 ibd_rc_chan_t *chan = (ibd_rc_chan_t *)arg;
2301 2300
2302 2301 if (ibd_rc_tx_softintr == 1) {
2303 2302 mutex_enter(&chan->tx_poll_lock);
2304 2303 if (chan->tx_poll_busy & IBD_CQ_POLLING) {
2305 2304 chan->tx_poll_busy |= IBD_REDO_CQ_POLLING;
2306 2305 mutex_exit(&chan->tx_poll_lock);
2307 2306 return;
2308 2307 } else {
2309 2308 mutex_exit(&chan->tx_poll_lock);
2310 2309 ddi_trigger_softintr(chan->scq_softintr);
2311 2310 }
2312 2311 } else
2313 2312 (void) ibd_rc_tx_recycle(arg);
2314 2313 }
2315 2314
2316 2315 static uint_t
2317 2316 ibd_rc_tx_recycle(caddr_t arg)
2318 2317 {
2319 2318 ibd_rc_chan_t *chan = (ibd_rc_chan_t *)arg;
2320 2319 ibd_state_t *state = chan->state;
2321 2320 int flag, redo_flag;
2322 2321 int redo = 1;
2323 2322
2324 2323 flag = IBD_CQ_POLLING;
2325 2324 redo_flag = IBD_REDO_CQ_POLLING;
2326 2325
2327 2326 mutex_enter(&chan->tx_poll_lock);
2328 2327 if (chan->tx_poll_busy & flag) {
2329 2328 ibd_print_warn(state, "ibd_rc_tx_recycle: multiple polling "
2330 2329 "threads");
2331 2330 chan->tx_poll_busy |= redo_flag;
2332 2331 mutex_exit(&chan->tx_poll_lock);
2333 2332 return (DDI_INTR_CLAIMED);
2334 2333 }
2335 2334 chan->tx_poll_busy |= flag;
2336 2335 mutex_exit(&chan->tx_poll_lock);
2337 2336
2338 2337 /*
2339 2338 * Poll for completed entries; the CQ will not interrupt any
2340 2339 * more for completed packets.
2341 2340 */
2342 2341 ibd_rc_drain_scq(chan, chan->scq_hdl);
2343 2342
2344 2343 /*
2345 2344 * Now enable CQ notifications; all completions originating now
2346 2345 * will cause new interrupts.
2347 2346 */
2348 2347 do {
2349 2348 if (ibt_enable_cq_notify(chan->scq_hdl, IBT_NEXT_COMPLETION) !=
2350 2349 IBT_SUCCESS) {
2351 2350 /*
2352 2351 * We do not expect a failure here.
2353 2352 */
2354 2353 DPRINT(40, "ibd_rc_scq_handler: ibt_enable_cq_notify()"
2355 2354 " failed");
2356 2355 }
2357 2356
2358 2357 ibd_rc_drain_scq(chan, chan->scq_hdl);
2359 2358
2360 2359 mutex_enter(&chan->tx_poll_lock);
2361 2360 if (chan->tx_poll_busy & redo_flag)
2362 2361 chan->tx_poll_busy &= ~redo_flag;
2363 2362 else {
2364 2363 chan->tx_poll_busy &= ~flag;
2365 2364 redo = 0;
2366 2365 }
2367 2366 mutex_exit(&chan->tx_poll_lock);
2368 2367
2369 2368 } while (redo);
2370 2369
2371 2370 return (DDI_INTR_CLAIMED);
2372 2371 }
2373 2372
2374 2373 static ibt_status_t
2375 2374 ibd_register_service(ibt_srv_desc_t *srv, ib_svc_id_t sid,
2376 2375 int num_sids, ibt_srv_hdl_t *srv_hdl, ib_svc_id_t *ret_sid)
2377 2376 {
2378 2377 ibd_service_t *p;
2379 2378 ibt_status_t status;
2380 2379
2381 2380 mutex_enter(&ibd_gstate.ig_mutex);
2382 2381 for (p = ibd_gstate.ig_service_list; p != NULL; p = p->is_link) {
2383 2382 if (p->is_sid == sid) {
2384 2383 p->is_ref_cnt++;
2385 2384 *srv_hdl = p->is_srv_hdl;
2386 2385 *ret_sid = sid;
2387 2386 mutex_exit(&ibd_gstate.ig_mutex);
2388 2387 return (IBT_SUCCESS);
2389 2388 }
2390 2389 }
2391 2390 status = ibt_register_service(ibd_gstate.ig_ibt_hdl, srv, sid,
2392 2391 num_sids, srv_hdl, ret_sid);
2393 2392 if (status == IBT_SUCCESS) {
2394 2393 p = kmem_alloc(sizeof (*p), KM_SLEEP);
2395 2394 p->is_srv_hdl = *srv_hdl;
2396 2395 p->is_sid = sid;
2397 2396 p->is_ref_cnt = 1;
2398 2397 p->is_link = ibd_gstate.ig_service_list;
2399 2398 ibd_gstate.ig_service_list = p;
2400 2399 }
2401 2400 mutex_exit(&ibd_gstate.ig_mutex);
2402 2401 return (status);
2403 2402 }
2404 2403
2405 2404 static ibt_status_t
2406 2405 ibd_deregister_service(ibt_srv_hdl_t srv_hdl)
2407 2406 {
2408 2407 ibd_service_t *p, **pp;
2409 2408 ibt_status_t status;
2410 2409
2411 2410 mutex_enter(&ibd_gstate.ig_mutex);
2412 2411 for (pp = &ibd_gstate.ig_service_list; *pp != NULL;
2413 2412 pp = &((*pp)->is_link)) {
2414 2413 p = *pp;
2415 2414 if (p->is_srv_hdl == srv_hdl) { /* Found it */
2416 2415 if (--p->is_ref_cnt == 0) {
2417 2416 status = ibt_deregister_service(
2418 2417 ibd_gstate.ig_ibt_hdl, srv_hdl);
2419 2418 *pp = p->is_link; /* link prev to next */
2420 2419 kmem_free(p, sizeof (*p));
2421 2420 } else {
2422 2421 status = IBT_SUCCESS;
2423 2422 }
2424 2423 mutex_exit(&ibd_gstate.ig_mutex);
2425 2424 return (status);
2426 2425 }
2427 2426 }
2428 2427 /* Should not ever get here */
2429 2428 mutex_exit(&ibd_gstate.ig_mutex);
2430 2429 return (IBT_FAILURE);
2431 2430 }
2432 2431
2433 2432 /* Listen with corresponding service ID */
2434 2433 ibt_status_t
2435 2434 ibd_rc_listen(ibd_state_t *state)
2436 2435 {
2437 2436 ibt_srv_desc_t srvdesc;
2438 2437 ib_svc_id_t ret_sid;
2439 2438 ibt_status_t status;
2440 2439 ib_gid_t gid;
2441 2440
2442 2441 if (state->rc_listen_hdl != NULL) {
2443 2442 DPRINT(40, "ibd_rc_listen: rc_listen_hdl should be NULL");
2444 2443 return (IBT_FAILURE);
2445 2444 }
2446 2445
2447 2446 bzero(&srvdesc, sizeof (ibt_srv_desc_t));
2448 2447 srvdesc.sd_handler = ibd_rc_dispatch_pass_mad;
2449 2448 srvdesc.sd_flags = IBT_SRV_NO_FLAGS;
2450 2449
2451 2450 /*
2452 2451 * Register the service with service id
2453 2452 * Incoming connection requests should arrive on this service id.
2454 2453 */
2455 2454 status = ibd_register_service(&srvdesc,
2456 2455 IBD_RC_QPN_TO_SID(state->id_qpnum),
2457 2456 1, &state->rc_listen_hdl, &ret_sid);
2458 2457 if (status != IBT_SUCCESS) {
2459 2458 DPRINT(40, "ibd_rc_listen: Service Registration Failed, "
2460 2459 "ret=%d", status);
2461 2460 return (status);
2462 2461 }
2463 2462
2464 2463 gid = state->id_sgid;
2465 2464
2466 2465 /* pass state as cm_private */
2467 2466 status = ibt_bind_service(state->rc_listen_hdl,
2468 2467 gid, NULL, state, &state->rc_listen_bind);
2469 2468 if (status != IBT_SUCCESS) {
2470 2469 DPRINT(40, "ibd_rc_listen:"
2471 2470 " fail to bind port: <%d>", status);
2472 2471 (void) ibd_deregister_service(state->rc_listen_hdl);
2473 2472 return (status);
2474 2473 }
2475 2474
2476 2475 /*
2477 2476 * Legacy OFED had used a wrong service ID (one additional zero digit)
2478 2477 * for many years. To interop with legacy OFED, we support this wrong
2479 2478 * service ID here.
2480 2479 */
2481 2480 ASSERT(state->rc_listen_hdl_OFED_interop == NULL);
2482 2481
2483 2482 bzero(&srvdesc, sizeof (ibt_srv_desc_t));
2484 2483 srvdesc.sd_handler = ibd_rc_dispatch_pass_mad;
2485 2484 srvdesc.sd_flags = IBT_SRV_NO_FLAGS;
2486 2485
2487 2486 /*
2488 2487 * Register the service with service id
2489 2488 * Incoming connection requests should arrive on this service id.
2490 2489 */
2491 2490 status = ibd_register_service(&srvdesc,
2492 2491 IBD_RC_QPN_TO_SID_OFED_INTEROP(state->id_qpnum),
2493 2492 1, &state->rc_listen_hdl_OFED_interop, &ret_sid);
2494 2493 if (status != IBT_SUCCESS) {
2495 2494 DPRINT(40,
2496 2495 "ibd_rc_listen: Service Registration for Legacy OFED "
2497 2496 "Failed %d", status);
2498 2497 (void) ibt_unbind_service(state->rc_listen_hdl,
2499 2498 state->rc_listen_bind);
2500 2499 (void) ibd_deregister_service(state->rc_listen_hdl);
2501 2500 return (status);
2502 2501 }
2503 2502
2504 2503 gid = state->id_sgid;
2505 2504
2506 2505 /* pass state as cm_private */
2507 2506 status = ibt_bind_service(state->rc_listen_hdl_OFED_interop,
2508 2507 gid, NULL, state, &state->rc_listen_bind_OFED_interop);
2509 2508 if (status != IBT_SUCCESS) {
2510 2509 DPRINT(40, "ibd_rc_listen: fail to bind port: <%d> for "
2511 2510 "Legacy OFED listener", status);
2512 2511 (void) ibd_deregister_service(
2513 2512 state->rc_listen_hdl_OFED_interop);
2514 2513 (void) ibt_unbind_service(state->rc_listen_hdl,
2515 2514 state->rc_listen_bind);
2516 2515 (void) ibd_deregister_service(state->rc_listen_hdl);
2517 2516 return (status);
2518 2517 }
2519 2518
2520 2519 return (IBT_SUCCESS);
2521 2520 }
2522 2521
2523 2522 void
2524 2523 ibd_rc_stop_listen(ibd_state_t *state)
2525 2524 {
2526 2525 int ret;
2527 2526
2528 2527 /* Disable incoming connection requests */
2529 2528 if (state->rc_listen_hdl != NULL) {
2530 2529 ret = ibt_unbind_all_services(state->rc_listen_hdl);
2531 2530 if (ret != 0) {
2532 2531 DPRINT(40, "ibd_rc_stop_listen:"
2533 2532 "ibt_unbind_all_services() failed, ret=%d", ret);
2534 2533 }
2535 2534 ret = ibd_deregister_service(state->rc_listen_hdl);
2536 2535 if (ret != 0) {
2537 2536 DPRINT(40, "ibd_rc_stop_listen:"
2538 2537 "ibd_deregister_service() failed, ret=%d", ret);
2539 2538 } else {
2540 2539 state->rc_listen_hdl = NULL;
2541 2540 }
2542 2541 }
2543 2542
2544 2543 /* Disable incoming connection requests */
2545 2544 if (state->rc_listen_hdl_OFED_interop != NULL) {
2546 2545 ret = ibt_unbind_all_services(
2547 2546 state->rc_listen_hdl_OFED_interop);
2548 2547 if (ret != 0) {
2549 2548 DPRINT(40, "ibd_rc_stop_listen:"
2550 2549 "ibt_unbind_all_services() failed: %d", ret);
2551 2550 }
2552 2551 ret = ibd_deregister_service(state->rc_listen_hdl_OFED_interop);
2553 2552 if (ret != 0) {
2554 2553 DPRINT(40, "ibd_rc_stop_listen:"
2555 2554 "ibd_deregister_service() failed: %d", ret);
2556 2555 } else {
2557 2556 state->rc_listen_hdl_OFED_interop = NULL;
2558 2557 }
2559 2558 }
2560 2559 }
2561 2560
2562 2561 void
2563 2562 ibd_rc_close_all_chan(ibd_state_t *state)
2564 2563 {
2565 2564 ibd_rc_chan_t *rc_chan;
2566 2565 ibd_ace_t *ace, *pre_ace;
2567 2566 uint_t attempts;
2568 2567
2569 2568 /* Disable all Rx routines */
2570 2569 mutex_enter(&state->rc_pass_chan_list.chan_list_mutex);
2571 2570 rc_chan = state->rc_pass_chan_list.chan_list;
2572 2571 while (rc_chan != NULL) {
2573 2572 ibt_set_cq_handler(rc_chan->rcq_hdl, 0, 0);
2574 2573 rc_chan = rc_chan->next;
2575 2574 }
2576 2575 mutex_exit(&state->rc_pass_chan_list.chan_list_mutex);
2577 2576
2578 2577 if (state->rc_enable_srq) {
2579 2578 attempts = 10;
2580 2579 while (state->rc_srq_rwqe_list.dl_bufs_outstanding > 0) {
2581 2580 DPRINT(30, "ibd_rc_close_all_chan: outstanding > 0");
2582 2581 delay(drv_usectohz(100000));
2583 2582 if (--attempts == 0) {
2584 2583 /*
2585 2584 * There are pending bufs with the network
2586 2585 * layer and we have no choice but to wait
2587 2586 * for them to be done with. Reap all the
2588 2587 * Tx/Rx completions that were posted since
2589 2588 * we turned off the notification and
2590 2589 * return failure.
2591 2590 */
2592 2591 break;
2593 2592 }
2594 2593 }
2595 2594 }
2596 2595
2597 2596 /* Close all passive RC channels */
2598 2597 rc_chan = ibd_rc_rm_header_chan_list(&state->rc_pass_chan_list);
2599 2598 while (rc_chan != NULL) {
2600 2599 (void) ibd_rc_pas_close(rc_chan, B_TRUE, B_FALSE);
2601 2600 rc_chan = ibd_rc_rm_header_chan_list(&state->rc_pass_chan_list);
2602 2601 }
2603 2602
2604 2603 /* Close all active RC channels */
2605 2604 mutex_enter(&state->id_ac_mutex);
2606 2605 state->id_ac_hot_ace = NULL;
2607 2606 ace = list_head(&state->id_ah_active);
2608 2607 while ((pre_ace = ace) != NULL) {
2609 2608 ace = list_next(&state->id_ah_active, ace);
2610 2609 if (pre_ace->ac_chan != NULL) {
2611 2610 INC_REF(pre_ace, 1);
2612 2611 IBD_ACACHE_PULLOUT_ACTIVE(state, pre_ace);
2613 2612 pre_ace->ac_chan->chan_state = IBD_RC_STATE_ACT_CLOSING;
2614 2613 ibd_rc_add_to_chan_list(&state->rc_obs_act_chan_list,
2615 2614 pre_ace->ac_chan);
2616 2615 }
2617 2616 }
2618 2617 mutex_exit(&state->id_ac_mutex);
2619 2618
2620 2619 rc_chan = ibd_rc_rm_header_chan_list(&state->rc_obs_act_chan_list);
2621 2620 while (rc_chan != NULL) {
2622 2621 ace = rc_chan->ace;
2623 2622 ibd_rc_act_close(rc_chan, B_TRUE);
2624 2623 if (ace != NULL) {
2625 2624 mutex_enter(&state->id_ac_mutex);
2626 2625 ASSERT(ace->ac_ref != 0);
2627 2626 atomic_dec_32(&ace->ac_ref);
2628 2627 ace->ac_chan = NULL;
2629 2628 if ((ace->ac_ref == 0) || (ace->ac_ref == CYCLEVAL)) {
2630 2629 IBD_ACACHE_INSERT_FREE(state, ace);
2631 2630 ace->ac_ref = 0;
2632 2631 } else {
2633 2632 ace->ac_ref |= CYCLEVAL;
2634 2633 state->rc_delay_ace_recycle++;
2635 2634 }
2636 2635 mutex_exit(&state->id_ac_mutex);
2637 2636 }
2638 2637 rc_chan = ibd_rc_rm_header_chan_list(
2639 2638 &state->rc_obs_act_chan_list);
2640 2639 }
2641 2640
2642 2641 attempts = 400;
2643 2642 while (((state->rc_num_tx_chan != 0) ||
2644 2643 (state->rc_num_rx_chan != 0)) && (attempts > 0)) {
2645 2644 /* Other thread is closing CM channel, wait it */
2646 2645 delay(drv_usectohz(100000));
2647 2646 attempts--;
2648 2647 }
2649 2648 }
2650 2649
2651 2650 void
2652 2651 ibd_rc_try_connect(ibd_state_t *state, ibd_ace_t *ace, ibt_path_info_t *path)
2653 2652 {
2654 2653 ibt_status_t status;
2655 2654
2656 2655 if ((state->id_mac_state & IBD_DRV_STARTED) == 0)
2657 2656 return;
2658 2657
2659 2658 status = ibd_rc_connect(state, ace, path,
2660 2659 IBD_RC_SERVICE_ID_OFED_INTEROP);
2661 2660
2662 2661 if (status != IBT_SUCCESS) {
2663 2662 /* wait peer side remove stale channel */
2664 2663 delay(drv_usectohz(10000));
2665 2664 if ((state->id_mac_state & IBD_DRV_STARTED) == 0)
2666 2665 return;
2667 2666 status = ibd_rc_connect(state, ace, path,
2668 2667 IBD_RC_SERVICE_ID_OFED_INTEROP);
2669 2668 }
2670 2669
2671 2670 if (status != IBT_SUCCESS) {
2672 2671 /* wait peer side remove stale channel */
2673 2672 delay(drv_usectohz(10000));
2674 2673 if ((state->id_mac_state & IBD_DRV_STARTED) == 0)
2675 2674 return;
2676 2675 (void) ibd_rc_connect(state, ace, path,
2677 2676 IBD_RC_SERVICE_ID);
2678 2677 }
2679 2678 }
2680 2679
2681 2680 /*
2682 2681 * Allocates channel and sets the ace->ac_chan to it.
2683 2682 * Opens the channel.
2684 2683 */
2685 2684 ibt_status_t
2686 2685 ibd_rc_connect(ibd_state_t *state, ibd_ace_t *ace, ibt_path_info_t *path,
2687 2686 uint64_t ietf_cm_service_id)
2688 2687 {
2689 2688 ibt_status_t status = 0;
2690 2689 ibt_rc_returns_t open_returns;
2691 2690 ibt_chan_open_args_t open_args;
2692 2691 ibd_rc_msg_hello_t hello_req_msg;
2693 2692 ibd_rc_msg_hello_t *hello_ack_msg;
2694 2693 ibd_rc_chan_t *chan;
2695 2694 ibt_ud_dest_query_attr_t dest_attrs;
2696 2695
2697 2696 ASSERT(ace != NULL);
2698 2697 ASSERT(ace->ac_mce == NULL);
2699 2698 ASSERT(ace->ac_chan == NULL);
2700 2699
2701 2700 if ((status = ibd_rc_alloc_chan(&chan, state, B_TRUE)) != IBT_SUCCESS) {
2702 2701 DPRINT(10, "ibd_rc_connect: ibd_rc_alloc_chan() failed");
2703 2702 return (status);
2704 2703 }
2705 2704
2706 2705 ace->ac_chan = chan;
2707 2706 chan->state = state;
2708 2707 chan->ace = ace;
2709 2708
2710 2709 ibt_set_chan_private(chan->chan_hdl, (void *)(uintptr_t)ace);
2711 2710
2712 2711 hello_ack_msg = kmem_zalloc(sizeof (ibd_rc_msg_hello_t), KM_SLEEP);
2713 2712
2714 2713 /*
2715 2714 * open the channels
2716 2715 */
2717 2716 bzero(&open_args, sizeof (ibt_chan_open_args_t));
2718 2717 bzero(&open_returns, sizeof (ibt_rc_returns_t));
2719 2718
2720 2719 open_args.oc_cm_handler = ibd_rc_dispatch_actv_mad;
2721 2720 open_args.oc_cm_clnt_private = (void *)(uintptr_t)ace;
2722 2721
2723 2722 /*
2724 2723 * update path record with the SID
2725 2724 */
2726 2725 if ((status = ibt_query_ud_dest(ace->ac_dest, &dest_attrs))
2727 2726 != IBT_SUCCESS) {
2728 2727 DPRINT(40, "ibd_rc_connect: ibt_query_ud_dest() failed, "
2729 2728 "ret=%d", status);
2730 2729 return (status);
2731 2730 }
2732 2731
2733 2732 path->pi_sid =
2734 2733 ietf_cm_service_id | ((dest_attrs.ud_dst_qpn) & 0xffffff);
2735 2734
2736 2735
2737 2736 /* pre-allocate memory for hello ack message */
2738 2737 open_returns.rc_priv_data_len = sizeof (ibd_rc_msg_hello_t);
2739 2738 open_returns.rc_priv_data = hello_ack_msg;
2740 2739
2741 2740 open_args.oc_path = path;
2742 2741
2743 2742 open_args.oc_path_rnr_retry_cnt = 1;
2744 2743 open_args.oc_path_retry_cnt = 1;
2745 2744
2746 2745 /* We don't do RDMA */
2747 2746 open_args.oc_rdma_ra_out = 0;
2748 2747 open_args.oc_rdma_ra_in = 0;
2749 2748
2750 2749 hello_req_msg.reserved_qpn = htonl(state->id_qpnum);
2751 2750 hello_req_msg.rx_mtu = htonl(state->rc_mtu);
2752 2751 open_args.oc_priv_data_len = sizeof (ibd_rc_msg_hello_t);
2753 2752 open_args.oc_priv_data = (void *)(&hello_req_msg);
2754 2753
2755 2754 ASSERT(open_args.oc_priv_data_len <= IBT_REQ_PRIV_DATA_SZ);
2756 2755 ASSERT(open_returns.rc_priv_data_len <= IBT_REP_PRIV_DATA_SZ);
2757 2756 ASSERT(open_args.oc_cm_handler != NULL);
2758 2757
2759 2758 status = ibt_open_rc_channel(chan->chan_hdl, IBT_OCHAN_NO_FLAGS,
2760 2759 IBT_BLOCKING, &open_args, &open_returns);
2761 2760
2762 2761 if (status == IBT_SUCCESS) {
2763 2762 /* Success! */
2764 2763 DPRINT(2, "ibd_rc_connect: call ibt_open_rc_channel succ!");
2765 2764 state->rc_conn_succ++;
2766 2765 kmem_free(hello_ack_msg, sizeof (ibd_rc_msg_hello_t));
2767 2766 return (IBT_SUCCESS);
2768 2767 }
2769 2768
2770 2769 /* failure */
2771 2770 (void) ibt_flush_channel(chan->chan_hdl);
2772 2771 ibd_rc_free_chan(chan);
2773 2772 ace->ac_chan = NULL;
2774 2773
2775 2774 /* check open_returns report error and exit */
2776 2775 DPRINT(30, "ibd_rc_connect: call ibt_open_rc_chan fail."
2777 2776 "ret status = %d, reason=%d, ace=%p, mtu=0x%x, qpn=0x%x,"
2778 2777 " peer qpn=0x%x", status, (int)open_returns.rc_status, ace,
2779 2778 hello_req_msg.rx_mtu, hello_req_msg.reserved_qpn,
2780 2779 dest_attrs.ud_dst_qpn);
2781 2780 kmem_free(hello_ack_msg, sizeof (ibd_rc_msg_hello_t));
2782 2781 return (status);
2783 2782 }
2784 2783
2785 2784 void
2786 2785 ibd_rc_signal_act_close(ibd_state_t *state, ibd_ace_t *ace)
2787 2786 {
2788 2787 ibd_req_t *req;
2789 2788
2790 2789 req = kmem_cache_alloc(state->id_req_kmc, KM_NOSLEEP);
2791 2790 if (req == NULL) {
2792 2791 ibd_print_warn(state, "ibd_rc_signal_act_close: alloc "
2793 2792 "ibd_req_t fail");
2794 2793 mutex_enter(&state->rc_obs_act_chan_list.chan_list_mutex);
2795 2794 ace->ac_chan->next = state->rc_obs_act_chan_list.chan_list;
2796 2795 state->rc_obs_act_chan_list.chan_list = ace->ac_chan;
2797 2796 mutex_exit(&state->rc_obs_act_chan_list.chan_list_mutex);
2798 2797 } else {
2799 2798 req->rq_ptr = ace->ac_chan;
2800 2799 ibd_queue_work_slot(state, req, IBD_ASYNC_RC_CLOSE_ACT_CHAN);
2801 2800 }
2802 2801 }
2803 2802
2804 2803 void
2805 2804 ibd_rc_signal_ace_recycle(ibd_state_t *state, ibd_ace_t *ace)
2806 2805 {
2807 2806 ibd_req_t *req;
2808 2807
2809 2808 mutex_enter(&state->rc_ace_recycle_lock);
2810 2809 if (state->rc_ace_recycle != NULL) {
2811 2810 mutex_exit(&state->rc_ace_recycle_lock);
2812 2811 return;
2813 2812 }
2814 2813
2815 2814 req = kmem_cache_alloc(state->id_req_kmc, KM_NOSLEEP);
2816 2815 if (req == NULL) {
2817 2816 mutex_exit(&state->rc_ace_recycle_lock);
2818 2817 return;
2819 2818 }
2820 2819
2821 2820 state->rc_ace_recycle = ace;
2822 2821 mutex_exit(&state->rc_ace_recycle_lock);
2823 2822 ASSERT(ace->ac_mce == NULL);
2824 2823 INC_REF(ace, 1);
2825 2824 IBD_ACACHE_PULLOUT_ACTIVE(state, ace);
2826 2825 req->rq_ptr = ace;
2827 2826 ibd_queue_work_slot(state, req, IBD_ASYNC_RC_RECYCLE_ACE);
2828 2827 }
2829 2828
2830 2829 /*
2831 2830 * Close an active channel
2832 2831 *
2833 2832 * is_close_rc_chan: if B_TRUE, we will call ibt_close_rc_channel()
2834 2833 */
2835 2834 static void
2836 2835 ibd_rc_act_close(ibd_rc_chan_t *chan, boolean_t is_close_rc_chan)
2837 2836 {
2838 2837 ibd_state_t *state;
2839 2838 ibd_ace_t *ace;
2840 2839 uint_t times;
2841 2840 ibt_status_t ret;
2842 2841
2843 2842 ASSERT(chan != NULL);
2844 2843
2845 2844 chan->state->rc_act_close++;
2846 2845 switch (chan->chan_state) {
2847 2846 case IBD_RC_STATE_ACT_CLOSING: /* stale, close it */
2848 2847 case IBD_RC_STATE_ACT_ESTAB:
2849 2848 DPRINT(30, "ibd_rc_act_close-1: close and free chan, "
2850 2849 "act_state=%d, chan=%p", chan->chan_state, chan);
2851 2850 chan->chan_state = IBD_RC_STATE_ACT_CLOSED;
2852 2851 ibt_set_cq_handler(chan->rcq_hdl, 0, 0);
2853 2852 /*
2854 2853 * Wait send queue empty. Its old value is 50 (5 seconds). But
2855 2854 * in my experiment, 5 seconds is not enough time to let IBTL
2856 2855 * return all buffers and ace->ac_ref. I tried 25 seconds, it
2857 2856 * works well. As another evidence, I saw IBTL takes about 17
2858 2857 * seconds every time it cleans a stale RC channel.
2859 2858 */
2860 2859 times = 250;
2861 2860 ace = chan->ace;
2862 2861 ASSERT(ace != NULL);
2863 2862 state = chan->state;
2864 2863 ASSERT(state != NULL);
2865 2864 mutex_enter(&state->id_ac_mutex);
2866 2865 mutex_enter(&chan->tx_wqe_list.dl_mutex);
2867 2866 mutex_enter(&chan->tx_rel_list.dl_mutex);
2868 2867 while (((chan->tx_wqe_list.dl_cnt + chan->tx_rel_list.dl_cnt)
2869 2868 != chan->scq_size) || ((ace->ac_ref != 1) &&
2870 2869 (ace->ac_ref != (CYCLEVAL+1)))) {
2871 2870 mutex_exit(&chan->tx_rel_list.dl_mutex);
2872 2871 mutex_exit(&chan->tx_wqe_list.dl_mutex);
2873 2872 mutex_exit(&state->id_ac_mutex);
2874 2873 times--;
2875 2874 if (times == 0) {
2876 2875 state->rc_act_close_not_clean++;
2877 2876 DPRINT(40, "ibd_rc_act_close: dl_cnt(tx_wqe_"
2878 2877 "list=%d, tx_rel_list=%d) != chan->"
2879 2878 "scq_size=%d, OR ac_ref(=%d) not clean",
2880 2879 chan->tx_wqe_list.dl_cnt,
2881 2880 chan->tx_rel_list.dl_cnt,
2882 2881 chan->scq_size, ace->ac_ref);
2883 2882 break;
2884 2883 }
2885 2884 mutex_enter(&chan->tx_poll_lock);
2886 2885 if (chan->tx_poll_busy & IBD_CQ_POLLING) {
2887 2886 DPRINT(40, "ibd_rc_act_close: multiple "
2888 2887 "polling threads");
2889 2888 mutex_exit(&chan->tx_poll_lock);
2890 2889 } else {
2891 2890 chan->tx_poll_busy = IBD_CQ_POLLING;
2892 2891 mutex_exit(&chan->tx_poll_lock);
2893 2892 ibd_rc_drain_scq(chan, chan->scq_hdl);
2894 2893 mutex_enter(&chan->tx_poll_lock);
2895 2894 chan->tx_poll_busy = 0;
2896 2895 mutex_exit(&chan->tx_poll_lock);
2897 2896 }
2898 2897 delay(drv_usectohz(100000));
2899 2898 mutex_enter(&state->id_ac_mutex);
2900 2899 mutex_enter(&chan->tx_wqe_list.dl_mutex);
2901 2900 mutex_enter(&chan->tx_rel_list.dl_mutex);
2902 2901 }
2903 2902 if (times != 0) {
2904 2903 mutex_exit(&chan->tx_rel_list.dl_mutex);
2905 2904 mutex_exit(&chan->tx_wqe_list.dl_mutex);
2906 2905 mutex_exit(&state->id_ac_mutex);
2907 2906 }
2908 2907
2909 2908 ibt_set_cq_handler(chan->scq_hdl, 0, 0);
2910 2909 if (is_close_rc_chan) {
2911 2910 ret = ibt_close_rc_channel(chan->chan_hdl,
2912 2911 IBT_BLOCKING|IBT_NOCALLBACKS, NULL, 0, NULL, NULL,
2913 2912 0);
2914 2913 if (ret != IBT_SUCCESS) {
2915 2914 DPRINT(40, "ibd_rc_act_close: ibt_close_rc_"
2916 2915 "channel fail, chan=%p, ret=%d",
2917 2916 chan, ret);
2918 2917 } else {
2919 2918 DPRINT(30, "ibd_rc_act_close: ibt_close_rc_"
2920 2919 "channel succ, chan=%p", chan);
2921 2920 }
2922 2921 }
2923 2922
2924 2923 ibd_rc_free_chan(chan);
2925 2924 break;
2926 2925 case IBD_RC_STATE_ACT_REP_RECV:
2927 2926 chan->chan_state = IBD_RC_STATE_ACT_CLOSED;
2928 2927 (void) ibt_flush_channel(chan->chan_hdl);
2929 2928 ibd_rc_free_chan(chan);
2930 2929 break;
2931 2930 case IBD_RC_STATE_ACT_ERROR:
2932 2931 DPRINT(40, "ibd_rc_act_close: IBD_RC_STATE_ERROR branch");
2933 2932 break;
2934 2933 default:
2935 2934 DPRINT(40, "ibd_rc_act_close: default branch, act_state=%d, "
2936 2935 "chan=%p", chan->chan_state, chan);
2937 2936 }
2938 2937 }
2939 2938
2940 2939 /*
2941 2940 * Close a passive channel
2942 2941 *
2943 2942 * is_close_rc_chan: if B_TRUE, we will call ibt_close_rc_channel()
2944 2943 *
2945 2944 * is_timeout_close: if B_TRUE, this function is called by the connection
2946 2945 * reaper (refer to function ibd_rc_conn_timeout_call). When the connection
2947 2946 * reaper calls ibd_rc_pas_close(), and if it finds that dl_bufs_outstanding
2948 2947 * or chan->rcq_invoking is non-zero, then it can simply put that channel back
2949 2948 * on the passive channels list and move on, since it might be an indication
2950 2949 * that the channel became active again by the time we started it's cleanup.
2951 2950 * It is costlier to do the cleanup and then reinitiate the channel
2952 2951 * establishment and hence it will help to be conservative when we do the
2953 2952 * cleanup.
2954 2953 */
2955 2954 int
2956 2955 ibd_rc_pas_close(ibd_rc_chan_t *chan, boolean_t is_close_rc_chan,
2957 2956 boolean_t is_timeout_close)
2958 2957 {
2959 2958 uint_t times;
2960 2959 ibt_status_t ret;
2961 2960
2962 2961 ASSERT(chan != NULL);
2963 2962 chan->state->rc_pas_close++;
2964 2963
2965 2964 switch (chan->chan_state) {
2966 2965 case IBD_RC_STATE_PAS_ESTAB:
2967 2966 if (is_timeout_close) {
2968 2967 if ((chan->rcq_invoking != 0) ||
2969 2968 ((!chan->state->rc_enable_srq) &&
2970 2969 (chan->rx_wqe_list.dl_bufs_outstanding > 0))) {
2971 2970 if (ibd_rc_re_add_to_pas_chan_list(chan)) {
2972 2971 return (DDI_FAILURE);
2973 2972 }
2974 2973 }
2975 2974 }
2976 2975 /*
2977 2976 * First, stop receive interrupts; this stops the
2978 2977 * connection from handing up buffers to higher layers.
2979 2978 * Wait for receive buffers to be returned; give up
2980 2979 * after 5 seconds.
2981 2980 */
2982 2981 ibt_set_cq_handler(chan->rcq_hdl, 0, 0);
2983 2982 /* Wait 0.01 second to let ibt_set_cq_handler() take effect */
2984 2983 delay(drv_usectohz(10000));
2985 2984 if (!chan->state->rc_enable_srq) {
2986 2985 times = 50;
2987 2986 while (chan->rx_wqe_list.dl_bufs_outstanding > 0) {
2988 2987 delay(drv_usectohz(100000));
2989 2988 if (--times == 0) {
2990 2989 DPRINT(40, "ibd_rc_pas_close : "
2991 2990 "reclaiming failed");
2992 2991 ibd_rc_poll_rcq(chan, chan->rcq_hdl);
2993 2992 ibt_set_cq_handler(chan->rcq_hdl,
2994 2993 ibd_rc_rcq_handler,
2995 2994 (void *)(uintptr_t)chan);
2996 2995 return (DDI_FAILURE);
2997 2996 }
2998 2997 }
2999 2998 }
3000 2999 times = 50;
3001 3000 while (chan->rcq_invoking != 0) {
3002 3001 delay(drv_usectohz(100000));
3003 3002 if (--times == 0) {
3004 3003 DPRINT(40, "ibd_rc_pas_close : "
3005 3004 "rcq handler is being invoked");
3006 3005 chan->state->rc_pas_close_rcq_invoking++;
3007 3006 break;
3008 3007 }
3009 3008 }
3010 3009 ibt_set_cq_handler(chan->scq_hdl, 0, 0);
3011 3010 chan->chan_state = IBD_RC_STATE_PAS_CLOSED;
3012 3011 DPRINT(30, "ibd_rc_pas_close-1: close and free chan, "
3013 3012 "chan_state=%d, chan=%p", chan->chan_state, chan);
3014 3013 if (is_close_rc_chan) {
3015 3014 ret = ibt_close_rc_channel(chan->chan_hdl,
3016 3015 IBT_BLOCKING|IBT_NOCALLBACKS, NULL, 0, NULL, NULL,
3017 3016 0);
3018 3017 if (ret != IBT_SUCCESS) {
3019 3018 DPRINT(40, "ibd_rc_pas_close: ibt_close_rc_"
3020 3019 "channel() fail, chan=%p, ret=%d", chan,
3021 3020 ret);
3022 3021 } else {
3023 3022 DPRINT(30, "ibd_rc_pas_close: ibt_close_rc_"
3024 3023 "channel() succ, chan=%p", chan);
3025 3024 }
3026 3025 }
3027 3026 ibd_rc_free_chan(chan);
3028 3027 break;
3029 3028 case IBD_RC_STATE_PAS_REQ_RECV:
3030 3029 chan->chan_state = IBD_RC_STATE_PAS_CLOSED;
3031 3030 (void) ibt_flush_channel(chan->chan_hdl);
3032 3031 ibd_rc_free_chan(chan);
3033 3032 break;
3034 3033 default:
3035 3034 DPRINT(40, "ibd_rc_pas_close: default, chan_state=%d, chan=%p",
3036 3035 chan->chan_state, chan);
3037 3036 }
3038 3037 return (DDI_SUCCESS);
3039 3038 }
3040 3039
3041 3040 /*
3042 3041 * Passive Side:
3043 3042 * Handle an incoming CM REQ from active side.
3044 3043 *
3045 3044 * If success, this function allocates an ibd_rc_chan_t, then
3046 3045 * assigns it to "*ret_conn".
3047 3046 */
3048 3047 static ibt_cm_status_t
3049 3048 ibd_rc_handle_req(void *arg, ibd_rc_chan_t **ret_conn,
3050 3049 ibt_cm_event_t *ibt_cm_event, ibt_cm_return_args_t *ret_args,
3051 3050 void *ret_priv_data)
3052 3051 {
3053 3052 ibd_rc_msg_hello_t *hello_msg;
3054 3053 ibd_state_t *state = (ibd_state_t *)arg;
3055 3054 ibd_rc_chan_t *chan;
3056 3055
3057 3056 if (ibd_rc_alloc_chan(&chan, state, B_FALSE) != IBT_SUCCESS) {
3058 3057 DPRINT(40, "ibd_rc_handle_req: ibd_rc_alloc_chan() failed");
3059 3058 return (IBT_CM_REJECT);
3060 3059 }
3061 3060
3062 3061 ibd_rc_add_to_chan_list(&state->rc_pass_chan_list, chan);
3063 3062
3064 3063 ibt_set_chan_private(chan->chan_hdl, (void *)(uintptr_t)chan);
3065 3064
3066 3065 if (!state->rc_enable_srq) {
3067 3066 if (ibd_rc_init_rxlist(chan) != DDI_SUCCESS) {
3068 3067 ibd_rc_free_chan(chan);
3069 3068 DPRINT(40, "ibd_rc_handle_req: ibd_rc_init_rxlist() "
3070 3069 "failed");
3071 3070 return (IBT_CM_REJECT);
3072 3071 }
3073 3072 }
3074 3073
3075 3074 ret_args->cm_ret.rep.cm_channel = chan->chan_hdl;
3076 3075
3077 3076 /* We don't do RDMA */
3078 3077 ret_args->cm_ret.rep.cm_rdma_ra_out = 0;
3079 3078 ret_args->cm_ret.rep.cm_rdma_ra_in = 0;
3080 3079
3081 3080 ret_args->cm_ret.rep.cm_rnr_retry_cnt = 7;
3082 3081 ret_args->cm_ret_len = sizeof (ibd_rc_msg_hello_t);
3083 3082
3084 3083 hello_msg = (ibd_rc_msg_hello_t *)ibt_cm_event->cm_priv_data;
3085 3084 DPRINT(30, "ibd_rc_handle_req(): peer qpn=0x%x, peer mtu=0x%x",
3086 3085 ntohl(hello_msg->reserved_qpn), ntohl(hello_msg->rx_mtu));
3087 3086
3088 3087 hello_msg = (ibd_rc_msg_hello_t *)ret_priv_data;
3089 3088 hello_msg->reserved_qpn = htonl(state->id_qpnum);
3090 3089 hello_msg->rx_mtu = htonl(state->rc_mtu);
3091 3090
3092 3091 chan->chan_state = IBD_RC_STATE_PAS_REQ_RECV; /* ready to receive */
3093 3092 *ret_conn = chan;
3094 3093
3095 3094 return (IBT_CM_ACCEPT);
3096 3095 }
3097 3096
3098 3097 /*
3099 3098 * ibd_rc_handle_act_estab -- handler for connection established completion
3100 3099 * for active side.
3101 3100 */
3102 3101 static ibt_cm_status_t
3103 3102 ibd_rc_handle_act_estab(ibd_ace_t *ace)
3104 3103 {
3105 3104 ibt_status_t result;
3106 3105
3107 3106 switch (ace->ac_chan->chan_state) {
3108 3107 case IBD_RC_STATE_ACT_REP_RECV:
3109 3108 ace->ac_chan->chan_state = IBD_RC_STATE_ACT_ESTAB;
3110 3109 result = ibt_enable_cq_notify(ace->ac_chan->rcq_hdl,
3111 3110 IBT_NEXT_COMPLETION);
3112 3111 if (result != IBT_SUCCESS) {
3113 3112 DPRINT(40, "ibd_rc_handle_act_estab: "
3114 3113 "ibt_enable_cq_notify(rcq) "
3115 3114 "failed: status %d", result);
3116 3115 return (IBT_CM_REJECT);
3117 3116 }
3118 3117 break;
3119 3118 default:
3120 3119 DPRINT(40, "ibd_rc_handle_act_estab: default "
3121 3120 "branch, act_state=%d", ace->ac_chan->chan_state);
3122 3121 return (IBT_CM_REJECT);
3123 3122 }
3124 3123 return (IBT_CM_ACCEPT);
3125 3124 }
3126 3125
3127 3126 /*
3128 3127 * ibd_rc_handle_pas_estab -- handler for connection established completion
3129 3128 * for passive side.
3130 3129 */
3131 3130 static ibt_cm_status_t
3132 3131 ibd_rc_handle_pas_estab(ibd_rc_chan_t *chan)
3133 3132 {
3134 3133 ibt_status_t result;
3135 3134
3136 3135 switch (chan->chan_state) {
3137 3136 case IBD_RC_STATE_PAS_REQ_RECV:
3138 3137 chan->chan_state = IBD_RC_STATE_PAS_ESTAB;
3139 3138
3140 3139 result = ibt_enable_cq_notify(chan->rcq_hdl,
3141 3140 IBT_NEXT_COMPLETION);
3142 3141 if (result != IBT_SUCCESS) {
3143 3142 DPRINT(40, "ibd_rc_handle_pas_estab: "
3144 3143 "ibt_enable_cq_notify(rcq) "
3145 3144 "failed: status %d", result);
3146 3145 return (IBT_CM_REJECT);
3147 3146 }
3148 3147 break;
3149 3148 default:
3150 3149 DPRINT(40, "ibd_rc_handle_pas_estab: default "
3151 3150 "branch, chan_state=%d", chan->chan_state);
3152 3151 return (IBT_CM_REJECT);
3153 3152 }
3154 3153 return (IBT_CM_ACCEPT);
3155 3154 }
3156 3155
3157 3156 /* ARGSUSED */
3158 3157 static ibt_cm_status_t
3159 3158 ibd_rc_dispatch_actv_mad(void *arg, ibt_cm_event_t *ibt_cm_event,
3160 3159 ibt_cm_return_args_t *ret_args, void *ret_priv_data,
3161 3160 ibt_priv_data_len_t ret_len_max)
3162 3161 {
3163 3162 ibt_cm_status_t result = IBT_CM_ACCEPT;
3164 3163 ibd_ace_t *ace = (ibd_ace_t *)(uintptr_t)arg;
3165 3164 ibd_rc_chan_t *rc_chan;
3166 3165 ibd_state_t *state;
3167 3166 ibd_rc_msg_hello_t *hello_ack;
3168 3167
3169 3168 switch (ibt_cm_event->cm_type) {
3170 3169 case IBT_CM_EVENT_REP_RCV:
3171 3170 ASSERT(ace->ac_chan != NULL);
3172 3171 ASSERT(ace->ac_chan->chan_state == IBD_RC_STATE_INIT);
3173 3172 hello_ack = (ibd_rc_msg_hello_t *)ibt_cm_event->cm_priv_data;
3174 3173 DPRINT(30, "ibd_rc_handle_rep: hello_ack->mtu=0x%x, "
3175 3174 "hello_ack->qpn=0x%x", ntohl(hello_ack->rx_mtu),
3176 3175 ntohl(hello_ack->reserved_qpn));
3177 3176 ace->ac_chan->chan_state = IBD_RC_STATE_ACT_REP_RECV;
3178 3177 break;
3179 3178
3180 3179 case IBT_CM_EVENT_CONN_EST:
3181 3180 ASSERT(ace->ac_chan != NULL);
3182 3181 DPRINT(30, "ibd_rc_dispatch_actv_mad: IBT_CM_EVENT_CONN_EST, "
3183 3182 "ace=%p, act_state=%d, chan=%p",
3184 3183 ace, ace->ac_chan->chan_state, ace->ac_chan);
3185 3184 result = ibd_rc_handle_act_estab(ace);
3186 3185 break;
3187 3186
3188 3187 case IBT_CM_EVENT_CONN_CLOSED:
3189 3188 rc_chan = ace->ac_chan;
3190 3189 if (rc_chan == NULL) {
3191 3190 DPRINT(40, "ibd_rc_dispatch_actv_mad: "
3192 3191 "rc_chan==NULL, IBT_CM_EVENT_CONN_CLOSED");
3193 3192 return (IBT_CM_ACCEPT);
3194 3193 }
3195 3194 state = rc_chan->state;
3196 3195 mutex_enter(&state->id_ac_mutex);
3197 3196 if ((rc_chan->chan_state == IBD_RC_STATE_ACT_ESTAB) &&
3198 3197 ((ace = ibd_acache_find(state, &ace->ac_mac, B_FALSE, 0))
3199 3198 != NULL) && (ace == rc_chan->ace)) {
3200 3199 rc_chan->chan_state = IBD_RC_STATE_ACT_CLOSING;
3201 3200 ASSERT(ace->ac_mce == NULL);
3202 3201 INC_REF(ace, 1);
3203 3202 IBD_ACACHE_PULLOUT_ACTIVE(state, ace);
3204 3203 mutex_exit(&state->id_ac_mutex);
3205 3204 DPRINT(30, "ibd_rc_dispatch_actv_mad: "
3206 3205 "IBT_CM_EVENT_CONN_CLOSED, ace=%p, chan=%p, "
3207 3206 "reason=%d", ace, rc_chan,
3208 3207 ibt_cm_event->cm_event.closed);
3209 3208 } else {
3210 3209 mutex_exit(&state->id_ac_mutex);
3211 3210 state->rc_act_close_simultaneous++;
3212 3211 DPRINT(40, "ibd_rc_dispatch_actv_mad: other thread "
3213 3212 "is closing it, IBT_CM_EVENT_CONN_CLOSED, "
3214 3213 "chan_state=%d", rc_chan->chan_state);
3215 3214 return (IBT_CM_ACCEPT);
3216 3215 }
3217 3216 ibd_rc_act_close(rc_chan, B_FALSE);
3218 3217 mutex_enter(&state->id_ac_mutex);
3219 3218 ace->ac_chan = NULL;
3220 3219 ASSERT(ace->ac_ref != 0);
3221 3220 atomic_dec_32(&ace->ac_ref);
3222 3221 if ((ace->ac_ref == 0) || (ace->ac_ref == CYCLEVAL)) {
3223 3222 IBD_ACACHE_INSERT_FREE(state, ace);
3224 3223 ace->ac_ref = 0;
3225 3224 } else {
3226 3225 ace->ac_ref |= CYCLEVAL;
3227 3226 state->rc_delay_ace_recycle++;
3228 3227 }
3229 3228 mutex_exit(&state->id_ac_mutex);
3230 3229 break;
3231 3230
3232 3231 case IBT_CM_EVENT_FAILURE:
3233 3232 DPRINT(30, "ibd_rc_dispatch_actv_mad: IBT_CM_EVENT_FAILURE,"
3234 3233 "ace=%p, chan=%p, code: %d, msg: %d, reason=%d",
3235 3234 ace, ace->ac_chan,
3236 3235 ibt_cm_event->cm_event.failed.cf_code,
3237 3236 ibt_cm_event->cm_event.failed.cf_msg,
3238 3237 ibt_cm_event->cm_event.failed.cf_reason);
3239 3238 /*
3240 3239 * Don't need free resource here. The resource is freed
3241 3240 * at function ibd_rc_connect()
3242 3241 */
3243 3242 break;
3244 3243
3245 3244 case IBT_CM_EVENT_MRA_RCV:
3246 3245 DPRINT(40, "ibd_rc_dispatch_actv_mad: IBT_CM_EVENT_MRA_RCV");
3247 3246 break;
3248 3247 case IBT_CM_EVENT_LAP_RCV:
3249 3248 DPRINT(40, "ibd_rc_dispatch_actv_mad: LAP message received");
3250 3249 break;
3251 3250 case IBT_CM_EVENT_APR_RCV:
3252 3251 DPRINT(40, "ibd_rc_dispatch_actv_mad: APR message received");
3253 3252 break;
3254 3253 default:
3255 3254 DPRINT(40, "ibd_rc_dispatch_actv_mad: default branch, "
3256 3255 "ibt_cm_event->cm_type=%d", ibt_cm_event->cm_type);
3257 3256 break;
3258 3257 }
3259 3258
3260 3259 return (result);
3261 3260 }
3262 3261
3263 3262 /* ARGSUSED */
3264 3263 static ibt_cm_status_t
3265 3264 ibd_rc_dispatch_pass_mad(void *arg, ibt_cm_event_t *ibt_cm_event,
3266 3265 ibt_cm_return_args_t *ret_args, void *ret_priv_data,
3267 3266 ibt_priv_data_len_t ret_len_max)
3268 3267 {
3269 3268 ibt_cm_status_t result = IBT_CM_ACCEPT;
3270 3269 ibd_rc_chan_t *chan;
3271 3270
3272 3271 if (ibt_cm_event->cm_type == IBT_CM_EVENT_REQ_RCV) {
3273 3272 DPRINT(30, "ibd_rc_dispatch_pass_mad: IBT_CM_EVENT_REQ_RCV,"
3274 3273 "req_pkey=%x", ibt_cm_event->cm_event.req.req_pkey);
3275 3274 /* Receive an incoming CM REQ from active side */
3276 3275 result = ibd_rc_handle_req(arg, &chan, ibt_cm_event, ret_args,
3277 3276 ret_priv_data);
3278 3277 return (result);
3279 3278 }
3280 3279
3281 3280 if (ibt_cm_event->cm_channel == 0) {
3282 3281 DPRINT(30, "ibd_rc_dispatch_pass_mad: "
3283 3282 "ERROR ibt_cm_event->cm_channel == 0");
3284 3283 return (IBT_CM_REJECT);
3285 3284 }
3286 3285
3287 3286 chan =
3288 3287 (ibd_rc_chan_t *)ibt_get_chan_private(ibt_cm_event->cm_channel);
3289 3288 if (chan == NULL) {
3290 3289 DPRINT(40, "ibd_rc_dispatch_pass_mad: conn == 0");
3291 3290 return (IBT_CM_REJECT);
3292 3291 }
3293 3292
3294 3293 switch (ibt_cm_event->cm_type) {
3295 3294 case IBT_CM_EVENT_CONN_EST:
3296 3295 DPRINT(30, "ibd_rc_dispatch_pass_mad: IBT_CM_EVENT_CONN_EST, "
3297 3296 "chan=%p", chan);
3298 3297 result = ibd_rc_handle_pas_estab(chan);
3299 3298 break;
3300 3299 case IBT_CM_EVENT_CONN_CLOSED:
3301 3300 DPRINT(30, "ibd_rc_dispatch_pass_mad: IBT_CM_EVENT_CONN_CLOSED,"
3302 3301 " chan=%p, reason=%d", chan, ibt_cm_event->cm_event.closed);
3303 3302 chan = ibd_rc_rm_from_chan_list(&chan->state->rc_pass_chan_list,
3304 3303 chan);
3305 3304 if (chan != NULL)
3306 3305 (void) ibd_rc_pas_close(chan, B_FALSE, B_FALSE);
3307 3306 break;
3308 3307 case IBT_CM_EVENT_FAILURE:
3309 3308 DPRINT(30, "ibd_rc_dispatch_pass_mad: IBT_CM_EVENT_FAILURE,"
3310 3309 " chan=%p, code: %d, msg: %d, reason=%d", chan,
3311 3310 ibt_cm_event->cm_event.failed.cf_code,
3312 3311 ibt_cm_event->cm_event.failed.cf_msg,
3313 3312 ibt_cm_event->cm_event.failed.cf_reason);
3314 3313 chan = ibd_rc_rm_from_chan_list(&chan->state->rc_pass_chan_list,
3315 3314 chan);
3316 3315 if (chan != NULL)
3317 3316 (void) ibd_rc_pas_close(chan, B_FALSE, B_FALSE);
3318 3317 return (IBT_CM_ACCEPT);
3319 3318 case IBT_CM_EVENT_MRA_RCV:
3320 3319 DPRINT(40, "ibd_rc_dispatch_pass_mad: IBT_CM_EVENT_MRA_RCV");
3321 3320 break;
3322 3321 case IBT_CM_EVENT_LAP_RCV:
3323 3322 DPRINT(40, "ibd_rc_dispatch_pass_mad: LAP message received");
3324 3323 break;
3325 3324 case IBT_CM_EVENT_APR_RCV:
3326 3325 DPRINT(40, "ibd_rc_dispatch_pass_mad: APR message received");
3327 3326 break;
3328 3327 default:
3329 3328 DPRINT(40, "ibd_rc_dispatch_pass_mad: default, type=%d, "
3330 3329 "chan=%p", ibt_cm_event->cm_type, chan);
3331 3330 break;
3332 3331 }
3333 3332
3334 3333 return (result);
3335 3334 }
↓ open down ↓ |
1653 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX