Print this page
5045 use atomic_{inc,dec}_* instead of atomic_add_*
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/io/ib/clients/daplt/daplt.c
+++ new/usr/src/uts/common/io/ib/clients/daplt/daplt.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 *
26 26 * UDAPL kernel agent
27 27 */
28 28
29 29 #include <sys/types.h>
30 30 #include <sys/errno.h>
31 31 #include <sys/debug.h>
32 32 #include <sys/stropts.h>
33 33 #include <sys/stream.h>
34 34 #include <sys/strlog.h>
35 35 #include <sys/cmn_err.h>
36 36 #include <sys/kmem.h>
37 37 #include <sys/conf.h>
38 38 #include <sys/stat.h>
39 39 #include <sys/modctl.h>
40 40 #include <sys/kstat.h>
41 41 #include <sys/ddi.h>
42 42 #include <sys/sunddi.h>
43 43 #include <sys/strsun.h>
44 44 #include <sys/taskq.h>
45 45 #include <sys/open.h>
46 46 #include <sys/uio.h>
47 47 #include <sys/cpuvar.h>
48 48 #include <sys/atomic.h>
49 49 #include <sys/sysmacros.h>
50 50 #include <sys/esunddi.h>
51 51 #include <sys/avl.h>
52 52 #include <sys/cred.h>
53 53 #include <sys/note.h>
54 54 #include <sys/ib/ibtl/ibti.h>
55 55 #include <sys/socket.h>
56 56 #include <netinet/in.h>
57 57 #include <daplt_if.h>
58 58 #include <daplt.h>
59 59
60 60 /*
61 61 * The following variables support the debug log buffer scheme.
62 62 */
63 63 #ifdef DEBUG
64 64 static char daplka_dbgbuf[0x80000];
65 65 #else /* DEBUG */
66 66 static char daplka_dbgbuf[0x4000];
67 67 #endif /* DEBUG */
68 68 static int daplka_dbgsize = sizeof (daplka_dbgbuf);
69 69 static size_t daplka_dbgnext;
70 70 static int daplka_dbginit = 0;
71 71 static kmutex_t daplka_dbglock;
72 72 _NOTE(MUTEX_PROTECTS_DATA(daplka_dbglock,
73 73 daplka_dbgbuf
74 74 daplka_dbgnext))
75 75
76 76 static int daplka_dbg = 0x0103;
77 77 static void daplka_console(const char *, ...);
78 78 static void daplka_debug(const char *, ...);
79 79 static int daplka_apm = 0x1; /* default enable */
80 80 static int daplka_failback = 0x1; /* default enable */
81 81 static int daplka_query_aft_setaltpath = 10;
82 82
83 83 #define DERR \
84 84 if (daplka_dbg & 0x100) \
85 85 daplka_debug
86 86
87 87 #ifdef DEBUG
88 88
89 89 #define DINFO \
90 90 daplka_console
91 91
92 92 #define D1 \
93 93 if (daplka_dbg & 0x01) \
94 94 daplka_debug
95 95 #define D2 \
96 96 if (daplka_dbg & 0x02) \
97 97 daplka_debug
98 98 #define D3 \
99 99 if (daplka_dbg & 0x04) \
100 100 daplka_debug
101 101 #define D4 \
102 102 if (daplka_dbg & 0x08) \
103 103 daplka_debug
104 104
105 105 #else /* DEBUG */
106 106
107 107 #define DINFO if (0) printf
108 108 #define D1 if (0) printf
109 109 #define D2 if (0) printf
110 110 #define D3 if (0) printf
111 111 #define D4 if (0) printf
112 112
113 113 #endif /* DEBUG */
114 114
115 115 /*
116 116 * driver entry points
117 117 */
118 118 static int daplka_open(dev_t *, int, int, struct cred *);
119 119 static int daplka_close(dev_t, int, int, struct cred *);
120 120 static int daplka_attach(dev_info_t *, ddi_attach_cmd_t);
121 121 static int daplka_detach(dev_info_t *, ddi_detach_cmd_t);
122 122 static int daplka_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
123 123 static int daplka_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
124 124
125 125 /*
126 126 * types of ioctls
127 127 */
128 128 static int daplka_common_ioctl(int, minor_t, intptr_t, int, cred_t *, int *);
129 129 static int daplka_misc_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
130 130 cred_t *, int *);
131 131 static int daplka_ep_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
132 132 cred_t *, int *);
133 133 static int daplka_evd_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
134 134 cred_t *, int *);
135 135 static int daplka_mr_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
136 136 cred_t *, int *);
137 137 static int daplka_cno_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
138 138 cred_t *, int *);
139 139 static int daplka_pd_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
140 140 cred_t *, int *);
141 141 static int daplka_sp_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
142 142 cred_t *, int *);
143 143 static int daplka_srq_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
144 144 cred_t *, int *);
145 145
146 146 /*
147 147 * common ioctls and supporting functions
148 148 */
149 149 static int daplka_ia_create(minor_t, intptr_t, int, cred_t *, int *);
150 150 static int daplka_ia_destroy(daplka_resource_t *);
151 151
152 152 /*
153 153 * EP ioctls and supporting functions
154 154 */
155 155 static int daplka_ep_create(daplka_ia_resource_t *, intptr_t, int,
156 156 cred_t *, int *);
157 157 static int daplka_ep_modify(daplka_ia_resource_t *, intptr_t, int,
158 158 cred_t *, int *);
159 159 static int daplka_ep_free(daplka_ia_resource_t *, intptr_t, int,
160 160 cred_t *, int *);
161 161 static int daplka_ep_connect(daplka_ia_resource_t *, intptr_t, int,
162 162 cred_t *, int *);
163 163 static int daplka_ep_disconnect(daplka_ia_resource_t *, intptr_t, int,
164 164 cred_t *, int *);
165 165 static int daplka_ep_reinit(daplka_ia_resource_t *, intptr_t, int,
166 166 cred_t *, int *);
167 167 static int daplka_ep_destroy(daplka_resource_t *);
168 168 static void daplka_hash_ep_free(void *);
169 169 static int daplka_ep_failback(void *objp, void *arg);
170 170 static int daplka_ep_altpath(daplka_ep_resource_t *, ib_gid_t *);
171 171
172 172 static uint32_t daplka_ep_get_state(daplka_ep_resource_t *);
173 173 static void daplka_ep_set_state(daplka_ep_resource_t *, uint32_t, uint32_t);
174 174 static boolean_t daplka_ep_transition_is_valid(uint32_t, uint32_t);
175 175 static daplka_timer_info_t *daplka_timer_info_alloc(daplka_ep_resource_t *);
176 176 static void daplka_timer_info_free(daplka_timer_info_t *);
177 177 static void daplka_timer_handler(void *);
178 178 static void daplka_timer_dispatch(void *);
179 179 static void daplka_timer_thread(void *);
180 180 static int daplka_cancel_timer(daplka_ep_resource_t *);
181 181 static void daplka_hash_timer_free(void *);
182 182
183 183 /*
184 184 * EVD ioctls and supporting functions
185 185 */
186 186 static int daplka_evd_create(daplka_ia_resource_t *, intptr_t, int,
187 187 cred_t *, int *);
188 188 static int daplka_cq_resize(daplka_ia_resource_t *, intptr_t, int,
189 189 cred_t *, int *);
190 190 static int daplka_evd_free(daplka_ia_resource_t *, intptr_t, int,
191 191 cred_t *, int *);
192 192 static int daplka_event_poll(daplka_ia_resource_t *, intptr_t, int,
193 193 cred_t *, int *);
194 194 static int daplka_evd_destroy(daplka_resource_t *);
195 195 static void daplka_cq_handler(ibt_cq_hdl_t, void *);
196 196 static void daplka_evd_wakeup(daplka_evd_resource_t *,
197 197 daplka_evd_event_list_t *, daplka_evd_event_t *);
198 198 static void daplka_evd_event_enqueue(daplka_evd_event_list_t *,
199 199 daplka_evd_event_t *);
200 200 static daplka_evd_event_t *daplka_evd_event_dequeue(daplka_evd_event_list_t *);
201 201 static void daplka_hash_evd_free(void *);
202 202
203 203
204 204 /*
205 205 * SRQ ioctls and supporting functions
206 206 */
207 207 static int daplka_srq_create(daplka_ia_resource_t *, intptr_t, int,
208 208 cred_t *, int *);
209 209 static int daplka_srq_resize(daplka_ia_resource_t *, intptr_t, int,
210 210 cred_t *, int *);
211 211 static int daplka_srq_free(daplka_ia_resource_t *, intptr_t, int,
212 212 cred_t *, int *);
213 213 static int daplka_srq_destroy(daplka_resource_t *);
214 214 static void daplka_hash_srq_free(void *);
215 215
216 216 /*
217 217 * Miscellaneous ioctls
218 218 */
219 219 static int daplka_cr_accept(daplka_ia_resource_t *, intptr_t, int,
220 220 cred_t *, int *);
221 221 static int daplka_cr_reject(daplka_ia_resource_t *, intptr_t, int,
222 222 cred_t *, int *);
223 223 static int daplka_cr_handoff(daplka_ia_resource_t *, intptr_t, int,
224 224 cred_t *, int *);
225 225 static int daplka_ia_query(daplka_ia_resource_t *, intptr_t, int,
226 226 cred_t *, int *);
227 227
228 228 /*
229 229 * PD ioctls and supporting functions
230 230 */
231 231 static int daplka_pd_alloc(daplka_ia_resource_t *, intptr_t, int,
232 232 cred_t *, int *);
233 233 static int daplka_pd_free(daplka_ia_resource_t *, intptr_t, int,
234 234 cred_t *, int *);
235 235 static int daplka_pd_destroy(daplka_resource_t *);
236 236 static void daplka_hash_pd_free(void *);
237 237
238 238 /*
239 239 * SP ioctls and supporting functions
240 240 */
241 241 static int daplka_service_register(daplka_ia_resource_t *, intptr_t, int,
242 242 cred_t *, int *);
243 243 static int daplka_service_deregister(daplka_ia_resource_t *, intptr_t, int,
244 244 cred_t *, int *);
245 245 static int daplka_sp_destroy(daplka_resource_t *);
246 246 static void daplka_hash_sp_free(void *);
247 247 static void daplka_hash_sp_unref(void *);
248 248
249 249 /*
250 250 * MR ioctls and supporting functions
251 251 */
252 252 static int daplka_mr_register(daplka_ia_resource_t *, intptr_t, int,
253 253 cred_t *, int *);
254 254 static int daplka_mr_register_lmr(daplka_ia_resource_t *, intptr_t, int,
255 255 cred_t *, int *);
256 256 static int daplka_mr_register_shared(daplka_ia_resource_t *, intptr_t, int,
257 257 cred_t *, int *);
258 258 static int daplka_mr_deregister(daplka_ia_resource_t *, intptr_t, int,
259 259 cred_t *, int *);
260 260 static int daplka_mr_sync(daplka_ia_resource_t *, intptr_t, int,
261 261 cred_t *, int *);
262 262 static int daplka_mr_destroy(daplka_resource_t *);
263 263 static void daplka_hash_mr_free(void *);
264 264 static void daplka_shared_mr_free(daplka_mr_resource_t *);
265 265
266 266 /*
267 267 * MW ioctls and supporting functions
268 268 */
269 269 static int daplka_mw_alloc(daplka_ia_resource_t *, intptr_t, int,
270 270 cred_t *, int *);
271 271 static int daplka_mw_free(daplka_ia_resource_t *, intptr_t, int,
272 272 cred_t *, int *);
273 273 static int daplka_mw_destroy(daplka_resource_t *);
274 274 static void daplka_hash_mw_free(void *);
275 275
276 276 /*
277 277 * CNO ioctls and supporting functions
278 278 */
279 279 static int daplka_cno_alloc(daplka_ia_resource_t *, intptr_t, int,
280 280 cred_t *, int *);
281 281 static int daplka_cno_free(daplka_ia_resource_t *, intptr_t, int,
282 282 cred_t *, int *);
283 283 static int daplka_cno_wait(daplka_ia_resource_t *, intptr_t, int,
284 284 cred_t *, int *);
285 285 static int daplka_cno_destroy(daplka_resource_t *);
286 286 static void daplka_hash_cno_free(void *);
287 287
288 288 /*
289 289 * CM handlers
290 290 */
291 291 static ibt_cm_status_t daplka_cm_rc_handler(void *, ibt_cm_event_t *,
292 292 ibt_cm_return_args_t *, void *, ibt_priv_data_len_t);
293 293
294 294 static ibt_cm_status_t daplka_cm_service_handler(void *, ibt_cm_event_t *,
295 295 ibt_cm_return_args_t *, void *, ibt_priv_data_len_t);
296 296
297 297 static ibt_cm_status_t daplka_cm_service_req(daplka_sp_resource_t *,
298 298 ibt_cm_event_t *, ibt_cm_return_args_t *, void *, ibt_priv_data_len_t);
299 299
300 300 /*
301 301 * resource management routines
302 302 */
303 303 static int daplka_resource_reserve(minor_t *);
304 304 static int daplka_resource_insert(minor_t, daplka_resource_t *);
305 305 static daplka_resource_t *daplka_resource_remove(minor_t rnum);
306 306 static daplka_resource_t *daplka_resource_lookup(minor_t);
307 307 static void daplka_resource_init(void);
308 308 static void daplka_resource_fini(void);
309 309 static struct daplka_resource_table daplka_resource;
310 310
311 311 /*
312 312 * hash table routines
313 313 */
314 314 static int daplka_hash_insert(daplka_hash_table_t *, uint64_t *, void *);
315 315 static int daplka_hash_remove(daplka_hash_table_t *, uint64_t, void **);
316 316 static void daplka_hash_walk(daplka_hash_table_t *, int (*)(void *, void *),
317 317 void *, krw_t);
318 318 static void *daplka_hash_lookup(daplka_hash_table_t *, uint64_t);
319 319 static int daplka_hash_create(daplka_hash_table_t *, uint_t,
320 320 void (*)(void *), void (*)(void *));
321 321 static void daplka_hash_destroy(daplka_hash_table_t *);
322 322 static uint32_t daplka_hash_getsize(daplka_hash_table_t *);
323 323 static void daplka_hash_generic_lookup(void *);
324 324
325 325 static uint32_t daplka_timer_hkey_gen();
326 326
327 327 /*
328 328 * async event handlers
329 329 */
330 330 static void daplka_async_event_create(ibt_async_code_t, ibt_async_event_t *,
331 331 uint64_t, daplka_ia_resource_t *);
332 332 static void daplka_rc_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
333 333 ibt_async_event_t *);
334 334 static void daplka_cq_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
335 335 ibt_async_event_t *);
336 336 static void daplka_un_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
337 337 ibt_async_event_t *);
338 338 static void daplka_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
339 339 ibt_async_event_t *);
340 340 static void daplka_sm_notice_handler(void *, ib_gid_t, ibt_subnet_event_code_t,
341 341 ibt_subnet_event_t *event);
342 342 static void daplka_sm_gid_avail(ib_gid_t *, ib_gid_t *);
343 343
344 344 /*
345 345 * IBTF wrappers and default limits used for resource accounting
346 346 */
347 347 static boolean_t daplka_accounting_enabled = B_TRUE;
348 348 static uint32_t daplka_max_qp_percent = 100;
349 349 static uint32_t daplka_max_cq_percent = 100;
350 350 static uint32_t daplka_max_pd_percent = 100;
351 351 static uint32_t daplka_max_mw_percent = 100;
352 352 static uint32_t daplka_max_mr_percent = 100;
353 353 static uint32_t daplka_max_srq_percent = 100;
354 354
355 355 static ibt_status_t
356 356 daplka_ibt_alloc_rc_channel(daplka_ep_resource_t *, ibt_hca_hdl_t,
357 357 ibt_chan_alloc_flags_t, ibt_rc_chan_alloc_args_t *,
358 358 ibt_channel_hdl_t *, ibt_chan_sizes_t *);
359 359
360 360 static ibt_status_t
361 361 daplka_ibt_free_channel(daplka_ep_resource_t *, ibt_channel_hdl_t);
362 362
363 363 static ibt_status_t
364 364 daplka_ibt_alloc_cq(daplka_evd_resource_t *, ibt_hca_hdl_t,
365 365 ibt_cq_attr_t *, ibt_cq_hdl_t *, uint_t *);
366 366
367 367 static ibt_status_t
368 368 daplka_ibt_free_cq(daplka_evd_resource_t *, ibt_cq_hdl_t);
369 369
370 370 static ibt_status_t
371 371 daplka_ibt_alloc_pd(daplka_pd_resource_t *, ibt_hca_hdl_t,
372 372 ibt_pd_flags_t, ibt_pd_hdl_t *);
373 373
374 374 static ibt_status_t
375 375 daplka_ibt_free_pd(daplka_pd_resource_t *, ibt_hca_hdl_t, ibt_pd_hdl_t);
376 376
377 377 static ibt_status_t
378 378 daplka_ibt_alloc_mw(daplka_mw_resource_t *, ibt_hca_hdl_t, ibt_pd_hdl_t,
379 379 ibt_mw_flags_t, ibt_mw_hdl_t *, ibt_rkey_t *);
380 380
381 381 static ibt_status_t
382 382 daplka_ibt_free_mw(daplka_mw_resource_t *, ibt_hca_hdl_t, ibt_mw_hdl_t);
383 383
384 384 static ibt_status_t
385 385 daplka_ibt_register_mr(daplka_mr_resource_t *, ibt_hca_hdl_t, ibt_pd_hdl_t,
386 386 ibt_mr_attr_t *, ibt_mr_hdl_t *, ibt_mr_desc_t *);
387 387
388 388 static ibt_status_t
389 389 daplka_ibt_register_shared_mr(daplka_mr_resource_t *, ibt_hca_hdl_t,
390 390 ibt_mr_hdl_t, ibt_pd_hdl_t, ibt_smr_attr_t *, ibt_mr_hdl_t *,
391 391 ibt_mr_desc_t *);
392 392
393 393 static ibt_status_t
394 394 daplka_ibt_deregister_mr(daplka_mr_resource_t *, ibt_hca_hdl_t, ibt_mr_hdl_t);
395 395
396 396 static ibt_status_t
397 397 daplka_ibt_alloc_srq(daplka_srq_resource_t *, ibt_hca_hdl_t, ibt_srq_flags_t,
398 398 ibt_pd_hdl_t, ibt_srq_sizes_t *, ibt_srq_hdl_t *, ibt_srq_sizes_t *);
399 399
400 400 static ibt_status_t
401 401 daplka_ibt_free_srq(daplka_srq_resource_t *, ibt_srq_hdl_t);
402 402
403 403 /*
404 404 * macros for manipulating resource objects.
405 405 * these macros can be used on objects that begin with a
406 406 * daplka_resource_t header.
407 407 */
408 408 #define DAPLKA_RS_REFCNT(rp) ((rp)->header.rs_refcnt)
409 409
410 410 #define DAPLKA_RS_REF(rp) { \
411 411 mutex_enter(&(rp)->header.rs_reflock); \
412 412 (rp)->header.rs_refcnt++; \
413 413 ASSERT((rp)->header.rs_refcnt != 0); \
414 414 mutex_exit(&(rp)->header.rs_reflock); \
415 415 }
416 416
417 417 #define DAPLKA_RS_UNREF(rp) { \
418 418 mutex_enter(&(rp)->header.rs_reflock); \
419 419 ASSERT((rp)->header.rs_refcnt != 0); \
420 420 if (--(rp)->header.rs_refcnt == 0) { \
421 421 ASSERT((rp)->header.rs_free != NULL); \
422 422 mutex_exit(&(rp)->header.rs_reflock); \
423 423 (rp)->header.rs_free((daplka_resource_t *)rp); \
424 424 } else { \
425 425 mutex_exit(&(rp)->header.rs_reflock); \
426 426 } \
427 427 }
428 428
429 429 #define DAPLKA_RS_INIT(rp, type, rnum, free_func) { \
430 430 (rp)->header.rs_refcnt = 1; \
431 431 (rp)->header.rs_type = (type); \
432 432 (rp)->header.rs_rnum = (rnum); \
433 433 (rp)->header.rs_charged = 0; \
434 434 (rp)->header.rs_free = (free_func); \
435 435 mutex_init(&(rp)->header.rs_reflock, NULL, \
436 436 MUTEX_DRIVER, NULL); \
437 437 }
438 438
439 439 #define DAPLKA_RS_FINI(rp) { \
440 440 mutex_destroy(&(rp)->header.rs_reflock); \
441 441 }
442 442
443 443 #define DAPLKA_RS_ACCT_INC(rp, cnt) { \
444 444 atomic_add_32(&(rp)->header.rs_charged, (cnt)); \
445 445 }
446 446 #define DAPLKA_RS_ACCT_DEC(rp, cnt) { \
447 447 atomic_add_32(&(rp)->header.rs_charged, -(cnt)); \
448 448 }
449 449 #define DAPLKA_RS_ACCT_CHARGED(rp) ((rp)->header.rs_charged)
450 450
451 451 #define DAPLKA_RS_RNUM(rp) ((rp)->header.rs_rnum)
452 452 #define DAPLKA_RS_TYPE(rp) ((rp)->header.rs_type)
453 453 #define DAPLKA_RS_RESERVED(rp) ((intptr_t)(rp) == DAPLKA_RC_RESERVED)
454 454
455 455 /*
456 456 * depending on the timeout value does a cv_wait_sig or cv_timedwait_sig
457 457 */
458 458 #define DAPLKA_EVD_WAIT(cvp, mp, timeout) \
459 459 ((timeout) == LONG_MAX) ? cv_wait_sig((cvp), (mp)) : \
460 460 cv_timedwait_sig((cvp), (mp), (timeout))
461 461
462 462 #define DAPLKA_HOLD_HCA_WITHOUT_LOCK(hca) ((hca)->hca_ref_cnt++)
463 463 #define DAPLKA_RELE_HCA_WITHOUT_LOCK(hca) ((hca)->hca_ref_cnt--)
464 464
465 465 #define DAPLKA_HOLD_HCA(dp, hca) { \
466 466 mutex_enter(&(dp)->daplka_mutex); \
467 467 DAPLKA_HOLD_HCA_WITHOUT_LOCK(hca); \
468 468 mutex_exit(&(dp)->daplka_mutex); \
469 469 }
470 470
471 471 #define DAPLKA_RELE_HCA(dp, hca) { \
472 472 mutex_enter(&(dp)->daplka_mutex); \
473 473 DAPLKA_RELE_HCA_WITHOUT_LOCK(hca); \
474 474 mutex_exit(&(dp)->daplka_mutex); \
475 475 }
476 476
477 477 #define DAPLKA_HCA_BUSY(hca) \
478 478 ((hca)->hca_ref_cnt != 0 || \
479 479 (hca)->hca_qp_count != 0 || \
480 480 (hca)->hca_cq_count != 0 || \
481 481 (hca)->hca_pd_count != 0 || \
482 482 (hca)->hca_mw_count != 0 || \
483 483 (hca)->hca_mr_count != 0)
484 484
485 485
486 486 static struct cb_ops daplka_cb_ops = {
487 487 daplka_open, /* cb_open */
488 488 daplka_close, /* cb_close */
489 489 nodev, /* cb_strategy */
490 490 nodev, /* cb_print */
491 491 nodev, /* cb_dump */
492 492 nodev, /* cb_read */
493 493 nodev, /* cb_write */
494 494 daplka_ioctl, /* cb_ioctl */
495 495 nodev, /* cb_devmap */
496 496 nodev, /* cb_mmap */
497 497 nodev, /* cb_segmap */
498 498 nochpoll, /* cb_chpoll */
499 499 ddi_prop_op, /* cb_prop_op */
500 500 NULL, /* cb_stream */
501 501 D_NEW | D_MP, /* cb_flag */
502 502 CB_REV, /* rev */
503 503 nodev, /* int (*cb_aread)() */
504 504 nodev /* int (*cb_awrite)() */
505 505 };
506 506
507 507 static struct dev_ops daplka_ops = {
508 508 DEVO_REV, /* devo_rev */
509 509 0, /* devo_refcnt */
510 510 daplka_info, /* devo_getinfo */
511 511 nulldev, /* devo_identify */
512 512 nulldev, /* devo_probe */
513 513 daplka_attach, /* devo_attach */
514 514 daplka_detach, /* devo_detach */
515 515 nodev, /* devo_reset */
516 516 &daplka_cb_ops, /* devo_cb_ops */
517 517 (struct bus_ops *)NULL, /* devo_bus_ops */
518 518 nulldev, /* power */
519 519 ddi_quiesce_not_needed, /* devo_quiesce */
520 520 };
521 521
522 522 /*
523 523 * Module linkage information for the kernel.
524 524 */
525 525 static struct modldrv modldrv = {
526 526 &mod_driverops,
527 527 "uDAPL Service Driver",
528 528 &daplka_ops,
529 529 };
530 530
531 531 static struct modlinkage modlinkage = {
532 532 #ifdef _LP64
533 533 MODREV_1, { (void *) &modldrv, NULL, NULL, NULL, NULL, NULL, NULL }
534 534 #else
535 535 MODREV_1, { (void *) &modldrv, NULL, NULL, NULL }
536 536 #endif
537 537 };
538 538
539 539 /*
540 540 * daplka_dev holds global driver state and a list of HCAs
541 541 */
542 542 static daplka_t *daplka_dev = NULL;
543 543 static void *daplka_state = NULL;
544 544
545 545 /*
546 546 * global SP hash table
547 547 */
548 548 static daplka_hash_table_t daplka_global_sp_htbl;
549 549
550 550 /*
551 551 * timer_info hash table
552 552 */
553 553 static daplka_hash_table_t daplka_timer_info_htbl;
554 554 static uint32_t daplka_timer_hkey = 0;
555 555
556 556 /*
557 557 * shared MR avl tree
558 558 */
559 559 static avl_tree_t daplka_shared_mr_tree;
560 560 static kmutex_t daplka_shared_mr_lock;
561 561 static int daplka_shared_mr_cmp(const void *, const void *);
562 562 _NOTE(MUTEX_PROTECTS_DATA(daplka_shared_mr_lock,
563 563 daplka_shared_mr_tree))
564 564
565 565 /*
566 566 * default kmem flags used by this driver
567 567 */
568 568 static int daplka_km_flags = KM_SLEEP;
569 569
570 570 /*
571 571 * taskq used for handling background tasks
572 572 */
573 573 static taskq_t *daplka_taskq = NULL;
574 574
575 575 /*
576 576 * daplka_cm_delay is the length of time the active
577 577 * side needs to wait before timing out on the REP message.
578 578 */
579 579 static clock_t daplka_cm_delay = 60000000;
580 580
581 581 /*
582 582 * modunload will fail if pending_close is non-zero
583 583 */
584 584 static uint32_t daplka_pending_close = 0;
585 585
586 586 static struct ibt_clnt_modinfo_s daplka_clnt_modinfo = {
587 587 IBTI_V_CURR,
588 588 IBT_USER,
589 589 daplka_async_handler,
590 590 NULL,
591 591 DAPLKA_DRV_NAME
592 592 };
593 593
594 594 /*
595 595 * Module Installation
596 596 */
597 597 int
598 598 _init(void)
599 599 {
600 600 int status;
601 601
602 602 status = ddi_soft_state_init(&daplka_state, sizeof (daplka_t), 1);
603 603 if (status != 0) {
604 604 return (status);
605 605 }
606 606
607 607 mutex_init(&daplka_dbglock, NULL, MUTEX_DRIVER, NULL);
608 608 bzero(daplka_dbgbuf, sizeof (daplka_dbgbuf));
609 609 daplka_dbgnext = 0;
610 610 daplka_dbginit = 1;
611 611
612 612 daplka_resource_init();
613 613
614 614 status = mod_install(&modlinkage);
615 615 if (status != DDI_SUCCESS) {
616 616 /* undo inits done before mod_install */
617 617 daplka_resource_fini();
618 618 mutex_destroy(&daplka_dbglock);
619 619 ddi_soft_state_fini(&daplka_state);
620 620 }
621 621 return (status);
622 622 }
623 623
624 624 /*
625 625 * Module Removal
626 626 */
627 627 int
628 628 _fini(void)
629 629 {
630 630 int status;
631 631
632 632 /*
633 633 * mod_remove causes detach to be called
634 634 */
635 635 if ((status = mod_remove(&modlinkage)) != 0) {
636 636 DERR("fini: mod_remove failed: 0x%x\n", status);
637 637 return (status);
638 638 }
639 639
640 640 daplka_resource_fini();
641 641 mutex_destroy(&daplka_dbglock);
642 642 ddi_soft_state_fini(&daplka_state);
643 643
644 644 return (status);
645 645 }
646 646
647 647 /*
648 648 * Return Module Info.
649 649 */
650 650 int
651 651 _info(struct modinfo *modinfop)
652 652 {
653 653 return (mod_info(&modlinkage, modinfop));
654 654 }
655 655
656 656 static void
657 657 daplka_enqueue_hca(daplka_t *dp, daplka_hca_t *hca)
658 658 {
659 659 daplka_hca_t *h;
660 660
661 661 ASSERT(mutex_owned(&dp->daplka_mutex));
662 662
663 663 if (dp->daplka_hca_list_head == NULL) {
664 664 dp->daplka_hca_list_head = hca;
665 665 } else {
666 666 h = dp->daplka_hca_list_head;
667 667 while (h->hca_next != NULL)
668 668 h = h->hca_next;
669 669
670 670 h->hca_next = hca;
671 671 }
672 672 }
673 673
674 674 static void
675 675 daplka_dequeue_hca(daplka_t *dp, daplka_hca_t *hca)
676 676 {
677 677 daplka_hca_t *h;
678 678
679 679 ASSERT(mutex_owned(&dp->daplka_mutex));
680 680
681 681 if (dp->daplka_hca_list_head == hca)
682 682 dp->daplka_hca_list_head = hca->hca_next;
683 683 else {
684 684 h = dp->daplka_hca_list_head;
685 685 while (h->hca_next != hca)
686 686 h = h->hca_next;
687 687 h->hca_next = hca->hca_next;
688 688 }
689 689 }
690 690
691 691 static int
692 692 daplka_init_hca(daplka_t *dp, ib_guid_t hca_guid)
693 693 {
694 694 daplka_hca_t *hca;
695 695 ibt_hca_portinfo_t *pinfop;
696 696 uint_t size;
697 697 int j;
698 698 ibt_status_t status;
699 699
700 700 hca = kmem_zalloc(sizeof (daplka_hca_t), KM_SLEEP);
701 701
702 702 hca->hca_guid = hca_guid;
703 703
704 704 /*
705 705 * open the HCA for use
706 706 */
707 707 status = ibt_open_hca(dp->daplka_clnt_hdl, hca_guid, &hca->hca_hdl);
708 708 if (status != IBT_SUCCESS) {
709 709 if (status == IBT_HCA_IN_USE) {
710 710 DERR("ibt_open_hca() returned IBT_HCA_IN_USE\n");
711 711 } else {
712 712 DERR("ibt_open_hca() returned %d\n", status);
713 713 }
714 714 kmem_free(hca, sizeof (daplka_hca_t));
715 715 return (status);
716 716 }
717 717
718 718 /*
719 719 * query HCA to get its info
720 720 */
721 721 status = ibt_query_hca(hca->hca_hdl, &hca->hca_attr);
722 722 if (status != IBT_SUCCESS) {
723 723 DERR("ibt_query_hca returned %d (hca_guid 0x%llx)\n",
724 724 status, (longlong_t)hca_guid);
725 725 goto out;
726 726 }
727 727
728 728 /*
729 729 * query HCA to get info of all ports
730 730 */
731 731 status = ibt_query_hca_ports(hca->hca_hdl,
732 732 0, &pinfop, &hca->hca_nports, &size);
733 733 if (status != IBT_SUCCESS) {
734 734 DERR("ibt_query_all_ports returned %d "
735 735 "(hca_guid 0x%llx)\n", status,
736 736 (longlong_t)hca_guid);
737 737 goto out;
738 738 }
739 739 hca->hca_ports = pinfop;
740 740 hca->hca_pinfosz = size;
741 741
742 742 DERR("hca guid 0x%llx, nports %d\n",
743 743 (longlong_t)hca_guid, hca->hca_nports);
744 744 for (j = 0; j < hca->hca_nports; j++) {
745 745 DERR("port %d: state %d prefix 0x%016llx "
746 746 "guid %016llx\n",
747 747 pinfop[j].p_port_num, pinfop[j].p_linkstate,
748 748 (longlong_t)pinfop[j].p_sgid_tbl[0].gid_prefix,
749 749 (longlong_t)pinfop[j].p_sgid_tbl[0].gid_guid);
750 750 }
751 751
752 752 mutex_enter(&dp->daplka_mutex);
753 753 daplka_enqueue_hca(dp, hca);
754 754 mutex_exit(&dp->daplka_mutex);
755 755
756 756 return (IBT_SUCCESS);
757 757
758 758 out:
759 759 (void) ibt_close_hca(hca->hca_hdl);
760 760 kmem_free(hca, sizeof (daplka_hca_t));
761 761 return (status);
762 762 }
763 763
764 764 /*
765 765 * this function obtains the list of HCAs from IBTF.
766 766 * the HCAs are then opened and the returned handles
767 767 * and attributes are stored into the global daplka_dev
768 768 * structure.
769 769 */
770 770 static int
771 771 daplka_init_hcas(daplka_t *dp)
772 772 {
773 773 int i;
774 774 ib_guid_t *hca_guids;
775 775 uint32_t hca_count;
776 776
777 777 /*
778 778 * get the num & list of HCAs present
779 779 */
780 780 hca_count = ibt_get_hca_list(&hca_guids);
781 781 DERR("No. of HCAs present %d\n", hca_count);
782 782
783 783 if (hca_count != 0) {
784 784 /*
785 785 * get the info for each available HCA
786 786 */
787 787 for (i = 0; i < hca_count; i++)
788 788 (void) daplka_init_hca(dp, hca_guids[i]);
789 789
790 790 ibt_free_hca_list(hca_guids, hca_count);
791 791 }
792 792
793 793 if (dp->daplka_hca_list_head != NULL)
794 794 return (IBT_SUCCESS);
795 795 else
796 796 return (IBT_FAILURE);
797 797 }
798 798
799 799 static int
800 800 daplka_fini_hca(daplka_t *dp, daplka_hca_t *hca)
801 801 {
802 802 ibt_status_t status;
803 803
804 804 if (hca->hca_hdl != NULL) {
805 805 status = ibt_close_hca(hca->hca_hdl);
806 806 if (status != IBT_SUCCESS) {
807 807 DERR("ibt_close_hca returned %d"
808 808 " (hca_guid 0x%llx)\n", status,
809 809 (longlong_t)hca->hca_guid);
810 810
811 811 mutex_enter(&dp->daplka_mutex);
812 812 daplka_enqueue_hca(dp, hca);
813 813 mutex_exit(&dp->daplka_mutex);
814 814
815 815 return (status);
816 816 }
817 817 }
818 818
819 819 if (hca->hca_ports != NULL)
820 820 ibt_free_portinfo(hca->hca_ports, hca->hca_pinfosz);
821 821
822 822 kmem_free(hca, sizeof (daplka_hca_t));
823 823 return (IBT_SUCCESS);
824 824 }
825 825
826 826 /*
827 827 * closes all HCAs and frees up the HCA list
828 828 */
829 829 static int
830 830 daplka_fini_hcas(daplka_t *dp)
831 831 {
832 832 ibt_status_t status;
833 833 daplka_hca_t *hca;
834 834
835 835 mutex_enter(&daplka_dev->daplka_mutex);
836 836 while ((hca = dp->daplka_hca_list_head) != NULL) {
837 837 if (DAPLKA_HCA_BUSY(hca)) {
838 838 mutex_exit(&daplka_dev->daplka_mutex);
839 839 return (IBT_HCA_RESOURCES_NOT_FREED);
840 840 }
841 841 daplka_dequeue_hca(daplka_dev, hca);
842 842 mutex_exit(&daplka_dev->daplka_mutex);
843 843
844 844 if ((status = daplka_fini_hca(dp, hca)) != IBT_SUCCESS)
845 845 return (status);
846 846
847 847 mutex_enter(&daplka_dev->daplka_mutex);
848 848 }
849 849 mutex_exit(&daplka_dev->daplka_mutex);
850 850
851 851 DERR("dapl kernel agent unloaded\n");
852 852 return (IBT_SUCCESS);
853 853 }
854 854
855 855
856 856 /*
857 857 * Attach the device, create and fill in daplka_dev
858 858 */
859 859 static int
860 860 daplka_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
861 861 {
862 862 daplka_t *dp;
863 863 int instance, retval, err;
864 864 boolean_t sp_htbl_allocated = B_FALSE;
865 865 boolean_t timer_htbl_allocated = B_FALSE;
866 866 boolean_t shared_mr_tree_allocated = B_FALSE;
867 867
868 868 switch (cmd) {
869 869 case DDI_ATTACH:
870 870 break;
871 871 case DDI_RESUME:
872 872 return (DDI_SUCCESS);
873 873 default:
874 874 return (DDI_FAILURE);
875 875 }
876 876
877 877 /*
878 878 * Allocate soft data structure
879 879 */
880 880 instance = ddi_get_instance(dip);
881 881 if (ddi_soft_state_zalloc(daplka_state, instance) != DDI_SUCCESS) {
882 882 DERR("attach: bad state zalloc\n");
883 883 return (DDI_FAILURE);
884 884 }
885 885
886 886 dp = ddi_get_soft_state(daplka_state, instance);
887 887 if (dp == NULL) {
888 888 ddi_soft_state_free(daplka_state, instance);
889 889 DERR("attach: cannot get soft state\n");
890 890 return (DDI_FAILURE);
891 891 }
892 892 /*
893 893 * Stuff private info into dip.
894 894 */
895 895 dp->daplka_dip = dip;
896 896 ddi_set_driver_private(dip, dp);
897 897 daplka_dev = dp;
898 898 mutex_init(&dp->daplka_mutex, NULL, MUTEX_DRIVER, NULL);
899 899
900 900 /*
901 901 * Register driver with IBTF
902 902 */
903 903 retval = ibt_attach(&daplka_clnt_modinfo, dip, dp,
904 904 &dp->daplka_clnt_hdl);
905 905 if (retval != IBT_SUCCESS) {
906 906 DERR("attach: ibt_attach failed: error = %d\n", retval);
907 907 retval = DDI_FAILURE;
908 908 goto error;
909 909 }
910 910 /* Register to receive SM events */
911 911 ibt_register_subnet_notices(dp->daplka_clnt_hdl,
912 912 daplka_sm_notice_handler, NULL);
913 913
914 914 retval = daplka_init_hcas(dp);
915 915 if (retval != IBT_SUCCESS) {
916 916 DERR("attach: hca_init failed: error = %d\n", retval);
917 917 retval = DDI_FAILURE;
918 918 goto error;
919 919 }
920 920 /*
921 921 * this table is used by cr_handoff
922 922 */
923 923 retval = daplka_hash_create(&daplka_global_sp_htbl,
924 924 DAPLKA_G_SP_HTBL_SZ, daplka_hash_sp_unref,
925 925 daplka_hash_generic_lookup);
926 926 if (retval != 0) {
927 927 DERR("attach: cannot create sp hash table\n");
928 928 retval = DDI_FAILURE;
929 929 goto error;
930 930 }
931 931 sp_htbl_allocated = B_TRUE;
932 932
933 933 /*
934 934 * this table stores per EP timer information.
935 935 * timer_info_t objects are inserted into this table whenever
936 936 * a EP timer is set. timers get removed when they expire
937 937 * or when they get cancelled.
938 938 */
939 939 retval = daplka_hash_create(&daplka_timer_info_htbl,
940 940 DAPLKA_TIMER_HTBL_SZ, daplka_hash_timer_free, NULL);
941 941 if (retval != 0) {
942 942 DERR("attach: cannot create timer hash table\n");
943 943 retval = DDI_FAILURE;
944 944 goto error;
945 945 }
946 946 timer_htbl_allocated = B_TRUE;
947 947
948 948 /*
949 949 * this taskq is currently only used for processing timers.
950 950 * other processing may also use this taskq in the future.
951 951 */
952 952 daplka_taskq = taskq_create(DAPLKA_DRV_NAME, DAPLKA_TQ_NTHREADS,
953 953 maxclsyspri, 1, DAPLKA_TQ_NTHREADS, TASKQ_DYNAMIC);
954 954 if (daplka_taskq == NULL) {
955 955 DERR("attach: cannot create daplka_taskq\n");
956 956 retval = DDI_FAILURE;
957 957 goto error;
958 958 }
959 959
960 960 /*
961 961 * daplka_shared_mr_tree holds daplka_shared_mr_t objects that
962 962 * gets retrieved or created when daplka_mr_register_shared is
963 963 * called.
964 964 */
965 965 mutex_init(&daplka_shared_mr_lock, NULL, MUTEX_DRIVER, NULL);
966 966
967 967 avl_create(&daplka_shared_mr_tree, daplka_shared_mr_cmp,
968 968 sizeof (daplka_shared_mr_t),
969 969 offsetof(daplka_shared_mr_t, smr_node));
970 970 shared_mr_tree_allocated = B_TRUE;
971 971
972 972 /*
973 973 * Create the filesystem device node.
974 974 */
975 975 if (ddi_create_minor_node(dip, DAPLKA_MINOR_NAME, S_IFCHR,
976 976 0, DDI_PSEUDO, NULL) != DDI_SUCCESS) {
977 977 DERR("attach: bad create_minor_node\n");
978 978 retval = DDI_FAILURE;
979 979 goto error;
980 980 }
981 981 dp->daplka_status = DAPLKA_STATE_ATTACHED;
982 982 ddi_report_dev(dip);
983 983 return (DDI_SUCCESS);
984 984
985 985 error:
986 986 if (shared_mr_tree_allocated) {
987 987 avl_destroy(&daplka_shared_mr_tree);
988 988 mutex_destroy(&daplka_shared_mr_lock);
989 989 }
990 990
991 991 if (daplka_taskq) {
992 992 taskq_destroy(daplka_taskq);
993 993 daplka_taskq = NULL;
994 994 }
995 995
996 996 if (timer_htbl_allocated) {
997 997 daplka_hash_destroy(&daplka_timer_info_htbl);
998 998 }
999 999
1000 1000 if (sp_htbl_allocated) {
1001 1001 daplka_hash_destroy(&daplka_global_sp_htbl);
1002 1002 }
1003 1003
1004 1004 err = daplka_fini_hcas(dp);
1005 1005 if (err != IBT_SUCCESS) {
1006 1006 DERR("attach: hca_fini returned %d\n", err);
1007 1007 }
1008 1008
1009 1009 if (dp->daplka_clnt_hdl != NULL) {
1010 1010 /* unregister SM event notification */
1011 1011 ibt_register_subnet_notices(dp->daplka_clnt_hdl,
1012 1012 (ibt_sm_notice_handler_t)NULL, NULL);
1013 1013 err = ibt_detach(dp->daplka_clnt_hdl);
1014 1014
1015 1015 if (err != IBT_SUCCESS) {
1016 1016 DERR("attach: ibt_detach returned %d\n", err);
1017 1017 }
1018 1018 }
1019 1019 mutex_destroy(&dp->daplka_mutex);
1020 1020
1021 1021 if (dp->daplka_status == DAPLKA_STATE_ATTACHED) {
1022 1022 ddi_remove_minor_node(dip, NULL);
1023 1023 }
1024 1024 ddi_soft_state_free(daplka_state, instance);
1025 1025 return (retval);
1026 1026 }
1027 1027
1028 1028 /*
1029 1029 * Detach - Free resources allocated in attach
1030 1030 */
1031 1031 /* ARGSUSED */
1032 1032 static int
1033 1033 daplka_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1034 1034 {
1035 1035 int instance, err;
1036 1036 void *cookie = NULL;
1037 1037 daplka_t *dp;
1038 1038
1039 1039 if (cmd != DDI_DETACH) {
1040 1040 return (DDI_FAILURE);
1041 1041 }
1042 1042 if (daplka_resource.daplka_rc_cnt > 0 ||
1043 1043 daplka_pending_close > 0) {
1044 1044 DERR("detach: driver in use\n");
1045 1045 return (DDI_FAILURE);
1046 1046 }
1047 1047
1048 1048 instance = ddi_get_instance(dip);
1049 1049 dp = ddi_get_soft_state(daplka_state, instance);
1050 1050 if (dp == NULL) {
1051 1051 DERR("detach: cannot get soft state\n");
1052 1052 return (DDI_FAILURE);
1053 1053 }
1054 1054 err = daplka_fini_hcas(dp);
1055 1055 if (err != IBT_SUCCESS) {
1056 1056 DERR("detach: hca_fini returned %d\n", err);
1057 1057 return (DDI_FAILURE);
1058 1058 }
1059 1059 if (dp->daplka_clnt_hdl != NULL) {
1060 1060 /* unregister SM event notification */
1061 1061 ibt_register_subnet_notices(dp->daplka_clnt_hdl,
1062 1062 (ibt_sm_notice_handler_t)NULL, NULL);
1063 1063 err = ibt_detach(dp->daplka_clnt_hdl);
1064 1064 if (err != IBT_SUCCESS) {
1065 1065 DERR("detach: ibt_detach returned %d\n", err);
1066 1066 return (DDI_FAILURE);
1067 1067 }
1068 1068 dp->daplka_clnt_hdl = NULL;
1069 1069 }
1070 1070 mutex_destroy(&dp->daplka_mutex);
1071 1071 if (dp->daplka_status == DAPLKA_STATE_ATTACHED) {
1072 1072 ddi_remove_minor_node(dip, NULL);
1073 1073 }
1074 1074 dp->daplka_status = DAPLKA_STATE_DETACHED;
1075 1075 ddi_soft_state_free(daplka_state, instance);
1076 1076 daplka_dev = NULL;
1077 1077
1078 1078 /*
1079 1079 * by the time we get here, all clients of dapl should
1080 1080 * have exited and completed their cleanup properly.
1081 1081 * we can assert that all global data structures are now
1082 1082 * empty.
1083 1083 */
1084 1084 ASSERT(avl_destroy_nodes(&daplka_shared_mr_tree, &cookie) == NULL);
1085 1085 avl_destroy(&daplka_shared_mr_tree);
1086 1086 mutex_destroy(&daplka_shared_mr_lock);
1087 1087
1088 1088 ASSERT(daplka_hash_getsize(&daplka_timer_info_htbl) == 0);
1089 1089 daplka_hash_destroy(&daplka_timer_info_htbl);
1090 1090
1091 1091 ASSERT(daplka_hash_getsize(&daplka_global_sp_htbl) == 0);
1092 1092 daplka_hash_destroy(&daplka_global_sp_htbl);
1093 1093
1094 1094 taskq_destroy(daplka_taskq);
1095 1095
1096 1096 return (DDI_SUCCESS);
1097 1097 }
1098 1098
1099 1099 /* ARGSUSED */
1100 1100 static int
1101 1101 daplka_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1102 1102 {
1103 1103 switch (infocmd) {
1104 1104 case DDI_INFO_DEVT2DEVINFO:
1105 1105 if (daplka_dev != NULL) {
1106 1106 *result = daplka_dev->daplka_dip;
1107 1107 return (DDI_SUCCESS);
1108 1108 } else {
1109 1109 return (DDI_FAILURE);
1110 1110 }
1111 1111
1112 1112 case DDI_INFO_DEVT2INSTANCE:
1113 1113 *result = 0;
1114 1114 return (DDI_SUCCESS);
1115 1115
1116 1116 default:
1117 1117 return (DDI_FAILURE);
1118 1118 }
1119 1119 }
1120 1120
1121 1121 /*
1122 1122 * creates a EP resource.
1123 1123 * A EP resource contains a RC channel. A EP resource holds a
1124 1124 * reference to a send_evd (for the send CQ), recv_evd (for the
1125 1125 * recv CQ), a connection evd and a PD. These references ensure
1126 1126 * that the referenced resources are not freed until the EP itself
1127 1127 * gets freed.
1128 1128 */
1129 1129 /* ARGSUSED */
1130 1130 static int
1131 1131 daplka_ep_create(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
1132 1132 cred_t *cred, int *rvalp)
1133 1133 {
1134 1134 daplka_ep_resource_t *ep_rp;
1135 1135 daplka_pd_resource_t *pd_rp;
1136 1136 dapl_ep_create_t args;
1137 1137 ibt_rc_chan_alloc_args_t chan_args;
1138 1138 ibt_chan_alloc_flags_t achan_flags;
1139 1139 ibt_chan_sizes_t chan_real_sizes;
1140 1140 ibt_hca_attr_t *hca_attrp;
1141 1141 uint64_t ep_hkey = 0;
1142 1142 boolean_t inserted = B_FALSE;
1143 1143 uint32_t old_state, new_state;
1144 1144 int retval;
1145 1145 ibt_status_t status;
1146 1146
1147 1147 D3("ep_create: enter\n");
1148 1148 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_create_t),
1149 1149 mode);
1150 1150 if (retval != 0) {
1151 1151 DERR("ep_create: copyin error %d\n", retval);
1152 1152 return (EFAULT);
1153 1153 }
1154 1154 ep_rp = kmem_zalloc(sizeof (daplka_ep_resource_t), daplka_km_flags);
1155 1155 if (ep_rp == NULL) {
1156 1156 DERR("ep_create: cannot allocate ep_rp\n");
1157 1157 return (ENOMEM);
1158 1158 }
1159 1159 DAPLKA_RS_INIT(ep_rp, DAPL_TYPE_EP,
1160 1160 DAPLKA_RS_RNUM(ia_rp), daplka_ep_destroy);
1161 1161
1162 1162 mutex_init(&ep_rp->ep_lock, NULL, MUTEX_DRIVER, NULL);
1163 1163 cv_init(&ep_rp->ep_cv, NULL, CV_DRIVER, NULL);
1164 1164 ep_rp->ep_hca = ia_rp->ia_hca;
1165 1165 ep_rp->ep_cookie = args.ep_cookie;
1166 1166 ep_rp->ep_timer_hkey = 0;
1167 1167
1168 1168 /*
1169 1169 * we don't have to use ep_get_state here because ep_rp is not in
1170 1170 * ep_htbl yet. refer to the description of daplka_ep_set_state
1171 1171 * for details about the EP state machine.
1172 1172 */
1173 1173 ep_rp->ep_state = DAPLKA_EP_STATE_TRANSITIONING;
1174 1174 new_state = old_state = DAPLKA_EP_STATE_CLOSED;
1175 1175
1176 1176 /* get reference to send evd and get cq handle */
1177 1177 ep_rp->ep_snd_evd = (daplka_evd_resource_t *)
1178 1178 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.ep_snd_evd_hkey);
1179 1179 if (ep_rp->ep_snd_evd == NULL) {
1180 1180 DERR("ep_create: ep_snd_evd %llx not found\n",
1181 1181 args.ep_snd_evd_hkey);
1182 1182 retval = EINVAL;
1183 1183 goto cleanup;
1184 1184 }
1185 1185 chan_args.rc_scq = ep_rp->ep_snd_evd->evd_cq_hdl;
1186 1186 if (chan_args.rc_scq == NULL) {
1187 1187 DERR("ep_create: ep_snd_evd cq invalid\n");
1188 1188 retval = EINVAL;
1189 1189 goto cleanup;
1190 1190 }
1191 1191
1192 1192 /* get reference to recv evd and get cq handle */
1193 1193 ep_rp->ep_rcv_evd = (daplka_evd_resource_t *)
1194 1194 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.ep_rcv_evd_hkey);
1195 1195 if (ep_rp->ep_rcv_evd == NULL) {
1196 1196 DERR("ep_create: ep_rcv_evd %llx not found\n",
1197 1197 args.ep_rcv_evd_hkey);
1198 1198 retval = EINVAL;
1199 1199 goto cleanup;
1200 1200 }
1201 1201 chan_args.rc_rcq = ep_rp->ep_rcv_evd->evd_cq_hdl;
1202 1202 if (chan_args.rc_rcq == NULL) {
1203 1203 DERR("ep_create: ep_rcv_evd cq invalid\n");
1204 1204 retval = EINVAL;
1205 1205 goto cleanup;
1206 1206 }
1207 1207
1208 1208 /* get reference to conn evd */
1209 1209 ep_rp->ep_conn_evd = (daplka_evd_resource_t *)
1210 1210 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.ep_conn_evd_hkey);
1211 1211 if (ep_rp->ep_conn_evd == NULL) {
1212 1212 DERR("ep_create: ep_conn_evd %llx not found\n",
1213 1213 args.ep_conn_evd_hkey);
1214 1214 retval = EINVAL;
1215 1215 goto cleanup;
1216 1216 }
1217 1217
1218 1218 /* get reference to SRQ if needed */
1219 1219 if (args.ep_srq_attached) {
1220 1220 ep_rp->ep_srq_res = (daplka_srq_resource_t *)daplka_hash_lookup(
1221 1221 &ia_rp->ia_srq_htbl, args.ep_srq_hkey);
1222 1222 if (ep_rp->ep_srq_res == NULL) {
1223 1223 DERR("ep_create: ep_srq %llx not found\n",
1224 1224 (longlong_t)args.ep_srq_hkey);
1225 1225 retval = EINVAL;
1226 1226 goto cleanup;
1227 1227 }
1228 1228 ASSERT(DAPLKA_RS_TYPE(ep_rp->ep_srq_res) == DAPL_TYPE_SRQ);
1229 1229 D3("ep_create: ep_srq %p %llx\n", ep_rp->ep_srq_res,
1230 1230 (longlong_t)args.ep_srq_hkey);
1231 1231 } else {
1232 1232 ep_rp->ep_srq_res = NULL;
1233 1233 }
1234 1234
1235 1235 /* get pd handle */
1236 1236 pd_rp = (daplka_pd_resource_t *)
1237 1237 daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.ep_pd_hkey);
1238 1238 if (pd_rp == NULL) {
1239 1239 DERR("ep_create: cannot find pd resource\n");
1240 1240 retval = EINVAL;
1241 1241 goto cleanup;
1242 1242 }
1243 1243 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
1244 1244 ep_rp->ep_pd_res = pd_rp;
1245 1245 chan_args.rc_pd = pd_rp->pd_hdl;
1246 1246
1247 1247
1248 1248 /*
1249 1249 * these checks ensure that the requested channel sizes
1250 1250 * are within the limits supported by the chosen HCA.
1251 1251 */
1252 1252 hca_attrp = &ia_rp->ia_hca->hca_attr;
1253 1253 if (args.ep_ch_sizes.dcs_sq_sgl > hca_attrp->hca_max_sgl) {
1254 1254 DERR("ep_create: invalid cs_sq_sgl %d\n",
1255 1255 args.ep_ch_sizes.dcs_sq_sgl);
1256 1256 retval = EINVAL;
1257 1257 goto cleanup;
1258 1258 }
1259 1259 if (args.ep_ch_sizes.dcs_rq_sgl > hca_attrp->hca_max_sgl) {
1260 1260 DERR("ep_create: invalid cs_rq_sgl %d\n",
1261 1261 args.ep_ch_sizes.dcs_rq_sgl);
1262 1262 retval = EINVAL;
1263 1263 goto cleanup;
1264 1264 }
1265 1265 if (args.ep_ch_sizes.dcs_sq > hca_attrp->hca_max_chan_sz) {
1266 1266 DERR("ep_create: invalid cs_sq %d\n",
1267 1267 args.ep_ch_sizes.dcs_sq);
1268 1268 retval = EINVAL;
1269 1269 goto cleanup;
1270 1270 }
1271 1271 if (args.ep_ch_sizes.dcs_rq > hca_attrp->hca_max_chan_sz) {
1272 1272 DERR("ep_create: invalid cs_rq %d\n",
1273 1273 args.ep_ch_sizes.dcs_rq);
1274 1274 retval = EINVAL;
1275 1275 goto cleanup;
1276 1276 }
1277 1277
1278 1278 chan_args.rc_sizes.cs_sq_sgl = args.ep_ch_sizes.dcs_sq_sgl;
1279 1279 chan_args.rc_sizes.cs_rq_sgl = args.ep_ch_sizes.dcs_rq_sgl;
1280 1280 chan_args.rc_sizes.cs_sq = args.ep_ch_sizes.dcs_sq;
1281 1281 chan_args.rc_sizes.cs_rq = args.ep_ch_sizes.dcs_rq;
1282 1282 chan_args.rc_flags = IBT_WR_SIGNALED;
1283 1283 chan_args.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR;
1284 1284 chan_args.rc_hca_port_num = ia_rp->ia_port_num;
1285 1285 chan_args.rc_clone_chan = NULL;
1286 1286 if (args.ep_srq_attached) {
1287 1287 chan_args.rc_srq = ep_rp->ep_srq_res->srq_hdl;
1288 1288 } else {
1289 1289 chan_args.rc_srq = NULL;
1290 1290 }
1291 1291
1292 1292 D3("ep_create: sq_sgl %d, rq_sgl %d, sq %d, rq %d, "
1293 1293 "sig_type 0x%x, control 0x%x, portnum %d, clone_chan 0x%p\n",
1294 1294 args.ep_ch_sizes.dcs_sq_sgl, args.ep_ch_sizes.dcs_rq_sgl,
1295 1295 args.ep_ch_sizes.dcs_sq, args.ep_ch_sizes.dcs_rq,
1296 1296 chan_args.rc_flags, chan_args.rc_control,
1297 1297 chan_args.rc_hca_port_num, chan_args.rc_clone_chan);
1298 1298
1299 1299 if (args.ep_srq_attached) {
1300 1300 achan_flags = IBT_ACHAN_USER_MAP | IBT_ACHAN_USES_SRQ;
1301 1301 } else {
1302 1302 achan_flags = IBT_ACHAN_USER_MAP;
1303 1303 }
1304 1304 /* create rc channel */
1305 1305 status = daplka_ibt_alloc_rc_channel(ep_rp, ia_rp->ia_hca_hdl,
1306 1306 achan_flags, &chan_args, &ep_rp->ep_chan_hdl,
1307 1307 &chan_real_sizes);
1308 1308 if (status != IBT_SUCCESS) {
1309 1309 DERR("ep_create: alloc_rc_channel returned %d\n", status);
1310 1310 *rvalp = (int)status;
1311 1311 retval = 0;
1312 1312 goto cleanup;
1313 1313 }
1314 1314
1315 1315 args.ep_ch_real_sizes.dcs_sq = chan_real_sizes.cs_sq;
1316 1316 args.ep_ch_real_sizes.dcs_rq = chan_real_sizes.cs_rq;
1317 1317 args.ep_ch_real_sizes.dcs_sq_sgl = chan_real_sizes.cs_sq_sgl;
1318 1318 args.ep_ch_real_sizes.dcs_rq_sgl = chan_real_sizes.cs_rq_sgl;
1319 1319
1320 1320 /*
1321 1321 * store ep ptr with chan_hdl.
1322 1322 * this ep_ptr is used by the CM handlers (both active and
1323 1323 * passive)
1324 1324 * mutex is only needed for race of "destroy" and "async"
1325 1325 */
1326 1326 mutex_enter(&daplka_dev->daplka_mutex);
1327 1327 ibt_set_chan_private(ep_rp->ep_chan_hdl, (void *)ep_rp);
1328 1328 mutex_exit(&daplka_dev->daplka_mutex);
1329 1329
1330 1330 /* Get HCA-specific data_out info */
1331 1331 status = ibt_ci_data_out(ia_rp->ia_hca_hdl,
1332 1332 IBT_CI_NO_FLAGS, IBT_HDL_CHANNEL, (void *)ep_rp->ep_chan_hdl,
1333 1333 &args.ep_qp_data_out, sizeof (args.ep_qp_data_out));
1334 1334
1335 1335 if (status != IBT_SUCCESS) {
1336 1336 DERR("ep_create: ibt_ci_data_out error(%d)\n",
1337 1337 status);
1338 1338 *rvalp = (int)status;
1339 1339 retval = 0;
1340 1340 goto cleanup;
1341 1341 }
1342 1342
1343 1343 /* insert into ep hash table */
1344 1344 retval = daplka_hash_insert(&ia_rp->ia_ep_htbl,
1345 1345 &ep_hkey, (void *)ep_rp);
1346 1346 if (retval != 0) {
1347 1347 DERR("ep_create: cannot insert ep resource into ep_htbl\n");
1348 1348 goto cleanup;
1349 1349 }
1350 1350 inserted = B_TRUE;
1351 1351
1352 1352 /*
1353 1353 * at this point, the ep_rp can be looked up by other threads
1354 1354 * if they manage to guess the correct hkey. but they are not
1355 1355 * permitted to operate on ep_rp until we transition to the
1356 1356 * CLOSED state.
1357 1357 */
1358 1358
1359 1359 /* return hkey to library */
1360 1360 args.ep_hkey = ep_hkey;
1361 1361
1362 1362 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_ep_create_t),
1363 1363 mode);
1364 1364 if (retval != 0) {
1365 1365 DERR("ep_create: copyout error %d\n", retval);
1366 1366 retval = EFAULT;
1367 1367 goto cleanup;
1368 1368 }
1369 1369
1370 1370 daplka_ep_set_state(ep_rp, old_state, new_state);
1371 1371 D3("ep_create: exit\n");
1372 1372 return (0);
1373 1373
1374 1374 cleanup:
1375 1375 if (inserted) {
1376 1376 daplka_ep_resource_t *free_rp = NULL;
1377 1377
1378 1378 (void) daplka_hash_remove(&ia_rp->ia_ep_htbl, ep_hkey,
1379 1379 (void **)&free_rp);
1380 1380 if (free_rp != ep_rp) {
1381 1381 /*
1382 1382 * this case is impossible because ep_free will
1383 1383 * wait until our state transition is complete.
1384 1384 */
1385 1385 DERR("ep_create: cannot remove ep from hash table\n");
1386 1386 ASSERT(B_FALSE);
1387 1387 return (retval);
1388 1388 }
1389 1389 }
1390 1390 new_state = DAPLKA_EP_STATE_FREED;
1391 1391 daplka_ep_set_state(ep_rp, old_state, new_state);
1392 1392 DAPLKA_RS_UNREF(ep_rp);
1393 1393 return (retval);
1394 1394 }
1395 1395
1396 1396 /*
1397 1397 * daplka_ep_get_state retrieves the current state of the EP and
1398 1398 * sets the state to TRANSITIONING. if the current state is already
1399 1399 * TRANSITIONING, this function will wait until the state becomes one
1400 1400 * of the other EP states. Most of the EP related ioctls follow the
1401 1401 * call sequence:
1402 1402 *
1403 1403 * new_state = old_state = daplka_ep_get_state(ep_rp);
1404 1404 * ...
1405 1405 * ...some code that affects the EP
1406 1406 * ...
1407 1407 * new_state = <NEW_STATE>;
1408 1408 * daplka_ep_set_state(ep_rp, old_state, new_state);
1409 1409 *
1410 1410 * this call sequence ensures that only one thread may access the EP
1411 1411 * during the time ep_state is in TRANSITIONING. daplka_ep_set_state
1412 1412 * transitions ep_state to new_state and wakes up any waiters blocking
1413 1413 * on ep_cv.
1414 1414 *
1415 1415 */
1416 1416 static uint32_t
1417 1417 daplka_ep_get_state(daplka_ep_resource_t *ep_rp)
1418 1418 {
1419 1419 uint32_t old_state = 0;
1420 1420
1421 1421 mutex_enter(&ep_rp->ep_lock);
1422 1422 while (ep_rp->ep_state == DAPLKA_EP_STATE_TRANSITIONING) {
1423 1423 D2("get_state: wait for state transition to complete\n");
1424 1424 cv_wait(&ep_rp->ep_cv, &ep_rp->ep_lock);
1425 1425 D2("get_state: done, curr state = %d\n", ep_rp->ep_state);
1426 1426 }
1427 1427 ASSERT(ep_rp->ep_state != DAPLKA_EP_STATE_TRANSITIONING);
1428 1428 old_state = ep_rp->ep_state;
1429 1429
1430 1430 /*
1431 1431 * an ep that is in the FREED state cannot transition
1432 1432 * back to any of the regular states
1433 1433 */
1434 1434 if (old_state != DAPLKA_EP_STATE_FREED) {
1435 1435 ep_rp->ep_state = DAPLKA_EP_STATE_TRANSITIONING;
1436 1436 }
1437 1437 mutex_exit(&ep_rp->ep_lock);
1438 1438 return (old_state);
1439 1439 }
1440 1440
1441 1441 /*
1442 1442 * EP state transition diagram
1443 1443 *
1444 1444 * CLOSED<-------------------
1445 1445 * | |
1446 1446 * | |
1447 1447 * ------------------------ |
1448 1448 * | | |
1449 1449 * | | |
1450 1450 * v v |
1451 1451 * CONNECTING ACCEPTING |
1452 1452 * | | | | | |
1453 1453 * | | | | | |
1454 1454 * | | | | | |
1455 1455 * | | |_______|_______| |
1456 1456 * | | | | | |
1457 1457 * | |___________| | | |
1458 1458 * | | | | |
1459 1459 * | v | |---->DISCONNECTED
1460 1460 * | CONNECTED | ^
1461 1461 * v | | |
1462 1462 * ABORTING |---------|--------------|
1463 1463 * | | | |
1464 1464 * | | v |
1465 1465 * | |-------->DISCONNECTING--|
1466 1466 * | |
1467 1467 * |---------------------------------|
1468 1468 *
1469 1469 * *not shown in this diagram:
1470 1470 * -loopback transitions
1471 1471 * -transitions to the FREED state
1472 1472 */
1473 1473 static boolean_t
1474 1474 daplka_ep_transition_is_valid(uint32_t old_state, uint32_t new_state)
1475 1475 {
1476 1476 boolean_t valid = B_FALSE;
1477 1477
1478 1478 /*
1479 1479 * reseting to the same state is a no-op and is always
1480 1480 * permitted. transitioning to the FREED state indicates
1481 1481 * that the ep is about to be freed and no further operation
1482 1482 * is allowed on it. to support abrupt close, the ep is
1483 1483 * permitted to transition to the FREED state from any state.
1484 1484 */
1485 1485 if (old_state == new_state ||
1486 1486 new_state == DAPLKA_EP_STATE_FREED) {
1487 1487 return (B_TRUE);
1488 1488 }
1489 1489
1490 1490 switch (old_state) {
1491 1491 case DAPLKA_EP_STATE_CLOSED:
1492 1492 /*
1493 1493 * this is the initial ep_state.
1494 1494 * a transition to CONNECTING or ACCEPTING may occur
1495 1495 * upon calling daplka_ep_connect or daplka_cr_accept,
1496 1496 * respectively.
1497 1497 */
1498 1498 if (new_state == DAPLKA_EP_STATE_CONNECTING ||
1499 1499 new_state == DAPLKA_EP_STATE_ACCEPTING) {
1500 1500 valid = B_TRUE;
1501 1501 }
1502 1502 break;
1503 1503 case DAPLKA_EP_STATE_CONNECTING:
1504 1504 /*
1505 1505 * we transition to this state if daplka_ep_connect
1506 1506 * is successful. from this state, we can transition
1507 1507 * to CONNECTED if daplka_cm_rc_conn_est gets called;
1508 1508 * or to DISCONNECTED if daplka_cm_rc_conn_closed or
1509 1509 * daplka_cm_rc_event_failure gets called. If the
1510 1510 * client calls daplka_ep_disconnect, we transition
1511 1511 * to DISCONNECTING. If a timer was set at ep_connect
1512 1512 * time and if the timer expires prior to any of the
1513 1513 * CM callbacks, we transition to ABORTING and then
1514 1514 * to DISCONNECTED.
1515 1515 */
1516 1516 if (new_state == DAPLKA_EP_STATE_CONNECTED ||
1517 1517 new_state == DAPLKA_EP_STATE_DISCONNECTING ||
1518 1518 new_state == DAPLKA_EP_STATE_DISCONNECTED ||
1519 1519 new_state == DAPLKA_EP_STATE_ABORTING) {
1520 1520 valid = B_TRUE;
1521 1521 }
1522 1522 break;
1523 1523 case DAPLKA_EP_STATE_ACCEPTING:
1524 1524 /*
1525 1525 * we transition to this state if daplka_cr_accept
1526 1526 * is successful. from this state, we can transition
1527 1527 * to CONNECTED if daplka_cm_service_conn_est gets called;
1528 1528 * or to DISCONNECTED if daplka_cm_service_conn_closed or
1529 1529 * daplka_cm_service_event_failure gets called. If the
1530 1530 * client calls daplka_ep_disconnect, we transition to
1531 1531 * DISCONNECTING.
1532 1532 */
1533 1533 if (new_state == DAPLKA_EP_STATE_CONNECTED ||
1534 1534 new_state == DAPLKA_EP_STATE_DISCONNECTING ||
1535 1535 new_state == DAPLKA_EP_STATE_DISCONNECTED) {
1536 1536 valid = B_TRUE;
1537 1537 }
1538 1538 break;
1539 1539 case DAPLKA_EP_STATE_CONNECTED:
1540 1540 /*
1541 1541 * we transition to this state if a active or passive
1542 1542 * connection gets established. if the client calls
1543 1543 * daplka_ep_disconnect, we transition to the
1544 1544 * DISCONNECTING state. subsequent CM callbacks will
1545 1545 * cause ep_state to be set to DISCONNECTED. If the
1546 1546 * remote peer terminates the connection before we do,
1547 1547 * it is possible for us to transition directly from
1548 1548 * CONNECTED to DISCONNECTED.
1549 1549 */
1550 1550 if (new_state == DAPLKA_EP_STATE_DISCONNECTING ||
1551 1551 new_state == DAPLKA_EP_STATE_DISCONNECTED) {
1552 1552 valid = B_TRUE;
1553 1553 }
1554 1554 break;
1555 1555 case DAPLKA_EP_STATE_DISCONNECTING:
1556 1556 /*
1557 1557 * we transition to this state if the client calls
1558 1558 * daplka_ep_disconnect.
1559 1559 */
1560 1560 if (new_state == DAPLKA_EP_STATE_DISCONNECTED) {
1561 1561 valid = B_TRUE;
1562 1562 }
1563 1563 break;
1564 1564 case DAPLKA_EP_STATE_ABORTING:
1565 1565 /*
1566 1566 * we transition to this state if the active side
1567 1567 * EP timer has expired. this is only a transient
1568 1568 * state that is set during timer processing. when
1569 1569 * timer processing completes, ep_state will become
1570 1570 * DISCONNECTED.
1571 1571 */
1572 1572 if (new_state == DAPLKA_EP_STATE_DISCONNECTED) {
1573 1573 valid = B_TRUE;
1574 1574 }
1575 1575 break;
1576 1576 case DAPLKA_EP_STATE_DISCONNECTED:
1577 1577 /*
1578 1578 * we transition to this state if we get a closed
1579 1579 * or event_failure CM callback. an expired timer
1580 1580 * can also cause us to be in this state. this
1581 1581 * is the only state in which we permit the
1582 1582 * ep_reinit operation.
1583 1583 */
1584 1584 if (new_state == DAPLKA_EP_STATE_CLOSED) {
1585 1585 valid = B_TRUE;
1586 1586 }
1587 1587 break;
1588 1588 default:
1589 1589 break;
1590 1590 }
1591 1591
1592 1592 if (!valid) {
1593 1593 DERR("ep_transition: invalid state change %d -> %d\n",
1594 1594 old_state, new_state);
1595 1595 }
1596 1596 return (valid);
1597 1597 }
1598 1598
1599 1599 /*
1600 1600 * first check if the transition is valid. then set ep_state
1601 1601 * to new_state and wake up all waiters.
1602 1602 */
1603 1603 static void
1604 1604 daplka_ep_set_state(daplka_ep_resource_t *ep_rp, uint32_t old_state,
1605 1605 uint32_t new_state)
1606 1606 {
1607 1607 boolean_t valid;
1608 1608
1609 1609 ASSERT(new_state != DAPLKA_EP_STATE_TRANSITIONING);
1610 1610
1611 1611 valid = daplka_ep_transition_is_valid(old_state, new_state);
1612 1612 mutex_enter(&ep_rp->ep_lock);
1613 1613 if (ep_rp->ep_state != DAPLKA_EP_STATE_FREED) {
1614 1614 if (valid) {
1615 1615 ep_rp->ep_state = new_state;
1616 1616 } else {
1617 1617 /*
1618 1618 * this case is impossible.
1619 1619 * we have a serious problem if we get here.
1620 1620 * instead of panicing, we reset the state to
1621 1621 * old_state. doing this would at least prevent
1622 1622 * threads from hanging due to ep_state being
1623 1623 * stuck in TRANSITIONING.
1624 1624 */
1625 1625 ep_rp->ep_state = old_state;
1626 1626 ASSERT(B_FALSE);
1627 1627 }
1628 1628 }
1629 1629 cv_broadcast(&ep_rp->ep_cv);
1630 1630 mutex_exit(&ep_rp->ep_lock);
1631 1631 }
1632 1632
1633 1633 /*
1634 1634 * modifies RC channel attributes.
1635 1635 * currently, only the rdma_in and rdma_out attributes may
1636 1636 * be modified. the channel must be in quiescent state when
1637 1637 * this function is called.
1638 1638 */
1639 1639 /* ARGSUSED */
1640 1640 static int
1641 1641 daplka_ep_modify(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
1642 1642 cred_t *cred, int *rvalp)
1643 1643 {
1644 1644 daplka_ep_resource_t *ep_rp = NULL;
1645 1645 ibt_cep_modify_flags_t good_flags;
1646 1646 ibt_rc_chan_modify_attr_t rcm_attr;
1647 1647 ibt_hca_attr_t *hca_attrp;
1648 1648 dapl_ep_modify_t args;
1649 1649 ibt_status_t status;
1650 1650 uint32_t old_state, new_state;
1651 1651 int retval = 0;
1652 1652
1653 1653 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_modify_t),
1654 1654 mode);
1655 1655 if (retval != 0) {
1656 1656 DERR("ep_modify: copyin error %d\n", retval);
1657 1657 return (EFAULT);
1658 1658 }
1659 1659 ep_rp = (daplka_ep_resource_t *)
1660 1660 daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epm_hkey);
1661 1661 if (ep_rp == NULL) {
1662 1662 DERR("ep_modify: cannot find ep resource\n");
1663 1663 return (EINVAL);
1664 1664 }
1665 1665 ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
1666 1666 new_state = old_state = daplka_ep_get_state(ep_rp);
1667 1667
1668 1668 if (old_state != DAPLKA_EP_STATE_CLOSED &&
1669 1669 old_state != DAPLKA_EP_STATE_DISCONNECTED) {
1670 1670 DERR("ep_modify: invalid state %d\n", old_state);
1671 1671 retval = EINVAL;
1672 1672 goto cleanup;
1673 1673 }
1674 1674
1675 1675 good_flags = IBT_CEP_SET_RDMARA_OUT | IBT_CEP_SET_RDMARA_IN;
1676 1676 if ((args.epm_flags & ~good_flags) != 0) {
1677 1677 DERR("ep_modify: invalid flags 0x%x\n", args.epm_flags);
1678 1678 retval = EINVAL;
1679 1679 goto cleanup;
1680 1680 }
1681 1681
1682 1682 hca_attrp = &ia_rp->ia_hca->hca_attr;
1683 1683
1684 1684 bzero(&rcm_attr, sizeof (ibt_rc_chan_modify_attr_t));
1685 1685 if ((args.epm_flags & IBT_CEP_SET_RDMARA_OUT) != 0) {
1686 1686 if (args.epm_rdma_ra_out > hca_attrp->hca_max_rdma_out_chan) {
1687 1687 DERR("ep_modify: invalid epm_rdma_ra_out %d\n",
1688 1688 args.epm_rdma_ra_out);
1689 1689 retval = EINVAL;
1690 1690 goto cleanup;
1691 1691 }
1692 1692 rcm_attr.rc_rdma_ra_out = args.epm_rdma_ra_out;
1693 1693 }
1694 1694 if ((args.epm_flags & IBT_CEP_SET_RDMARA_IN) != 0) {
1695 1695 if (args.epm_rdma_ra_in > hca_attrp->hca_max_rdma_in_chan) {
1696 1696 DERR("ep_modify: epm_rdma_ra_in %d\n",
1697 1697 args.epm_rdma_ra_in);
1698 1698 retval = EINVAL;
1699 1699 goto cleanup;
1700 1700 }
1701 1701 rcm_attr.rc_rdma_ra_in = args.epm_rdma_ra_in;
1702 1702 }
1703 1703 status = ibt_modify_rc_channel(ep_rp->ep_chan_hdl, args.epm_flags,
1704 1704 &rcm_attr, NULL);
1705 1705 if (status != IBT_SUCCESS) {
1706 1706 DERR("ep_modify: modify_rc_channel returned %d\n", status);
1707 1707 *rvalp = (int)status;
1708 1708 retval = 0;
1709 1709 goto cleanup;
1710 1710 }
1711 1711
1712 1712 /*
1713 1713 * ep_modify does not change ep_state
1714 1714 */
1715 1715 cleanup:;
1716 1716 daplka_ep_set_state(ep_rp, old_state, new_state);
1717 1717 DAPLKA_RS_UNREF(ep_rp);
1718 1718 return (retval);
1719 1719 }
1720 1720
1721 1721 /*
1722 1722 * Frees a EP resource.
1723 1723 * a EP may only be freed when it is in the CLOSED or
1724 1724 * DISCONNECTED state.
1725 1725 */
1726 1726 /* ARGSUSED */
1727 1727 static int
1728 1728 daplka_ep_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
1729 1729 cred_t *cred, int *rvalp)
1730 1730 {
1731 1731 daplka_ep_resource_t *ep_rp = NULL;
1732 1732 dapl_ep_free_t args;
1733 1733 uint32_t old_state, new_state;
1734 1734 int retval;
1735 1735
1736 1736 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_free_t), mode);
1737 1737 if (retval != 0) {
1738 1738 DERR("ep_free: copyin error %d\n", retval);
1739 1739 return (EFAULT);
1740 1740 }
1741 1741 ep_rp = (daplka_ep_resource_t *)
1742 1742 daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epf_hkey);
1743 1743 if (ep_rp == NULL) {
1744 1744 DERR("ep_free: cannot find ep resource\n");
1745 1745 return (EINVAL);
1746 1746 }
1747 1747 ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
1748 1748 new_state = old_state = daplka_ep_get_state(ep_rp);
1749 1749
1750 1750 /*
1751 1751 * ep cannot be freed if it is in an invalid state.
1752 1752 */
1753 1753 if (old_state != DAPLKA_EP_STATE_CLOSED &&
1754 1754 old_state != DAPLKA_EP_STATE_DISCONNECTED) {
1755 1755 DERR("ep_free: invalid state %d\n", old_state);
1756 1756 retval = EINVAL;
1757 1757 goto cleanup;
1758 1758 }
1759 1759 ep_rp = NULL;
1760 1760 retval = daplka_hash_remove(&ia_rp->ia_ep_htbl,
1761 1761 args.epf_hkey, (void **)&ep_rp);
1762 1762 if (retval != 0 || ep_rp == NULL) {
1763 1763 /*
1764 1764 * this is only possible if we have two threads
1765 1765 * calling ep_free in parallel.
1766 1766 */
1767 1767 DERR("ep_free: cannot find ep resource\n");
1768 1768 goto cleanup;
1769 1769 }
1770 1770 /* there should not be any outstanding timers */
1771 1771 ASSERT(ep_rp->ep_timer_hkey == 0);
1772 1772
1773 1773 new_state = DAPLKA_EP_STATE_FREED;
1774 1774 daplka_ep_set_state(ep_rp, old_state, new_state);
1775 1775
1776 1776 /* remove reference obtained by lookup */
1777 1777 DAPLKA_RS_UNREF(ep_rp);
1778 1778
1779 1779 /* UNREF calls the actual free function when refcnt is zero */
1780 1780 DAPLKA_RS_UNREF(ep_rp);
1781 1781 return (0);
1782 1782
1783 1783 cleanup:;
1784 1784 daplka_ep_set_state(ep_rp, old_state, new_state);
1785 1785
1786 1786 /* remove reference obtained by lookup */
1787 1787 DAPLKA_RS_UNREF(ep_rp);
1788 1788 return (retval);
1789 1789 }
1790 1790
1791 1791 /*
1792 1792 * The following routines supports the timeout feature of ep_connect.
1793 1793 * Refer to the description of ep_connect for details.
1794 1794 */
1795 1795
1796 1796 /*
1797 1797 * this is the timer processing thread.
1798 1798 */
1799 1799 static void
1800 1800 daplka_timer_thread(void *arg)
1801 1801 {
1802 1802 daplka_timer_info_t *timerp = (daplka_timer_info_t *)arg;
1803 1803 daplka_ep_resource_t *ep_rp;
1804 1804 daplka_evd_event_t *disc_ev = NULL;
1805 1805 ibt_status_t status;
1806 1806 int old_state, new_state;
1807 1807
1808 1808 ep_rp = timerp->ti_ep_res;
1809 1809 ASSERT(ep_rp != NULL);
1810 1810 ASSERT(timerp->ti_tmo_id != 0);
1811 1811 timerp->ti_tmo_id = 0;
1812 1812
1813 1813 new_state = old_state = daplka_ep_get_state(ep_rp);
1814 1814 if (old_state != DAPLKA_EP_STATE_CONNECTING) {
1815 1815 /* unblock hash_ep_free */
1816 1816 mutex_enter(&ep_rp->ep_lock);
1817 1817 ASSERT(ep_rp->ep_timer_hkey != 0);
1818 1818 ep_rp->ep_timer_hkey = 0;
1819 1819 cv_broadcast(&ep_rp->ep_cv);
1820 1820 mutex_exit(&ep_rp->ep_lock);
1821 1821
1822 1822 /* reset state to original state */
1823 1823 daplka_ep_set_state(ep_rp, old_state, new_state);
1824 1824
1825 1825 /* this function will also unref ep_rp */
1826 1826 daplka_timer_info_free(timerp);
1827 1827 return;
1828 1828 }
1829 1829
1830 1830 ASSERT(ep_rp->ep_timer_hkey != 0);
1831 1831 ep_rp->ep_timer_hkey = 0;
1832 1832
1833 1833 /*
1834 1834 * we cannot keep ep_state in TRANSITIONING if we call
1835 1835 * ibt_close_rc_channel in blocking mode. this would cause
1836 1836 * a deadlock because the cm callbacks will be blocked and
1837 1837 * will not be able to wake us up.
1838 1838 */
1839 1839 new_state = DAPLKA_EP_STATE_ABORTING;
1840 1840 daplka_ep_set_state(ep_rp, old_state, new_state);
1841 1841
1842 1842 /*
1843 1843 * when we return from close_rc_channel, all callbacks should have
1844 1844 * completed. we can also be certain that these callbacks did not
1845 1845 * enqueue any events to conn_evd.
1846 1846 */
1847 1847 status = ibt_close_rc_channel(ep_rp->ep_chan_hdl, IBT_BLOCKING,
1848 1848 NULL, 0, NULL, NULL, NULL);
1849 1849 if (status != IBT_SUCCESS) {
1850 1850 DERR("timer_thread: ibt_close_rc_channel returned %d\n",
1851 1851 status);
1852 1852 }
1853 1853 old_state = daplka_ep_get_state(ep_rp);
1854 1854
1855 1855 /*
1856 1856 * this is the only thread that can transition ep_state out
1857 1857 * of ABORTING. all other ep operations would fail when
1858 1858 * ep_state is in ABORTING.
1859 1859 */
1860 1860 ASSERT(old_state == DAPLKA_EP_STATE_ABORTING);
1861 1861
1862 1862 disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_SLEEP);
1863 1863 ASSERT(disc_ev != NULL);
1864 1864
1865 1865 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_TIMED_OUT;
1866 1866 disc_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie;
1867 1867 disc_ev->ee_cmev.ec_cm_is_passive = B_FALSE;
1868 1868 disc_ev->ee_cmev.ec_cm_psep_cookie = 0;
1869 1869 disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
1870 1870 disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
1871 1871
1872 1872 D2("timer_thread: enqueue event(%p) evdp(%p)\n",
1873 1873 disc_ev, ep_rp->ep_conn_evd);
1874 1874
1875 1875 new_state = DAPLKA_EP_STATE_DISCONNECTED;
1876 1876 daplka_ep_set_state(ep_rp, old_state, new_state);
1877 1877
1878 1878 daplka_evd_wakeup(ep_rp->ep_conn_evd,
1879 1879 &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);
1880 1880
1881 1881 /* this function will also unref ep_rp */
1882 1882 daplka_timer_info_free(timerp);
1883 1883 }
1884 1884
1885 1885 /*
1886 1886 * dispatches a thread to continue with timer processing.
1887 1887 */
1888 1888 static void
1889 1889 daplka_timer_dispatch(void *arg)
1890 1890 {
1891 1891 /*
1892 1892 * keep rescheduling this function until
1893 1893 * taskq_dispatch succeeds.
1894 1894 */
1895 1895 if (taskq_dispatch(daplka_taskq,
1896 1896 daplka_timer_thread, arg, TQ_NOSLEEP) == 0) {
1897 1897 DERR("timer_dispatch: taskq_dispatch failed, retrying...\n");
1898 1898 (void) timeout(daplka_timer_dispatch, arg, 10);
1899 1899 }
1900 1900 }
1901 1901
1902 1902 /*
1903 1903 * this function is called by the kernel's callout thread.
1904 1904 * we first attempt to remove the timer object from the
1905 1905 * global timer table. if it is found, we dispatch a thread
1906 1906 * to continue processing the timer object. if it is not
1907 1907 * found, that means the timer has been cancelled by someone
1908 1908 * else.
1909 1909 */
1910 1910 static void
1911 1911 daplka_timer_handler(void *arg)
1912 1912 {
1913 1913 uint64_t timer_hkey = (uintptr_t)arg;
1914 1914 daplka_timer_info_t *timerp = NULL;
1915 1915
1916 1916 D2("timer_handler: timer_hkey 0x%llx\n", (longlong_t)timer_hkey);
1917 1917
1918 1918 (void) daplka_hash_remove(&daplka_timer_info_htbl,
1919 1919 timer_hkey, (void **)&timerp);
1920 1920 if (timerp == NULL) {
1921 1921 D2("timer_handler: timer already cancelled\n");
1922 1922 return;
1923 1923 }
1924 1924 daplka_timer_dispatch((void *)timerp);
1925 1925 }
1926 1926
1927 1927 /*
1928 1928 * allocates a timer_info object.
1929 1929 * a reference to a EP is held by this object. this ensures
1930 1930 * that the EP stays valid when a timer is outstanding.
1931 1931 */
1932 1932 static daplka_timer_info_t *
1933 1933 daplka_timer_info_alloc(daplka_ep_resource_t *ep_rp)
1934 1934 {
1935 1935 daplka_timer_info_t *timerp;
1936 1936
1937 1937 timerp = kmem_zalloc(sizeof (*timerp), daplka_km_flags);
1938 1938 if (timerp == NULL) {
1939 1939 DERR("timer_info_alloc: cannot allocate timer info\n");
1940 1940 return (NULL);
1941 1941 }
1942 1942 timerp->ti_ep_res = ep_rp;
1943 1943 timerp->ti_tmo_id = 0;
1944 1944
1945 1945 return (timerp);
1946 1946 }
1947 1947
1948 1948 /*
1949 1949 * Frees the timer_info object.
1950 1950 * we release the EP reference before freeing the object.
1951 1951 */
1952 1952 static void
1953 1953 daplka_timer_info_free(daplka_timer_info_t *timerp)
1954 1954 {
1955 1955 ASSERT(timerp->ti_ep_res != NULL);
1956 1956 DAPLKA_RS_UNREF(timerp->ti_ep_res);
1957 1957 timerp->ti_ep_res = NULL;
1958 1958 ASSERT(timerp->ti_tmo_id == 0);
1959 1959 kmem_free(timerp, sizeof (*timerp));
1960 1960 }
1961 1961
1962 1962 /*
1963 1963 * cancels the timer set by ep_connect.
1964 1964 * returns -1 if timer handling is in progress
1965 1965 * and 0 otherwise.
1966 1966 */
1967 1967 static int
1968 1968 daplka_cancel_timer(daplka_ep_resource_t *ep_rp)
1969 1969 {
1970 1970 /*
1971 1971 * this function can only be called when ep_state
1972 1972 * is frozen.
1973 1973 */
1974 1974 ASSERT(ep_rp->ep_state == DAPLKA_EP_STATE_TRANSITIONING);
1975 1975 if (ep_rp->ep_timer_hkey != 0) {
1976 1976 daplka_timer_info_t *timerp = NULL;
1977 1977
1978 1978 (void) daplka_hash_remove(&daplka_timer_info_htbl,
1979 1979 ep_rp->ep_timer_hkey, (void **)&timerp);
1980 1980 if (timerp == NULL) {
1981 1981 /*
1982 1982 * this is possible if the timer_handler has
1983 1983 * removed the timerp but the taskq thread has
1984 1984 * not transitioned the ep_state to DISCONNECTED.
1985 1985 * we need to reset the ep_state to allow the
1986 1986 * taskq thread to continue with its work. the
1987 1987 * taskq thread will set the ep_timer_hkey to 0
1988 1988 * so we don't have to do it here.
1989 1989 */
1990 1990 DERR("cancel_timer: timer is being processed\n");
1991 1991 return (-1);
1992 1992 }
1993 1993 /*
1994 1994 * we got the timer object. if the handler fires at
1995 1995 * this point, it will not be able to find the object
1996 1996 * and will return immediately. normally, ti_tmo_id gets
1997 1997 * cleared when the handler fires.
1998 1998 */
1999 1999 ASSERT(timerp->ti_tmo_id != 0);
2000 2000
2001 2001 /*
2002 2002 * note that untimeout can possibly call the handler.
2003 2003 * we are safe because the handler will be a no-op.
2004 2004 */
2005 2005 (void) untimeout(timerp->ti_tmo_id);
2006 2006 timerp->ti_tmo_id = 0;
2007 2007 daplka_timer_info_free(timerp);
2008 2008 ep_rp->ep_timer_hkey = 0;
2009 2009 }
2010 2010 return (0);
2011 2011 }
2012 2012
2013 2013 /*
2014 2014 * this function is called by daplka_hash_destroy for
2015 2015 * freeing timer_info objects
2016 2016 */
2017 2017 static void
2018 2018 daplka_hash_timer_free(void *obj)
2019 2019 {
2020 2020 daplka_timer_info_free((daplka_timer_info_t *)obj);
2021 2021 }
2022 2022
2023 2023 /* ARGSUSED */
2024 2024 static uint16_t
2025 2025 daplka_hellomsg_cksum(DAPL_PRIVATE *dp)
2026 2026 {
2027 2027 uint8_t *bp;
2028 2028 int i;
2029 2029 uint16_t cksum = 0;
2030 2030
2031 2031 bp = (uint8_t *)dp;
2032 2032 for (i = 0; i < sizeof (DAPL_PRIVATE); i++) {
2033 2033 cksum += bp[i];
2034 2034 }
2035 2035 return (cksum);
2036 2036 }
2037 2037
2038 2038 /*
2039 2039 * ep_connect is called by the client to initiate a connection to a
2040 2040 * remote service point. It is a non-blocking call. If a non-zero
2041 2041 * timeout is specified by the client, a timer will be set just before
2042 2042 * returning from ep_connect. Upon a successful return from ep_connect,
2043 2043 * the client will call evd_wait to wait for the connection to complete.
2044 2044 * If the connection is rejected or has failed due to an error, the
2045 2045 * client will be notified with an event containing the appropriate error
2046 2046 * code. If the connection is accepted, the client will be notified with
2047 2047 * the CONN_ESTABLISHED event. If the timer expires before either of the
2048 2048 * above events (error or established), a TIMED_OUT event will be delivered
2049 2049 * to the client.
2050 2050 *
2051 2051 * the complicated part of the timer logic is the handling of race
2052 2052 * conditions with CM callbacks. we need to ensure that either the CM or
2053 2053 * the timer thread gets to deliver an event, but not both. when the
2054 2054 * CM callback is about to deliver an event, it always tries to cancel
2055 2055 * the outstanding timer. if cancel_timer indicates a that the timer is
2056 2056 * already being processed, the CM callback will simply return without
2057 2057 * delivering an event. when the timer thread executes, it tries to check
2058 2058 * if the EP is still in CONNECTING state (timers only work on the active
2059 2059 * side). if the EP is not in this state, the timer thread will return
2060 2060 * without delivering an event.
2061 2061 */
2062 2062 /* ARGSUSED */
2063 2063 static int
2064 2064 daplka_ep_connect(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
2065 2065 cred_t *cred, int *rvalp)
2066 2066 {
2067 2067 daplka_ep_resource_t *ep_rp = NULL;
2068 2068 dapl_ep_connect_t args;
2069 2069 daplka_timer_info_t *timerp = NULL;
2070 2070 uint32_t old_state, new_state;
2071 2071 boolean_t timer_inserted = B_FALSE;
2072 2072 uint64_t timer_hkey = 0;
2073 2073 ibt_path_info_t path_info;
2074 2074 ibt_path_attr_t path_attr;
2075 2075 ibt_hca_attr_t *hca_attrp;
2076 2076 ibt_chan_open_args_t chan_args;
2077 2077 ibt_status_t status = IBT_SUCCESS;
2078 2078 uint8_t num_paths;
2079 2079 void *priv_data;
2080 2080 DAPL_PRIVATE *dp;
2081 2081 int retval = 0;
2082 2082 ib_gid_t *sgid;
2083 2083 ib_gid_t *dgid;
2084 2084 uint64_t dgid_ored;
2085 2085 ibt_ar_t ar_query_s;
2086 2086 ibt_ar_t ar_result_s;
2087 2087 ibt_path_flags_t pathflags;
2088 2088
2089 2089 D3("ep_connect: enter\n");
2090 2090 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_connect_t),
2091 2091 mode);
2092 2092 if (retval != 0) {
2093 2093 DERR("ep_connect: copyin error %d\n", retval);
2094 2094 return (EFAULT);
2095 2095 }
2096 2096 ep_rp = (daplka_ep_resource_t *)
2097 2097 daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epc_hkey);
2098 2098 if (ep_rp == NULL) {
2099 2099 DERR("ep_connect: cannot find ep resource\n");
2100 2100 return (EINVAL);
2101 2101 }
2102 2102 ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
2103 2103
2104 2104 new_state = old_state = daplka_ep_get_state(ep_rp);
2105 2105 if (old_state != DAPLKA_EP_STATE_CLOSED) {
2106 2106 DERR("ep_connect: invalid state %d\n", old_state);
2107 2107 retval = EINVAL;
2108 2108 goto cleanup;
2109 2109 }
2110 2110 if (args.epc_priv_sz > DAPL_MAX_PRIVATE_DATA_SIZE) {
2111 2111 DERR("ep_connect: private data len (%d) exceeded "
2112 2112 "max size %d\n", args.epc_priv_sz,
2113 2113 DAPL_MAX_PRIVATE_DATA_SIZE);
2114 2114 retval = EINVAL;
2115 2115 goto cleanup;
2116 2116 }
2117 2117
2118 2118 /*
2119 2119 * check for remote ipaddress to dgid resolution needs ATS
2120 2120 */
2121 2121 dgid = &args.epc_dgid;
2122 2122 dgid_ored = dgid->gid_guid | dgid->gid_prefix;
2123 2123 #if defined(DAPLKA_DEBUG_FORCE_ATS)
2124 2124 dgid_ored = 0ULL;
2125 2125 #endif /* DAPLKA_DEBUG_FORCE_ATS */
2126 2126 /* check for unidentified dgid */
2127 2127 if (dgid_ored == 0ULL) {
2128 2128 /*
2129 2129 * setup for ibt_query_ar()
2130 2130 */
2131 2131 sgid = &ia_rp->ia_hca_sgid;
2132 2132 ar_query_s.ar_gid.gid_guid = 0ULL;
2133 2133 ar_query_s.ar_gid.gid_prefix = 0ULL;
2134 2134 ar_query_s.ar_pkey = 0;
2135 2135 bcopy(args.epc_raddr_sadata.iad_sadata,
2136 2136 ar_query_s.ar_data, DAPL_ATS_NBYTES);
2137 2137 #define UR(b) ar_query_s.ar_data[(b)]
2138 2138 D3("daplka_ep_connect: SA[8] %d.%d.%d.%d\n",
2139 2139 UR(8), UR(9), UR(10), UR(11));
2140 2140 D3("daplka_ep_connect: SA[12] %d.%d.%d.%d\n",
2141 2141 UR(12), UR(13), UR(14), UR(15));
2142 2142 status = ibt_query_ar(sgid, &ar_query_s, &ar_result_s);
2143 2143 if (status != IBT_SUCCESS) {
2144 2144 DERR("ep_connect: ibt_query_ar returned %d\n", status);
2145 2145 *rvalp = (int)status;
2146 2146 retval = 0;
2147 2147 goto cleanup;
2148 2148 }
2149 2149 /*
2150 2150 * dgid identified from SA record
2151 2151 */
2152 2152 dgid = &ar_result_s.ar_gid;
2153 2153 D2("daplka_ep_connect: ATS dgid=%llx:%llx\n",
2154 2154 (longlong_t)dgid->gid_prefix, (longlong_t)dgid->gid_guid);
2155 2155 }
2156 2156
2157 2157 bzero(&path_info, sizeof (ibt_path_info_t));
2158 2158 bzero(&path_attr, sizeof (ibt_path_attr_t));
2159 2159 bzero(&chan_args, sizeof (ibt_chan_open_args_t));
2160 2160
2161 2161 path_attr.pa_dgids = dgid;
2162 2162 path_attr.pa_num_dgids = 1;
2163 2163 /*
2164 2164 * don't set sid in path_attr saves 1 SA query
2165 2165 * Also makes server side not to write the service record
2166 2166 */
2167 2167 path_attr.pa_sgid = ia_rp->ia_hca_sgid;
2168 2168 path_attr.pa_pkey = ia_rp->ia_port_pkey;
2169 2169
2170 2170 /* save the connection ep - struct copy */
2171 2171 ep_rp->ep_sgid = ia_rp->ia_hca_sgid;
2172 2172 ep_rp->ep_dgid = *dgid;
2173 2173
2174 2174 num_paths = 0;
2175 2175 pathflags = IBT_PATH_PKEY;
2176 2176 /* enable APM on remote port but not on loopback case */
2177 2177 if (daplka_apm && ((dgid->gid_prefix != path_attr.pa_sgid.gid_prefix) ||
2178 2178 (dgid->gid_guid != path_attr.pa_sgid.gid_guid))) {
2179 2179 pathflags |= IBT_PATH_APM;
2180 2180 }
2181 2181 status = ibt_get_paths(daplka_dev->daplka_clnt_hdl,
2182 2182 pathflags, &path_attr, 1, &path_info, &num_paths);
2183 2183
2184 2184 if (status != IBT_SUCCESS && status != IBT_INSUFF_DATA) {
2185 2185 DERR("ep_connect: ibt_get_paths returned %d paths %d\n",
2186 2186 status, num_paths);
2187 2187 *rvalp = (int)status;
2188 2188 retval = 0;
2189 2189 goto cleanup;
2190 2190 }
2191 2191 /* fill in the sid directly to path_info */
2192 2192 path_info.pi_sid = args.epc_sid;
2193 2193 hca_attrp = &ia_rp->ia_hca->hca_attr;
2194 2194
2195 2195 /* fill in open channel args */
2196 2196 chan_args.oc_path = &path_info;
2197 2197 chan_args.oc_cm_handler = daplka_cm_rc_handler;
2198 2198 chan_args.oc_cm_clnt_private = (void *)ep_rp;
2199 2199 chan_args.oc_rdma_ra_out = hca_attrp->hca_max_rdma_out_chan;
2200 2200 chan_args.oc_rdma_ra_in = hca_attrp->hca_max_rdma_in_chan;
2201 2201 chan_args.oc_path_retry_cnt = 7; /* 3-bit field */
2202 2202 chan_args.oc_path_rnr_retry_cnt = IBT_RNR_INFINITE_RETRY;
2203 2203
2204 2204 ASSERT(args.epc_priv_sz > 0);
2205 2205 priv_data = (void *)args.epc_priv;
2206 2206
2207 2207 chan_args.oc_priv_data_len = args.epc_priv_sz;
2208 2208 chan_args.oc_priv_data = priv_data;
2209 2209
2210 2210 /*
2211 2211 * calculate checksum value of hello message and
2212 2212 * put hello message in networking byte order
2213 2213 */
2214 2214 dp = (DAPL_PRIVATE *)priv_data;
2215 2215 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dp))
2216 2216 dp->hello_msg.hi_port = htons(dp->hello_msg.hi_port);
2217 2217 dp->hello_msg.hi_checksum = 0;
2218 2218 dp->hello_msg.hi_checksum = htons(daplka_hellomsg_cksum(dp));
2219 2219 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*dp))
2220 2220
2221 2221 if (args.epc_timeout > 0) {
2222 2222 /*
2223 2223 * increment refcnt before passing reference to
2224 2224 * timer_info_alloc.
2225 2225 */
2226 2226 DAPLKA_RS_REF(ep_rp);
2227 2227 timerp = daplka_timer_info_alloc(ep_rp);
2228 2228 if (timerp == NULL) {
2229 2229 DERR("ep_connect: cannot allocate timer\n");
2230 2230 /*
2231 2231 * we need to remove the reference if
2232 2232 * allocation failed.
2233 2233 */
2234 2234 DAPLKA_RS_UNREF(ep_rp);
2235 2235 retval = ENOMEM;
2236 2236 goto cleanup;
2237 2237 }
2238 2238 /*
2239 2239 * We generate our own hkeys so that timer_hkey can fit
2240 2240 * into a pointer and passed as an arg to timeout()
2241 2241 */
2242 2242 timer_hkey = (uint64_t)daplka_timer_hkey_gen();
2243 2243 retval = daplka_hash_insert(&daplka_timer_info_htbl,
2244 2244 &timer_hkey, (void *)timerp);
2245 2245 if (retval != 0) {
2246 2246 DERR("ep_connect: cannot insert timer info\n");
2247 2247 goto cleanup;
2248 2248 }
2249 2249 ASSERT(ep_rp->ep_timer_hkey == 0);
2250 2250 ep_rp->ep_timer_hkey = timer_hkey;
2251 2251 timer_inserted = B_TRUE;
2252 2252 D2("ep_connect: timer_hkey = 0x%llx\n",
2253 2253 (longlong_t)timer_hkey);
2254 2254 }
2255 2255 status = ibt_open_rc_channel(ep_rp->ep_chan_hdl, IBT_OCHAN_NO_FLAGS,
2256 2256 IBT_NONBLOCKING, &chan_args, NULL);
2257 2257
2258 2258 if (status != IBT_SUCCESS) {
2259 2259 DERR("ep_connect: ibt_open_rc_channel returned %d\n", status);
2260 2260 *rvalp = (int)status;
2261 2261 retval = 0;
2262 2262 goto cleanup;
2263 2263 }
2264 2264 /*
2265 2265 * if a cm callback gets called at this point, it'll have to wait until
2266 2266 * ep_state becomes connecting (or some other state if another thread
2267 2267 * manages to get ahead of the callback). this guarantees that the
2268 2268 * callback will not touch the timer until it gets set.
2269 2269 */
2270 2270 if (timerp != NULL) {
2271 2271 clock_t tmo;
2272 2272
2273 2273 tmo = drv_usectohz((clock_t)args.epc_timeout);
2274 2274 /*
2275 2275 * We generate our own 32 bit timer_hkey so that it can fit
2276 2276 * into a pointer
2277 2277 */
2278 2278 ASSERT(timer_hkey != 0);
2279 2279 timerp->ti_tmo_id = timeout(daplka_timer_handler,
2280 2280 (void *)(uintptr_t)timer_hkey, tmo);
2281 2281 }
2282 2282 new_state = DAPLKA_EP_STATE_CONNECTING;
2283 2283
2284 2284 cleanup:;
2285 2285 if (timerp != NULL && (retval != 0 || status != IBT_SUCCESS)) {
2286 2286 /*
2287 2287 * if ibt_open_rc_channel failed, the timerp must still
2288 2288 * be in daplka_timer_info_htbl because neither the cm
2289 2289 * callback nor the timer_handler will be called.
2290 2290 */
2291 2291 if (timer_inserted) {
2292 2292 daplka_timer_info_t *new_timerp = NULL;
2293 2293
2294 2294 ASSERT(timer_hkey != 0);
2295 2295 (void) daplka_hash_remove(&daplka_timer_info_htbl,
2296 2296 timer_hkey, (void **)&new_timerp);
2297 2297 ASSERT(new_timerp == timerp);
2298 2298 ep_rp->ep_timer_hkey = 0;
2299 2299 }
2300 2300 daplka_timer_info_free(timerp);
2301 2301 }
2302 2302 daplka_ep_set_state(ep_rp, old_state, new_state);
2303 2303 DAPLKA_RS_UNREF(ep_rp);
2304 2304 D3("ep_connect: exit\n");
2305 2305 return (retval);
2306 2306 }
2307 2307
2308 2308 /*
2309 2309 * ep_disconnect closes a connection with a remote peer.
2310 2310 * if a connection has not been established, ep_disconnect
2311 2311 * will instead flush all recv bufs posted to this channel.
2312 2312 * if the EP state is CONNECTED, CONNECTING or ACCEPTING upon
2313 2313 * entry to ep_disconnect, the EP state will transition to
2314 2314 * DISCONNECTING upon exit. the CM callbacks triggered by
2315 2315 * ibt_close_rc_channel will cause EP state to become
2316 2316 * DISCONNECTED. This function is a no-op if EP state is
2317 2317 * DISCONNECTED.
2318 2318 */
2319 2319 /* ARGSUSED */
2320 2320 static int
2321 2321 daplka_ep_disconnect(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
2322 2322 cred_t *cred, int *rvalp)
2323 2323 {
2324 2324 daplka_ep_resource_t *ep_rp = NULL;
2325 2325 dapl_ep_disconnect_t args;
2326 2326 ibt_status_t status;
2327 2327 uint32_t old_state, new_state;
2328 2328 int retval = 0;
2329 2329
2330 2330 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_disconnect_t),
2331 2331 mode);
2332 2332 if (retval != 0) {
2333 2333 DERR("ep_disconnect: copyin error %d\n", retval);
2334 2334 return (EFAULT);
2335 2335 }
2336 2336 ep_rp = (daplka_ep_resource_t *)
2337 2337 daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epd_hkey);
2338 2338 if (ep_rp == NULL) {
2339 2339 DERR("ep_disconnect: cannot find ep resource\n");
2340 2340 return (EINVAL);
2341 2341 }
2342 2342 ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
2343 2343
2344 2344 new_state = old_state = daplka_ep_get_state(ep_rp);
2345 2345 if (old_state != DAPLKA_EP_STATE_CONNECTED &&
2346 2346 old_state != DAPLKA_EP_STATE_CONNECTING &&
2347 2347 old_state != DAPLKA_EP_STATE_ACCEPTING &&
2348 2348 old_state != DAPLKA_EP_STATE_DISCONNECTED &&
2349 2349 old_state != DAPLKA_EP_STATE_DISCONNECTING &&
2350 2350 old_state != DAPLKA_EP_STATE_CLOSED) {
2351 2351 DERR("ep_disconnect: invalid state %d\n", old_state);
2352 2352 retval = EINVAL;
2353 2353 goto cleanup;
2354 2354 }
2355 2355
2356 2356 if ((old_state == DAPLKA_EP_STATE_DISCONNECTED) ||
2357 2357 (old_state == DAPLKA_EP_STATE_DISCONNECTING)) {
2358 2358 D2("ep_disconnect: ep already disconnected\n");
2359 2359 retval = 0;
2360 2360 /* we leave the state as DISCONNECTED */
2361 2361 goto cleanup;
2362 2362 }
2363 2363 if (old_state == DAPLKA_EP_STATE_CONNECTING ||
2364 2364 old_state == DAPLKA_EP_STATE_ACCEPTING) {
2365 2365 D2("ep_disconnect: aborting, old_state = %d\n", old_state);
2366 2366 }
2367 2367
2368 2368 /*
2369 2369 * according to the udapl spec, ep_disconnect should
2370 2370 * flush the channel if the channel is not CONNECTED.
2371 2371 */
2372 2372 if (old_state == DAPLKA_EP_STATE_CLOSED) {
2373 2373 status = ibt_flush_channel(ep_rp->ep_chan_hdl);
2374 2374 if (status != IBT_SUCCESS) {
2375 2375 DERR("ep_disconnect: ibt_flush_channel failed %d\n",
2376 2376 status);
2377 2377 *rvalp = (int)status;
2378 2378 }
2379 2379 retval = 0;
2380 2380 /* we leave the state as CLOSED */
2381 2381 goto cleanup;
2382 2382 }
2383 2383
2384 2384 new_state = DAPLKA_EP_STATE_DISCONNECTING;
2385 2385 daplka_ep_set_state(ep_rp, old_state, new_state);
2386 2386 status = ibt_close_rc_channel(ep_rp->ep_chan_hdl, IBT_NONBLOCKING,
2387 2387 NULL, 0, NULL, NULL, NULL);
2388 2388
2389 2389 if (status == IBT_SUCCESS) {
2390 2390 DAPLKA_RS_UNREF(ep_rp);
2391 2391 return (retval);
2392 2392 } else {
2393 2393 DERR("ep_disconnect: ibt_close_rc_channel returned %d\n",
2394 2394 status);
2395 2395 *rvalp = (int)status;
2396 2396 retval = 0;
2397 2397 new_state = old_state;
2398 2398 }
2399 2399
2400 2400 cleanup:;
2401 2401 daplka_ep_set_state(ep_rp, old_state, new_state);
2402 2402 DAPLKA_RS_UNREF(ep_rp);
2403 2403 return (retval);
2404 2404 }
2405 2405
2406 2406 /*
2407 2407 * this function resets the EP to a usable state (ie. from
2408 2408 * DISCONNECTED to CLOSED). this function is best implemented using
2409 2409 * the ibt_recycle_channel interface. until that is available, we will
2410 2410 * instead clone and tear down the existing channel and replace the
2411 2411 * existing channel with the cloned one.
2412 2412 */
2413 2413 /* ARGSUSED */
2414 2414 static int
2415 2415 daplka_ep_reinit(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
2416 2416 cred_t *cred, int *rvalp)
2417 2417 {
2418 2418 daplka_ep_resource_t *ep_rp = NULL;
2419 2419 dapl_ep_reinit_t args;
2420 2420 ibt_status_t status;
2421 2421 uint32_t old_state, new_state;
2422 2422 int retval = 0;
2423 2423
2424 2424 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_reinit_t),
2425 2425 mode);
2426 2426 if (retval != 0) {
2427 2427 DERR("reinit: copyin error %d\n", retval);
2428 2428 return (EFAULT);
2429 2429 }
2430 2430 ep_rp = (daplka_ep_resource_t *)
2431 2431 daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epri_hkey);
2432 2432 if (ep_rp == NULL) {
2433 2433 DERR("reinit: cannot find ep resource\n");
2434 2434 return (EINVAL);
2435 2435 }
2436 2436 ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
2437 2437 new_state = old_state = daplka_ep_get_state(ep_rp);
2438 2438 if ((old_state != DAPLKA_EP_STATE_CLOSED) &&
2439 2439 (old_state != DAPLKA_EP_STATE_DISCONNECTED)) {
2440 2440 DERR("reinit: invalid state %d\n", old_state);
2441 2441 retval = EINVAL;
2442 2442 goto cleanup;
2443 2443 }
2444 2444
2445 2445 status = ibt_recycle_rc(ep_rp->ep_chan_hdl,
2446 2446 IBT_CEP_RDMA_RD|IBT_CEP_RDMA_WR,
2447 2447 ia_rp->ia_port_num, NULL, NULL);
2448 2448 if (status != IBT_SUCCESS) {
2449 2449 DERR("reinit: unable to clone channel\n");
2450 2450 *rvalp = (int)status;
2451 2451 retval = 0;
2452 2452 goto cleanup;
2453 2453 }
2454 2454 new_state = DAPLKA_EP_STATE_CLOSED;
2455 2455
2456 2456 cleanup:;
2457 2457 daplka_ep_set_state(ep_rp, old_state, new_state);
2458 2458 DAPLKA_RS_UNREF(ep_rp);
2459 2459 return (retval);
2460 2460 }
2461 2461
2462 2462 /*
2463 2463 * destroys a EP resource.
2464 2464 * called when refcnt drops to zero.
2465 2465 */
2466 2466 static int
2467 2467 daplka_ep_destroy(daplka_resource_t *gen_rp)
2468 2468 {
2469 2469 daplka_ep_resource_t *ep_rp = (daplka_ep_resource_t *)gen_rp;
2470 2470 ibt_status_t status;
2471 2471
2472 2472 ASSERT(DAPLKA_RS_REFCNT(ep_rp) == 0);
2473 2473 ASSERT(ep_rp->ep_state == DAPLKA_EP_STATE_FREED);
2474 2474
2475 2475 /*
2476 2476 * by the time we get here, we can be sure that
2477 2477 * there is no outstanding timer.
2478 2478 */
2479 2479 ASSERT(ep_rp->ep_timer_hkey == 0);
2480 2480
2481 2481 D3("ep_destroy: entering, ep_rp 0x%p, rnum %d\n",
2482 2482 ep_rp, DAPLKA_RS_RNUM(ep_rp));
2483 2483 /*
2484 2484 * free rc channel
2485 2485 */
2486 2486 if (ep_rp->ep_chan_hdl != NULL) {
2487 2487 mutex_enter(&daplka_dev->daplka_mutex);
2488 2488 ibt_set_chan_private(ep_rp->ep_chan_hdl, NULL);
2489 2489 mutex_exit(&daplka_dev->daplka_mutex);
2490 2490 status = daplka_ibt_free_channel(ep_rp, ep_rp->ep_chan_hdl);
2491 2491 if (status != IBT_SUCCESS) {
2492 2492 DERR("ep_free: ibt_free_channel returned %d\n",
2493 2493 status);
2494 2494 }
2495 2495 ep_rp->ep_chan_hdl = NULL;
2496 2496 D3("ep_destroy: qp freed, rnum %d\n", DAPLKA_RS_RNUM(ep_rp));
2497 2497 }
2498 2498 /*
2499 2499 * release all references
2500 2500 */
2501 2501 if (ep_rp->ep_snd_evd != NULL) {
2502 2502 DAPLKA_RS_UNREF(ep_rp->ep_snd_evd);
2503 2503 ep_rp->ep_snd_evd = NULL;
2504 2504 }
2505 2505 if (ep_rp->ep_rcv_evd != NULL) {
2506 2506 DAPLKA_RS_UNREF(ep_rp->ep_rcv_evd);
2507 2507 ep_rp->ep_rcv_evd = NULL;
2508 2508 }
2509 2509 if (ep_rp->ep_conn_evd != NULL) {
2510 2510 DAPLKA_RS_UNREF(ep_rp->ep_conn_evd);
2511 2511 ep_rp->ep_conn_evd = NULL;
2512 2512 }
2513 2513 if (ep_rp->ep_srq_res != NULL) {
2514 2514 DAPLKA_RS_UNREF(ep_rp->ep_srq_res);
2515 2515 ep_rp->ep_srq_res = NULL;
2516 2516 }
2517 2517 if (ep_rp->ep_pd_res != NULL) {
2518 2518 DAPLKA_RS_UNREF(ep_rp->ep_pd_res);
2519 2519 ep_rp->ep_pd_res = NULL;
2520 2520 }
2521 2521 cv_destroy(&ep_rp->ep_cv);
2522 2522 mutex_destroy(&ep_rp->ep_lock);
2523 2523
2524 2524 DAPLKA_RS_FINI(ep_rp);
2525 2525 kmem_free(ep_rp, sizeof (daplka_ep_resource_t));
2526 2526 D3("ep_destroy: exiting, ep_rp 0x%p\n", ep_rp);
2527 2527 return (0);
2528 2528 }
2529 2529
2530 2530 /*
2531 2531 * this function is called by daplka_hash_destroy for
2532 2532 * freeing EP resource objects
2533 2533 */
2534 2534 static void
2535 2535 daplka_hash_ep_free(void *obj)
2536 2536 {
2537 2537 daplka_ep_resource_t *ep_rp = (daplka_ep_resource_t *)obj;
2538 2538 ibt_status_t status;
2539 2539 uint32_t old_state, new_state;
2540 2540 int retval;
2541 2541
2542 2542 old_state = daplka_ep_get_state(ep_rp);
2543 2543 retval = daplka_cancel_timer(ep_rp);
2544 2544 new_state = DAPLKA_EP_STATE_FREED;
2545 2545 daplka_ep_set_state(ep_rp, old_state, new_state);
2546 2546
2547 2547 if (retval != 0) {
2548 2548 D2("hash_ep_free: ep_rp 0x%p "
2549 2549 "timer is still being processed\n", ep_rp);
2550 2550 mutex_enter(&ep_rp->ep_lock);
2551 2551 if (ep_rp->ep_timer_hkey != 0) {
2552 2552 D2("hash_ep_free: ep_rp 0x%p "
2553 2553 "waiting for timer_hkey to be 0\n", ep_rp);
2554 2554 cv_wait(&ep_rp->ep_cv, &ep_rp->ep_lock);
2555 2555 }
2556 2556 mutex_exit(&ep_rp->ep_lock);
2557 2557 }
2558 2558
2559 2559 /* call ibt_close_rc_channel regardless of what state we are in */
2560 2560 status = ibt_close_rc_channel(ep_rp->ep_chan_hdl, IBT_BLOCKING,
2561 2561 NULL, 0, NULL, NULL, NULL);
2562 2562 if (status != IBT_SUCCESS) {
2563 2563 if (old_state == DAPLKA_EP_STATE_CONNECTED ||
2564 2564 old_state == DAPLKA_EP_STATE_CONNECTING ||
2565 2565 old_state == DAPLKA_EP_STATE_ACCEPTING) {
2566 2566 DERR("hash_ep_free: ep_rp 0x%p state %d "
2567 2567 "unexpected error %d from close_rc_channel\n",
2568 2568 ep_rp, old_state, status);
2569 2569 }
2570 2570 D2("hash_ep_free: close_rc_channel, status %d\n", status);
2571 2571 }
2572 2572
2573 2573 DAPLKA_RS_UNREF(ep_rp);
2574 2574 }
2575 2575
2576 2576 /*
2577 2577 * creates a EVD resource.
2578 2578 * a EVD is used by the client to wait for events from one
2579 2579 * or more sources.
2580 2580 */
2581 2581 /* ARGSUSED */
2582 2582 static int
2583 2583 daplka_evd_create(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
2584 2584 cred_t *cred, int *rvalp)
2585 2585 {
2586 2586 daplka_evd_resource_t *evd_rp = NULL;
2587 2587 daplka_async_evd_hkey_t *async_evd;
2588 2588 ibt_hca_attr_t *hca_attrp;
2589 2589 ibt_cq_attr_t cq_attr;
2590 2590 dapl_evd_create_t args;
2591 2591 uint64_t evd_hkey = 0;
2592 2592 boolean_t inserted = B_FALSE;
2593 2593 int retval = 0;
2594 2594 ibt_status_t status;
2595 2595
2596 2596 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_evd_create_t),
2597 2597 mode);
2598 2598 if (retval != 0) {
2599 2599 DERR("evd_create: copyin error %d", retval);
2600 2600 return (EFAULT);
2601 2601 }
2602 2602 if ((args.evd_flags &
2603 2603 ~(DAT_EVD_DEFAULT_FLAG | DAT_EVD_SOFTWARE_FLAG)) != 0) {
2604 2604 DERR("evd_create: invalid flags 0x%x\n", args.evd_flags);
2605 2605 return (EINVAL);
2606 2606 }
2607 2607
2608 2608 evd_rp = kmem_zalloc(sizeof (daplka_evd_resource_t), daplka_km_flags);
2609 2609 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*evd_rp))
2610 2610 DAPLKA_RS_INIT(evd_rp, DAPL_TYPE_EVD,
2611 2611 DAPLKA_RS_RNUM(ia_rp), daplka_evd_destroy);
2612 2612
2613 2613 mutex_init(&evd_rp->evd_lock, NULL, MUTEX_DRIVER, NULL);
2614 2614 cv_init(&evd_rp->evd_cv, NULL, CV_DRIVER, NULL);
2615 2615 evd_rp->evd_hca = ia_rp->ia_hca;
2616 2616 evd_rp->evd_flags = args.evd_flags;
2617 2617 evd_rp->evd_hca_hdl = ia_rp->ia_hca_hdl;
2618 2618 evd_rp->evd_cookie = args.evd_cookie;
2619 2619 evd_rp->evd_cno_res = NULL;
2620 2620 evd_rp->evd_cr_events.eel_event_type = DAPLKA_EVD_CM_EVENTS;
2621 2621 evd_rp->evd_conn_events.eel_event_type = DAPLKA_EVD_CM_EVENTS;
2622 2622 evd_rp->evd_async_events.eel_event_type = DAPLKA_EVD_ASYNC_EVENTS;
2623 2623
2624 2624 /*
2625 2625 * if the client specified a non-zero cno_hkey, we
2626 2626 * lookup the cno and save the reference for later use.
2627 2627 */
2628 2628 if (args.evd_cno_hkey > 0) {
2629 2629 daplka_cno_resource_t *cno_rp;
2630 2630
2631 2631 cno_rp = (daplka_cno_resource_t *)
2632 2632 daplka_hash_lookup(&ia_rp->ia_cno_htbl,
2633 2633 args.evd_cno_hkey);
2634 2634 if (cno_rp == NULL) {
2635 2635 DERR("evd_create: cannot find cno resource\n");
2636 2636 goto cleanup;
2637 2637 }
2638 2638 ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
2639 2639 evd_rp->evd_cno_res = cno_rp;
2640 2640 }
2641 2641 hca_attrp = &ia_rp->ia_hca->hca_attr;
2642 2642 if ((evd_rp->evd_flags &
2643 2643 (DAT_EVD_DTO_FLAG | DAT_EVD_RMR_BIND_FLAG)) != 0) {
2644 2644 if (args.evd_cq_size > hca_attrp->hca_max_cq_sz) {
2645 2645 DERR("evd_create: invalid cq size %d",
2646 2646 args.evd_cq_size);
2647 2647 retval = EINVAL;
2648 2648 goto cleanup;
2649 2649 }
2650 2650 cq_attr.cq_size = args.evd_cq_size;
2651 2651 cq_attr.cq_sched = NULL;
2652 2652 cq_attr.cq_flags = IBT_CQ_USER_MAP;
2653 2653
2654 2654 status = daplka_ibt_alloc_cq(evd_rp, evd_rp->evd_hca_hdl,
2655 2655 &cq_attr, &evd_rp->evd_cq_hdl, &evd_rp->evd_cq_real_size);
2656 2656
2657 2657 if (status != IBT_SUCCESS) {
2658 2658 DERR("evd_create: ibt_alloc_cq returned %d", status);
2659 2659 *rvalp = (int)status;
2660 2660 retval = 0;
2661 2661 goto cleanup;
2662 2662 }
2663 2663
2664 2664 /*
2665 2665 * store evd ptr with cq_hdl
2666 2666 * mutex is only needed for race of "destroy" and "async"
2667 2667 */
2668 2668 mutex_enter(&daplka_dev->daplka_mutex);
2669 2669 ibt_set_cq_private(evd_rp->evd_cq_hdl, (void *)evd_rp);
2670 2670 mutex_exit(&daplka_dev->daplka_mutex);
2671 2671
2672 2672 /* Get HCA-specific data_out info */
2673 2673 status = ibt_ci_data_out(evd_rp->evd_hca_hdl,
2674 2674 IBT_CI_NO_FLAGS, IBT_HDL_CQ, (void *)evd_rp->evd_cq_hdl,
2675 2675 &args.evd_cq_data_out, sizeof (args.evd_cq_data_out));
2676 2676
2677 2677 if (status != IBT_SUCCESS) {
2678 2678 DERR("evd_create: ibt_ci_data_out error(%d)", status);
2679 2679 *rvalp = (int)status;
2680 2680 retval = 0;
2681 2681 goto cleanup;
2682 2682 }
2683 2683
2684 2684 args.evd_cq_real_size = evd_rp->evd_cq_real_size;
2685 2685
2686 2686 ibt_set_cq_handler(evd_rp->evd_cq_hdl, daplka_cq_handler,
2687 2687 (void *)evd_rp);
2688 2688 }
2689 2689
2690 2690 retval = daplka_hash_insert(&ia_rp->ia_evd_htbl,
2691 2691 &evd_hkey, (void *)evd_rp);
2692 2692 if (retval != 0) {
2693 2693 DERR("evd_ceate: cannot insert evd %d\n", retval);
2694 2694 goto cleanup;
2695 2695 }
2696 2696 inserted = B_TRUE;
2697 2697 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*evd_rp))
2698 2698
2699 2699 /*
2700 2700 * If this evd handles async events need to add to the IA resource
2701 2701 * async evd list
2702 2702 */
2703 2703 if (evd_rp->evd_flags & DAT_EVD_ASYNC_FLAG) {
2704 2704 async_evd = kmem_zalloc(sizeof (daplka_async_evd_hkey_t),
2705 2705 daplka_km_flags);
2706 2706 /* add the evd to the head of the list */
2707 2707 mutex_enter(&ia_rp->ia_lock);
2708 2708 async_evd->aeh_evd_hkey = evd_hkey;
2709 2709 async_evd->aeh_next = ia_rp->ia_async_evd_hkeys;
2710 2710 ia_rp->ia_async_evd_hkeys = async_evd;
2711 2711 mutex_exit(&ia_rp->ia_lock);
2712 2712 }
2713 2713
2714 2714 args.evd_hkey = evd_hkey;
2715 2715 retval = copyout(&args, (void *)arg, sizeof (dapl_evd_create_t));
2716 2716 if (retval != 0) {
2717 2717 DERR("evd_create: copyout error %d\n", retval);
2718 2718 retval = EFAULT;
2719 2719 goto cleanup;
2720 2720 }
2721 2721 return (0);
2722 2722
2723 2723 cleanup:;
2724 2724 if (inserted) {
2725 2725 daplka_evd_resource_t *free_rp = NULL;
2726 2726
2727 2727 (void) daplka_hash_remove(&ia_rp->ia_evd_htbl, evd_hkey,
2728 2728 (void **)&free_rp);
2729 2729 if (free_rp != evd_rp) {
2730 2730 DERR("evd_create: cannot remove evd\n");
2731 2731 /*
2732 2732 * we can only get here if another thread
2733 2733 * has completed the cleanup in evd_free
2734 2734 */
2735 2735 return (retval);
2736 2736 }
2737 2737 }
2738 2738 DAPLKA_RS_UNREF(evd_rp);
2739 2739 return (retval);
2740 2740 }
2741 2741
2742 2742 /*
2743 2743 * resizes CQ and returns new mapping info to library.
2744 2744 */
2745 2745 /* ARGSUSED */
2746 2746 static int
2747 2747 daplka_cq_resize(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
2748 2748 cred_t *cred, int *rvalp)
2749 2749 {
2750 2750 daplka_evd_resource_t *evd_rp = NULL;
2751 2751 ibt_hca_attr_t *hca_attrp;
2752 2752 dapl_cq_resize_t args;
2753 2753 ibt_status_t status;
2754 2754 int retval = 0;
2755 2755
2756 2756 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cq_resize_t),
2757 2757 mode);
2758 2758 if (retval != 0) {
2759 2759 DERR("cq_resize: copyin error %d\n", retval);
2760 2760 return (EFAULT);
2761 2761 }
2762 2762
2763 2763 /* get evd resource */
2764 2764 evd_rp = (daplka_evd_resource_t *)
2765 2765 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.cqr_evd_hkey);
2766 2766 if (evd_rp == NULL) {
2767 2767 DERR("cq_resize: cannot find evd resource\n");
2768 2768 return (EINVAL);
2769 2769 }
2770 2770 ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
2771 2771
2772 2772 hca_attrp = &ia_rp->ia_hca->hca_attr;
2773 2773 if (args.cqr_cq_new_size > hca_attrp->hca_max_cq_sz) {
2774 2774 DERR("cq_resize: invalid cq size %d", args.cqr_cq_new_size);
2775 2775 retval = EINVAL;
2776 2776 goto cleanup;
2777 2777 }
2778 2778 /*
2779 2779 * If ibt_resize_cq fails that it is primarily due to resource
2780 2780 * shortage. Per IB spec resize will never loose events and
2781 2781 * a resize error leaves the CQ intact. Therefore even if the
2782 2782 * resize request fails we proceed and get the mapping data
2783 2783 * from the CQ so that the library can mmap it.
2784 2784 */
2785 2785 status = ibt_resize_cq(evd_rp->evd_cq_hdl, args.cqr_cq_new_size,
2786 2786 &args.cqr_cq_real_size);
2787 2787 if (status != IBT_SUCCESS) {
2788 2788 /* we return the size of the old CQ if resize fails */
2789 2789 args.cqr_cq_real_size = evd_rp->evd_cq_real_size;
2790 2790 ASSERT(status != IBT_CQ_HDL_INVALID);
2791 2791 DERR("cq_resize: ibt_resize_cq failed:%d\n", status);
2792 2792 } else {
2793 2793 mutex_enter(&evd_rp->evd_lock);
2794 2794 evd_rp->evd_cq_real_size = args.cqr_cq_real_size;
2795 2795 mutex_exit(&evd_rp->evd_lock);
2796 2796 }
2797 2797
2798 2798 D2("cq_resize(%d): done new_sz(%u) real_sz(%u)\n",
2799 2799 DAPLKA_RS_RNUM(evd_rp),
2800 2800 args.cqr_cq_new_size, args.cqr_cq_real_size);
2801 2801
2802 2802 /* Get HCA-specific data_out info */
2803 2803 status = ibt_ci_data_out(evd_rp->evd_hca_hdl,
2804 2804 IBT_CI_NO_FLAGS, IBT_HDL_CQ, (void *)evd_rp->evd_cq_hdl,
2805 2805 &args.cqr_cq_data_out, sizeof (args.cqr_cq_data_out));
2806 2806 if (status != IBT_SUCCESS) {
2807 2807 DERR("cq_resize: ibt_ci_data_out error(%d)\n", status);
2808 2808 /* return ibt_ci_data_out status */
2809 2809 *rvalp = (int)status;
2810 2810 retval = 0;
2811 2811 goto cleanup;
2812 2812 }
2813 2813
2814 2814 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_cq_resize_t),
2815 2815 mode);
2816 2816 if (retval != 0) {
2817 2817 DERR("cq_resize: copyout error %d\n", retval);
2818 2818 retval = EFAULT;
2819 2819 goto cleanup;
2820 2820 }
2821 2821
2822 2822 cleanup:;
2823 2823 if (evd_rp != NULL) {
2824 2824 DAPLKA_RS_UNREF(evd_rp);
2825 2825 }
2826 2826 return (retval);
2827 2827 }
2828 2828
2829 2829 /*
2830 2830 * Routine to copyin the event poll message so that 32 bit libraries
2831 2831 * can be safely supported
2832 2832 */
2833 2833 int
2834 2834 daplka_event_poll_copyin(intptr_t inarg, dapl_event_poll_t *outarg, int mode)
2835 2835 {
2836 2836 int retval;
2837 2837
2838 2838 #ifdef _MULTI_DATAMODEL
2839 2839 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
2840 2840 dapl_event_poll32_t args32;
2841 2841
2842 2842 retval = ddi_copyin((void *)inarg, &args32,
2843 2843 sizeof (dapl_event_poll32_t), mode);
2844 2844 if (retval != 0) {
2845 2845 DERR("event_poll_copyin: 32bit error %d\n", retval);
2846 2846 return (EFAULT);
2847 2847 }
2848 2848
2849 2849 outarg->evp_evd_hkey = args32.evp_evd_hkey;
2850 2850 outarg->evp_threshold = args32.evp_threshold;
2851 2851 outarg->evp_timeout = args32.evp_timeout;
2852 2852 outarg->evp_ep = (dapl_ib_event_t *)(uintptr_t)args32.evp_ep;
2853 2853 outarg->evp_num_ev = args32.evp_num_ev;
2854 2854 outarg->evp_num_polled = args32.evp_num_polled;
2855 2855 return (0);
2856 2856 }
2857 2857 #endif
2858 2858 retval = ddi_copyin((void *)inarg, outarg, sizeof (dapl_event_poll_t),
2859 2859 mode);
2860 2860 if (retval != 0) {
2861 2861 DERR("event_poll: copyin error %d\n", retval);
2862 2862 return (EFAULT);
2863 2863 }
2864 2864
2865 2865 return (0);
2866 2866 }
2867 2867
2868 2868 /*
2869 2869 * Routine to copyout the event poll message so that 32 bit libraries
2870 2870 * can be safely supported
2871 2871 */
2872 2872 int
2873 2873 daplka_event_poll_copyout(dapl_event_poll_t *inarg, intptr_t outarg, int mode)
2874 2874 {
2875 2875 int retval;
2876 2876
2877 2877 #ifdef _MULTI_DATAMODEL
2878 2878 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
2879 2879 dapl_event_poll32_t args32;
2880 2880
2881 2881 args32.evp_evd_hkey = inarg->evp_evd_hkey;
2882 2882 args32.evp_threshold = inarg->evp_threshold;
2883 2883 args32.evp_timeout = inarg->evp_timeout;
2884 2884 args32.evp_ep = (caddr32_t)(uintptr_t)inarg->evp_ep;
2885 2885 args32.evp_num_ev = inarg->evp_num_ev;
2886 2886 args32.evp_num_polled = inarg->evp_num_polled;
2887 2887
2888 2888 retval = ddi_copyout((void *)&args32, (void *)outarg,
2889 2889 sizeof (dapl_event_poll32_t), mode);
2890 2890 if (retval != 0) {
2891 2891 DERR("event_poll_copyout: 32bit error %d\n", retval);
2892 2892 return (EFAULT);
2893 2893 }
2894 2894 return (0);
2895 2895 }
2896 2896 #endif
2897 2897 retval = ddi_copyout((void *)inarg, (void *)outarg,
2898 2898 sizeof (dapl_event_poll_t), mode);
2899 2899 if (retval != 0) {
2900 2900 DERR("event_poll_copyout: error %d\n", retval);
2901 2901 return (EFAULT);
2902 2902 }
2903 2903
2904 2904 return (0);
2905 2905 }
2906 2906
2907 2907 /*
2908 2908 * fucntion to handle CM REQ RCV private data from Solaris or third parties
2909 2909 */
2910 2910 /* ARGSUSED */
2911 2911 static void
2912 2912 daplka_crevent_privdata_post(daplka_ia_resource_t *ia_rp,
2913 2913 dapl_ib_event_t *evd_rp, daplka_evd_event_t *cr_ev)
2914 2914 {
2915 2915 DAPL_PRIVATE *dp;
2916 2916 ib_gid_t *lgid;
2917 2917 ibt_ar_t ar_query_s;
2918 2918 ibt_ar_t ar_result_s;
2919 2919 DAPL_HELLO_MSG *hip;
2920 2920 uint32_t ipaddr_ord;
2921 2921 ibt_priv_data_len_t clen;
2922 2922 ibt_priv_data_len_t olen;
2923 2923 ibt_status_t status;
2924 2924 uint16_t cksum;
2925 2925
2926 2926 /*
2927 2927 * get private data and len
2928 2928 */
2929 2929 dp = (DAPL_PRIVATE *)cr_ev->ee_cmev.ec_cm_ev_priv_data;
2930 2930 clen = cr_ev->ee_cmev.ec_cm_ev_priv_data_len;
2931 2931 #if defined(DAPLKA_DEBUG_FORCE_ATS)
2932 2932 /* skip the DAPL_PRIVATE chekcsum check */
2933 2933 #else
2934 2934 /* for remote connects */
2935 2935 /* look up hello message in the CM private data area */
2936 2936 if (clen >= sizeof (DAPL_PRIVATE) &&
2937 2937 (dp->hello_msg.hi_vers == DAPL_HELLO_MSG_VERS)) {
2938 2938 cksum = ntohs(dp->hello_msg.hi_checksum);
2939 2939 dp->hello_msg.hi_checksum = 0;
2940 2940 if (daplka_hellomsg_cksum(dp) == cksum) {
2941 2941 D2("daplka_crevent_privdata_post: Solaris msg\n");
2942 2942 evd_rp->ibe_ce.ibce_priv_data_size = clen;
2943 2943 dp->hello_msg.hi_checksum = DAPL_CHECKSUM;
2944 2944 dp->hello_msg.hi_port = ntohs(dp->hello_msg.hi_port);
2945 2945 bcopy(dp, evd_rp->ibe_ce.ibce_priv_data_ptr, clen);
2946 2946 kmem_free(dp, clen);
2947 2947 return;
2948 2948 }
2949 2949 }
2950 2950 #endif /* DAPLKA_DEBUG_FORCE_ATS */
2951 2951
2952 2952 D2("daplka_crevent_privdata_post: 3rd party msg\n");
2953 2953 /* transpose CM private data into hello message */
2954 2954 if (clen) {
2955 2955 olen = clen;
2956 2956 if (clen > DAPL_CONSUMER_MAX_PRIVATE_DATA_SIZE) {
2957 2957 clen = DAPL_CONSUMER_MAX_PRIVATE_DATA_SIZE;
2958 2958 }
2959 2959 bcopy(dp, evd_rp->ibe_ce.ibce_priv_data_ptr, clen);
2960 2960 kmem_free(dp, olen);
2961 2961 } else {
2962 2962 bzero(evd_rp->ibe_ce.ibce_priv_data_ptr,
2963 2963 DAPL_CONSUMER_MAX_PRIVATE_DATA_SIZE);
2964 2964 }
2965 2965 evd_rp->ibe_ce.ibce_priv_data_size = sizeof (DAPL_PRIVATE);
2966 2966 dp = (DAPL_PRIVATE *)evd_rp->ibe_ce.ibce_priv_data_ptr;
2967 2967 /*
2968 2968 * fill in hello message
2969 2969 */
2970 2970 hip = &dp->hello_msg;
2971 2971 hip->hi_checksum = DAPL_CHECKSUM;
2972 2972 hip->hi_clen = clen;
2973 2973 hip->hi_mid = 0;
2974 2974 hip->hi_vers = DAPL_HELLO_MSG_VERS;
2975 2975 hip->hi_port = 0;
2976 2976
2977 2977 /* assign sgid and dgid */
2978 2978 lgid = &ia_rp->ia_hca_sgid;
2979 2979 ar_query_s.ar_gid.gid_prefix =
2980 2980 cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_prefix;
2981 2981 ar_query_s.ar_gid.gid_guid =
2982 2982 cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_guid;
2983 2983 ar_query_s.ar_pkey = ia_rp->ia_port_pkey;
2984 2984 bzero(ar_query_s.ar_data, DAPL_ATS_NBYTES);
2985 2985
2986 2986 /* reverse ip address lookup through ATS */
2987 2987 status = ibt_query_ar(lgid, &ar_query_s, &ar_result_s);
2988 2988 if (status == IBT_SUCCESS) {
2989 2989 bcopy(ar_result_s.ar_data, hip->hi_saaddr, DAPL_ATS_NBYTES);
2990 2990 /* determine the address families */
2991 2991 ipaddr_ord = hip->hi_v4pad[0] | hip->hi_v4pad[1] |
2992 2992 hip->hi_v4pad[2];
2993 2993 if (ipaddr_ord == 0) {
2994 2994 hip->hi_ipv = AF_INET;
2995 2995 } else {
2996 2996 hip->hi_ipv = AF_INET6;
2997 2997 }
2998 2998
2999 2999 #define UL(b) ar_result_s.ar_data[(b)]
3000 3000 D3("daplka_privdata_post: family=%d :SA[8] %d.%d.%d.%d\n",
3001 3001 hip->hi_ipv, UL(8), UL(9), UL(10), UL(11));
3002 3002 D3("daplka_privdata_post: SA[12] %d.%d.%d.%d\n",
3003 3003 UL(12), UL(13), UL(14), UL(15));
3004 3004 } else {
3005 3005 /* non-conformed third parties */
3006 3006 hip->hi_ipv = AF_UNSPEC;
3007 3007 bzero(hip->hi_saaddr, DAPL_ATS_NBYTES);
3008 3008 }
3009 3009 }
3010 3010
3011 3011 /*
3012 3012 * this function is called by evd_wait and evd_dequeue to wait for
3013 3013 * connection events and CQ notifications. typically this function
3014 3014 * is called when the userland CQ is empty and the client has
3015 3015 * specified a non-zero timeout to evd_wait. if the client is
3016 3016 * interested in CQ events, the CQ must be armed in userland prior
3017 3017 * to calling this function.
3018 3018 */
3019 3019 /* ARGSUSED */
3020 3020 static int
3021 3021 daplka_event_poll(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3022 3022 cred_t *cred, int *rvalp)
3023 3023 {
3024 3024 daplka_evd_resource_t *evd_rp = NULL;
3025 3025 dapl_event_poll_t args;
3026 3026 daplka_evd_event_t *head;
3027 3027 dapl_ib_event_t evp_arr[NUM_EVENTS_PER_POLL];
3028 3028 dapl_ib_event_t *evp;
3029 3029 dapl_ib_event_t *evp_start;
3030 3030 size_t evp_size;
3031 3031 int threshold;
3032 3032 clock_t timeout;
3033 3033 uint32_t max_events;
3034 3034 uint32_t num_events = 0;
3035 3035 void *pd;
3036 3036 ibt_priv_data_len_t n;
3037 3037 int retval = 0;
3038 3038 int rc;
3039 3039
3040 3040 retval = daplka_event_poll_copyin(arg, &args, mode);
3041 3041 if (retval != 0) {
3042 3042 return (EFAULT);
3043 3043 }
3044 3044
3045 3045 if ((args.evp_num_ev > 0) && (args.evp_ep == NULL)) {
3046 3046 DERR("event_poll: evp_ep cannot be NULL if num_wc=%d",
3047 3047 args.evp_num_ev);
3048 3048 return (EINVAL);
3049 3049 }
3050 3050 /*
3051 3051 * Note: dequeue requests have a threshold = 0, timeout = 0
3052 3052 */
3053 3053 threshold = args.evp_threshold;
3054 3054
3055 3055 max_events = args.evp_num_ev;
3056 3056 /* ensure library is passing sensible values */
3057 3057 if (max_events < threshold) {
3058 3058 DERR("event_poll: max_events(%d) < threshold(%d)\n",
3059 3059 max_events, threshold);
3060 3060 return (EINVAL);
3061 3061 }
3062 3062 /* Do a sanity check to avoid excessive memory allocation */
3063 3063 if (max_events > DAPL_EVD_MAX_EVENTS) {
3064 3064 DERR("event_poll: max_events(%d) > %d",
3065 3065 max_events, DAPL_EVD_MAX_EVENTS);
3066 3066 return (EINVAL);
3067 3067 }
3068 3068 D4("event_poll: threshold(%d) timeout(0x%llx) max_events(%d)\n",
3069 3069 threshold, (longlong_t)args.evp_timeout, max_events);
3070 3070
3071 3071 /* get evd resource */
3072 3072 evd_rp = (daplka_evd_resource_t *)
3073 3073 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.evp_evd_hkey);
3074 3074 if (evd_rp == NULL) {
3075 3075 DERR("event_poll: cannot find evd resource\n");
3076 3076 return (EINVAL);
3077 3077 }
3078 3078 ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
3079 3079
3080 3080 /*
3081 3081 * Use event array on the stack if possible
3082 3082 */
3083 3083 if (max_events <= NUM_EVENTS_PER_POLL) {
3084 3084 evp_start = evp = &evp_arr[0];
3085 3085 } else {
3086 3086 evp_size = max_events * sizeof (dapl_ib_event_t);
3087 3087 evp_start = evp = kmem_zalloc(evp_size, daplka_km_flags);
3088 3088 if (evp == NULL) {
3089 3089 DERR("event_poll: kmem_zalloc failed, evp_size %d",
3090 3090 evp_size);
3091 3091 retval = ENOMEM;
3092 3092 goto cleanup;
3093 3093 }
3094 3094 }
3095 3095
3096 3096 /*
3097 3097 * The Event poll algorithm is as follows -
3098 3098 * The library passes a buffer big enough to hold "max_events"
3099 3099 * events. max_events is >= threshold. If at any stage we get
3100 3100 * max_events no. of events we bail. The events are polled in
3101 3101 * the following order -
3102 3102 * 1) Check for CR events in the evd_cr_events list
3103 3103 * 2) Check for Connection events in the evd_connection_events list
3104 3104 *
3105 3105 * If after the above 2 steps we don't have enough(>= threshold) events
3106 3106 * we block for CQ notification and sleep. Upon being woken up we start
3107 3107 * at step 1 again.
3108 3108 */
3109 3109
3110 3110 /*
3111 3111 * Note: this could be 0 or INFINITE or anyother value in microsec
3112 3112 */
3113 3113 if (args.evp_timeout > 0) {
3114 3114 if (args.evp_timeout >= LONG_MAX) {
3115 3115 timeout = LONG_MAX;
3116 3116 } else {
3117 3117 clock_t curr_time = ddi_get_lbolt();
3118 3118
3119 3119 timeout = curr_time +
3120 3120 drv_usectohz((clock_t)args.evp_timeout);
3121 3121 /*
3122 3122 * use the max value if we wrapped around
3123 3123 */
3124 3124 if (timeout <= curr_time) {
3125 3125 timeout = LONG_MAX;
3126 3126 }
3127 3127 }
3128 3128 } else {
3129 3129 timeout = 0;
3130 3130 }
3131 3131
3132 3132 mutex_enter(&evd_rp->evd_lock);
3133 3133 for (;;) {
3134 3134 /*
3135 3135 * If this evd is waiting for CM events check that now.
3136 3136 */
3137 3137 if ((evd_rp->evd_flags & DAT_EVD_CR_FLAG) &&
3138 3138 (evd_rp->evd_cr_events.eel_num_elements > 0)) {
3139 3139 /* dequeue events from evd_cr_events list */
3140 3140 while (head = daplka_evd_event_dequeue(
3141 3141 &evd_rp->evd_cr_events)) {
3142 3142 /*
3143 3143 * populate the evp array
3144 3144 */
3145 3145 evp[num_events].ibe_ev_family = DAPL_CR_EVENTS;
3146 3146 evp[num_events].ibe_ce.ibce_event =
3147 3147 head->ee_cmev.ec_cm_ev_type;
3148 3148 evp[num_events].ibe_ce.ibce_cookie =
3149 3149 (uint64_t)head->ee_cmev.ec_cm_cookie;
3150 3150 evp[num_events].ibe_ce.ibce_psep_cookie =
3151 3151 head->ee_cmev.ec_cm_psep_cookie;
3152 3152 daplka_crevent_privdata_post(ia_rp,
3153 3153 &evp[num_events], head);
3154 3154 kmem_free(head, sizeof (daplka_evd_event_t));
3155 3155
3156 3156 if (++num_events == max_events) {
3157 3157 mutex_exit(&evd_rp->evd_lock);
3158 3158 goto maxevent_reached;
3159 3159 }
3160 3160 }
3161 3161 }
3162 3162
3163 3163 if ((evd_rp->evd_flags & DAT_EVD_CONNECTION_FLAG) &&
3164 3164 (evd_rp->evd_conn_events.eel_num_elements > 0)) {
3165 3165 /* dequeue events from evd_connection_events list */
3166 3166 while ((head = daplka_evd_event_dequeue
3167 3167 (&evd_rp->evd_conn_events))) {
3168 3168 /*
3169 3169 * populate the evp array -
3170 3170 *
3171 3171 */
3172 3172 if (head->ee_cmev.ec_cm_is_passive) {
3173 3173 evp[num_events].ibe_ev_family =
3174 3174 DAPL_PASSIVE_CONNECTION_EVENTS;
3175 3175 } else {
3176 3176 evp[num_events].ibe_ev_family =
3177 3177 DAPL_ACTIVE_CONNECTION_EVENTS;
3178 3178 }
3179 3179 evp[num_events].ibe_ce.ibce_event =
3180 3180 head->ee_cmev.ec_cm_ev_type;
3181 3181 evp[num_events].ibe_ce.ibce_cookie =
3182 3182 (uint64_t)head->ee_cmev.ec_cm_cookie;
3183 3183 evp[num_events].ibe_ce.ibce_psep_cookie =
3184 3184 head->ee_cmev.ec_cm_psep_cookie;
3185 3185
3186 3186 if (head->ee_cmev.ec_cm_ev_priv_data_len > 0) {
3187 3187 pd = head->ee_cmev.ec_cm_ev_priv_data;
3188 3188 n = head->
3189 3189 ee_cmev.ec_cm_ev_priv_data_len;
3190 3190 bcopy(pd, (void *)evp[num_events].
3191 3191 ibe_ce.ibce_priv_data_ptr, n);
3192 3192 evp[num_events].ibe_ce.
3193 3193 ibce_priv_data_size = n;
3194 3194 kmem_free(pd, n);
3195 3195 }
3196 3196
3197 3197 kmem_free(head, sizeof (daplka_evd_event_t));
3198 3198
3199 3199 if (++num_events == max_events) {
3200 3200 mutex_exit(&evd_rp->evd_lock);
3201 3201 goto maxevent_reached;
3202 3202 }
3203 3203 }
3204 3204 }
3205 3205
3206 3206 if ((evd_rp->evd_flags & DAT_EVD_ASYNC_FLAG) &&
3207 3207 (evd_rp->evd_async_events.eel_num_elements > 0)) {
3208 3208 /* dequeue events from evd_async_events list */
3209 3209 while (head = daplka_evd_event_dequeue(
3210 3210 &evd_rp->evd_async_events)) {
3211 3211 /*
3212 3212 * populate the evp array
3213 3213 */
3214 3214 evp[num_events].ibe_ev_family =
3215 3215 DAPL_ASYNC_EVENTS;
3216 3216 evp[num_events].ibe_async.ibae_type =
3217 3217 head->ee_aev.ibae_type;
3218 3218 evp[num_events].ibe_async.ibae_hca_guid =
3219 3219 head->ee_aev.ibae_hca_guid;
3220 3220 evp[num_events].ibe_async.ibae_cookie =
3221 3221 head->ee_aev.ibae_cookie;
3222 3222 evp[num_events].ibe_async.ibae_port =
3223 3223 head->ee_aev.ibae_port;
3224 3224
3225 3225 kmem_free(head, sizeof (daplka_evd_event_t));
3226 3226
3227 3227 if (++num_events == max_events) {
3228 3228 break;
3229 3229 }
3230 3230 }
3231 3231 }
3232 3232
3233 3233 /*
3234 3234 * We have sufficient events for this call so no need to wait
3235 3235 */
3236 3236 if ((threshold > 0) && (num_events >= threshold)) {
3237 3237 mutex_exit(&evd_rp->evd_lock);
3238 3238 break;
3239 3239 }
3240 3240
3241 3241 evd_rp->evd_waiters++;
3242 3242 /*
3243 3243 * There are no new events and a timeout was specified.
3244 3244 * Note: for CQ events threshold is 0 but timeout is
3245 3245 * not necessarily 0.
3246 3246 */
3247 3247 while ((evd_rp->evd_newevents == DAPLKA_EVD_NO_EVENTS) &&
3248 3248 timeout) {
3249 3249 retval = DAPLKA_EVD_WAIT(&evd_rp->evd_cv,
3250 3250 &evd_rp->evd_lock, timeout);
3251 3251 if (retval == 0) {
3252 3252 retval = EINTR;
3253 3253 break;
3254 3254 } else if (retval == -1) {
3255 3255 retval = ETIME;
3256 3256 break;
3257 3257 } else {
3258 3258 retval = 0;
3259 3259 continue;
3260 3260 }
3261 3261 }
3262 3262 evd_rp->evd_waiters--;
3263 3263 if (evd_rp->evd_newevents != DAPLKA_EVD_NO_EVENTS) {
3264 3264 /*
3265 3265 * If we got woken up by the CQ handler due to events
3266 3266 * in the CQ. Need to go to userland to check for
3267 3267 * CQ events. Or if we were woken up due to S/W events
3268 3268 */
3269 3269
3270 3270 /* check for userland events only */
3271 3271 if (!(evd_rp->evd_newevents &
3272 3272 ~DAPLKA_EVD_ULAND_EVENTS)) {
3273 3273 evd_rp->evd_newevents = DAPLKA_EVD_NO_EVENTS;
3274 3274 mutex_exit(&evd_rp->evd_lock);
3275 3275 break;
3276 3276 }
3277 3277 /*
3278 3278 * Clear newevents since we are going to loopback
3279 3279 * back and check for both CM and CQ events
3280 3280 */
3281 3281 evd_rp->evd_newevents = DAPLKA_EVD_NO_EVENTS;
3282 3282 } else { /* error */
3283 3283 mutex_exit(&evd_rp->evd_lock);
3284 3284 break;
3285 3285 }
3286 3286 }
3287 3287
3288 3288 maxevent_reached:
3289 3289 args.evp_num_polled = num_events;
3290 3290
3291 3291 /*
3292 3292 * At this point retval might have a value that we want to return
3293 3293 * back to the user. So the copyouts shouldn't tamper retval.
3294 3294 */
3295 3295 if (args.evp_num_polled > 0) { /* copyout the events */
3296 3296 rc = ddi_copyout(evp, args.evp_ep, args.evp_num_polled *
3297 3297 sizeof (dapl_ib_event_t), mode);
3298 3298 if (rc != 0) { /* XXX: we are losing events here */
3299 3299 DERR("event_poll: event array copyout error %d", rc);
3300 3300 retval = EFAULT;
3301 3301 goto cleanup;
3302 3302 }
3303 3303 rc = daplka_event_poll_copyout(&args, arg, mode);
3304 3304 if (rc != 0) { /* XXX: we are losing events here */
3305 3305 DERR("event_poll: copyout error %d\n", rc);
3306 3306 retval = EFAULT;
3307 3307 goto cleanup;
3308 3308 }
3309 3309 }
3310 3310
3311 3311 cleanup:;
3312 3312 if ((max_events > NUM_EVENTS_PER_POLL) && (evp_start != NULL)) {
3313 3313 kmem_free(evp_start, evp_size);
3314 3314 }
3315 3315
3316 3316 if (evd_rp != NULL) {
3317 3317 DAPLKA_RS_UNREF(evd_rp);
3318 3318 }
3319 3319 return (retval);
3320 3320 }
3321 3321
3322 3322 /* ARGSUSED */
3323 3323 static int
3324 3324 daplka_event_wakeup(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3325 3325 cred_t *cred, int *rvalp)
3326 3326 {
3327 3327 dapl_event_wakeup_t args;
3328 3328 daplka_evd_resource_t *evd_rp;
3329 3329 int retval;
3330 3330
3331 3331 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_event_wakeup_t),
3332 3332 mode);
3333 3333 if (retval != 0) {
3334 3334 DERR("event_wakeup: copyin error %d\n", retval);
3335 3335 return (EFAULT);
3336 3336 }
3337 3337
3338 3338 /* get evd resource */
3339 3339 evd_rp = (daplka_evd_resource_t *)
3340 3340 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.evw_hkey);
3341 3341 if (evd_rp == NULL) {
3342 3342 DERR("event_wakeup: cannot find evd resource\n");
3343 3343 return (EINVAL);
3344 3344 }
3345 3345 ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
3346 3346
3347 3347 daplka_evd_wakeup(evd_rp, NULL, NULL);
3348 3348
3349 3349 DAPLKA_RS_UNREF(evd_rp);
3350 3350
3351 3351 return (retval);
3352 3352 }
3353 3353
3354 3354 /* ARGSUSED */
3355 3355 static int
3356 3356 daplka_evd_modify_cno(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3357 3357 cred_t *cred, int *rvalp)
3358 3358 {
3359 3359 dapl_evd_modify_cno_t args;
3360 3360 daplka_evd_resource_t *evd_rp;
3361 3361 daplka_cno_resource_t *cno_rp;
3362 3362 daplka_cno_resource_t *old_cno_rp;
3363 3363 int retval;
3364 3364
3365 3365 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_evd_modify_cno_t),
3366 3366 mode);
3367 3367 if (retval != 0) {
3368 3368 DERR("evd_modify_cno: copyin error %d\n", retval);
3369 3369 return (EFAULT);
3370 3370 }
3371 3371
3372 3372 /* get evd resource */
3373 3373 evd_rp = (daplka_evd_resource_t *)
3374 3374 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.evmc_hkey);
3375 3375 if (evd_rp == NULL) {
3376 3376 DERR("evd_modify_cno: cannot find evd resource\n");
3377 3377 retval = EINVAL;
3378 3378 goto cleanup;
3379 3379 }
3380 3380 ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
3381 3381
3382 3382 if (args.evmc_cno_hkey > 0) {
3383 3383 /* get cno resource corresponding to the new CNO */
3384 3384 cno_rp = (daplka_cno_resource_t *)
3385 3385 daplka_hash_lookup(&ia_rp->ia_cno_htbl,
3386 3386 args.evmc_cno_hkey);
3387 3387 if (cno_rp == NULL) {
3388 3388 DERR("evd_modify_cno: cannot find CNO resource\n");
3389 3389 retval = EINVAL;
3390 3390 goto cleanup;
3391 3391 }
3392 3392 ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
3393 3393 } else {
3394 3394 cno_rp = NULL;
3395 3395 }
3396 3396
3397 3397 mutex_enter(&evd_rp->evd_lock);
3398 3398 old_cno_rp = evd_rp->evd_cno_res;
3399 3399 evd_rp->evd_cno_res = cno_rp;
3400 3400 mutex_exit(&evd_rp->evd_lock);
3401 3401
3402 3402 /*
3403 3403 * drop the refcnt on the old CNO, the refcnt on the new CNO is
3404 3404 * retained since the evd holds a reference to it.
3405 3405 */
3406 3406 if (old_cno_rp) {
3407 3407 DAPLKA_RS_UNREF(old_cno_rp);
3408 3408 }
3409 3409
3410 3410 cleanup:
3411 3411 if (evd_rp) {
3412 3412 DAPLKA_RS_UNREF(evd_rp);
3413 3413 }
3414 3414
3415 3415 return (retval);
3416 3416 }
3417 3417
3418 3418 /*
3419 3419 * Frees the EVD and associated resources.
3420 3420 * If there are other threads still using this EVD, the destruction
3421 3421 * will defer until the EVD's refcnt drops to zero.
3422 3422 */
3423 3423 /* ARGSUSED */
3424 3424 static int
3425 3425 daplka_evd_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3426 3426 cred_t *cred, int *rvalp)
3427 3427 {
3428 3428 daplka_evd_resource_t *evd_rp = NULL;
3429 3429 daplka_async_evd_hkey_t *curr;
3430 3430 daplka_async_evd_hkey_t *prev;
3431 3431 dapl_evd_free_t args;
3432 3432 int retval = 0;
3433 3433
3434 3434 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_evd_free_t), mode);
3435 3435 if (retval != 0) {
3436 3436 DERR("evd_free: copyin error %d\n", retval);
3437 3437 return (EFAULT);
3438 3438 }
3439 3439 retval = daplka_hash_remove(&ia_rp->ia_evd_htbl, args.evf_hkey,
3440 3440 (void **)&evd_rp);
3441 3441 if (retval != 0 || evd_rp == NULL) {
3442 3442 DERR("evd_free: cannot find evd resource\n");
3443 3443 return (EINVAL);
3444 3444 }
3445 3445 ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
3446 3446
3447 3447 /* If this is an async evd remove it from the IA's async evd list */
3448 3448 if (evd_rp->evd_flags & DAT_EVD_ASYNC_FLAG) {
3449 3449 mutex_enter(&ia_rp->ia_lock);
3450 3450 curr = prev = ia_rp->ia_async_evd_hkeys;
3451 3451 while (curr != NULL) {
3452 3452 if (curr->aeh_evd_hkey == args.evf_hkey) {
3453 3453 /* unlink curr from the list */
3454 3454 if (curr == prev) {
3455 3455 /*
3456 3456 * if first element in the list update
3457 3457 * the list head
3458 3458 */
3459 3459 ia_rp->ia_async_evd_hkeys =
3460 3460 curr->aeh_next;
3461 3461 } else {
3462 3462 prev->aeh_next = curr->aeh_next;
3463 3463 }
3464 3464 break;
3465 3465 }
3466 3466 prev = curr;
3467 3467 curr = curr->aeh_next;
3468 3468 }
3469 3469 mutex_exit(&ia_rp->ia_lock);
3470 3470 /* free the curr entry */
3471 3471 kmem_free(curr, sizeof (daplka_async_evd_hkey_t));
3472 3472 }
3473 3473
3474 3474 /* UNREF calls the actual free function when refcnt is zero */
3475 3475 DAPLKA_RS_UNREF(evd_rp);
3476 3476 return (0);
3477 3477 }
3478 3478
3479 3479 /*
3480 3480 * destroys EVD resource.
3481 3481 * called when refcnt drops to zero.
3482 3482 */
3483 3483 static int
3484 3484 daplka_evd_destroy(daplka_resource_t *gen_rp)
3485 3485 {
3486 3486 daplka_evd_resource_t *evd_rp = (daplka_evd_resource_t *)gen_rp;
3487 3487 ibt_status_t status;
3488 3488 daplka_evd_event_t *evt;
3489 3489 ibt_priv_data_len_t len;
3490 3490
3491 3491 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*evd_rp))
3492 3492 D3("evd_destroy: entering, evd_rp 0x%p, rnum %d\n",
3493 3493 evd_rp, DAPLKA_RS_RNUM(evd_rp));
3494 3494 /*
3495 3495 * free CQ
3496 3496 */
3497 3497 if (evd_rp->evd_cq_hdl) {
3498 3498 ibt_set_cq_handler(evd_rp->evd_cq_hdl, NULL, NULL);
3499 3499 mutex_enter(&daplka_dev->daplka_mutex);
3500 3500 ibt_set_cq_private(evd_rp->evd_cq_hdl, NULL);
3501 3501 mutex_exit(&daplka_dev->daplka_mutex);
3502 3502
3503 3503 status = daplka_ibt_free_cq(evd_rp, evd_rp->evd_cq_hdl);
3504 3504 if (status != IBT_SUCCESS) {
3505 3505 DERR("evd_destroy: ibt_free_cq returned %d\n", status);
3506 3506 }
3507 3507 evd_rp->evd_cq_hdl = NULL;
3508 3508 D2("evd_destroy: cq freed, rnum %d\n", DAPLKA_RS_RNUM(evd_rp));
3509 3509 }
3510 3510
3511 3511 /*
3512 3512 * release reference on CNO
3513 3513 */
3514 3514 if (evd_rp->evd_cno_res != NULL) {
3515 3515 mutex_enter(&evd_rp->evd_cno_res->cno_lock);
3516 3516 if (evd_rp->evd_cno_res->cno_evd_cookie ==
3517 3517 evd_rp->evd_cookie) {
3518 3518 evd_rp->evd_cno_res->cno_evd_cookie = 0;
3519 3519 }
3520 3520 mutex_exit(&evd_rp->evd_cno_res->cno_lock);
3521 3521 DAPLKA_RS_UNREF(evd_rp->evd_cno_res);
3522 3522 evd_rp->evd_cno_res = NULL;
3523 3523 }
3524 3524
3525 3525 /*
3526 3526 * discard all remaining events
3527 3527 */
3528 3528 mutex_enter(&evd_rp->evd_lock);
3529 3529 while ((evt = daplka_evd_event_dequeue(&evd_rp->evd_cr_events))) {
3530 3530 D2("evd_destroy: discarding CR event: %d\n",
3531 3531 evt->ee_cmev.ec_cm_ev_type);
3532 3532 len = evt->ee_cmev.ec_cm_ev_priv_data_len;
3533 3533 if (len > 0) {
3534 3534 kmem_free(evt->ee_cmev.ec_cm_ev_priv_data, len);
3535 3535 evt->ee_cmev.ec_cm_ev_priv_data = NULL;
3536 3536 evt->ee_cmev.ec_cm_ev_priv_data_len = 0;
3537 3537 }
3538 3538 kmem_free(evt, sizeof (*evt));
3539 3539 }
3540 3540 ASSERT(evd_rp->evd_cr_events.eel_num_elements == 0);
3541 3541
3542 3542 while ((evt = daplka_evd_event_dequeue(&evd_rp->evd_conn_events))) {
3543 3543 D2("evd_destroy: discarding CONN event: %d\n",
3544 3544 evt->ee_cmev.ec_cm_ev_type);
3545 3545 len = evt->ee_cmev.ec_cm_ev_priv_data_len;
3546 3546 if (len > 0) {
3547 3547 kmem_free(evt->ee_cmev.ec_cm_ev_priv_data, len);
3548 3548 evt->ee_cmev.ec_cm_ev_priv_data = NULL;
3549 3549 evt->ee_cmev.ec_cm_ev_priv_data_len = 0;
3550 3550 }
3551 3551 kmem_free(evt, sizeof (*evt));
3552 3552 }
3553 3553 ASSERT(evd_rp->evd_conn_events.eel_num_elements == 0);
3554 3554
3555 3555 while ((evt = daplka_evd_event_dequeue(&evd_rp->evd_async_events))) {
3556 3556 DERR("evd_destroy: discarding ASYNC event: %d\n",
3557 3557 evt->ee_aev.ibae_type);
3558 3558 kmem_free(evt, sizeof (*evt));
3559 3559 }
3560 3560 ASSERT(evd_rp->evd_async_events.eel_num_elements == 0);
3561 3561 mutex_exit(&evd_rp->evd_lock);
3562 3562
3563 3563 mutex_destroy(&evd_rp->evd_lock);
3564 3564 DAPLKA_RS_FINI(evd_rp);
3565 3565 kmem_free(evd_rp, sizeof (daplka_evd_resource_t));
3566 3566 D3("evd_destroy: exiting, evd_rp 0x%p\n", evd_rp);
3567 3567 return (0);
3568 3568 }
3569 3569
3570 3570 static void
3571 3571 daplka_hash_evd_free(void *obj)
3572 3572 {
3573 3573 daplka_evd_resource_t *evd_rp = (daplka_evd_resource_t *)obj;
3574 3574
3575 3575 ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
3576 3576 DAPLKA_RS_UNREF(evd_rp);
3577 3577 }
3578 3578
3579 3579 /*
3580 3580 * this handler fires when new completions arrive.
3581 3581 */
3582 3582 /* ARGSUSED */
3583 3583 static void
3584 3584 daplka_cq_handler(ibt_cq_hdl_t ibt_cq, void *arg)
3585 3585 {
3586 3586 D3("cq_handler: fired setting evd_newevents\n");
3587 3587 daplka_evd_wakeup((daplka_evd_resource_t *)arg, NULL, NULL);
3588 3588 }
3589 3589
3590 3590 /*
3591 3591 * this routine wakes up a client from evd_wait. if evtq and evt
3592 3592 * are non-null, the event evt will be enqueued prior to waking
3593 3593 * up the client. if the evd is associated with a CNO and if there
3594 3594 * are no waiters on the evd, the CNO will be notified.
3595 3595 */
3596 3596 static void
3597 3597 daplka_evd_wakeup(daplka_evd_resource_t *evd_rp, daplka_evd_event_list_t *evtq,
3598 3598 daplka_evd_event_t *evt)
3599 3599 {
3600 3600 uint32_t waiters = 0;
3601 3601
3602 3602 mutex_enter(&evd_rp->evd_lock);
3603 3603 if (evtq != NULL && evt != NULL) {
3604 3604 ASSERT(evtq == &evd_rp->evd_cr_events ||
3605 3605 evtq == &evd_rp->evd_conn_events ||
3606 3606 evtq == &evd_rp->evd_async_events);
3607 3607 daplka_evd_event_enqueue(evtq, evt);
3608 3608 ASSERT((evtq->eel_event_type == DAPLKA_EVD_CM_EVENTS) ||
3609 3609 (evtq->eel_event_type == DAPLKA_EVD_ASYNC_EVENTS));
3610 3610 evd_rp->evd_newevents |= evtq->eel_event_type;
3611 3611 } else {
3612 3612 evd_rp->evd_newevents |= DAPLKA_EVD_ULAND_EVENTS;
3613 3613 }
3614 3614 waiters = evd_rp->evd_waiters;
3615 3615 cv_broadcast(&evd_rp->evd_cv);
3616 3616 mutex_exit(&evd_rp->evd_lock);
3617 3617
3618 3618 /*
3619 3619 * only wakeup the CNO if there are no waiters on this evd.
3620 3620 */
3621 3621 if (evd_rp->evd_cno_res != NULL && waiters == 0) {
3622 3622 mutex_enter(&evd_rp->evd_cno_res->cno_lock);
3623 3623 evd_rp->evd_cno_res->cno_evd_cookie = evd_rp->evd_cookie;
3624 3624 cv_broadcast(&evd_rp->evd_cno_res->cno_cv);
3625 3625 mutex_exit(&evd_rp->evd_cno_res->cno_lock);
3626 3626 }
3627 3627 }
3628 3628
3629 3629 /*
3630 3630 * daplka_evd_event_enqueue adds elem to the end of the event list
3631 3631 * The caller is expected to acquire appropriate locks before
3632 3632 * calling enqueue
3633 3633 */
3634 3634 static void
3635 3635 daplka_evd_event_enqueue(daplka_evd_event_list_t *evlist,
3636 3636 daplka_evd_event_t *elem)
3637 3637 {
3638 3638 if (evlist->eel_tail) {
3639 3639 evlist->eel_tail->ee_next = elem;
3640 3640 evlist->eel_tail = elem;
3641 3641 } else {
3642 3642 /* list is empty */
3643 3643 ASSERT(evlist->eel_head == NULL);
3644 3644 evlist->eel_head = elem;
3645 3645 evlist->eel_tail = elem;
3646 3646 }
3647 3647 evlist->eel_num_elements++;
3648 3648 }
3649 3649
3650 3650 /*
3651 3651 * daplka_evd_event_dequeue removes and returns the first element of event
3652 3652 * list. NULL is returned if the list is empty. The caller is expected to
3653 3653 * acquire appropriate locks before calling enqueue.
3654 3654 */
3655 3655 static daplka_evd_event_t *
3656 3656 daplka_evd_event_dequeue(daplka_evd_event_list_t *evlist)
3657 3657 {
3658 3658 daplka_evd_event_t *head;
3659 3659
3660 3660 head = evlist->eel_head;
3661 3661 if (head == NULL) {
3662 3662 return (NULL);
3663 3663 }
3664 3664
3665 3665 evlist->eel_head = head->ee_next;
3666 3666 evlist->eel_num_elements--;
3667 3667 /* if it was the last element update the tail pointer too */
3668 3668 if (evlist->eel_head == NULL) {
3669 3669 ASSERT(evlist->eel_num_elements == 0);
3670 3670 evlist->eel_tail = NULL;
3671 3671 }
3672 3672 return (head);
3673 3673 }
3674 3674
3675 3675 /*
3676 3676 * A CNO allows the client to wait for notifications from multiple EVDs.
3677 3677 * To use a CNO, the client needs to follow the procedure below:
3678 3678 * 1. allocate a CNO. this returns a cno_hkey that identifies the CNO.
3679 3679 * 2. create one or more EVDs using the returned cno_hkey.
3680 3680 * 3. call cno_wait. when one of the associated EVDs get notified, the
3681 3681 * CNO will also get notified. cno_wait will then return with a
3682 3682 * evd_cookie identifying the EVD that triggered the event.
3683 3683 *
3684 3684 * A note about cno_wait:
3685 3685 * -unlike a EVD, a CNO does not maintain a queue of notifications. For
3686 3686 * example, suppose multiple EVDs triggered a CNO before the client calls
3687 3687 * cno_wait; when the client calls cno_wait, it will return with the
3688 3688 * evd_cookie that identifies the *last* EVD that triggered the CNO. It
3689 3689 * is the responsibility of the client, upon returning from cno_wait, to
3690 3690 * check on all EVDs that can potentially trigger the CNO. the returned
3691 3691 * evd_cookie is only meant to be a hint. there is no guarantee that the
3692 3692 * EVD identified by the evd_cookie still contains an event or still
3693 3693 * exists by the time cno_wait returns.
3694 3694 */
3695 3695
3696 3696 /*
3697 3697 * allocates a CNO.
3698 3698 * the returned cno_hkey may subsequently be used in evd_create.
3699 3699 */
3700 3700 /* ARGSUSED */
3701 3701 static int
3702 3702 daplka_cno_alloc(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3703 3703 cred_t *cred, int *rvalp)
3704 3704 {
3705 3705 dapl_cno_alloc_t args;
3706 3706 daplka_cno_resource_t *cno_rp = NULL;
3707 3707 uint64_t cno_hkey = 0;
3708 3708 boolean_t inserted = B_FALSE;
3709 3709 int retval = 0;
3710 3710
3711 3711 cno_rp = kmem_zalloc(sizeof (*cno_rp), daplka_km_flags);
3712 3712 if (cno_rp == NULL) {
3713 3713 DERR("cno_alloc: cannot allocate cno resource\n");
3714 3714 return (ENOMEM);
3715 3715 }
3716 3716 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cno_rp))
3717 3717 DAPLKA_RS_INIT(cno_rp, DAPL_TYPE_CNO,
3718 3718 DAPLKA_RS_RNUM(ia_rp), daplka_cno_destroy);
3719 3719
3720 3720 mutex_init(&cno_rp->cno_lock, NULL, MUTEX_DRIVER, NULL);
3721 3721 cv_init(&cno_rp->cno_cv, NULL, CV_DRIVER, NULL);
3722 3722 cno_rp->cno_evd_cookie = 0;
3723 3723
3724 3724 /* insert into cno hash table */
3725 3725 retval = daplka_hash_insert(&ia_rp->ia_cno_htbl,
3726 3726 &cno_hkey, (void *)cno_rp);
3727 3727 if (retval != 0) {
3728 3728 DERR("cno_alloc: cannot insert cno resource\n");
3729 3729 goto cleanup;
3730 3730 }
3731 3731 inserted = B_TRUE;
3732 3732 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*cno_rp))
3733 3733
3734 3734 /* return hkey to library */
3735 3735 args.cno_hkey = cno_hkey;
3736 3736
3737 3737 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_cno_alloc_t),
3738 3738 mode);
3739 3739 if (retval != 0) {
3740 3740 DERR("cno_alloc: copyout error %d\n", retval);
3741 3741 retval = EFAULT;
3742 3742 goto cleanup;
3743 3743 }
3744 3744 return (0);
3745 3745
3746 3746 cleanup:;
3747 3747 if (inserted) {
3748 3748 daplka_cno_resource_t *free_rp = NULL;
3749 3749
3750 3750 (void) daplka_hash_remove(&ia_rp->ia_cno_htbl, cno_hkey,
3751 3751 (void **)&free_rp);
3752 3752 if (free_rp != cno_rp) {
3753 3753 DERR("cno_alloc: cannot remove cno\n");
3754 3754 /*
3755 3755 * we can only get here if another thread
3756 3756 * has completed the cleanup in cno_free
3757 3757 */
3758 3758 return (retval);
3759 3759 }
3760 3760 }
3761 3761 DAPLKA_RS_UNREF(cno_rp);
3762 3762 return (retval);
3763 3763 }
3764 3764
3765 3765 /*
3766 3766 * destroys a CNO.
3767 3767 * this gets called when a CNO resource's refcnt drops to zero.
3768 3768 */
3769 3769 static int
3770 3770 daplka_cno_destroy(daplka_resource_t *gen_rp)
3771 3771 {
3772 3772 daplka_cno_resource_t *cno_rp = (daplka_cno_resource_t *)gen_rp;
3773 3773
3774 3774 ASSERT(DAPLKA_RS_REFCNT(cno_rp) == 0);
3775 3775 D2("cno_destroy: entering, cno_rp %p, rnum %d\n",
3776 3776 cno_rp, DAPLKA_RS_RNUM(cno_rp));
3777 3777
3778 3778 ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
3779 3779 cv_destroy(&cno_rp->cno_cv);
3780 3780 mutex_destroy(&cno_rp->cno_lock);
3781 3781
3782 3782 DAPLKA_RS_FINI(cno_rp);
3783 3783 kmem_free(cno_rp, sizeof (daplka_cno_resource_t));
3784 3784 D2("cno_destroy: exiting, cno_rp %p\n", cno_rp);
3785 3785 return (0);
3786 3786 }
3787 3787
3788 3788 static void
3789 3789 daplka_hash_cno_free(void *obj)
3790 3790 {
3791 3791 daplka_cno_resource_t *cno_rp = (daplka_cno_resource_t *)obj;
3792 3792
3793 3793 ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
3794 3794 DAPLKA_RS_UNREF(cno_rp);
3795 3795 }
3796 3796
3797 3797 /*
3798 3798 * removes the CNO from the cno hash table and frees the CNO
3799 3799 * if there are no references to it. if there are references to
3800 3800 * it, the CNO will be destroyed when the last of the references
3801 3801 * is released. once the CNO is removed from the cno hash table,
3802 3802 * the client will no longer be able to call cno_wait on the CNO.
3803 3803 */
3804 3804 /* ARGSUSED */
3805 3805 static int
3806 3806 daplka_cno_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3807 3807 cred_t *cred, int *rvalp)
3808 3808 {
3809 3809 daplka_cno_resource_t *cno_rp = NULL;
3810 3810 dapl_cno_free_t args;
3811 3811 int retval = 0;
3812 3812
3813 3813 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cno_free_t), mode);
3814 3814 if (retval != 0) {
3815 3815 DERR("cno_free: copyin error %d\n", retval);
3816 3816 return (EINVAL);
3817 3817 }
3818 3818
3819 3819 retval = daplka_hash_remove(&ia_rp->ia_cno_htbl,
3820 3820 args.cnf_hkey, (void **)&cno_rp);
3821 3821 if (retval != 0 || cno_rp == NULL) {
3822 3822 DERR("cno_free: cannot find cno resource\n");
3823 3823 return (EINVAL);
3824 3824 }
3825 3825 ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
3826 3826
3827 3827 /* UNREF calls the actual free function when refcnt is zero */
3828 3828 DAPLKA_RS_UNREF(cno_rp);
3829 3829 return (0);
3830 3830 }
3831 3831
3832 3832 /*
3833 3833 * wait for a notification from one of the associated EVDs.
3834 3834 */
3835 3835 /* ARGSUSED */
3836 3836 static int
3837 3837 daplka_cno_wait(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3838 3838 cred_t *cred, int *rvalp)
3839 3839 {
3840 3840 daplka_cno_resource_t *cno_rp = NULL;
3841 3841 dapl_cno_wait_t args;
3842 3842 int retval = 0;
3843 3843 uint64_t evd_cookie = 0;
3844 3844 clock_t timeout, curr_time;
3845 3845
3846 3846 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cno_wait_t), mode);
3847 3847 if (retval != 0) {
3848 3848 DERR("cno_wait: copyin error %d\n", retval);
3849 3849 return (EINVAL);
3850 3850 }
3851 3851 /* get cno resource */
3852 3852 cno_rp = (daplka_cno_resource_t *)
3853 3853 daplka_hash_lookup(&ia_rp->ia_cno_htbl, args.cnw_hkey);
3854 3854 if (cno_rp == NULL) {
3855 3855 DERR("cno_wait: cannot find cno resource\n");
3856 3856 return (EINVAL);
3857 3857 }
3858 3858 ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
3859 3859
3860 3860 curr_time = ddi_get_lbolt();
3861 3861 timeout = curr_time + drv_usectohz(args.cnw_timeout);
3862 3862
3863 3863 /*
3864 3864 * use the max value if we wrapped around
3865 3865 */
3866 3866 if (args.cnw_timeout > 0 && timeout <= curr_time) {
3867 3867 /*
3868 3868 * clock_t (size long) changes between 32 and 64-bit kernels
3869 3869 */
3870 3870 timeout = LONG_MAX >> 4;
3871 3871 }
3872 3872 mutex_enter(&cno_rp->cno_lock);
3873 3873 while (cno_rp->cno_evd_cookie == 0) {
3874 3874 int rval = 0;
3875 3875
3876 3876 rval = cv_timedwait_sig(&cno_rp->cno_cv,
3877 3877 &cno_rp->cno_lock, timeout);
3878 3878 if (rval == 0) {
3879 3879 DERR("cno_wait: interrupted\n");
3880 3880 mutex_exit(&cno_rp->cno_lock);
3881 3881 retval = EINTR;
3882 3882 goto cleanup;
3883 3883 } else if (rval == -1) {
3884 3884 DERR("cno_wait: timed out\n");
3885 3885 mutex_exit(&cno_rp->cno_lock);
3886 3886 retval = ETIME;
3887 3887 goto cleanup;
3888 3888 }
3889 3889 }
3890 3890 evd_cookie = cno_rp->cno_evd_cookie;
3891 3891 cno_rp->cno_evd_cookie = 0;
3892 3892 mutex_exit(&cno_rp->cno_lock);
3893 3893
3894 3894 ASSERT(evd_cookie != 0);
3895 3895 D2("cno_wait: returning evd_cookie 0x%p\n",
3896 3896 (void *)(uintptr_t)evd_cookie);
3897 3897 args.cnw_evd_cookie = evd_cookie;
3898 3898 retval = ddi_copyout((void *)&args, (void *)arg,
3899 3899 sizeof (dapl_cno_wait_t), mode);
3900 3900 if (retval != 0) {
3901 3901 DERR("cno_wait: copyout error %d\n", retval);
3902 3902 retval = EFAULT;
3903 3903 goto cleanup;
3904 3904 }
3905 3905
3906 3906 cleanup:;
3907 3907 if (cno_rp != NULL) {
3908 3908 DAPLKA_RS_UNREF(cno_rp);
3909 3909 }
3910 3910 return (retval);
3911 3911 }
3912 3912
3913 3913 /*
3914 3914 * this function is called by the client when it decides to
3915 3915 * accept a connection request. a connection request is generated
3916 3916 * when the active side generates REQ MAD to a service point on
3917 3917 * the destination node. this causes the CM service handler
3918 3918 * (daplka_cm_service_req) on the passive side to be callee. This
3919 3919 * handler will then enqueue this connection request to the backlog
3920 3920 * array of the service point. A connection event containing the
3921 3921 * backlog array index and connection request private data is passed
3922 3922 * to the client's service point EVD (sp_evd_res). once the event
3923 3923 * is passed up to the userland, the client may examine the request
3924 3924 * to decide whether to call daplka_cr_accept or dapka_cr_reject.
3925 3925 */
3926 3926 /* ARGSUSED */
3927 3927 static int
3928 3928 daplka_cr_accept(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3929 3929 cred_t *cred, int *rvalp)
3930 3930 {
3931 3931 daplka_ep_resource_t *ep_rp = NULL;
3932 3932 daplka_sp_resource_t *sp_rp = NULL;
3933 3933 dapl_cr_accept_t args;
3934 3934 daplka_sp_conn_pend_t *conn;
3935 3935 ibt_cm_proceed_reply_t proc_reply;
3936 3936 ibt_status_t status;
3937 3937 uint16_t bkl_index;
3938 3938 uint32_t old_state, new_state;
3939 3939 int retval = 0;
3940 3940 void *priv_data = NULL, *sid;
3941 3941
3942 3942 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cr_accept_t),
3943 3943 mode);
3944 3944 if (retval != 0) {
3945 3945 DERR("cr_accept: copyin error %d\n", retval);
3946 3946 return (EFAULT);
3947 3947 }
3948 3948 if (args.cra_priv_sz > DAPL_MAX_PRIVATE_DATA_SIZE) {
3949 3949 DERR("cr_accept: private data len (%d) exceeded "
3950 3950 "max size %d\n", args.cra_priv_sz,
3951 3951 DAPL_MAX_PRIVATE_DATA_SIZE);
3952 3952 return (EINVAL);
3953 3953 }
3954 3954 priv_data = (args.cra_priv_sz > 0) ? (void *)args.cra_priv : NULL;
3955 3955
3956 3956 D2("cr_accept: priv(0x%p) priv_len(%u) psep(0x%llx)\n", priv_data,
3957 3957 args.cra_priv_sz, (longlong_t)args.cra_bkl_cookie);
3958 3958
3959 3959 /* get sp resource */
3960 3960 sp_rp = (daplka_sp_resource_t *)daplka_hash_lookup(&ia_rp->ia_sp_htbl,
3961 3961 args.cra_sp_hkey);
3962 3962 if (sp_rp == NULL) {
3963 3963 DERR("cr_accept: cannot find sp resource\n");
3964 3964 return (EINVAL);
3965 3965 }
3966 3966 ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
3967 3967
3968 3968 /* get ep resource */
3969 3969 ep_rp = (daplka_ep_resource_t *)daplka_hash_lookup(&ia_rp->ia_ep_htbl,
3970 3970 args.cra_ep_hkey);
3971 3971 if (ep_rp == NULL) {
3972 3972 DERR("cr_accept: cannot find ep resource\n");
3973 3973 retval = EINVAL;
3974 3974 goto cleanup;
3975 3975 }
3976 3976 ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
3977 3977
3978 3978 /*
3979 3979 * accept is only allowed if ep_state is CLOSED.
3980 3980 * note that after this point, the ep_state is frozen
3981 3981 * (i.e. TRANSITIONING) until we transition ep_state
3982 3982 * to ACCEPTING or back to CLOSED if we get an error.
3983 3983 */
3984 3984 new_state = old_state = daplka_ep_get_state(ep_rp);
3985 3985 if (old_state != DAPLKA_EP_STATE_CLOSED) {
3986 3986 DERR("cr_accept: invalid ep state %d\n", old_state);
3987 3987 retval = EINVAL;
3988 3988 goto cleanup;
3989 3989 }
3990 3990
3991 3991 mutex_enter(&sp_rp->sp_lock);
3992 3992 bkl_index = DAPLKA_GET_PSEP_INDEX(args.cra_bkl_cookie);
3993 3993 /*
3994 3994 * make sure the backlog index is not bogus.
3995 3995 */
3996 3996 if (bkl_index >= sp_rp->sp_backlog_size) {
3997 3997 DERR("cr_accept: invalid backlog index 0x%llx %d\n",
3998 3998 (longlong_t)args.cra_bkl_cookie, bkl_index);
3999 3999 mutex_exit(&sp_rp->sp_lock);
4000 4000 retval = EINVAL;
4001 4001 goto cleanup;
4002 4002 }
4003 4003 /*
4004 4004 * make sure the backlog index indeed refers
4005 4005 * to a pending connection.
4006 4006 */
4007 4007 conn = &sp_rp->sp_backlog[bkl_index];
4008 4008 if (conn->spcp_state != DAPLKA_SPCP_PENDING) {
4009 4009 DERR("cr_accept: invalid conn state %d\n",
4010 4010 conn->spcp_state);
4011 4011 mutex_exit(&sp_rp->sp_lock);
4012 4012 retval = EINVAL;
4013 4013 goto cleanup;
4014 4014 }
4015 4015 if (conn->spcp_sid == NULL) {
4016 4016 DERR("cr_accept: sid == NULL\n");
4017 4017 mutex_exit(&sp_rp->sp_lock);
4018 4018 retval = EINVAL;
4019 4019 goto cleanup;
4020 4020 }
4021 4021 if (ep_rp->ep_chan_hdl == NULL) {
4022 4022 /*
4023 4023 * a ep_rp with a NULL chan_hdl is impossible.
4024 4024 */
4025 4025 DERR("cr_accept: ep_chan_hdl == NULL\n");
4026 4026 mutex_exit(&sp_rp->sp_lock);
4027 4027 ASSERT(B_FALSE);
4028 4028 retval = EINVAL;
4029 4029 goto cleanup;
4030 4030 }
4031 4031 proc_reply.rep.cm_channel = ep_rp->ep_chan_hdl;
4032 4032 proc_reply.rep.cm_rdma_ra_out = conn->spcp_rdma_ra_out;
4033 4033 proc_reply.rep.cm_rdma_ra_in = conn->spcp_rdma_ra_in;
4034 4034 proc_reply.rep.cm_rnr_retry_cnt = IBT_RNR_INFINITE_RETRY;
4035 4035 sid = conn->spcp_sid;
4036 4036
4037 4037 /*
4038 4038 * this clears our slot in the backlog array.
4039 4039 * this slot may now be used by other pending connections.
4040 4040 */
4041 4041 conn->spcp_sid = NULL;
4042 4042 conn->spcp_state = DAPLKA_SPCP_INIT;
4043 4043 conn->spcp_req_len = 0;
4044 4044 mutex_exit(&sp_rp->sp_lock);
4045 4045
4046 4046 /*
4047 4047 * Set the unique cookie corresponding to the CR to this EP
4048 4048 * so that is can be used in passive side CM callbacks
4049 4049 */
4050 4050 ep_rp->ep_psep_cookie = args.cra_bkl_cookie;
4051 4051
4052 4052 status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV, sid, IBT_CM_ACCEPT,
4053 4053 &proc_reply, priv_data, (ibt_priv_data_len_t)args.cra_priv_sz);
4054 4054
4055 4055 if (status != IBT_SUCCESS) {
4056 4056 DERR("cr_accept: ibt_cm_proceed returned %d\n", status);
4057 4057 *rvalp = (int)status;
4058 4058 retval = 0;
4059 4059 }
4060 4060 /*
4061 4061 * note that the CM handler may actually be called at this
4062 4062 * point. but since ep_state is still in TRANSITIONING, the
4063 4063 * handler will wait until we transition to ACCEPTING. this
4064 4064 * prevents the case where we set ep_state to ACCEPTING after
4065 4065 * daplka_service_conn_est sets ep_state to CONNECTED.
4066 4066 */
4067 4067 new_state = DAPLKA_EP_STATE_ACCEPTING;
4068 4068
4069 4069 cleanup:;
4070 4070 if (sp_rp != NULL) {
4071 4071 DAPLKA_RS_UNREF(sp_rp);
4072 4072 }
4073 4073 if (ep_rp != NULL) {
4074 4074 daplka_ep_set_state(ep_rp, old_state, new_state);
4075 4075 DAPLKA_RS_UNREF(ep_rp);
4076 4076 }
4077 4077 return (retval);
4078 4078 }
4079 4079
4080 4080 /*
4081 4081 * this function is called by the client to reject a
4082 4082 * connection request.
4083 4083 */
4084 4084 /* ARGSUSED */
4085 4085 static int
4086 4086 daplka_cr_reject(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
4087 4087 cred_t *cred, int *rvalp)
4088 4088 {
4089 4089 dapl_cr_reject_t args;
4090 4090 daplka_sp_resource_t *sp_rp = NULL;
4091 4091 daplka_sp_conn_pend_t *conn;
4092 4092 ibt_cm_proceed_reply_t proc_reply;
4093 4093 ibt_cm_status_t proc_status;
4094 4094 ibt_status_t status;
4095 4095 uint16_t bkl_index;
4096 4096 int retval = 0;
4097 4097 void *sid;
4098 4098
4099 4099 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cr_reject_t),
4100 4100 mode);
4101 4101 if (retval != 0) {
4102 4102 DERR("cr_reject: copyin error %d\n", retval);
4103 4103 return (EFAULT);
4104 4104 }
4105 4105 /* get sp resource */
4106 4106 sp_rp = (daplka_sp_resource_t *)daplka_hash_lookup(&ia_rp->ia_sp_htbl,
4107 4107 args.crr_sp_hkey);
4108 4108 if (sp_rp == NULL) {
4109 4109 DERR("cr_reject: cannot find sp resource\n");
4110 4110 return (EINVAL);
4111 4111 }
4112 4112 ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
4113 4113
4114 4114 D2("cr_reject: psep(0x%llx)\n", (longlong_t)args.crr_bkl_cookie);
4115 4115
4116 4116 mutex_enter(&sp_rp->sp_lock);
4117 4117 bkl_index = DAPLKA_GET_PSEP_INDEX(args.crr_bkl_cookie);
4118 4118 /*
4119 4119 * make sure the backlog index is not bogus.
4120 4120 */
4121 4121 if (bkl_index >= sp_rp->sp_backlog_size) {
4122 4122 DERR("cr_reject: invalid backlog index 0x%llx %d\n",
4123 4123 (longlong_t)args.crr_bkl_cookie, bkl_index);
4124 4124 mutex_exit(&sp_rp->sp_lock);
4125 4125 retval = EINVAL;
4126 4126 goto cleanup;
4127 4127 }
4128 4128 /*
4129 4129 * make sure the backlog index indeed refers
4130 4130 * to a pending connection.
4131 4131 */
4132 4132 conn = &sp_rp->sp_backlog[bkl_index];
4133 4133 if (conn->spcp_state != DAPLKA_SPCP_PENDING) {
4134 4134 DERR("cr_reject: invalid conn state %d\n",
4135 4135 conn->spcp_state);
4136 4136 mutex_exit(&sp_rp->sp_lock);
4137 4137 retval = EINVAL;
4138 4138 goto cleanup;
4139 4139 }
4140 4140 if (conn->spcp_sid == NULL) {
4141 4141 DERR("cr_reject: sid == NULL\n");
4142 4142 mutex_exit(&sp_rp->sp_lock);
4143 4143 retval = EINVAL;
4144 4144 goto cleanup;
4145 4145 }
4146 4146 bzero(&proc_reply, sizeof (proc_reply));
4147 4147 sid = conn->spcp_sid;
4148 4148
4149 4149 /*
4150 4150 * this clears our slot in the backlog array.
4151 4151 * this slot may now be used by other pending connections.
4152 4152 */
4153 4153 conn->spcp_sid = NULL;
4154 4154 conn->spcp_state = DAPLKA_SPCP_INIT;
4155 4155 conn->spcp_req_len = 0;
4156 4156
4157 4157 switch (args.crr_reason) {
4158 4158 case DAPL_IB_CM_REJ_REASON_CONSUMER_REJ:
4159 4159 /* results in IBT_CM_CONSUMER as the reason for reject */
4160 4160 proc_status = IBT_CM_REJECT;
4161 4161 break;
4162 4162 case DAPL_IB_CME_LOCAL_FAILURE:
4163 4163 /*FALLTHRU*/
4164 4164 case DAPL_IB_CME_DESTINATION_UNREACHABLE:
4165 4165 /* results in IBT_CM_NO_RESC as the reason for reject */
4166 4166 proc_status = IBT_CM_NO_RESOURCE;
4167 4167 break;
4168 4168 default:
4169 4169 /* unexpect reason code */
4170 4170 ASSERT(!"unexpected reject reason code");
4171 4171 proc_status = IBT_CM_NO_RESOURCE;
4172 4172 break;
4173 4173 }
4174 4174
4175 4175 mutex_exit(&sp_rp->sp_lock);
4176 4176
4177 4177 status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV, sid, proc_status,
4178 4178 &proc_reply, NULL, 0);
4179 4179
4180 4180 if (status != IBT_SUCCESS) {
4181 4181 DERR("cr_reject: ibt_cm_proceed returned %d\n", status);
4182 4182 *rvalp = (int)status;
4183 4183 retval = 0;
4184 4184 }
4185 4185
4186 4186 cleanup:;
4187 4187 if (sp_rp != NULL) {
4188 4188 DAPLKA_RS_UNREF(sp_rp);
4189 4189 }
4190 4190 return (retval);
4191 4191 }
4192 4192
4193 4193
4194 4194 /*
4195 4195 * daplka_sp_match is used by daplka_hash_walk for finding SPs
4196 4196 */
4197 4197 typedef struct daplka_sp_match_s {
4198 4198 uint64_t spm_conn_qual;
4199 4199 daplka_sp_resource_t *spm_sp_rp;
4200 4200 } daplka_sp_match_t;
4201 4201 _NOTE(SCHEME_PROTECTS_DATA("daplka", daplka_sp_match_s::spm_sp_rp))
4202 4202
4203 4203 static int
4204 4204 daplka_sp_match(void *objp, void *arg)
4205 4205 {
4206 4206 daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)objp;
4207 4207
4208 4208 ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
4209 4209 if (sp_rp->sp_conn_qual ==
4210 4210 ((daplka_sp_match_t *)arg)->spm_conn_qual) {
4211 4211 ((daplka_sp_match_t *)arg)->spm_sp_rp = sp_rp;
4212 4212 D2("daplka_sp_match: found sp, conn_qual %016llu\n",
4213 4213 (longlong_t)((daplka_sp_match_t *)arg)->spm_conn_qual);
4214 4214 DAPLKA_RS_REF(sp_rp);
4215 4215 return (1);
4216 4216 }
4217 4217 return (0);
4218 4218 }
4219 4219
4220 4220 /*
4221 4221 * cr_handoff allows the client to handoff a connection request from
4222 4222 * one service point to another.
4223 4223 */
4224 4224 /* ARGSUSED */
4225 4225 static int
4226 4226 daplka_cr_handoff(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
4227 4227 cred_t *cred, int *rvalp)
4228 4228 {
4229 4229 dapl_cr_handoff_t args;
4230 4230 daplka_sp_resource_t *sp_rp = NULL, *new_sp_rp = NULL;
4231 4231 daplka_sp_conn_pend_t *conn;
4232 4232 daplka_sp_match_t sp_match;
4233 4233 ibt_cm_event_t fake_event;
4234 4234 ibt_cm_status_t cm_status;
4235 4235 ibt_status_t status;
4236 4236 uint16_t bkl_index;
4237 4237 void *sid, *priv = NULL;
4238 4238 int retval = 0, priv_len = 0;
4239 4239
4240 4240 D3("cr_handoff: entering\n");
4241 4241 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cr_handoff_t),
4242 4242 mode);
4243 4243 if (retval != 0) {
4244 4244 DERR("cr_handoff: copyin error %d\n", retval);
4245 4245 return (EFAULT);
4246 4246 }
4247 4247 /* get sp resource */
4248 4248 sp_rp = (daplka_sp_resource_t *)daplka_hash_lookup(&ia_rp->ia_sp_htbl,
4249 4249 args.crh_sp_hkey);
4250 4250 if (sp_rp == NULL) {
4251 4251 DERR("cr_handoff: cannot find sp resource\n");
4252 4252 return (EINVAL);
4253 4253 }
4254 4254 ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
4255 4255
4256 4256 /*
4257 4257 * find the destination service point.
4258 4258 */
4259 4259 sp_match.spm_conn_qual = args.crh_conn_qual;
4260 4260 sp_match.spm_sp_rp = NULL;
4261 4261 daplka_hash_walk(&daplka_global_sp_htbl, daplka_sp_match,
4262 4262 (void *)&sp_match, RW_READER);
4263 4263
4264 4264 /*
4265 4265 * return if we cannot find the service point
4266 4266 */
4267 4267 if (sp_match.spm_sp_rp == NULL) {
4268 4268 DERR("cr_handoff: new sp not found, conn qual = %llu\n",
4269 4269 (longlong_t)args.crh_conn_qual);
4270 4270 retval = EINVAL;
4271 4271 goto cleanup;
4272 4272 }
4273 4273 new_sp_rp = sp_match.spm_sp_rp;
4274 4274
4275 4275 /*
4276 4276 * the spec does not discuss the security implications of this
4277 4277 * function. to be safe, we currently only allow processes
4278 4278 * owned by the same user to handoff connection requests
4279 4279 * to each other.
4280 4280 */
4281 4281 if (crgetruid(cred) != new_sp_rp->sp_ruid) {
4282 4282 DERR("cr_handoff: permission denied\n");
4283 4283 retval = EPERM;
4284 4284 goto cleanup;
4285 4285 }
4286 4286
4287 4287 D2("cr_handoff: psep(0x%llx)\n", (longlong_t)args.crh_bkl_cookie);
4288 4288
4289 4289 mutex_enter(&sp_rp->sp_lock);
4290 4290 bkl_index = DAPLKA_GET_PSEP_INDEX(args.crh_bkl_cookie);
4291 4291 /*
4292 4292 * make sure the backlog index is not bogus.
4293 4293 */
4294 4294 if (bkl_index >= sp_rp->sp_backlog_size) {
4295 4295 DERR("cr_handoff: invalid backlog index 0x%llx %d\n",
4296 4296 (longlong_t)args.crh_bkl_cookie, bkl_index);
4297 4297 mutex_exit(&sp_rp->sp_lock);
4298 4298 retval = EINVAL;
4299 4299 goto cleanup;
4300 4300 }
4301 4301 /*
4302 4302 * make sure the backlog index indeed refers
4303 4303 * to a pending connection.
4304 4304 */
4305 4305 conn = &sp_rp->sp_backlog[bkl_index];
4306 4306 if (conn->spcp_state != DAPLKA_SPCP_PENDING) {
4307 4307 DERR("cr_handoff: invalid conn state %d\n",
4308 4308 conn->spcp_state);
4309 4309 mutex_exit(&sp_rp->sp_lock);
4310 4310 retval = EINVAL;
4311 4311 goto cleanup;
4312 4312 }
4313 4313 if (conn->spcp_sid == NULL) {
4314 4314 DERR("cr_handoff: sid == NULL\n");
4315 4315 mutex_exit(&sp_rp->sp_lock);
4316 4316 retval = EINVAL;
4317 4317 goto cleanup;
4318 4318 }
4319 4319 sid = conn->spcp_sid;
4320 4320 priv = NULL;
4321 4321 priv_len = conn->spcp_req_len;
4322 4322 if (priv_len > 0) {
4323 4323 priv = kmem_zalloc(priv_len, daplka_km_flags);
4324 4324 if (priv == NULL) {
4325 4325 mutex_exit(&sp_rp->sp_lock);
4326 4326 retval = ENOMEM;
4327 4327 goto cleanup;
4328 4328 }
4329 4329 bcopy(conn->spcp_req_data, priv, priv_len);
4330 4330 }
4331 4331 /*
4332 4332 * this clears our slot in the backlog array.
4333 4333 * this slot may now be used by other pending connections.
4334 4334 */
4335 4335 conn->spcp_sid = NULL;
4336 4336 conn->spcp_state = DAPLKA_SPCP_INIT;
4337 4337 conn->spcp_req_len = 0;
4338 4338 mutex_exit(&sp_rp->sp_lock);
4339 4339
4340 4340 /* fill fake_event and call service_req handler */
4341 4341 bzero(&fake_event, sizeof (fake_event));
4342 4342 fake_event.cm_type = IBT_CM_EVENT_REQ_RCV;
4343 4343 fake_event.cm_session_id = sid;
4344 4344 fake_event.cm_priv_data_len = priv_len;
4345 4345 fake_event.cm_priv_data = priv;
4346 4346
4347 4347 cm_status = daplka_cm_service_req(new_sp_rp,
4348 4348 &fake_event, NULL, priv, (ibt_priv_data_len_t)priv_len);
4349 4349 if (cm_status != IBT_CM_DEFER) {
4350 4350 ibt_cm_proceed_reply_t proc_reply;
4351 4351
4352 4352 DERR("cr_handoff: service_req returned %d\n", cm_status);
4353 4353 /*
4354 4354 * if for some reason cm_service_req failed, we
4355 4355 * reject the connection.
4356 4356 */
4357 4357 bzero(&proc_reply, sizeof (proc_reply));
4358 4358
4359 4359 status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV, sid,
4360 4360 IBT_CM_NO_RESOURCE, &proc_reply, NULL, 0);
4361 4361 if (status != IBT_SUCCESS) {
4362 4362 DERR("cr_handoff: ibt_cm_proceed returned %d\n",
4363 4363 status);
4364 4364 }
4365 4365 *rvalp = (int)status;
4366 4366 retval = 0;
4367 4367 }
4368 4368
4369 4369 cleanup:;
4370 4370 if (priv_len > 0 && priv != NULL) {
4371 4371 kmem_free(priv, priv_len);
4372 4372 }
4373 4373 if (new_sp_rp != NULL) {
4374 4374 DAPLKA_RS_UNREF(new_sp_rp);
4375 4375 }
4376 4376 if (sp_rp != NULL) {
4377 4377 DAPLKA_RS_UNREF(sp_rp);
4378 4378 }
4379 4379 D3("cr_handoff: exiting\n");
4380 4380 return (retval);
4381 4381 }
4382 4382
4383 4383 /*
4384 4384 * returns a list of hca attributes
4385 4385 */
4386 4386 /* ARGSUSED */
4387 4387 static int
4388 4388 daplka_ia_query(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
4389 4389 cred_t *cred, int *rvalp)
4390 4390 {
4391 4391 dapl_ia_query_t args;
4392 4392 int retval;
4393 4393 ibt_hca_attr_t *hcap;
4394 4394
4395 4395 hcap = &ia_rp->ia_hca->hca_attr;
4396 4396
4397 4397 /*
4398 4398 * Take the ibt_hca_attr_t and stuff them into dapl_hca_attr_t
4399 4399 */
4400 4400 args.hca_attr.dhca_vendor_id = hcap->hca_vendor_id;
4401 4401 args.hca_attr.dhca_device_id = hcap->hca_device_id;
4402 4402 args.hca_attr.dhca_version_id = hcap->hca_version_id;
4403 4403 args.hca_attr.dhca_max_chans = hcap->hca_max_chans;
4404 4404 args.hca_attr.dhca_max_chan_sz = hcap->hca_max_chan_sz;
4405 4405 args.hca_attr.dhca_max_sgl = hcap->hca_max_sgl;
4406 4406 args.hca_attr.dhca_max_cq = hcap->hca_max_cq;
4407 4407 args.hca_attr.dhca_max_cq_sz = hcap->hca_max_cq_sz;
4408 4408 args.hca_attr.dhca_max_memr = hcap->hca_max_memr;
4409 4409 args.hca_attr.dhca_max_memr_len = hcap->hca_max_memr_len;
4410 4410 args.hca_attr.dhca_max_mem_win = hcap->hca_max_mem_win;
4411 4411 args.hca_attr.dhca_max_rdma_in_chan = hcap->hca_max_rdma_in_chan;
4412 4412 args.hca_attr.dhca_max_rdma_out_chan = hcap->hca_max_rdma_out_chan;
4413 4413 args.hca_attr.dhca_max_partitions = hcap->hca_max_partitions;
4414 4414 args.hca_attr.dhca_nports = hcap->hca_nports;
4415 4415 args.hca_attr.dhca_node_guid = hcap->hca_node_guid;
4416 4416 args.hca_attr.dhca_max_pd = hcap->hca_max_pd;
4417 4417 args.hca_attr.dhca_max_srqs = hcap->hca_max_srqs;
4418 4418 args.hca_attr.dhca_max_srqs_sz = hcap->hca_max_srqs_sz;
4419 4419 args.hca_attr.dhca_max_srq_sgl = hcap->hca_max_srq_sgl;
4420 4420
4421 4421 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_ia_query_t),
4422 4422 mode);
4423 4423 if (retval != 0) {
4424 4424 DERR("ia_query: copyout error %d\n", retval);
4425 4425 return (EFAULT);
4426 4426 }
4427 4427 return (0);
4428 4428 }
4429 4429
4430 4430 /*
4431 4431 * This routine is passed to hash walk in the daplka_pre_mr_cleanup_callback,
4432 4432 * it frees the mw embedded in the mw resource object.
4433 4433 */
4434 4434
4435 4435 /* ARGSUSED */
4436 4436 static int
4437 4437 daplka_mr_cb_freemw(void *objp, void *arg)
4438 4438 {
4439 4439 daplka_mw_resource_t *mw_rp = (daplka_mw_resource_t *)objp;
4440 4440 ibt_mw_hdl_t mw_hdl;
4441 4441 ibt_status_t status;
4442 4442
4443 4443 D3("mr_cb_freemw: entering, mw_rp 0x%p\n", mw_rp);
4444 4444 DAPLKA_RS_REF(mw_rp);
4445 4445
4446 4446 mutex_enter(&mw_rp->mw_lock);
4447 4447 mw_hdl = mw_rp->mw_hdl;
4448 4448 /*
4449 4449 * we set mw_hdl to NULL so it won't get freed again
4450 4450 */
4451 4451 mw_rp->mw_hdl = NULL;
4452 4452 mutex_exit(&mw_rp->mw_lock);
4453 4453
4454 4454 if (mw_hdl != NULL) {
4455 4455 status = daplka_ibt_free_mw(mw_rp, mw_rp->mw_hca_hdl, mw_hdl);
4456 4456 if (status != IBT_SUCCESS) {
4457 4457 DERR("mr_cb_freemw: ibt_free_mw returned %d\n", status);
4458 4458 }
4459 4459 D3("mr_cb_freemw: mw freed\n");
4460 4460 }
4461 4461
4462 4462 DAPLKA_RS_UNREF(mw_rp);
4463 4463 return (0);
4464 4464 }
4465 4465
4466 4466 /*
4467 4467 * This routine is called from HCA driver's umem lock undo callback
4468 4468 * when the memory associated with an MR is being unmapped. In this callback
4469 4469 * we free all the MW associated with the IA and post an unaffiliated
4470 4470 * async event to tell the app that there was a catastrophic event.
4471 4471 * This allows the HCA to deregister the MR in its callback processing.
4472 4472 */
4473 4473 static void
4474 4474 daplka_pre_mr_cleanup_callback(void *arg1, void *arg2 /*ARGSUSED*/)
4475 4475 {
4476 4476 daplka_mr_resource_t *mr_rp;
4477 4477 daplka_ia_resource_t *ia_rp;
4478 4478 #ifdef _THROW_ASYNC_EVENT_FROM_MRUNLOCKCB
4479 4479 ibt_async_event_t event;
4480 4480 ibt_hca_attr_t *hca_attrp;
4481 4481 #endif
4482 4482 minor_t rnum;
4483 4483
4484 4484 mr_rp = (daplka_mr_resource_t *)arg1;
4485 4485 rnum = DAPLKA_RS_RNUM(mr_rp);
4486 4486 daplka_shared_mr_free(mr_rp);
4487 4487
4488 4488 ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(rnum);
4489 4489 if (ia_rp == NULL) {
4490 4490 DERR("daplka_mr_unlock_callback: resource not found, rnum %d\n",
4491 4491 rnum);
4492 4492 return;
4493 4493 }
4494 4494
4495 4495 DERR("daplka_mr_unlock_callback: resource(%p) rnum(%d)\n", ia_rp, rnum);
4496 4496
4497 4497 mutex_enter(&ia_rp->ia_lock);
4498 4498 /*
4499 4499 * MW is being alloced OR MW freeze has already begun. In
4500 4500 * both these cases we wait for that to complete before
4501 4501 * continuing.
4502 4502 */
4503 4503 while ((ia_rp->ia_state == DAPLKA_IA_MW_ALLOC_IN_PROGRESS) ||
4504 4504 (ia_rp->ia_state == DAPLKA_IA_MW_FREEZE_IN_PROGRESS)) {
4505 4505 cv_wait(&ia_rp->ia_cv, &ia_rp->ia_lock);
4506 4506 }
4507 4507
4508 4508 switch (ia_rp->ia_state) {
4509 4509 case DAPLKA_IA_INIT:
4510 4510 ia_rp->ia_state = DAPLKA_IA_MW_FREEZE_IN_PROGRESS;
4511 4511 mutex_exit(&ia_rp->ia_lock);
4512 4512 break;
4513 4513 case DAPLKA_IA_MW_FROZEN:
4514 4514 /* the mw on this ia have been freed */
4515 4515 D2("daplka_mr_unlock_callback: ia_state %d nothing to do\n",
4516 4516 ia_rp->ia_state);
4517 4517 mutex_exit(&ia_rp->ia_lock);
4518 4518 goto cleanup;
4519 4519 default:
4520 4520 ASSERT(!"daplka_mr_unlock_callback: IA state invalid");
4521 4521 DERR("daplka_mr_unlock_callback: invalid ia_state %d\n",
4522 4522 ia_rp->ia_state);
4523 4523 mutex_exit(&ia_rp->ia_lock);
4524 4524 goto cleanup;
4525 4525 }
4526 4526
4527 4527 /*
4528 4528 * Walk the mw hash table and free the mws. Acquire a writer
4529 4529 * lock since we don't want anyone else traversing this tree
4530 4530 * while we are freeing the MW.
4531 4531 */
4532 4532 daplka_hash_walk(&ia_rp->ia_mw_htbl, daplka_mr_cb_freemw, NULL,
4533 4533 RW_WRITER);
4534 4534
4535 4535 mutex_enter(&ia_rp->ia_lock);
4536 4536 ASSERT(ia_rp->ia_state == DAPLKA_IA_MW_FREEZE_IN_PROGRESS);
4537 4537 ia_rp->ia_state = DAPLKA_IA_MW_FROZEN;
4538 4538 cv_broadcast(&ia_rp->ia_cv);
4539 4539 mutex_exit(&ia_rp->ia_lock);
4540 4540
4541 4541 /*
4542 4542 * Currently commented out because Oracle skgxp is incapable
4543 4543 * of handling async events correctly.
4544 4544 */
4545 4545 #ifdef _THROW_ASYNC_EVENT_FROM_MRUNLOCKCB
4546 4546 /*
4547 4547 * Enqueue an unaffiliated async error event to indicate this
4548 4548 * IA has encountered a problem that caused the MW to freed up
4549 4549 */
4550 4550
4551 4551 /* Create a fake event, only relevant field is the hca_guid */
4552 4552 bzero(&event, sizeof (ibt_async_event_t));
4553 4553 hca_attrp = &ia_rp->ia_hca->hca_attr;
4554 4554 event.ev_hca_guid = hca_attrp->hca_node_guid;
4555 4555
4556 4556 daplka_async_event_create(IBT_ERROR_LOCAL_CATASTROPHIC, &event, 0,
4557 4557 ia_rp);
4558 4558 #endif /* _THROW_ASYNC_EVENT_FROM_MRUNLOCKCB */
4559 4559
4560 4560 cleanup:;
4561 4561 D2("daplka_mr_unlock_callback: resource(%p) done\n", ia_rp);
4562 4562 DAPLKA_RS_UNREF(ia_rp);
4563 4563 }
4564 4564
4565 4565 /*
4566 4566 * registers a memory region.
4567 4567 * memory locking will be done by the HCA driver.
4568 4568 */
4569 4569 /* ARGSUSED */
4570 4570 static int
4571 4571 daplka_mr_register(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
4572 4572 cred_t *cred, int *rvalp)
4573 4573 {
4574 4574 boolean_t inserted = B_FALSE;
4575 4575 daplka_mr_resource_t *mr_rp;
4576 4576 daplka_pd_resource_t *pd_rp;
4577 4577 dapl_mr_register_t args;
4578 4578 ibt_mr_data_in_t mr_cb_data_in;
4579 4579 uint64_t mr_hkey = 0;
4580 4580 ibt_status_t status;
4581 4581 int retval;
4582 4582
4583 4583 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mr_register_t),
4584 4584 mode);
4585 4585 if (retval != 0) {
4586 4586 DERR("mr_register: copyin error %d\n", retval);
4587 4587 return (EINVAL);
4588 4588 }
4589 4589 mr_rp = kmem_zalloc(sizeof (daplka_mr_resource_t), daplka_km_flags);
4590 4590 if (mr_rp == NULL) {
4591 4591 DERR("mr_register: cannot allocate mr resource\n");
4592 4592 return (ENOMEM);
4593 4593 }
4594 4594 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp))
4595 4595 DAPLKA_RS_INIT(mr_rp, DAPL_TYPE_MR,
4596 4596 DAPLKA_RS_RNUM(ia_rp), daplka_mr_destroy);
4597 4597
4598 4598 mutex_init(&mr_rp->mr_lock, NULL, MUTEX_DRIVER, NULL);
4599 4599 mr_rp->mr_hca = ia_rp->ia_hca;
4600 4600 mr_rp->mr_hca_hdl = ia_rp->ia_hca_hdl;
4601 4601 mr_rp->mr_next = NULL;
4602 4602 mr_rp->mr_shared_mr = NULL;
4603 4603
4604 4604 /* get pd handle */
4605 4605 pd_rp = (daplka_pd_resource_t *)
4606 4606 daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.mr_pd_hkey);
4607 4607 if (pd_rp == NULL) {
4608 4608 DERR("mr_register: cannot find pd resource\n");
4609 4609 retval = EINVAL;
4610 4610 goto cleanup;
4611 4611 }
4612 4612 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
4613 4613 mr_rp->mr_pd_res = pd_rp;
4614 4614
4615 4615 mr_rp->mr_attr.mr_vaddr = args.mr_vaddr;
4616 4616 mr_rp->mr_attr.mr_len = args.mr_len;
4617 4617 mr_rp->mr_attr.mr_as = curproc->p_as;
4618 4618 mr_rp->mr_attr.mr_flags = args.mr_flags | IBT_MR_NOSLEEP;
4619 4619
4620 4620 D3("mr_register: mr_vaddr %p, mr_len %llu, mr_flags 0x%x\n",
4621 4621 (void *)(uintptr_t)mr_rp->mr_attr.mr_vaddr,
4622 4622 (longlong_t)mr_rp->mr_attr.mr_len,
4623 4623 mr_rp->mr_attr.mr_flags);
4624 4624
4625 4625 status = daplka_ibt_register_mr(mr_rp, ia_rp->ia_hca_hdl,
4626 4626 mr_rp->mr_pd_res->pd_hdl, &mr_rp->mr_attr, &mr_rp->mr_hdl,
4627 4627 &mr_rp->mr_desc);
4628 4628
4629 4629 if (status != IBT_SUCCESS) {
4630 4630 DERR("mr_register: ibt_register_mr error %d\n", status);
4631 4631 *rvalp = (int)status;
4632 4632 retval = 0;
4633 4633 goto cleanup;
4634 4634 }
4635 4635
4636 4636 mr_cb_data_in.mr_rev = IBT_MR_DATA_IN_IF_VERSION;
4637 4637 mr_cb_data_in.mr_func = daplka_pre_mr_cleanup_callback;
4638 4638 mr_cb_data_in.mr_arg1 = (void *)mr_rp;
4639 4639 mr_cb_data_in.mr_arg2 = NULL;
4640 4640
4641 4641 /* Pass the service driver mr cleanup handler to the hca driver */
4642 4642 status = ibt_ci_data_in(ia_rp->ia_hca_hdl,
4643 4643 IBT_CI_NO_FLAGS, IBT_HDL_MR, (void *)mr_rp->mr_hdl,
4644 4644 &mr_cb_data_in, sizeof (mr_cb_data_in));
4645 4645
4646 4646 if (status != IBT_SUCCESS) {
4647 4647 DERR("mr_register: ibt_ci_data_in error(%d) ver(%d)",
4648 4648 status, mr_cb_data_in.mr_rev);
4649 4649 *rvalp = (int)status;
4650 4650 retval = 0;
4651 4651 goto cleanup;
4652 4652 }
4653 4653
4654 4654 /* insert into mr hash table */
4655 4655 retval = daplka_hash_insert(&ia_rp->ia_mr_htbl,
4656 4656 &mr_hkey, (void *)mr_rp);
4657 4657 if (retval != 0) {
4658 4658 DERR("mr_register: cannot insert mr resource into mr_htbl\n");
4659 4659 goto cleanup;
4660 4660 }
4661 4661 inserted = B_TRUE;
4662 4662 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mr_rp))
4663 4663
4664 4664 args.mr_lkey = mr_rp->mr_desc.md_lkey;
4665 4665 args.mr_rkey = mr_rp->mr_desc.md_rkey;
4666 4666 args.mr_hkey = mr_hkey;
4667 4667
4668 4668 retval = ddi_copyout((void *)&args, (void *)arg,
4669 4669 sizeof (dapl_mr_register_t), mode);
4670 4670 if (retval != 0) {
4671 4671 DERR("mr_register: copyout error %d\n", retval);
4672 4672 retval = EFAULT;
4673 4673 goto cleanup;
4674 4674 }
4675 4675 return (0);
4676 4676
4677 4677 cleanup:;
4678 4678 if (inserted) {
4679 4679 daplka_mr_resource_t *free_rp = NULL;
4680 4680
4681 4681 (void) daplka_hash_remove(&ia_rp->ia_mr_htbl, mr_hkey,
4682 4682 (void **)&free_rp);
4683 4683 if (free_rp != mr_rp) {
4684 4684 DERR("mr_register: cannot remove mr from hash table\n");
4685 4685 /*
4686 4686 * we can only get here if another thread
4687 4687 * has completed the cleanup in mr_deregister
4688 4688 */
4689 4689 return (retval);
4690 4690 }
4691 4691 }
4692 4692 DAPLKA_RS_UNREF(mr_rp);
4693 4693 return (retval);
4694 4694 }
4695 4695
4696 4696 /*
4697 4697 * registers a shared memory region.
4698 4698 * the client calls this function with the intention to share the memory
4699 4699 * region with other clients. it is assumed that, prior to calling this
4700 4700 * function, the client(s) are already sharing parts of their address
4701 4701 * space using a mechanism such as SYSV shared memory. the first client
4702 4702 * that calls this function will create and insert a daplka_shared_mr_t
4703 4703 * object into the global daplka_shared_mr_tree. this shared mr object
4704 4704 * will be identified by a unique 40-byte key and will maintain a list
4705 4705 * of mr resources. every time this function gets called with the same
4706 4706 * 40-byte key, a new mr resource (containing a new mr handle generated
4707 4707 * by ibt_register_mr or ibt_register_shared_mr) is created and inserted
4708 4708 * into this list. similarly, every time a shared mr gets deregistered
4709 4709 * or invalidated by a callback, the mr resource gets removed from this
4710 4710 * list. the shared mr object has a reference count. when it drops to
4711 4711 * zero, the shared mr object will be removed from the global avl tree
4712 4712 * and be freed.
4713 4713 */
4714 4714 /* ARGSUSED */
4715 4715 static int
4716 4716 daplka_mr_register_shared(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
4717 4717 cred_t *cred, int *rvalp)
4718 4718 {
4719 4719 dapl_mr_register_shared_t args;
4720 4720 daplka_shared_mr_t *smrp = NULL;
4721 4721 daplka_shared_mr_t tmp_smr;
4722 4722 ibt_mr_data_in_t mr_cb_data_in;
4723 4723 avl_index_t where;
4724 4724 boolean_t inserted = B_FALSE;
4725 4725 daplka_mr_resource_t *mr_rp = NULL;
4726 4726 daplka_pd_resource_t *pd_rp;
4727 4727 uint64_t mr_hkey = 0;
4728 4728 ibt_status_t status;
4729 4729 int retval;
4730 4730
4731 4731 retval = ddi_copyin((void *)arg, &args,
4732 4732 sizeof (dapl_mr_register_shared_t), mode);
4733 4733 if (retval != 0) {
4734 4734 DERR("mr_register_shared: copyin error %d\n", retval);
4735 4735 return (EINVAL);
4736 4736 }
4737 4737
4738 4738 mutex_enter(&daplka_shared_mr_lock);
4739 4739 /*
4740 4740 * find smrp from the global avl tree.
4741 4741 * the 40-byte key is used as the lookup key.
4742 4742 */
4743 4743 tmp_smr.smr_cookie = args.mrs_shm_cookie;
4744 4744 smrp = (daplka_shared_mr_t *)
4745 4745 avl_find(&daplka_shared_mr_tree, &tmp_smr, &where);
4746 4746 if (smrp != NULL) {
4747 4747 D2("mr_register_shared: smrp 0x%p, found cookie:\n"
4748 4748 "0x%016llx%016llx%016llx%016llx%016llx\n", smrp,
4749 4749 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[4],
4750 4750 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[3],
4751 4751 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[2],
4752 4752 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[1],
4753 4753 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[0]);
4754 4754
4755 4755 /*
4756 4756 * if the smrp exists, other threads could still be
4757 4757 * accessing it. we wait until they are done before
4758 4758 * we continue.
4759 4759 */
4760 4760 smrp->smr_refcnt++;
4761 4761 while (smrp->smr_state == DAPLKA_SMR_TRANSITIONING) {
4762 4762 D2("mr_register_shared: smrp 0x%p, "
4763 4763 "waiting in transitioning state, refcnt %d\n",
4764 4764 smrp, smrp->smr_refcnt);
4765 4765 cv_wait(&smrp->smr_cv, &daplka_shared_mr_lock);
4766 4766 }
4767 4767 ASSERT(smrp->smr_state == DAPLKA_SMR_READY);
4768 4768 D2("mr_register_shared: smrp 0x%p, refcnt %d, ready\n",
4769 4769 smrp, smrp->smr_refcnt);
4770 4770
4771 4771 /*
4772 4772 * we set smr_state to TRANSITIONING to temporarily
4773 4773 * prevent other threads from trying to access smrp.
4774 4774 */
4775 4775 smrp->smr_state = DAPLKA_SMR_TRANSITIONING;
4776 4776 } else {
4777 4777 D2("mr_register_shared: cannot find cookie:\n"
4778 4778 "0x%016llx%016llx%016llx%016llx%016llx\n",
4779 4779 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[4],
4780 4780 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[3],
4781 4781 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[2],
4782 4782 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[1],
4783 4783 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[0]);
4784 4784
4785 4785 /*
4786 4786 * if we cannot find smrp, we need to create and
4787 4787 * insert one into daplka_shared_mr_tree
4788 4788 */
4789 4789 smrp = kmem_zalloc(sizeof (daplka_shared_mr_t),
4790 4790 daplka_km_flags);
4791 4791 if (smrp == NULL) {
4792 4792 retval = ENOMEM;
4793 4793 mutex_exit(&daplka_shared_mr_lock);
4794 4794 goto cleanup;
4795 4795 }
4796 4796 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*smrp))
4797 4797 smrp->smr_refcnt = 1;
4798 4798 smrp->smr_cookie = args.mrs_shm_cookie;
4799 4799 smrp->smr_state = DAPLKA_SMR_TRANSITIONING;
4800 4800 smrp->smr_mr_list = NULL;
4801 4801 cv_init(&smrp->smr_cv, NULL, CV_DRIVER, NULL);
4802 4802 avl_insert(&daplka_shared_mr_tree, smrp, where);
4803 4803 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*smrp))
4804 4804 }
4805 4805 mutex_exit(&daplka_shared_mr_lock);
4806 4806
4807 4807 mr_rp = kmem_zalloc(sizeof (daplka_mr_resource_t), daplka_km_flags);
4808 4808 if (mr_rp == NULL) {
4809 4809 DERR("mr_register_shared: cannot allocate mr resource\n");
4810 4810 goto cleanup;
4811 4811 }
4812 4812 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp))
4813 4813 DAPLKA_RS_INIT(mr_rp, DAPL_TYPE_MR,
4814 4814 DAPLKA_RS_RNUM(ia_rp), daplka_mr_destroy);
4815 4815
4816 4816 mutex_init(&mr_rp->mr_lock, NULL, MUTEX_DRIVER, NULL);
4817 4817 mr_rp->mr_hca = ia_rp->ia_hca;
4818 4818 mr_rp->mr_hca_hdl = ia_rp->ia_hca_hdl;
4819 4819 mr_rp->mr_next = NULL;
4820 4820 mr_rp->mr_shared_mr = NULL;
4821 4821
4822 4822 /* get pd handle */
4823 4823 pd_rp = (daplka_pd_resource_t *)
4824 4824 daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.mrs_pd_hkey);
4825 4825 if (pd_rp == NULL) {
4826 4826 DERR("mr_register_shared: cannot find pd resource\n");
4827 4827 retval = EINVAL;
4828 4828 goto cleanup;
4829 4829 }
4830 4830 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
4831 4831 mr_rp->mr_pd_res = pd_rp;
4832 4832
4833 4833 mr_rp->mr_attr.mr_vaddr = args.mrs_vaddr;
4834 4834 mr_rp->mr_attr.mr_len = args.mrs_len;
4835 4835 mr_rp->mr_attr.mr_flags = args.mrs_flags | IBT_MR_NOSLEEP;
4836 4836 mr_rp->mr_attr.mr_as = curproc->p_as;
4837 4837
4838 4838 D2("mr_register_shared: mr_vaddr 0x%p, mr_len %llu, "
4839 4839 "mr_flags 0x%x, mr_as 0x%p, mr_exists %d, smrp 0x%p\n",
4840 4840 (void *)(uintptr_t)mr_rp->mr_attr.mr_vaddr,
4841 4841 (longlong_t)mr_rp->mr_attr.mr_len,
4842 4842 mr_rp->mr_attr.mr_flags, mr_rp->mr_attr.mr_as,
4843 4843 (int)(smrp->smr_mr_list != NULL), smrp);
4844 4844
4845 4845 /*
4846 4846 * since we are in TRANSITIONING state, we are guaranteed
4847 4847 * that we have exclusive access to smr_mr_list.
4848 4848 */
4849 4849 if (smrp->smr_mr_list != NULL) {
4850 4850 ibt_smr_attr_t mem_sattr;
4851 4851
4852 4852 /*
4853 4853 * a non-null smr_mr_list indicates that someone
4854 4854 * else has already inserted an mr_resource into
4855 4855 * smr_mr_list. we use the mr_handle from the first
4856 4856 * element as an arg to ibt_register_shared_mr.
4857 4857 */
4858 4858 mem_sattr.mr_vaddr = smrp->smr_mr_list->mr_desc.md_vaddr;
4859 4859 mem_sattr.mr_flags = mr_rp->mr_attr.mr_flags;
4860 4860
4861 4861 D2("mr_register_shared: mem_sattr vaddr 0x%p flags 0x%x\n",
4862 4862 (void *)(uintptr_t)mem_sattr.mr_vaddr, mem_sattr.mr_flags);
4863 4863 status = daplka_ibt_register_shared_mr(mr_rp, ia_rp->ia_hca_hdl,
4864 4864 smrp->smr_mr_list->mr_hdl, mr_rp->mr_pd_res->pd_hdl,
4865 4865 &mem_sattr, &mr_rp->mr_hdl, &mr_rp->mr_desc);
4866 4866
4867 4867 if (status != IBT_SUCCESS) {
4868 4868 DERR("mr_register_shared: "
4869 4869 "ibt_register_shared_mr error %d\n", status);
4870 4870 *rvalp = (int)status;
4871 4871 retval = 0;
4872 4872 goto cleanup;
4873 4873 }
4874 4874 } else {
4875 4875 /*
4876 4876 * an mr does not exist yet. we need to create one
4877 4877 * using ibt_register_mr.
4878 4878 */
4879 4879 status = daplka_ibt_register_mr(mr_rp, ia_rp->ia_hca_hdl,
4880 4880 mr_rp->mr_pd_res->pd_hdl, &mr_rp->mr_attr,
4881 4881 &mr_rp->mr_hdl, &mr_rp->mr_desc);
4882 4882
4883 4883 if (status != IBT_SUCCESS) {
4884 4884 DERR("mr_register_shared: "
4885 4885 "ibt_register_mr error %d\n", status);
4886 4886 *rvalp = (int)status;
4887 4887 retval = 0;
4888 4888 goto cleanup;
4889 4889 }
4890 4890 }
4891 4891
4892 4892 mr_cb_data_in.mr_rev = IBT_MR_DATA_IN_IF_VERSION;
4893 4893 mr_cb_data_in.mr_func = daplka_pre_mr_cleanup_callback;
4894 4894 mr_cb_data_in.mr_arg1 = (void *)mr_rp;
4895 4895 mr_cb_data_in.mr_arg2 = NULL;
4896 4896
4897 4897 /* Pass the service driver mr cleanup handler to the hca driver */
4898 4898 status = ibt_ci_data_in(ia_rp->ia_hca_hdl,
4899 4899 IBT_CI_NO_FLAGS, IBT_HDL_MR, (void *)mr_rp->mr_hdl,
4900 4900 &mr_cb_data_in, sizeof (mr_cb_data_in));
4901 4901
4902 4902 if (status != IBT_SUCCESS) {
4903 4903 DERR("mr_register_shared: ibt_ci_data_in error(%d) ver(%d)",
4904 4904 status, mr_cb_data_in.mr_rev);
4905 4905 *rvalp = (int)status;
4906 4906 retval = 0;
4907 4907 goto cleanup;
4908 4908 }
4909 4909
4910 4910 /*
4911 4911 * we bump reference of mr_rp and enqueue it onto smrp.
4912 4912 */
4913 4913 DAPLKA_RS_REF(mr_rp);
4914 4914 mr_rp->mr_next = smrp->smr_mr_list;
4915 4915 smrp->smr_mr_list = mr_rp;
4916 4916 mr_rp->mr_shared_mr = smrp;
4917 4917
4918 4918 /* insert into mr hash table */
4919 4919 retval = daplka_hash_insert(&ia_rp->ia_mr_htbl,
4920 4920 &mr_hkey, (void *)mr_rp);
4921 4921 if (retval != 0) {
4922 4922 DERR("mr_register_shared: cannot insert mr resource\n");
4923 4923 goto cleanup;
4924 4924 }
4925 4925 inserted = B_TRUE;
4926 4926 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mr_rp))
4927 4927
4928 4928 /*
4929 4929 * at this point, there are two references to our mr resource.
4930 4930 * one is kept in ia_mr_htbl. the other is kept in the list
4931 4931 * within this shared mr object (smrp). when we deregister this
4932 4932 * mr or when a callback invalidates this mr, the reference kept
4933 4933 * by this shared mr object will be removed.
4934 4934 */
4935 4935
4936 4936 args.mrs_lkey = mr_rp->mr_desc.md_lkey;
4937 4937 args.mrs_rkey = mr_rp->mr_desc.md_rkey;
4938 4938 args.mrs_hkey = mr_hkey;
4939 4939
4940 4940 retval = ddi_copyout((void *)&args, (void *)arg,
4941 4941 sizeof (dapl_mr_register_shared_t), mode);
4942 4942 if (retval != 0) {
4943 4943 DERR("mr_register_shared: copyout error %d\n", retval);
4944 4944 retval = EFAULT;
4945 4945 goto cleanup;
4946 4946 }
4947 4947
4948 4948 /*
4949 4949 * set the state to READY to allow others to continue
4950 4950 */
4951 4951 mutex_enter(&daplka_shared_mr_lock);
4952 4952 smrp->smr_state = DAPLKA_SMR_READY;
4953 4953 cv_broadcast(&smrp->smr_cv);
4954 4954 mutex_exit(&daplka_shared_mr_lock);
4955 4955 return (0);
4956 4956
4957 4957 cleanup:;
4958 4958 if (inserted) {
4959 4959 daplka_mr_resource_t *free_rp = NULL;
4960 4960
4961 4961 (void) daplka_hash_remove(&ia_rp->ia_mr_htbl, mr_hkey,
4962 4962 (void **)&free_rp);
4963 4963 if (free_rp != mr_rp) {
4964 4964 DERR("mr_register_shared: "
4965 4965 "cannot remove mr from hash table\n");
4966 4966 /*
4967 4967 * we can only get here if another thread
4968 4968 * has completed the cleanup in mr_deregister
4969 4969 */
4970 4970 return (retval);
4971 4971 }
4972 4972 }
4973 4973 if (smrp != NULL) {
4974 4974 mutex_enter(&daplka_shared_mr_lock);
4975 4975 ASSERT(smrp->smr_refcnt > 0);
4976 4976 smrp->smr_refcnt--;
4977 4977
4978 4978 if (smrp->smr_refcnt == 0) {
4979 4979 DERR("mr_register_shared: freeing smrp 0x%p\n", smrp);
4980 4980 avl_remove(&daplka_shared_mr_tree, smrp);
4981 4981 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*smrp))
4982 4982 if (smrp->smr_mr_list != NULL) {
4983 4983 /*
4984 4984 * the refcnt is 0. if there is anything
4985 4985 * left on the list, it must be ours.
4986 4986 */
4987 4987 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp))
4988 4988 ASSERT(smrp->smr_mr_list == mr_rp);
4989 4989 DAPLKA_RS_UNREF(mr_rp);
4990 4990 smrp->smr_mr_list = NULL;
4991 4991 ASSERT(mr_rp->mr_shared_mr == smrp);
4992 4992 mr_rp->mr_shared_mr = NULL;
4993 4993 ASSERT(mr_rp->mr_next == NULL);
4994 4994 }
4995 4995 smrp->smr_state = DAPLKA_SMR_FREED;
4996 4996 cv_destroy(&smrp->smr_cv);
4997 4997 kmem_free(smrp, sizeof (daplka_shared_mr_t));
4998 4998 } else {
4999 4999 DERR("mr_register_shared: resetting smr_state "
5000 5000 "smrp 0x%p, %d waiters remain\n", smrp,
5001 5001 smrp->smr_refcnt);
5002 5002 ASSERT(smrp->smr_state == DAPLKA_SMR_TRANSITIONING);
5003 5003 if (smrp->smr_mr_list != NULL && mr_rp != NULL) {
5004 5004 daplka_mr_resource_t **mpp;
5005 5005
5006 5006 /*
5007 5007 * search and remove mr_rp from smr_mr_list
5008 5008 */
5009 5009 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp))
5010 5010 mpp = &smrp->smr_mr_list;
5011 5011 while (*mpp != NULL) {
5012 5012 if (*mpp == mr_rp) {
5013 5013 *mpp = (*mpp)->mr_next;
5014 5014 DAPLKA_RS_UNREF(mr_rp);
5015 5015 ASSERT(mr_rp->mr_shared_mr ==
5016 5016 smrp);
5017 5017 mr_rp->mr_shared_mr = NULL;
5018 5018 mr_rp->mr_next = NULL;
5019 5019 break;
5020 5020 }
5021 5021 mpp = &(*mpp)->mr_next;
5022 5022 }
5023 5023 }
5024 5024 /*
5025 5025 * note that smr_state == READY does not necessarily
5026 5026 * mean that smr_mr_list is non empty. for this case,
5027 5027 * we are doing cleanup because of a failure. we set
5028 5028 * the state to READY to allow other threads to
5029 5029 * continue.
5030 5030 */
5031 5031 smrp->smr_state = DAPLKA_SMR_READY;
5032 5032 cv_broadcast(&smrp->smr_cv);
5033 5033 }
5034 5034 mutex_exit(&daplka_shared_mr_lock);
5035 5035 }
5036 5036 if (mr_rp != NULL) {
5037 5037 DAPLKA_RS_UNREF(mr_rp);
5038 5038 }
5039 5039 return (retval);
5040 5040 }
5041 5041
5042 5042 /*
5043 5043 * registers a memory region using the attributes of an
5044 5044 * existing region.
5045 5045 */
5046 5046 /* ARGSUSED */
5047 5047 static int
5048 5048 daplka_mr_register_lmr(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5049 5049 cred_t *cred, int *rvalp)
5050 5050 {
5051 5051 boolean_t inserted = B_FALSE;
5052 5052 dapl_mr_register_lmr_t args;
5053 5053 ibt_mr_data_in_t mr_cb_data_in;
5054 5054 daplka_mr_resource_t *orig_mr_rp = NULL;
5055 5055 daplka_mr_resource_t *mr_rp;
5056 5056 ibt_smr_attr_t mem_sattr;
5057 5057 uint64_t mr_hkey = 0;
5058 5058 ibt_status_t status;
5059 5059 int retval;
5060 5060
5061 5061 retval = ddi_copyin((void *)arg, &args,
5062 5062 sizeof (dapl_mr_register_lmr_t), mode);
5063 5063 if (retval != 0) {
5064 5064 DERR("mr_register_lmr: copyin error %d\n", retval);
5065 5065 return (EINVAL);
5066 5066 }
5067 5067 orig_mr_rp = (daplka_mr_resource_t *)
5068 5068 daplka_hash_lookup(&ia_rp->ia_mr_htbl, args.mrl_orig_hkey);
5069 5069 if (orig_mr_rp == NULL) {
5070 5070 DERR("mr_register_lmr: cannot find mr resource\n");
5071 5071 return (EINVAL);
5072 5072 }
5073 5073 ASSERT(DAPLKA_RS_TYPE(orig_mr_rp) == DAPL_TYPE_MR);
5074 5074
5075 5075 mr_rp = kmem_zalloc(sizeof (daplka_mr_resource_t), daplka_km_flags);
5076 5076 if (mr_rp == NULL) {
5077 5077 DERR("mr_register_lmr: cannot allocate mr resource\n");
5078 5078 retval = ENOMEM;
5079 5079 goto cleanup;
5080 5080 }
5081 5081 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp))
5082 5082 DAPLKA_RS_INIT(mr_rp, DAPL_TYPE_MR,
5083 5083 DAPLKA_RS_RNUM(ia_rp), daplka_mr_destroy);
5084 5084
5085 5085 mutex_init(&mr_rp->mr_lock, NULL, MUTEX_DRIVER, NULL);
5086 5086 mr_rp->mr_hca = ia_rp->ia_hca;
5087 5087 mr_rp->mr_hca_hdl = ia_rp->ia_hca_hdl;
5088 5088 mr_rp->mr_next = NULL;
5089 5089 mr_rp->mr_shared_mr = NULL;
5090 5090
5091 5091 DAPLKA_RS_REF(orig_mr_rp->mr_pd_res);
5092 5092 mr_rp->mr_pd_res = orig_mr_rp->mr_pd_res;
5093 5093 mr_rp->mr_attr = orig_mr_rp->mr_attr;
5094 5094
5095 5095 /* Pass the IO addr that was returned while allocating the orig MR */
5096 5096 mem_sattr.mr_vaddr = orig_mr_rp->mr_desc.md_vaddr;
5097 5097 mem_sattr.mr_flags = args.mrl_flags | IBT_MR_NOSLEEP;
5098 5098
5099 5099 status = daplka_ibt_register_shared_mr(mr_rp, ia_rp->ia_hca_hdl,
5100 5100 orig_mr_rp->mr_hdl, mr_rp->mr_pd_res->pd_hdl, &mem_sattr,
5101 5101 &mr_rp->mr_hdl, &mr_rp->mr_desc);
5102 5102
5103 5103 if (status != IBT_SUCCESS) {
5104 5104 DERR("mr_register_lmr: ibt_register_shared_mr error %d\n",
5105 5105 status);
5106 5106 *rvalp = (int)status;
5107 5107 retval = 0;
5108 5108 goto cleanup;
5109 5109 }
5110 5110
5111 5111 mr_cb_data_in.mr_rev = IBT_MR_DATA_IN_IF_VERSION;
5112 5112 mr_cb_data_in.mr_func = daplka_pre_mr_cleanup_callback;
5113 5113 mr_cb_data_in.mr_arg1 = (void *)mr_rp;
5114 5114 mr_cb_data_in.mr_arg2 = NULL;
5115 5115
5116 5116 /* Pass the service driver mr cleanup handler to the hca driver */
5117 5117 status = ibt_ci_data_in(ia_rp->ia_hca_hdl,
5118 5118 IBT_CI_NO_FLAGS, IBT_HDL_MR, (void *)mr_rp->mr_hdl,
5119 5119 &mr_cb_data_in, sizeof (mr_cb_data_in));
5120 5120
5121 5121 if (status != IBT_SUCCESS) {
5122 5122 DERR("mr_register_lmr: ibt_ci_data_in error(%d) ver(%d)",
5123 5123 status, mr_cb_data_in.mr_rev);
5124 5124 *rvalp = (int)status;
5125 5125 retval = 0;
5126 5126 goto cleanup;
5127 5127 }
5128 5128 mr_rp->mr_attr.mr_len = orig_mr_rp->mr_attr.mr_len;
5129 5129 mr_rp->mr_attr.mr_flags = mem_sattr.mr_flags;
5130 5130
5131 5131 /* insert into mr hash table */
5132 5132 retval = daplka_hash_insert(&ia_rp->ia_mr_htbl, &mr_hkey,
5133 5133 (void *)mr_rp);
5134 5134 if (retval != 0) {
5135 5135 DERR("mr_register: cannot insert mr resource into mr_htbl\n");
5136 5136 goto cleanup;
5137 5137 }
5138 5138 inserted = B_TRUE;
5139 5139 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mr_rp))
5140 5140
5141 5141 args.mrl_lkey = mr_rp->mr_desc.md_lkey;
5142 5142 args.mrl_rkey = mr_rp->mr_desc.md_rkey;
5143 5143 args.mrl_hkey = mr_hkey;
5144 5144
5145 5145 retval = ddi_copyout((void *)&args, (void *)arg,
5146 5146 sizeof (dapl_mr_register_lmr_t), mode);
5147 5147 if (retval != 0) {
5148 5148 DERR("mr_register_lmr: copyout error %d\n", retval);
5149 5149 retval = EFAULT;
5150 5150 goto cleanup;
5151 5151 }
5152 5152 if (orig_mr_rp != NULL) {
5153 5153 DAPLKA_RS_UNREF(orig_mr_rp);
5154 5154 }
5155 5155 return (0);
5156 5156
5157 5157 cleanup:;
5158 5158 if (inserted) {
5159 5159 daplka_mr_resource_t *free_rp = NULL;
5160 5160
5161 5161 (void) daplka_hash_remove(&ia_rp->ia_mr_htbl, mr_hkey,
5162 5162 (void **)&free_rp);
5163 5163 if (free_rp != mr_rp) {
5164 5164 DERR("mr_register: cannot remove mr from hash table\n");
5165 5165 /*
5166 5166 * we can only get here if another thread
5167 5167 * has completed the cleanup in mr_deregister
5168 5168 */
5169 5169 return (retval);
5170 5170 }
5171 5171 }
5172 5172 if (orig_mr_rp != NULL) {
5173 5173 DAPLKA_RS_UNREF(orig_mr_rp);
5174 5174 }
5175 5175 if (mr_rp != NULL) {
5176 5176 DAPLKA_RS_UNREF(mr_rp);
5177 5177 }
5178 5178 return (retval);
5179 5179 }
5180 5180
5181 5181 /*
5182 5182 * this function is called by mr_deregister and mr_cleanup_callback to
5183 5183 * remove a mr resource from the shared mr object mr_rp->mr_shared_mr.
5184 5184 * if mr_shared_mr is already NULL, that means the region being
5185 5185 * deregistered or invalidated is not a shared mr region and we can
5186 5186 * return immediately.
5187 5187 */
5188 5188 static void
5189 5189 daplka_shared_mr_free(daplka_mr_resource_t *mr_rp)
5190 5190 {
5191 5191 daplka_shared_mr_t *smrp;
5192 5192
5193 5193 /*
5194 5194 * we need a lock because mr_callback also checks this field.
5195 5195 * for the rare case that mr_deregister and mr_cleanup_callback
5196 5196 * gets called simultaneously, we are guaranteed that smrp won't
5197 5197 * be dereferenced twice because either function will find
5198 5198 * mr_shared_mr to be NULL.
5199 5199 */
5200 5200 mutex_enter(&mr_rp->mr_lock);
5201 5201 smrp = mr_rp->mr_shared_mr;
5202 5202 mr_rp->mr_shared_mr = NULL;
5203 5203 mutex_exit(&mr_rp->mr_lock);
5204 5204
5205 5205 if (smrp != NULL) {
5206 5206 daplka_mr_resource_t **mpp;
5207 5207 boolean_t mr_found = B_FALSE;
5208 5208
5209 5209 mutex_enter(&daplka_shared_mr_lock);
5210 5210 ASSERT(smrp->smr_refcnt > 0);
5211 5211 while (smrp->smr_state == DAPLKA_SMR_TRANSITIONING) {
5212 5212 cv_wait(&smrp->smr_cv, &daplka_shared_mr_lock);
5213 5213 }
5214 5214 ASSERT(smrp->smr_state == DAPLKA_SMR_READY);
5215 5215 smrp->smr_state = DAPLKA_SMR_TRANSITIONING;
5216 5216 smrp->smr_refcnt--;
5217 5217
5218 5218 /*
5219 5219 * search and remove mr_rp from smr_mr_list.
5220 5220 * also UNREF mr_rp because it is no longer
5221 5221 * on the list.
5222 5222 */
5223 5223 mpp = &smrp->smr_mr_list;
5224 5224 while (*mpp != NULL) {
5225 5225 if (*mpp == mr_rp) {
5226 5226 *mpp = (*mpp)->mr_next;
5227 5227 DAPLKA_RS_UNREF(mr_rp);
5228 5228 mr_rp->mr_next = NULL;
5229 5229 mr_found = B_TRUE;
5230 5230 break;
5231 5231 }
5232 5232 mpp = &(*mpp)->mr_next;
5233 5233 }
5234 5234 /*
5235 5235 * since mr_clean_callback may not touch smr_mr_list
5236 5236 * at this time (due to smr_state), we can be sure
5237 5237 * that we can find and remove mr_rp from smr_mr_list
5238 5238 */
5239 5239 ASSERT(mr_found);
5240 5240 if (smrp->smr_refcnt == 0) {
5241 5241 D3("shared_mr_free: freeing smrp 0x%p\n", smrp);
5242 5242 avl_remove(&daplka_shared_mr_tree, smrp);
5243 5243 ASSERT(smrp->smr_mr_list == NULL);
5244 5244 smrp->smr_state = DAPLKA_SMR_FREED;
5245 5245 cv_destroy(&smrp->smr_cv);
5246 5246 kmem_free(smrp, sizeof (daplka_shared_mr_t));
5247 5247 } else {
5248 5248 D3("shared_mr_free: smrp 0x%p, refcnt %d\n",
5249 5249 smrp, smrp->smr_refcnt);
5250 5250 smrp->smr_state = DAPLKA_SMR_READY;
5251 5251 cv_broadcast(&smrp->smr_cv);
5252 5252 }
5253 5253 mutex_exit(&daplka_shared_mr_lock);
5254 5254 }
5255 5255 }
5256 5256
5257 5257 /*
5258 5258 * deregisters a memory region.
5259 5259 * if mr is shared, remove reference from global shared mr object.
5260 5260 * release the initial reference to the mr. if the mr's refcnt is
5261 5261 * zero, call mr_destroy to free mr.
5262 5262 */
5263 5263 /* ARGSUSED */
5264 5264 static int
5265 5265 daplka_mr_deregister(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5266 5266 cred_t *cred, int *rvalp)
5267 5267 {
5268 5268 daplka_mr_resource_t *mr_rp;
5269 5269 dapl_mr_deregister_t args;
5270 5270 int retval;
5271 5271
5272 5272 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mr_deregister_t),
5273 5273 mode);
5274 5274 if (retval != 0) {
5275 5275 DERR("mr_deregister: copyin error %d\n", retval);
5276 5276 return (EINVAL);
5277 5277 }
5278 5278 retval = daplka_hash_remove(&ia_rp->ia_mr_htbl,
5279 5279 args.mrd_hkey, (void **)&mr_rp);
5280 5280 if (retval != 0 || mr_rp == NULL) {
5281 5281 DERR("mr_deregister: cannot find mr resource\n");
5282 5282 return (EINVAL);
5283 5283 }
5284 5284 ASSERT(DAPLKA_RS_TYPE(mr_rp) == DAPL_TYPE_MR);
5285 5285
5286 5286 daplka_shared_mr_free(mr_rp);
5287 5287 DAPLKA_RS_UNREF(mr_rp);
5288 5288 return (0);
5289 5289 }
5290 5290
5291 5291 /*
5292 5292 * sync local memory regions on RDMA read or write.
5293 5293 */
5294 5294 /* ARGSUSED */
5295 5295 static int
5296 5296 daplka_mr_sync(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5297 5297 cred_t *cred, int *rvalp)
5298 5298 {
5299 5299 dapl_mr_sync_t args;
5300 5300 daplka_mr_resource_t *mr_rp[DAPL_MR_PER_SYNC];
5301 5301 ibt_mr_sync_t mrs[DAPL_MR_PER_SYNC];
5302 5302 uint32_t sync_direction_flags;
5303 5303 ibt_status_t status;
5304 5304 int i, j;
5305 5305 int retval;
5306 5306
5307 5307 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mr_sync_t), mode);
5308 5308 if (retval != 0) {
5309 5309 DERR("mr_sync: copyin error %d\n", retval);
5310 5310 return (EFAULT);
5311 5311 }
5312 5312
5313 5313 /* number of segments bound check */
5314 5314 if (args.mrs_numseg > DAPL_MR_PER_SYNC) {
5315 5315 DERR("mr_sync: number of segments too large\n");
5316 5316 return (EINVAL);
5317 5317 }
5318 5318
5319 5319 /* translate MR sync direction flag */
5320 5320 if (args.mrs_flags == DAPL_MR_SYNC_RDMA_RD) {
5321 5321 sync_direction_flags = IBT_SYNC_READ;
5322 5322 } else if (args.mrs_flags == DAPL_MR_SYNC_RDMA_WR) {
5323 5323 sync_direction_flags = IBT_SYNC_WRITE;
5324 5324 } else {
5325 5325 DERR("mr_sync: unknown flags\n");
5326 5326 return (EINVAL);
5327 5327 }
5328 5328
5329 5329 /*
5330 5330 * all the segments are going to be sync'd by ibtl together
5331 5331 */
5332 5332 for (i = 0; i < args.mrs_numseg; i++) {
5333 5333 mr_rp[i] = (daplka_mr_resource_t *)daplka_hash_lookup(
5334 5334 &ia_rp->ia_mr_htbl, args.mrs_vec[i].mrsv_hkey);
5335 5335 if (mr_rp[i] == NULL) {
5336 5336 for (j = 0; j < i; j++) {
5337 5337 DAPLKA_RS_UNREF(mr_rp[j]);
5338 5338 }
5339 5339 DERR("mr_sync: lookup error\n");
5340 5340 return (EINVAL);
5341 5341 }
5342 5342 ASSERT(DAPLKA_RS_TYPE(mr_rp[i]) == DAPL_TYPE_MR);
5343 5343 mrs[i].ms_handle = mr_rp[i]->mr_hdl;
5344 5344 mrs[i].ms_vaddr = args.mrs_vec[i].mrsv_va;
5345 5345 mrs[i].ms_len = args.mrs_vec[i].mrsv_len;
5346 5346 mrs[i].ms_flags = sync_direction_flags;
5347 5347 }
5348 5348
5349 5349 status = ibt_sync_mr(ia_rp->ia_hca_hdl, mrs, args.mrs_numseg);
5350 5350 if (status != IBT_SUCCESS) {
5351 5351 DERR("mr_sync: ibt_sync_mr error %d\n", status);
5352 5352 *rvalp = (int)status;
5353 5353 }
5354 5354 for (i = 0; i < args.mrs_numseg; i++) {
5355 5355 DAPLKA_RS_UNREF(mr_rp[i]);
5356 5356 }
5357 5357 return (0);
5358 5358 }
5359 5359
5360 5360 /*
5361 5361 * destroys a memory region.
5362 5362 * called when refcnt drops to zero.
5363 5363 */
5364 5364 static int
5365 5365 daplka_mr_destroy(daplka_resource_t *gen_rp)
5366 5366 {
5367 5367 daplka_mr_resource_t *mr_rp = (daplka_mr_resource_t *)gen_rp;
5368 5368 ibt_status_t status;
5369 5369
5370 5370 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp))
5371 5371 ASSERT(DAPLKA_RS_REFCNT(mr_rp) == 0);
5372 5372 ASSERT(mr_rp->mr_shared_mr == NULL);
5373 5373 D3("mr_destroy: entering, mr_rp 0x%p, rnum %d\n",
5374 5374 mr_rp, DAPLKA_RS_RNUM(mr_rp));
5375 5375
5376 5376 /*
5377 5377 * deregister mr
5378 5378 */
5379 5379 if (mr_rp->mr_hdl) {
5380 5380 status = daplka_ibt_deregister_mr(mr_rp, mr_rp->mr_hca_hdl,
5381 5381 mr_rp->mr_hdl);
5382 5382 if (status != IBT_SUCCESS) {
5383 5383 DERR("mr_destroy: ibt_deregister_mr returned %d\n",
5384 5384 status);
5385 5385 }
5386 5386 mr_rp->mr_hdl = NULL;
5387 5387 D3("mr_destroy: mr deregistered\n");
5388 5388 }
5389 5389 mr_rp->mr_attr.mr_vaddr = NULL;
5390 5390
5391 5391 /*
5392 5392 * release reference on PD
5393 5393 */
5394 5394 if (mr_rp->mr_pd_res != NULL) {
5395 5395 DAPLKA_RS_UNREF(mr_rp->mr_pd_res);
5396 5396 mr_rp->mr_pd_res = NULL;
5397 5397 }
5398 5398 mutex_destroy(&mr_rp->mr_lock);
5399 5399 DAPLKA_RS_FINI(mr_rp);
5400 5400 kmem_free(mr_rp, sizeof (daplka_mr_resource_t));
5401 5401 D3("mr_destroy: exiting, mr_rp 0x%p\n", mr_rp);
5402 5402 return (0);
5403 5403 }
5404 5404
5405 5405 /*
5406 5406 * this function is called by daplka_hash_destroy for
5407 5407 * freeing MR resource objects
5408 5408 */
5409 5409 static void
5410 5410 daplka_hash_mr_free(void *obj)
5411 5411 {
5412 5412 daplka_mr_resource_t *mr_rp = (daplka_mr_resource_t *)obj;
5413 5413
5414 5414 daplka_shared_mr_free(mr_rp);
5415 5415 DAPLKA_RS_UNREF(mr_rp);
5416 5416 }
5417 5417
5418 5418 /*
5419 5419 * comparison function used for finding a shared mr object
5420 5420 * from the global shared mr avl tree.
5421 5421 */
5422 5422 static int
5423 5423 daplka_shared_mr_cmp(const void *smr1, const void *smr2)
5424 5424 {
5425 5425 daplka_shared_mr_t *s1 = (daplka_shared_mr_t *)smr1;
5426 5426 daplka_shared_mr_t *s2 = (daplka_shared_mr_t *)smr2;
5427 5427 int i;
5428 5428
5429 5429 for (i = 4; i >= 0; i--) {
5430 5430 if (s1->smr_cookie.mc_uint_arr[i] <
5431 5431 s2->smr_cookie.mc_uint_arr[i]) {
5432 5432 return (-1);
5433 5433 }
5434 5434 if (s1->smr_cookie.mc_uint_arr[i] >
5435 5435 s2->smr_cookie.mc_uint_arr[i]) {
5436 5436 return (1);
5437 5437 }
5438 5438 }
5439 5439 return (0);
5440 5440 }
5441 5441
5442 5442 /*
5443 5443 * allocates a protection domain.
5444 5444 */
5445 5445 /* ARGSUSED */
5446 5446 static int
5447 5447 daplka_pd_alloc(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5448 5448 cred_t *cred, int *rvalp)
5449 5449 {
5450 5450 dapl_pd_alloc_t args;
5451 5451 daplka_pd_resource_t *pd_rp;
5452 5452 ibt_status_t status;
5453 5453 uint64_t pd_hkey = 0;
5454 5454 boolean_t inserted = B_FALSE;
5455 5455 int retval;
5456 5456
5457 5457 pd_rp = kmem_zalloc(sizeof (*pd_rp), daplka_km_flags);
5458 5458 if (pd_rp == NULL) {
5459 5459 DERR("pd_alloc: cannot allocate pd resource\n");
5460 5460 return (ENOMEM);
5461 5461 }
5462 5462 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd_rp))
5463 5463 DAPLKA_RS_INIT(pd_rp, DAPL_TYPE_PD,
5464 5464 DAPLKA_RS_RNUM(ia_rp), daplka_pd_destroy);
5465 5465
5466 5466 pd_rp->pd_hca = ia_rp->ia_hca;
5467 5467 pd_rp->pd_hca_hdl = ia_rp->ia_hca_hdl;
5468 5468 status = daplka_ibt_alloc_pd(pd_rp, pd_rp->pd_hca_hdl,
5469 5469 IBT_PD_NO_FLAGS, &pd_rp->pd_hdl);
5470 5470 if (status != IBT_SUCCESS) {
5471 5471 DERR("pd_alloc: ibt_alloc_pd returned %d\n", status);
5472 5472 *rvalp = (int)status;
5473 5473 retval = 0;
5474 5474 goto cleanup;
5475 5475 }
5476 5476
5477 5477 /* insert into pd hash table */
5478 5478 retval = daplka_hash_insert(&ia_rp->ia_pd_htbl,
5479 5479 &pd_hkey, (void *)pd_rp);
5480 5480 if (retval != 0) {
5481 5481 DERR("pd_alloc: cannot insert pd resource into pd_htbl\n");
5482 5482 goto cleanup;
5483 5483 }
5484 5484 inserted = B_TRUE;
5485 5485 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*pd_rp))
5486 5486
5487 5487 /* return hkey to library */
5488 5488 args.pda_hkey = pd_hkey;
5489 5489
5490 5490 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_pd_alloc_t),
5491 5491 mode);
5492 5492 if (retval != 0) {
5493 5493 DERR("pd_alloc: copyout error %d\n", retval);
5494 5494 retval = EFAULT;
5495 5495 goto cleanup;
5496 5496 }
5497 5497 return (0);
5498 5498
5499 5499 cleanup:;
5500 5500 if (inserted) {
5501 5501 daplka_pd_resource_t *free_rp = NULL;
5502 5502
5503 5503 (void) daplka_hash_remove(&ia_rp->ia_pd_htbl, pd_hkey,
5504 5504 (void **)&free_rp);
5505 5505 if (free_rp != pd_rp) {
5506 5506 DERR("pd_alloc: cannot remove pd from hash table\n");
5507 5507 /*
5508 5508 * we can only get here if another thread
5509 5509 * has completed the cleanup in pd_free
5510 5510 */
5511 5511 return (retval);
5512 5512 }
5513 5513 }
5514 5514 DAPLKA_RS_UNREF(pd_rp);
5515 5515 return (retval);
5516 5516 }
5517 5517
5518 5518 /*
5519 5519 * destroys a protection domain.
5520 5520 * called when refcnt drops to zero.
5521 5521 */
5522 5522 static int
5523 5523 daplka_pd_destroy(daplka_resource_t *gen_rp)
5524 5524 {
5525 5525 daplka_pd_resource_t *pd_rp = (daplka_pd_resource_t *)gen_rp;
5526 5526 ibt_status_t status;
5527 5527
5528 5528 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd_rp))
5529 5529 ASSERT(DAPLKA_RS_REFCNT(pd_rp) == 0);
5530 5530 D3("pd_destroy: entering, pd_rp %p, rnum %d\n",
5531 5531 pd_rp, DAPLKA_RS_RNUM(pd_rp));
5532 5532
5533 5533 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
5534 5534 if (pd_rp->pd_hdl != NULL) {
5535 5535 status = daplka_ibt_free_pd(pd_rp, pd_rp->pd_hca_hdl,
5536 5536 pd_rp->pd_hdl);
5537 5537 if (status != IBT_SUCCESS) {
5538 5538 DERR("pd_destroy: ibt_free_pd returned %d\n", status);
5539 5539 }
5540 5540 }
5541 5541 DAPLKA_RS_FINI(pd_rp);
5542 5542 kmem_free(pd_rp, sizeof (daplka_pd_resource_t));
5543 5543 D3("pd_destroy: exiting, pd_rp %p\n", pd_rp);
5544 5544 return (0);
5545 5545 }
5546 5546
5547 5547 static void
5548 5548 daplka_hash_pd_free(void *obj)
5549 5549 {
5550 5550 daplka_pd_resource_t *pd_rp = (daplka_pd_resource_t *)obj;
5551 5551
5552 5552 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
5553 5553 DAPLKA_RS_UNREF(pd_rp);
5554 5554 }
5555 5555
5556 5556 /*
5557 5557 * removes the pd reference from ia_pd_htbl and releases the
5558 5558 * initial reference to the pd. also destroys the pd if the refcnt
5559 5559 * is zero.
5560 5560 */
5561 5561 /* ARGSUSED */
5562 5562 static int
5563 5563 daplka_pd_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5564 5564 cred_t *cred, int *rvalp)
5565 5565 {
5566 5566 daplka_pd_resource_t *pd_rp;
5567 5567 dapl_pd_free_t args;
5568 5568 int retval;
5569 5569
5570 5570 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_pd_free_t), mode);
5571 5571 if (retval != 0) {
5572 5572 DERR("pd_free: copyin error %d\n", retval);
5573 5573 return (EINVAL);
5574 5574 }
5575 5575
5576 5576 retval = daplka_hash_remove(&ia_rp->ia_pd_htbl,
5577 5577 args.pdf_hkey, (void **)&pd_rp);
5578 5578 if (retval != 0 || pd_rp == NULL) {
5579 5579 DERR("pd_free: cannot find pd resource\n");
5580 5580 return (EINVAL);
5581 5581 }
5582 5582 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
5583 5583
5584 5584 /* UNREF calls the actual free function when refcnt is zero */
5585 5585 DAPLKA_RS_UNREF(pd_rp);
5586 5586 return (0);
5587 5587 }
5588 5588
5589 5589 /*
5590 5590 * allocates a memory window
5591 5591 */
5592 5592 /* ARGSUSED */
5593 5593 static int
5594 5594 daplka_mw_alloc(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5595 5595 cred_t *cred, int *rvalp)
5596 5596 {
5597 5597 daplka_pd_resource_t *pd_rp;
5598 5598 daplka_mw_resource_t *mw_rp;
5599 5599 dapl_mw_alloc_t args;
5600 5600 ibt_status_t status;
5601 5601 boolean_t inserted = B_FALSE;
5602 5602 uint64_t mw_hkey;
5603 5603 ibt_rkey_t mw_rkey;
5604 5604 int retval;
5605 5605
5606 5606 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mw_alloc_t), mode);
5607 5607 if (retval != 0) {
5608 5608 DERR("mw_alloc: copyin error %d\n", retval);
5609 5609 return (EFAULT);
5610 5610 }
5611 5611
5612 5612 /*
5613 5613 * Allocate and initialize a MW resource
5614 5614 */
5615 5615 mw_rp = kmem_zalloc(sizeof (daplka_mw_resource_t), daplka_km_flags);
5616 5616 if (mw_rp == NULL) {
5617 5617 DERR("mw_alloc: cannot allocate mw resource\n");
5618 5618 return (ENOMEM);
5619 5619 }
5620 5620 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw_rp))
5621 5621 DAPLKA_RS_INIT(mw_rp, DAPL_TYPE_MW,
5622 5622 DAPLKA_RS_RNUM(ia_rp), daplka_mw_destroy);
5623 5623
5624 5624 mutex_init(&mw_rp->mw_lock, NULL, MUTEX_DRIVER, NULL);
5625 5625 mw_rp->mw_hca = ia_rp->ia_hca;
5626 5626 mw_rp->mw_hca_hdl = ia_rp->ia_hca_hdl;
5627 5627
5628 5628 /* get pd handle */
5629 5629 pd_rp = (daplka_pd_resource_t *)
5630 5630 daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.mw_pd_hkey);
5631 5631 if (pd_rp == NULL) {
5632 5632 DERR("mw_alloc: cannot find pd resource\n");
5633 5633 goto cleanup;
5634 5634 }
5635 5635 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
5636 5636
5637 5637 mw_rp->mw_pd_res = pd_rp;
5638 5638
5639 5639 status = daplka_ibt_alloc_mw(mw_rp, mw_rp->mw_hca_hdl,
5640 5640 pd_rp->pd_hdl, IBT_MW_NOSLEEP, &mw_rp->mw_hdl, &mw_rkey);
5641 5641
5642 5642 if (status != IBT_SUCCESS) {
5643 5643 DERR("mw_alloc: ibt_alloc_mw returned %d\n", status);
5644 5644 *rvalp = (int)status;
5645 5645 retval = 0;
5646 5646 goto cleanup;
5647 5647 }
5648 5648
5649 5649 mutex_enter(&ia_rp->ia_lock);
5650 5650 switch (ia_rp->ia_state) {
5651 5651 case DAPLKA_IA_INIT:
5652 5652 ia_rp->ia_state = DAPLKA_IA_MW_ALLOC_IN_PROGRESS;
5653 5653 ia_rp->ia_mw_alloccnt++;
5654 5654 retval = 0;
5655 5655 break;
5656 5656 case DAPLKA_IA_MW_ALLOC_IN_PROGRESS:
5657 5657 /* another mw_alloc is already in progress increase cnt */
5658 5658 ia_rp->ia_mw_alloccnt++;
5659 5659 retval = 0;
5660 5660 break;
5661 5661 case DAPLKA_IA_MW_FREEZE_IN_PROGRESS:
5662 5662 /* FALLTHRU */
5663 5663 case DAPLKA_IA_MW_FROZEN:
5664 5664 /*
5665 5665 * IA is being or already frozen don't allow more MWs to be
5666 5666 * allocated.
5667 5667 */
5668 5668 DERR("mw_alloc: IA is freezing MWs (state=%d)\n",
5669 5669 ia_rp->ia_state);
5670 5670 retval = EINVAL;
5671 5671 break;
5672 5672 default:
5673 5673 ASSERT(!"Invalid IA state in mw_alloc");
5674 5674 DERR("mw_alloc: IA state=%d invalid\n", ia_rp->ia_state);
5675 5675 retval = EINVAL;
5676 5676 break;
5677 5677 }
5678 5678 mutex_exit(&ia_rp->ia_lock);
5679 5679 /* retval is 0 when ia_mw_alloccnt is incremented */
5680 5680 if (retval != 0) {
5681 5681 goto cleanup;
5682 5682 }
5683 5683
5684 5684 /* insert into mw hash table */
5685 5685 mw_hkey = 0;
5686 5686 retval = daplka_hash_insert(&ia_rp->ia_mw_htbl, &mw_hkey,
5687 5687 (void *)mw_rp);
5688 5688 if (retval != 0) {
5689 5689 DERR("mw_alloc: cannot insert mw resource into mw_htbl\n");
5690 5690 mutex_enter(&ia_rp->ia_lock);
5691 5691 ASSERT(ia_rp->ia_state == DAPLKA_IA_MW_ALLOC_IN_PROGRESS);
5692 5692 ia_rp->ia_mw_alloccnt--;
5693 5693 if (ia_rp->ia_mw_alloccnt == 0) {
5694 5694 ia_rp->ia_state = DAPLKA_IA_INIT;
5695 5695 cv_broadcast(&ia_rp->ia_cv);
5696 5696 }
5697 5697 mutex_exit(&ia_rp->ia_lock);
5698 5698 goto cleanup;
5699 5699 }
5700 5700 inserted = B_TRUE;
5701 5701 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mw_rp))
5702 5702
5703 5703 D3("mw_alloc: ibt_alloc_mw mw_hdl(%p) mw_rkey(0x%llx)\n",
5704 5704 mw_rp->mw_hdl, (longlong_t)mw_rkey);
5705 5705
5706 5706 mutex_enter(&ia_rp->ia_lock);
5707 5707 /*
5708 5708 * We are done with mw_alloc if this was the last mw_alloc
5709 5709 * change state back to DAPLKA_IA_INIT and wake up waiters
5710 5710 * specifically the unlock callback.
5711 5711 */
5712 5712 ASSERT(ia_rp->ia_state == DAPLKA_IA_MW_ALLOC_IN_PROGRESS);
5713 5713 ia_rp->ia_mw_alloccnt--;
5714 5714 if (ia_rp->ia_mw_alloccnt == 0) {
5715 5715 ia_rp->ia_state = DAPLKA_IA_INIT;
5716 5716 cv_broadcast(&ia_rp->ia_cv);
5717 5717 }
5718 5718 mutex_exit(&ia_rp->ia_lock);
5719 5719
5720 5720 args.mw_hkey = mw_hkey;
5721 5721 args.mw_rkey = mw_rkey;
5722 5722
5723 5723 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_mw_alloc_t),
5724 5724 mode);
5725 5725 if (retval != 0) {
5726 5726 DERR("mw_alloc: copyout error %d\n", retval);
5727 5727 retval = EFAULT;
5728 5728 goto cleanup;
5729 5729 }
5730 5730 return (0);
5731 5731
5732 5732 cleanup:;
5733 5733 if (inserted) {
5734 5734 daplka_mw_resource_t *free_rp = NULL;
5735 5735
5736 5736 (void) daplka_hash_remove(&ia_rp->ia_mw_htbl, mw_hkey,
5737 5737 (void **)&free_rp);
5738 5738 if (free_rp != mw_rp) {
5739 5739 DERR("mw_alloc: cannot remove mw from hash table\n");
5740 5740 /*
5741 5741 * we can only get here if another thread
5742 5742 * has completed the cleanup in mw_free
5743 5743 */
5744 5744 return (retval);
5745 5745 }
5746 5746 }
5747 5747 DAPLKA_RS_UNREF(mw_rp);
5748 5748 return (retval);
5749 5749 }
5750 5750
5751 5751 /*
5752 5752 * removes the mw reference from ia_mw_htbl and releases the
5753 5753 * initial reference to the mw. also destroys the mw if the refcnt
5754 5754 * is zero.
5755 5755 */
5756 5756 /* ARGSUSED */
5757 5757 static int
5758 5758 daplka_mw_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5759 5759 cred_t *cred, int *rvalp)
5760 5760 {
5761 5761 daplka_mw_resource_t *mw_rp = NULL;
5762 5762 dapl_mw_free_t args;
5763 5763 int retval = 0;
5764 5764
5765 5765 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mw_free_t), mode);
5766 5766 if (retval != 0) {
5767 5767 DERR("mw_free: copyin error %d\n", retval);
5768 5768 return (EFAULT);
5769 5769 }
5770 5770
5771 5771 retval = daplka_hash_remove(&ia_rp->ia_mw_htbl, args.mw_hkey,
5772 5772 (void **)&mw_rp);
5773 5773 if (retval != 0 || mw_rp == NULL) {
5774 5774 DERR("mw_free: cannot find mw resrc (0x%llx)\n",
5775 5775 (longlong_t)args.mw_hkey);
5776 5776 return (EINVAL);
5777 5777 }
5778 5778
5779 5779 ASSERT(DAPLKA_RS_TYPE(mw_rp) == DAPL_TYPE_MW);
5780 5780
5781 5781 /* UNREF calls the actual free function when refcnt is zero */
5782 5782 DAPLKA_RS_UNREF(mw_rp);
5783 5783 return (retval);
5784 5784 }
5785 5785
5786 5786 /*
5787 5787 * destroys the memory window.
5788 5788 * called when refcnt drops to zero.
5789 5789 */
5790 5790 static int
5791 5791 daplka_mw_destroy(daplka_resource_t *gen_rp)
5792 5792 {
5793 5793 daplka_mw_resource_t *mw_rp = (daplka_mw_resource_t *)gen_rp;
5794 5794 ibt_status_t status;
5795 5795
5796 5796 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw_rp))
5797 5797 ASSERT(DAPLKA_RS_REFCNT(mw_rp) == 0);
5798 5798 D3("mw_destroy: entering, mw_rp 0x%p, rnum %d\n",
5799 5799 mw_rp, DAPLKA_RS_RNUM(mw_rp));
5800 5800
5801 5801 /*
5802 5802 * free memory window
5803 5803 */
5804 5804 if (mw_rp->mw_hdl) {
5805 5805 status = daplka_ibt_free_mw(mw_rp, mw_rp->mw_hca_hdl,
5806 5806 mw_rp->mw_hdl);
5807 5807 if (status != IBT_SUCCESS) {
5808 5808 DERR("mw_destroy: ibt_free_mw returned %d\n", status);
5809 5809 }
5810 5810 mw_rp->mw_hdl = NULL;
5811 5811 D3("mw_destroy: mw freed\n");
5812 5812 }
5813 5813
5814 5814 /*
5815 5815 * release reference on PD
5816 5816 */
5817 5817 if (mw_rp->mw_pd_res != NULL) {
5818 5818 DAPLKA_RS_UNREF(mw_rp->mw_pd_res);
5819 5819 mw_rp->mw_pd_res = NULL;
5820 5820 }
5821 5821 mutex_destroy(&mw_rp->mw_lock);
5822 5822 DAPLKA_RS_FINI(mw_rp);
5823 5823 kmem_free(mw_rp, sizeof (daplka_mw_resource_t));
5824 5824 D3("mw_destroy: exiting, mw_rp 0x%p\n", mw_rp);
5825 5825 return (0);
5826 5826 }
5827 5827
5828 5828 static void
5829 5829 daplka_hash_mw_free(void *obj)
5830 5830 {
5831 5831 daplka_mw_resource_t *mw_rp = (daplka_mw_resource_t *)obj;
5832 5832
5833 5833 ASSERT(DAPLKA_RS_TYPE(mw_rp) == DAPL_TYPE_MW);
5834 5834 DAPLKA_RS_UNREF(mw_rp);
5835 5835 }
5836 5836
5837 5837 /*
5838 5838 * SRQ ioctls and supporting functions
5839 5839 */
5840 5840 /* ARGSUSED */
5841 5841 static int
5842 5842 daplka_srq_create(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5843 5843 cred_t *cred, int *rvalp)
5844 5844 {
5845 5845 daplka_srq_resource_t *srq_rp;
5846 5846 daplka_pd_resource_t *pd_rp;
5847 5847 dapl_srq_create_t args;
5848 5848 ibt_srq_sizes_t srq_sizes;
5849 5849 ibt_srq_sizes_t srq_real_sizes;
5850 5850 ibt_hca_attr_t *hca_attrp;
5851 5851 uint64_t srq_hkey = 0;
5852 5852 boolean_t inserted = B_FALSE;
5853 5853 int retval;
5854 5854 ibt_status_t status;
5855 5855
5856 5856 D3("srq_create: enter\n");
5857 5857 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_srq_create_t),
5858 5858 mode);
5859 5859 if (retval != 0) {
5860 5860 DERR("srq_create: copyin error %d\n", retval);
5861 5861 return (EFAULT);
5862 5862 }
5863 5863 srq_rp = kmem_zalloc(sizeof (daplka_srq_resource_t), daplka_km_flags);
5864 5864 if (srq_rp == NULL) {
5865 5865 DERR("srq_create: cannot allocate ep_rp\n");
5866 5866 return (ENOMEM);
5867 5867 }
5868 5868 DAPLKA_RS_INIT(srq_rp, DAPL_TYPE_SRQ,
5869 5869 DAPLKA_RS_RNUM(ia_rp), daplka_srq_destroy);
5870 5870
5871 5871 srq_rp->srq_hca = ia_rp->ia_hca;
5872 5872 srq_rp->srq_hca_hdl = ia_rp->ia_hca_hdl;
5873 5873 mutex_init(&srq_rp->srq_lock, NULL, MUTEX_DRIVER, NULL);
5874 5874
5875 5875 /* get pd handle */
5876 5876 pd_rp = (daplka_pd_resource_t *)
5877 5877 daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.srqc_pd_hkey);
5878 5878 if (pd_rp == NULL) {
5879 5879 DERR("srq_create: cannot find pd resource\n");
5880 5880 retval = EINVAL;
5881 5881 goto cleanup;
5882 5882 }
5883 5883 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
5884 5884 srq_rp->srq_pd_res = pd_rp;
5885 5885
5886 5886 /*
5887 5887 * these checks ensure that the requested SRQ sizes
5888 5888 * are within the limits supported by the chosen HCA.
5889 5889 */
5890 5890 hca_attrp = &ia_rp->ia_hca->hca_attr;
5891 5891 if (args.srqc_sizes.srqs_sz > hca_attrp->hca_max_srqs_sz) {
5892 5892 DERR("srq_create: invalid srqs_sz %d\n",
5893 5893 args.srqc_sizes.srqs_sz);
5894 5894 retval = EINVAL;
5895 5895 goto cleanup;
5896 5896 }
5897 5897 if (args.srqc_sizes.srqs_sgl > hca_attrp->hca_max_srq_sgl) {
5898 5898 DERR("srq_create: invalid srqs_sgl %d\n",
5899 5899 args.srqc_sizes.srqs_sgl);
5900 5900 retval = EINVAL;
5901 5901 goto cleanup;
5902 5902 }
5903 5903
5904 5904 D3("srq_create: srq_sgl %d, srq_sz %d\n",
5905 5905 args.srqc_sizes.srqs_sgl, args.srqc_sizes.srqs_sz);
5906 5906
5907 5907 srq_sizes.srq_wr_sz = args.srqc_sizes.srqs_sz;
5908 5908 srq_sizes.srq_sgl_sz = args.srqc_sizes.srqs_sgl;
5909 5909
5910 5910 /* create srq */
5911 5911 status = daplka_ibt_alloc_srq(srq_rp, ia_rp->ia_hca_hdl,
5912 5912 IBT_SRQ_USER_MAP, pd_rp->pd_hdl, &srq_sizes, &srq_rp->srq_hdl,
5913 5913 &srq_real_sizes);
5914 5914 if (status != IBT_SUCCESS) {
5915 5915 DERR("srq_create: alloc_srq returned %d\n", status);
5916 5916 *rvalp = (int)status;
5917 5917 retval = 0;
5918 5918 goto cleanup;
5919 5919 }
5920 5920
5921 5921 args.srqc_real_sizes.srqs_sz = srq_real_sizes.srq_wr_sz;
5922 5922 args.srqc_real_sizes.srqs_sgl = srq_real_sizes.srq_sgl_sz;
5923 5923
5924 5924 /* Get HCA-specific data_out info */
5925 5925 status = ibt_ci_data_out(ia_rp->ia_hca_hdl,
5926 5926 IBT_CI_NO_FLAGS, IBT_HDL_SRQ, (void *)srq_rp->srq_hdl,
5927 5927 &args.srqc_data_out, sizeof (args.srqc_data_out));
5928 5928
5929 5929 if (status != IBT_SUCCESS) {
5930 5930 DERR("srq_create: ibt_ci_data_out error(%d)\n", status);
5931 5931 *rvalp = (int)status;
5932 5932 retval = 0;
5933 5933 goto cleanup;
5934 5934 }
5935 5935
5936 5936 srq_rp->srq_real_size = srq_real_sizes.srq_wr_sz;
5937 5937
5938 5938 /* preparing to copyout map_data back to the library */
5939 5939 args.srqc_real_sizes.srqs_sz = srq_real_sizes.srq_wr_sz;
5940 5940 args.srqc_real_sizes.srqs_sgl = srq_real_sizes.srq_sgl_sz;
5941 5941
5942 5942 /* insert into srq hash table */
5943 5943 retval = daplka_hash_insert(&ia_rp->ia_srq_htbl,
5944 5944 &srq_hkey, (void *)srq_rp);
5945 5945 if (retval != 0) {
5946 5946 DERR("srq_create: cannot insert srq resource into srq_htbl\n");
5947 5947 goto cleanup;
5948 5948 }
5949 5949 inserted = B_TRUE;
5950 5950
5951 5951 /* return hkey to library */
5952 5952 args.srqc_hkey = srq_hkey;
5953 5953
5954 5954 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_srq_create_t),
5955 5955 mode);
5956 5956 if (retval != 0) {
5957 5957 DERR("srq_create: copyout error %d\n", retval);
5958 5958 retval = EFAULT;
5959 5959 goto cleanup;
5960 5960 }
5961 5961
5962 5962 D3("srq_create: %p, 0x%llx\n", srq_rp->srq_hdl, (longlong_t)srq_hkey);
5963 5963 D3(" sz(%d) sgl(%d)\n",
5964 5964 args.srqc_real_sizes.srqs_sz, args.srqc_real_sizes.srqs_sgl);
5965 5965 D3("srq_create: exit\n");
5966 5966 return (0);
5967 5967
5968 5968 cleanup:
5969 5969 if (inserted) {
5970 5970 daplka_srq_resource_t *free_rp = NULL;
5971 5971
5972 5972 (void) daplka_hash_remove(&ia_rp->ia_srq_htbl, srq_hkey,
5973 5973 (void **)&free_rp);
5974 5974 if (free_rp != srq_rp) {
5975 5975 /*
5976 5976 * this case is impossible because ep_free will
5977 5977 * wait until our state transition is complete.
5978 5978 */
5979 5979 DERR("srq_create: cannot remove srq from hash table\n");
5980 5980 ASSERT(B_FALSE);
5981 5981 return (retval);
5982 5982 }
5983 5983 }
5984 5984 DAPLKA_RS_UNREF(srq_rp);
5985 5985 return (retval);
5986 5986 }
5987 5987
5988 5988 /*
5989 5989 * Resize an existing SRQ
5990 5990 */
5991 5991 /* ARGSUSED */
5992 5992 static int
5993 5993 daplka_srq_resize(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5994 5994 cred_t *cred, int *rvalp)
5995 5995 {
5996 5996 daplka_srq_resource_t *srq_rp = NULL;
5997 5997 ibt_hca_attr_t *hca_attrp;
5998 5998 dapl_srq_resize_t args;
5999 5999 ibt_status_t status;
6000 6000 int retval = 0;
6001 6001
6002 6002 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_srq_resize_t),
6003 6003 mode);
6004 6004 if (retval != 0) {
6005 6005 DERR("srq_resize: copyin error %d\n", retval);
6006 6006 return (EFAULT);
6007 6007 }
6008 6008
6009 6009 /* get srq resource */
6010 6010 srq_rp = (daplka_srq_resource_t *)
6011 6011 daplka_hash_lookup(&ia_rp->ia_srq_htbl, args.srqr_hkey);
6012 6012 if (srq_rp == NULL) {
6013 6013 DERR("srq_resize: cannot find srq resource\n");
6014 6014 return (EINVAL);
6015 6015 }
6016 6016 ASSERT(DAPLKA_RS_TYPE(srq_rp) == DAPL_TYPE_SRQ);
6017 6017
6018 6018 hca_attrp = &ia_rp->ia_hca->hca_attr;
6019 6019 if (args.srqr_new_size > hca_attrp->hca_max_srqs_sz) {
6020 6020 DERR("srq_resize: invalid srq size %d", args.srqr_new_size);
6021 6021 retval = EINVAL;
6022 6022 goto cleanup;
6023 6023 }
6024 6024
6025 6025 mutex_enter(&srq_rp->srq_lock);
6026 6026 /*
6027 6027 * If ibt_resize_srq fails that it is primarily due to resource
6028 6028 * shortage. Per IB spec resize will never loose events and
6029 6029 * a resize error leaves the SRQ intact. Therefore even if the
6030 6030 * resize request fails we proceed and get the mapping data
6031 6031 * from the SRQ so that the library can mmap it.
6032 6032 */
6033 6033 status = ibt_modify_srq(srq_rp->srq_hdl, IBT_SRQ_SET_SIZE,
6034 6034 args.srqr_new_size, 0, &args.srqr_real_size);
6035 6035 if (status != IBT_SUCCESS) {
6036 6036 /* we return the size of the old CQ if resize fails */
6037 6037 args.srqr_real_size = srq_rp->srq_real_size;
6038 6038 ASSERT(status != IBT_SRQ_HDL_INVALID);
6039 6039 DERR("srq_resize: ibt_modify_srq failed:%d\n", status);
6040 6040 } else {
6041 6041 srq_rp->srq_real_size = args.srqr_real_size;
6042 6042 }
6043 6043 mutex_exit(&srq_rp->srq_lock);
6044 6044
6045 6045
6046 6046 D2("srq_resize(%d): done new_sz(%u) real_sz(%u)\n",
6047 6047 DAPLKA_RS_RNUM(srq_rp), args.srqr_new_size, args.srqr_real_size);
6048 6048
6049 6049 /* Get HCA-specific data_out info */
6050 6050 status = ibt_ci_data_out(srq_rp->srq_hca_hdl,
6051 6051 IBT_CI_NO_FLAGS, IBT_HDL_SRQ, (void *)srq_rp->srq_hdl,
6052 6052 &args.srqr_data_out, sizeof (args.srqr_data_out));
6053 6053 if (status != IBT_SUCCESS) {
6054 6054 DERR("srq_resize: ibt_ci_data_out error(%d)\n", status);
6055 6055 /* return ibt_ci_data_out status */
6056 6056 *rvalp = (int)status;
6057 6057 retval = 0;
6058 6058 goto cleanup;
6059 6059 }
6060 6060
6061 6061 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_srq_resize_t),
6062 6062 mode);
6063 6063 if (retval != 0) {
6064 6064 DERR("srq_resize: copyout error %d\n", retval);
6065 6065 retval = EFAULT;
6066 6066 goto cleanup;
6067 6067 }
6068 6068
6069 6069 cleanup:;
6070 6070 if (srq_rp != NULL) {
6071 6071 DAPLKA_RS_UNREF(srq_rp);
6072 6072 }
6073 6073 return (retval);
6074 6074 }
6075 6075
6076 6076 /*
6077 6077 * Frees an SRQ resource.
6078 6078 */
6079 6079 /* ARGSUSED */
6080 6080 static int
6081 6081 daplka_srq_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
6082 6082 cred_t *cred, int *rvalp)
6083 6083 {
6084 6084 daplka_srq_resource_t *srq_rp = NULL;
6085 6085 dapl_srq_free_t args;
6086 6086 int retval;
6087 6087
6088 6088 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_srq_free_t), mode);
6089 6089 if (retval != 0) {
6090 6090 DERR("srq_free: copyin error %d\n", retval);
6091 6091 return (EFAULT);
6092 6092 }
6093 6093
6094 6094 retval = daplka_hash_remove(&ia_rp->ia_srq_htbl,
6095 6095 args.srqf_hkey, (void **)&srq_rp);
6096 6096 if (retval != 0 || srq_rp == NULL) {
6097 6097 /*
6098 6098 * this is only possible if we have two threads
6099 6099 * calling ep_free in parallel.
6100 6100 */
6101 6101 DERR("srq_free: cannot find resource retval(%d) 0x%llx\n",
6102 6102 retval, args.srqf_hkey);
6103 6103 return (EINVAL);
6104 6104 }
6105 6105
6106 6106 /* UNREF calls the actual free function when refcnt is zero */
6107 6107 DAPLKA_RS_UNREF(srq_rp);
6108 6108 return (0);
6109 6109 }
6110 6110
6111 6111 /*
6112 6112 * destroys a SRQ resource.
6113 6113 * called when refcnt drops to zero.
6114 6114 */
6115 6115 static int
6116 6116 daplka_srq_destroy(daplka_resource_t *gen_rp)
6117 6117 {
6118 6118 daplka_srq_resource_t *srq_rp = (daplka_srq_resource_t *)gen_rp;
6119 6119 ibt_status_t status;
6120 6120
6121 6121 ASSERT(DAPLKA_RS_REFCNT(srq_rp) == 0);
6122 6122
6123 6123 D3("srq_destroy: entering, srq_rp 0x%p, rnum %d\n",
6124 6124 srq_rp, DAPLKA_RS_RNUM(srq_rp));
6125 6125 /*
6126 6126 * destroy the srq
6127 6127 */
6128 6128 if (srq_rp->srq_hdl != NULL) {
6129 6129 status = daplka_ibt_free_srq(srq_rp, srq_rp->srq_hdl);
6130 6130 if (status != IBT_SUCCESS) {
6131 6131 DERR("srq_destroy: ibt_free_srq returned %d\n",
6132 6132 status);
6133 6133 }
6134 6134 srq_rp->srq_hdl = NULL;
6135 6135 D3("srq_destroy: srq freed, rnum %d\n", DAPLKA_RS_RNUM(srq_rp));
6136 6136 }
6137 6137 /*
6138 6138 * release all references
6139 6139 */
6140 6140 if (srq_rp->srq_pd_res != NULL) {
6141 6141 DAPLKA_RS_UNREF(srq_rp->srq_pd_res);
6142 6142 srq_rp->srq_pd_res = NULL;
6143 6143 }
6144 6144
6145 6145 mutex_destroy(&srq_rp->srq_lock);
6146 6146 DAPLKA_RS_FINI(srq_rp);
6147 6147 kmem_free(srq_rp, sizeof (daplka_srq_resource_t));
6148 6148 D3("srq_destroy: exiting, srq_rp 0x%p\n", srq_rp);
6149 6149 return (0);
6150 6150 }
6151 6151
6152 6152 static void
6153 6153 daplka_hash_srq_free(void *obj)
6154 6154 {
6155 6155 daplka_srq_resource_t *srq_rp = (daplka_srq_resource_t *)obj;
6156 6156
6157 6157 ASSERT(DAPLKA_RS_TYPE(srq_rp) == DAPL_TYPE_SRQ);
6158 6158 DAPLKA_RS_UNREF(srq_rp);
6159 6159 }
6160 6160
6161 6161 /*
6162 6162 * This function tells the CM to start listening on a service id.
6163 6163 * It must be called by the passive side client before the client
6164 6164 * can receive connection requests from remote endpoints. If the
6165 6165 * client specifies a non-zero service id (connection qualifier in
6166 6166 * dapl terms), this function will attempt to bind to this service
6167 6167 * id and return an error if the id is already in use. If the client
6168 6168 * specifies zero as the service id, this function will try to find
6169 6169 * the next available service id and return it back to the client.
6170 6170 * To support the cr_handoff function, this function will, in addition
6171 6171 * to creating and inserting an SP resource into the per-IA SP hash
6172 6172 * table, insert the SP resource into a global SP table. This table
6173 6173 * maintains all active service points created by all dapl clients.
6174 6174 * CR handoff locates the target SP by iterating through this global
6175 6175 * table.
6176 6176 */
6177 6177 /* ARGSUSED */
6178 6178 static int
6179 6179 daplka_service_register(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
6180 6180 cred_t *cred, int *rvalp)
6181 6181 {
6182 6182 daplka_evd_resource_t *evd_rp = NULL;
6183 6183 daplka_sp_resource_t *sp_rp = NULL;
6184 6184 dapl_service_register_t args;
6185 6185 ibt_srv_desc_t sd_args;
6186 6186 ibt_srv_bind_t sb_args;
6187 6187 ibt_status_t status;
6188 6188 ib_svc_id_t retsid = 0;
6189 6189 uint64_t sp_hkey = 0;
6190 6190 boolean_t bumped = B_FALSE;
6191 6191 int backlog_size;
6192 6192 int retval = 0;
6193 6193
6194 6194 retval = ddi_copyin((void *)arg, &args,
6195 6195 sizeof (dapl_service_register_t), mode);
6196 6196 if (retval != 0) {
6197 6197 DERR("service_register: copyin error %d\n", retval);
6198 6198 return (EINVAL);
6199 6199 }
6200 6200
6201 6201 sp_rp = kmem_zalloc(sizeof (*sp_rp), daplka_km_flags);
6202 6202 if (sp_rp == NULL) {
6203 6203 DERR("service_register: cannot allocate sp resource\n");
6204 6204 return (ENOMEM);
6205 6205 }
6206 6206 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sp_rp))
6207 6207 DAPLKA_RS_INIT(sp_rp, DAPL_TYPE_SP,
6208 6208 DAPLKA_RS_RNUM(ia_rp), daplka_sp_destroy);
6209 6209
6210 6210 /* check if evd exists */
6211 6211 evd_rp = (daplka_evd_resource_t *)
6212 6212 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.sr_evd_hkey);
6213 6213 if (evd_rp == NULL) {
6214 6214 DERR("service_register: evd resource not found\n");
6215 6215 retval = EINVAL;
6216 6216 goto cleanup;
6217 6217 }
6218 6218 /*
6219 6219 * initialize backlog size
6220 6220 */
6221 6221 if (evd_rp && evd_rp->evd_cq_real_size > 0) {
6222 6222 backlog_size = evd_rp->evd_cq_real_size + 1;
6223 6223 } else {
6224 6224 backlog_size = DAPLKA_DEFAULT_SP_BACKLOG;
6225 6225 }
6226 6226 D2("service_register: args.sr_sid = %llu\n", (longlong_t)args.sr_sid);
6227 6227
6228 6228 /* save the userland sp ptr */
6229 6229 sp_rp->sp_cookie = args.sr_sp_cookie;
6230 6230 sp_rp->sp_backlog_size = backlog_size;
6231 6231 D3("service_register: backlog set to %d\n", sp_rp->sp_backlog_size);
6232 6232 sp_rp->sp_backlog = kmem_zalloc(sp_rp->sp_backlog_size *
6233 6233 sizeof (daplka_sp_conn_pend_t), daplka_km_flags);
6234 6234
6235 6235 /* save evd resource pointer */
6236 6236 sp_rp->sp_evd_res = evd_rp;
6237 6237
6238 6238 /*
6239 6239 * save ruid here so that we can do a comparison later
6240 6240 * when someone does cr_handoff. the check will prevent
6241 6241 * a malicious app from passing a CR to us.
6242 6242 */
6243 6243 sp_rp->sp_ruid = crgetruid(cred);
6244 6244
6245 6245 /* fill in args for register_service */
6246 6246 sd_args.sd_ud_handler = NULL;
6247 6247 sd_args.sd_handler = daplka_cm_service_handler;
6248 6248 sd_args.sd_flags = IBT_SRV_NO_FLAGS;
6249 6249
6250 6250 status = ibt_register_service(daplka_dev->daplka_clnt_hdl,
6251 6251 &sd_args, args.sr_sid, 1, &sp_rp->sp_srv_hdl, &retsid);
6252 6252
6253 6253 if (status != IBT_SUCCESS) {
6254 6254 DERR("service_register: ibt_register_service returned %d\n",
6255 6255 status);
6256 6256 *rvalp = (int)status;
6257 6257 retval = 0;
6258 6258 goto cleanup;
6259 6259 }
6260 6260 /* save returned sid */
6261 6261 sp_rp->sp_conn_qual = retsid;
6262 6262 args.sr_retsid = retsid;
6263 6263
6264 6264 /* fill in args for bind_service */
6265 6265 sb_args.sb_pkey = ia_rp->ia_port_pkey;
6266 6266 sb_args.sb_lease = 0xffffffff;
6267 6267 sb_args.sb_key[0] = 0x1234;
6268 6268 sb_args.sb_key[1] = 0x5678;
6269 6269 sb_args.sb_name = DAPLKA_DRV_NAME;
6270 6270
6271 6271 D2("service_register: bind(0x%llx:0x%llx)\n",
6272 6272 (longlong_t)ia_rp->ia_hca_sgid.gid_prefix,
6273 6273 (longlong_t)ia_rp->ia_hca_sgid.gid_guid);
6274 6274
6275 6275 status = ibt_bind_service(sp_rp->sp_srv_hdl, ia_rp->ia_hca_sgid,
6276 6276 &sb_args, (void *)sp_rp, &sp_rp->sp_bind_hdl);
6277 6277 if (status != IBT_SUCCESS) {
6278 6278 DERR("service_register: ibt_bind_service returned %d\n",
6279 6279 status);
6280 6280 *rvalp = (int)status;
6281 6281 retval = 0;
6282 6282 goto cleanup;
6283 6283 }
6284 6284
6285 6285 /*
6286 6286 * need to bump refcnt because the global hash table will
6287 6287 * have a reference to sp_rp
6288 6288 */
6289 6289 DAPLKA_RS_REF(sp_rp);
6290 6290 bumped = B_TRUE;
6291 6291
6292 6292 /* insert into global sp hash table */
6293 6293 sp_rp->sp_global_hkey = 0;
6294 6294 retval = daplka_hash_insert(&daplka_global_sp_htbl,
6295 6295 &sp_rp->sp_global_hkey, (void *)sp_rp);
6296 6296 if (retval != 0) {
6297 6297 DERR("service_register: cannot insert sp resource\n");
6298 6298 goto cleanup;
6299 6299 }
6300 6300 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*sp_rp))
6301 6301
6302 6302 /* insert into per-IA sp hash table */
6303 6303 retval = daplka_hash_insert(&ia_rp->ia_sp_htbl,
6304 6304 &sp_hkey, (void *)sp_rp);
6305 6305 if (retval != 0) {
6306 6306 DERR("service_register: cannot insert sp resource\n");
6307 6307 goto cleanup;
6308 6308 }
6309 6309
6310 6310 /* pass index to application */
6311 6311 args.sr_sp_hkey = sp_hkey;
6312 6312 retval = ddi_copyout(&args, (void *)arg,
6313 6313 sizeof (dapl_service_register_t), mode);
6314 6314 if (retval != 0) {
6315 6315 DERR("service_register: copyout error %d\n", retval);
6316 6316 retval = EFAULT;
6317 6317 goto cleanup;
6318 6318 }
6319 6319 return (0);
6320 6320
6321 6321 cleanup:;
6322 6322 ASSERT(sp_rp != NULL);
6323 6323 /* remove from ia table */
6324 6324 if (sp_hkey != 0) {
6325 6325 daplka_sp_resource_t *free_rp = NULL;
6326 6326
6327 6327 (void) daplka_hash_remove(&ia_rp->ia_sp_htbl,
6328 6328 sp_hkey, (void **)&free_rp);
6329 6329 if (free_rp != sp_rp) {
6330 6330 DERR("service_register: cannot remove sp\n");
6331 6331 /*
6332 6332 * we can only get here if another thread
6333 6333 * has completed the cleanup in svc_deregister
6334 6334 */
6335 6335 return (retval);
6336 6336 }
6337 6337 }
6338 6338
6339 6339 /* remove from global table */
6340 6340 if (sp_rp->sp_global_hkey != 0) {
6341 6341 daplka_sp_resource_t *free_rp = NULL;
6342 6342
6343 6343 /*
6344 6344 * we get here if either the hash_insert into
6345 6345 * ia_sp_htbl failed or the ddi_copyout failed.
6346 6346 * hash_insert failure implies that we are the
6347 6347 * only thread with a reference to sp. ddi_copyout
6348 6348 * failure implies that svc_deregister could have
6349 6349 * picked up the sp and destroyed it. but since
6350 6350 * we got to this point, we must have removed
6351 6351 * the sp ourselves in hash_remove above and
6352 6352 * that the sp can be destroyed by us.
6353 6353 */
6354 6354 (void) daplka_hash_remove(&daplka_global_sp_htbl,
6355 6355 sp_rp->sp_global_hkey, (void **)&free_rp);
6356 6356 if (free_rp != sp_rp) {
6357 6357 DERR("service_register: cannot remove sp\n");
6358 6358 /*
6359 6359 * this case is impossible. see explanation above.
6360 6360 */
6361 6361 ASSERT(B_FALSE);
6362 6362 return (retval);
6363 6363 }
6364 6364 sp_rp->sp_global_hkey = 0;
6365 6365 }
6366 6366 /* unreference sp */
6367 6367 if (bumped) {
6368 6368 DAPLKA_RS_UNREF(sp_rp);
6369 6369 }
6370 6370
6371 6371 /* destroy sp resource */
6372 6372 DAPLKA_RS_UNREF(sp_rp);
6373 6373 return (retval);
6374 6374 }
6375 6375
6376 6376 /*
6377 6377 * deregisters the service and removes SP from the global table.
6378 6378 */
6379 6379 /* ARGSUSED */
6380 6380 static int
6381 6381 daplka_service_deregister(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
6382 6382 cred_t *cred, int *rvalp)
6383 6383 {
6384 6384 dapl_service_deregister_t args;
6385 6385 daplka_sp_resource_t *sp_rp = NULL, *g_sp_rp = NULL;
6386 6386 int retval;
6387 6387
6388 6388 retval = ddi_copyin((void *)arg, &args,
6389 6389 sizeof (dapl_service_deregister_t), mode);
6390 6390
6391 6391 if (retval != 0) {
6392 6392 DERR("service_deregister: copyin error %d\n", retval);
6393 6393 return (EINVAL);
6394 6394 }
6395 6395
6396 6396 retval = daplka_hash_remove(&ia_rp->ia_sp_htbl,
6397 6397 args.sdr_sp_hkey, (void **)&sp_rp);
6398 6398 if (retval != 0 || sp_rp == NULL) {
6399 6399 DERR("service_deregister: cannot find sp resource\n");
6400 6400 return (EINVAL);
6401 6401 }
6402 6402
6403 6403 retval = daplka_hash_remove(&daplka_global_sp_htbl,
6404 6404 sp_rp->sp_global_hkey, (void **)&g_sp_rp);
6405 6405 if (retval != 0 || g_sp_rp == NULL) {
6406 6406 DERR("service_deregister: cannot find sp resource\n");
6407 6407 }
6408 6408
6409 6409 /* remove the global reference */
6410 6410 if (g_sp_rp == sp_rp) {
6411 6411 DAPLKA_RS_UNREF(g_sp_rp);
6412 6412 }
6413 6413
6414 6414 DAPLKA_RS_UNREF(sp_rp);
6415 6415 return (0);
6416 6416 }
6417 6417
6418 6418 /*
6419 6419 * destroys a service point.
6420 6420 * called when the refcnt drops to zero.
6421 6421 */
6422 6422 static int
6423 6423 daplka_sp_destroy(daplka_resource_t *gen_rp)
6424 6424 {
6425 6425 daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)gen_rp;
6426 6426 ibt_status_t status;
6427 6427
6428 6428 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sp_rp))
6429 6429 ASSERT(DAPLKA_RS_REFCNT(sp_rp) == 0);
6430 6430 D3("sp_destroy: entering, sp_rp %p, rnum %d\n",
6431 6431 sp_rp, DAPLKA_RS_RNUM(sp_rp));
6432 6432
6433 6433 /*
6434 6434 * it is possible for pending connections to remain
6435 6435 * on an SP. We need to clean them up here.
6436 6436 */
6437 6437 if (sp_rp->sp_backlog != NULL) {
6438 6438 ibt_cm_proceed_reply_t proc_reply;
6439 6439 int i, cnt = 0;
6440 6440 void *spcp_sidp;
6441 6441
6442 6442 for (i = 0; i < sp_rp->sp_backlog_size; i++) {
6443 6443 if (sp_rp->sp_backlog[i].spcp_state ==
6444 6444 DAPLKA_SPCP_PENDING) {
6445 6445 cnt++;
6446 6446 if (sp_rp->sp_backlog[i].spcp_sid == NULL) {
6447 6447 DERR("sp_destroy: "
6448 6448 "spcp_sid == NULL!\n");
6449 6449 continue;
6450 6450 }
6451 6451 mutex_enter(&sp_rp->sp_lock);
6452 6452 spcp_sidp = sp_rp->sp_backlog[i].spcp_sid;
6453 6453 sp_rp->sp_backlog[i].spcp_state =
6454 6454 DAPLKA_SPCP_INIT;
6455 6455 sp_rp->sp_backlog[i].spcp_sid = NULL;
6456 6456 sp_rp->sp_backlog[i].spcp_req_len = 0;
6457 6457 mutex_exit(&sp_rp->sp_lock);
6458 6458 status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV,
6459 6459 spcp_sidp,
6460 6460 IBT_CM_NO_RESOURCE, &proc_reply, NULL, 0);
6461 6461 if (status != IBT_SUCCESS) {
6462 6462 DERR("sp_destroy: proceed failed %d\n",
6463 6463 status);
6464 6464 }
6465 6465 }
6466 6466 }
6467 6467 if (cnt > 0) {
6468 6468 DERR("sp_destroy: found %d pending "
6469 6469 "connections\n", cnt);
6470 6470 }
6471 6471 }
6472 6472
6473 6473 if (sp_rp->sp_srv_hdl != NULL && sp_rp->sp_bind_hdl != NULL) {
6474 6474 status = ibt_unbind_service(sp_rp->sp_srv_hdl,
6475 6475 sp_rp->sp_bind_hdl);
6476 6476 if (status != IBT_SUCCESS) {
6477 6477 DERR("sp_destroy: ibt_unbind_service "
6478 6478 "failed: %d\n", status);
6479 6479 }
6480 6480 }
6481 6481
6482 6482 if (sp_rp->sp_srv_hdl != NULL) {
6483 6483 status = ibt_deregister_service(daplka_dev->daplka_clnt_hdl,
6484 6484 sp_rp->sp_srv_hdl);
6485 6485 if (status != IBT_SUCCESS) {
6486 6486 DERR("sp_destroy: ibt_deregister_service "
6487 6487 "failed: %d\n", status);
6488 6488 }
6489 6489 }
6490 6490 if (sp_rp->sp_backlog != NULL) {
6491 6491 kmem_free(sp_rp->sp_backlog,
6492 6492 sp_rp->sp_backlog_size * sizeof (daplka_sp_conn_pend_t));
6493 6493 sp_rp->sp_backlog = NULL;
6494 6494 sp_rp->sp_backlog_size = 0;
6495 6495 }
6496 6496
6497 6497 /*
6498 6498 * release reference to evd
6499 6499 */
6500 6500 if (sp_rp->sp_evd_res != NULL) {
6501 6501 DAPLKA_RS_UNREF(sp_rp->sp_evd_res);
6502 6502 }
6503 6503 sp_rp->sp_bind_hdl = NULL;
6504 6504 sp_rp->sp_srv_hdl = NULL;
6505 6505 DAPLKA_RS_FINI(sp_rp);
6506 6506 kmem_free(sp_rp, sizeof (*sp_rp));
6507 6507 D3("sp_destroy: exiting, sp_rp %p\n", sp_rp);
6508 6508 return (0);
6509 6509 }
6510 6510
6511 6511 /*
6512 6512 * this function is called by daplka_hash_destroy for
6513 6513 * freeing SP resource objects
6514 6514 */
6515 6515 static void
6516 6516 daplka_hash_sp_free(void *obj)
6517 6517 {
6518 6518 daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)obj;
6519 6519 daplka_sp_resource_t *g_sp_rp;
6520 6520 int retval;
6521 6521
6522 6522 ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
6523 6523
6524 6524 retval = daplka_hash_remove(&daplka_global_sp_htbl,
6525 6525 sp_rp->sp_global_hkey, (void **)&g_sp_rp);
6526 6526 if (retval != 0 || g_sp_rp == NULL) {
6527 6527 DERR("sp_free: cannot find sp resource\n");
6528 6528 }
6529 6529 if (g_sp_rp == sp_rp) {
6530 6530 DAPLKA_RS_UNREF(g_sp_rp);
6531 6531 }
6532 6532
6533 6533 DAPLKA_RS_UNREF(sp_rp);
6534 6534 }
6535 6535
6536 6536 static void
6537 6537 daplka_hash_sp_unref(void *obj)
6538 6538 {
6539 6539 daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)obj;
6540 6540
6541 6541 ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
6542 6542 DAPLKA_RS_UNREF(sp_rp);
6543 6543 }
6544 6544
6545 6545 /*
6546 6546 * Passive side CM handlers
6547 6547 */
6548 6548
6549 6549 /*
6550 6550 * processes the REQ_RCV event
6551 6551 */
6552 6552 /* ARGSUSED */
6553 6553 static ibt_cm_status_t
6554 6554 daplka_cm_service_req(daplka_sp_resource_t *spp, ibt_cm_event_t *event,
6555 6555 ibt_cm_return_args_t *ret_args, void *pr_data, ibt_priv_data_len_t pr_len)
6556 6556 {
6557 6557 daplka_sp_conn_pend_t *conn = NULL;
6558 6558 daplka_evd_event_t *cr_ev = NULL;
6559 6559 ibt_cm_status_t cm_status = IBT_CM_DEFAULT;
6560 6560 uint16_t bkl_index;
6561 6561 ibt_status_t status;
6562 6562
6563 6563 /*
6564 6564 * acquire a slot in the connection backlog of this service point
6565 6565 */
6566 6566 mutex_enter(&spp->sp_lock);
6567 6567 for (bkl_index = 0; bkl_index < spp->sp_backlog_size; bkl_index++) {
6568 6568 if (spp->sp_backlog[bkl_index].spcp_state == DAPLKA_SPCP_INIT) {
6569 6569 conn = &spp->sp_backlog[bkl_index];
6570 6570 ASSERT(conn->spcp_sid == NULL);
6571 6571 conn->spcp_state = DAPLKA_SPCP_PENDING;
6572 6572 conn->spcp_sid = event->cm_session_id;
6573 6573 break;
6574 6574 }
6575 6575 }
6576 6576 mutex_exit(&spp->sp_lock);
6577 6577
6578 6578 /*
6579 6579 * too many pending connections
6580 6580 */
6581 6581 if (bkl_index == spp->sp_backlog_size) {
6582 6582 DERR("service_req: connection pending exceeded %d limit\n",
6583 6583 spp->sp_backlog_size);
6584 6584 return (IBT_CM_NO_RESOURCE);
6585 6585 }
6586 6586 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*conn))
6587 6587
6588 6588 /*
6589 6589 * save data for cr_handoff
6590 6590 */
6591 6591 if (pr_data != NULL && pr_len > 0) {
6592 6592 int trunc_len = pr_len;
6593 6593
6594 6594 if (trunc_len > DAPL_MAX_PRIVATE_DATA_SIZE) {
6595 6595 DERR("service_req: private data truncated\n");
6596 6596 trunc_len = DAPL_MAX_PRIVATE_DATA_SIZE;
6597 6597 }
6598 6598 conn->spcp_req_len = trunc_len;
6599 6599 bcopy(pr_data, conn->spcp_req_data, trunc_len);
6600 6600 } else {
6601 6601 conn->spcp_req_len = 0;
6602 6602 }
6603 6603 conn->spcp_rdma_ra_in = event->cm_event.req.req_rdma_ra_in;
6604 6604 conn->spcp_rdma_ra_out = event->cm_event.req.req_rdma_ra_out;
6605 6605
6606 6606 /*
6607 6607 * create a CR event
6608 6608 */
6609 6609 cr_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
6610 6610 if (cr_ev == NULL) {
6611 6611 DERR("service_req: could not alloc cr_ev\n");
6612 6612 cm_status = IBT_CM_NO_RESOURCE;
6613 6613 goto cleanup;
6614 6614 }
6615 6615
6616 6616 cr_ev->ee_next = NULL;
6617 6617 cr_ev->ee_cmev.ec_cm_cookie = spp->sp_cookie;
6618 6618 cr_ev->ee_cmev.ec_cm_is_passive = B_TRUE;
6619 6619 cr_ev->ee_cmev.ec_cm_psep_cookie = DAPLKA_CREATE_PSEP_COOKIE(bkl_index);
6620 6620 /*
6621 6621 * save the requestor gid
6622 6622 * daplka_event_poll needs this if this is a third party REQ_RCV
6623 6623 */
6624 6624 cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_prefix =
6625 6625 event->cm_event.req.req_prim_addr.av_dgid.gid_prefix;
6626 6626 cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_guid =
6627 6627 event->cm_event.req.req_prim_addr.av_dgid.gid_guid;
6628 6628
6629 6629 /*
6630 6630 * set event type
6631 6631 */
6632 6632 if (pr_len == 0) {
6633 6633 cr_ev->ee_cmev.ec_cm_ev_type =
6634 6634 DAPL_IB_CME_CONNECTION_REQUEST_PENDING;
6635 6635 } else {
6636 6636 cr_ev->ee_cmev.ec_cm_ev_priv_data =
6637 6637 kmem_zalloc(pr_len, KM_NOSLEEP);
6638 6638 if (cr_ev->ee_cmev.ec_cm_ev_priv_data == NULL) {
6639 6639 DERR("service_req: could not alloc priv\n");
6640 6640 cm_status = IBT_CM_NO_RESOURCE;
6641 6641 goto cleanup;
6642 6642 }
6643 6643 bcopy(pr_data, cr_ev->ee_cmev.ec_cm_ev_priv_data, pr_len);
6644 6644 cr_ev->ee_cmev.ec_cm_ev_type =
6645 6645 DAPL_IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA;
6646 6646 }
6647 6647 cr_ev->ee_cmev.ec_cm_ev_priv_data_len = pr_len;
6648 6648
6649 6649 /*
6650 6650 * tell the active side to expect the processing time to be
6651 6651 * at most equal to daplka_cm_delay
6652 6652 */
6653 6653 status = ibt_cm_delay(IBT_CM_DELAY_REQ, event->cm_session_id,
6654 6654 daplka_cm_delay, NULL, 0);
6655 6655 if (status != IBT_SUCCESS) {
6656 6656 DERR("service_req: ibt_cm_delay failed %d\n", status);
6657 6657 cm_status = IBT_CM_NO_RESOURCE;
6658 6658 goto cleanup;
6659 6659 }
6660 6660
6661 6661 /*
6662 6662 * enqueue cr_ev onto the cr_events list of the EVD
6663 6663 * corresponding to the SP
6664 6664 */
6665 6665 D2("service_req: enqueue event(%p) evdp(%p) priv_data(%p) "
6666 6666 "priv_len(%d) psep(0x%llx)\n", cr_ev, spp->sp_evd_res,
6667 6667 cr_ev->ee_cmev.ec_cm_ev_priv_data,
6668 6668 (int)cr_ev->ee_cmev.ec_cm_ev_priv_data_len,
6669 6669 (longlong_t)cr_ev->ee_cmev.ec_cm_psep_cookie);
6670 6670
6671 6671 daplka_evd_wakeup(spp->sp_evd_res,
6672 6672 &spp->sp_evd_res->evd_cr_events, cr_ev);
6673 6673
6674 6674 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*conn))
6675 6675 return (IBT_CM_DEFER);
6676 6676
6677 6677 cleanup:;
6678 6678 /*
6679 6679 * free the cr event
6680 6680 */
6681 6681 if (cr_ev != NULL) {
6682 6682 if (cr_ev->ee_cmev.ec_cm_ev_priv_data != NULL) {
6683 6683 kmem_free(cr_ev->ee_cmev.ec_cm_ev_priv_data, pr_len);
6684 6684 cr_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
6685 6685 cr_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
6686 6686 }
6687 6687 kmem_free(cr_ev, sizeof (daplka_evd_event_t));
6688 6688 }
6689 6689 /*
6690 6690 * release our slot in the backlog array
6691 6691 */
6692 6692 if (conn != NULL) {
6693 6693 mutex_enter(&spp->sp_lock);
6694 6694 ASSERT(conn->spcp_state == DAPLKA_SPCP_PENDING);
6695 6695 ASSERT(conn->spcp_sid == event->cm_session_id);
6696 6696 conn->spcp_state = DAPLKA_SPCP_INIT;
6697 6697 conn->spcp_req_len = 0;
6698 6698 conn->spcp_sid = NULL;
6699 6699 mutex_exit(&spp->sp_lock);
6700 6700 }
6701 6701 return (cm_status);
6702 6702 }
6703 6703
6704 6704 /*
6705 6705 * processes the CONN_CLOSED event
6706 6706 */
6707 6707 /* ARGSUSED */
6708 6708 static ibt_cm_status_t
6709 6709 daplka_cm_service_conn_closed(daplka_sp_resource_t *sp_rp,
6710 6710 ibt_cm_event_t *event, ibt_cm_return_args_t *ret_args,
6711 6711 void *priv_data, ibt_priv_data_len_t len)
6712 6712 {
6713 6713 daplka_ep_resource_t *ep_rp;
6714 6714 daplka_evd_event_t *disc_ev;
6715 6715 uint32_t old_state, new_state;
6716 6716
6717 6717 ep_rp = (daplka_ep_resource_t *)
6718 6718 ibt_get_chan_private(event->cm_channel);
6719 6719 if (ep_rp == NULL) {
6720 6720 DERR("service_conn_closed: ep_rp == NULL\n");
6721 6721 return (IBT_CM_ACCEPT);
6722 6722 }
6723 6723
6724 6724 /*
6725 6725 * verify that the ep_state is either CONNECTED or
6726 6726 * DISCONNECTING. if it is not in either states return
6727 6727 * without generating an event.
6728 6728 */
6729 6729 new_state = old_state = daplka_ep_get_state(ep_rp);
6730 6730 if (old_state != DAPLKA_EP_STATE_CONNECTED &&
6731 6731 old_state != DAPLKA_EP_STATE_DISCONNECTING) {
6732 6732 /*
6733 6733 * we can get here if the connection is being aborted
6734 6734 */
6735 6735 D2("service_conn_closed: conn aborted, state = %d, "
6736 6736 "closed = %d\n", old_state, (int)event->cm_event.closed);
6737 6737 daplka_ep_set_state(ep_rp, old_state, new_state);
6738 6738 return (IBT_CM_ACCEPT);
6739 6739 }
6740 6740
6741 6741 /*
6742 6742 * create a DAPL_IB_CME_DISCONNECTED event
6743 6743 */
6744 6744 disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
6745 6745 if (disc_ev == NULL) {
6746 6746 DERR("service_conn_closed: cannot alloc disc_ev\n");
6747 6747 daplka_ep_set_state(ep_rp, old_state, new_state);
6748 6748 return (IBT_CM_ACCEPT);
6749 6749 }
6750 6750
6751 6751 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_DISCONNECTED;
6752 6752 disc_ev->ee_cmev.ec_cm_cookie = sp_rp->sp_cookie;
6753 6753 disc_ev->ee_cmev.ec_cm_is_passive = B_TRUE;
6754 6754 disc_ev->ee_cmev.ec_cm_psep_cookie = ep_rp->ep_psep_cookie;
6755 6755 disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
6756 6756 disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
6757 6757
6758 6758 D2("service_conn_closed: enqueue event(%p) evdp(%p) psep(0x%llx)\n",
6759 6759 disc_ev, sp_rp->sp_evd_res, (longlong_t)ep_rp->ep_psep_cookie);
6760 6760
6761 6761 /*
6762 6762 * transition ep_state to DISCONNECTED
6763 6763 */
6764 6764 new_state = DAPLKA_EP_STATE_DISCONNECTED;
6765 6765 daplka_ep_set_state(ep_rp, old_state, new_state);
6766 6766
6767 6767 /*
6768 6768 * enqueue event onto the conn_evd owned by ep_rp
6769 6769 */
6770 6770 daplka_evd_wakeup(ep_rp->ep_conn_evd,
6771 6771 &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);
6772 6772
6773 6773 return (IBT_CM_ACCEPT);
6774 6774 }
6775 6775
6776 6776 /*
6777 6777 * processes the CONN_EST event
6778 6778 */
6779 6779 /* ARGSUSED */
6780 6780 static ibt_cm_status_t
6781 6781 daplka_cm_service_conn_est(daplka_sp_resource_t *sp_rp, ibt_cm_event_t *event,
6782 6782 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
6783 6783 {
6784 6784 daplka_ep_resource_t *ep_rp;
6785 6785 daplka_evd_event_t *conn_ev;
6786 6786 void *pr_data = event->cm_priv_data;
6787 6787 ibt_priv_data_len_t pr_len = event->cm_priv_data_len;
6788 6788 uint32_t old_state, new_state;
6789 6789
6790 6790 ep_rp = (daplka_ep_resource_t *)
6791 6791 ibt_get_chan_private(event->cm_channel);
6792 6792 if (ep_rp == NULL) {
6793 6793 DERR("service_conn_est: ep_rp == NULL\n");
6794 6794 return (IBT_CM_ACCEPT);
6795 6795 }
6796 6796
6797 6797 /*
6798 6798 * verify that ep_state is ACCEPTING. if it is not in this
6799 6799 * state, return without generating an event.
6800 6800 */
6801 6801 new_state = old_state = daplka_ep_get_state(ep_rp);
6802 6802 if (old_state != DAPLKA_EP_STATE_ACCEPTING) {
6803 6803 /*
6804 6804 * we can get here if the connection is being aborted
6805 6805 */
6806 6806 DERR("service_conn_est: conn aborted, state = %d\n",
6807 6807 old_state);
6808 6808 daplka_ep_set_state(ep_rp, old_state, new_state);
6809 6809 return (IBT_CM_ACCEPT);
6810 6810 }
6811 6811
6812 6812 /*
6813 6813 * create a DAPL_IB_CME_CONNECTED event
6814 6814 */
6815 6815 conn_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
6816 6816 if (conn_ev == NULL) {
6817 6817 DERR("service_conn_est: conn_ev alloc failed\n");
6818 6818 daplka_ep_set_state(ep_rp, old_state, new_state);
6819 6819 return (IBT_CM_ACCEPT);
6820 6820 }
6821 6821
6822 6822 conn_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_CONNECTED;
6823 6823 conn_ev->ee_cmev.ec_cm_cookie = sp_rp->sp_cookie;
6824 6824 conn_ev->ee_cmev.ec_cm_is_passive = B_TRUE;
6825 6825 conn_ev->ee_cmev.ec_cm_psep_cookie = ep_rp->ep_psep_cookie;
6826 6826
6827 6827 /*
6828 6828 * copy private data into event
6829 6829 */
6830 6830 if (pr_len > 0) {
6831 6831 conn_ev->ee_cmev.ec_cm_ev_priv_data =
6832 6832 kmem_zalloc(pr_len, KM_NOSLEEP);
6833 6833 if (conn_ev->ee_cmev.ec_cm_ev_priv_data == NULL) {
6834 6834 DERR("service_conn_est: pr_data alloc failed\n");
6835 6835 daplka_ep_set_state(ep_rp, old_state, new_state);
6836 6836 kmem_free(conn_ev, sizeof (daplka_evd_event_t));
6837 6837 return (IBT_CM_ACCEPT);
6838 6838 }
6839 6839 bcopy(pr_data, conn_ev->ee_cmev.ec_cm_ev_priv_data, pr_len);
6840 6840 }
6841 6841 conn_ev->ee_cmev.ec_cm_ev_priv_data_len = pr_len;
6842 6842
6843 6843 D2("service_conn_est: enqueue event(%p) evdp(%p)\n",
6844 6844 conn_ev, ep_rp->ep_conn_evd);
6845 6845
6846 6846 /*
6847 6847 * transition ep_state to CONNECTED
6848 6848 */
6849 6849 new_state = DAPLKA_EP_STATE_CONNECTED;
6850 6850 daplka_ep_set_state(ep_rp, old_state, new_state);
6851 6851
6852 6852 /*
6853 6853 * enqueue event onto the conn_evd owned by ep_rp
6854 6854 */
6855 6855 daplka_evd_wakeup(ep_rp->ep_conn_evd,
6856 6856 &ep_rp->ep_conn_evd->evd_conn_events, conn_ev);
6857 6857
6858 6858 return (IBT_CM_ACCEPT);
6859 6859 }
6860 6860
6861 6861 /*
6862 6862 * processes the FAILURE event
6863 6863 */
6864 6864 /* ARGSUSED */
6865 6865 static ibt_cm_status_t
6866 6866 daplka_cm_service_event_failure(daplka_sp_resource_t *sp_rp,
6867 6867 ibt_cm_event_t *event, ibt_cm_return_args_t *ret_args, void *priv_data,
6868 6868 ibt_priv_data_len_t len)
6869 6869 {
6870 6870 daplka_evd_event_t *disc_ev;
6871 6871 daplka_ep_resource_t *ep_rp;
6872 6872 uint32_t old_state, new_state;
6873 6873 ibt_rc_chan_query_attr_t chan_attrs;
6874 6874 ibt_status_t status;
6875 6875
6876 6876 /*
6877 6877 * check that we still have a valid cm_channel before continuing
6878 6878 */
6879 6879 if (event->cm_channel == NULL) {
6880 6880 DERR("serice_event_failure: event->cm_channel == NULL\n");
6881 6881 return (IBT_CM_ACCEPT);
6882 6882 }
6883 6883 ep_rp = (daplka_ep_resource_t *)
6884 6884 ibt_get_chan_private(event->cm_channel);
6885 6885 if (ep_rp == NULL) {
6886 6886 DERR("service_event_failure: ep_rp == NULL\n");
6887 6887 return (IBT_CM_ACCEPT);
6888 6888 }
6889 6889
6890 6890 /*
6891 6891 * verify that ep_state is ACCEPTING or DISCONNECTING. if it
6892 6892 * is not in either state, return without generating an event.
6893 6893 */
6894 6894 new_state = old_state = daplka_ep_get_state(ep_rp);
6895 6895 if (old_state != DAPLKA_EP_STATE_ACCEPTING &&
6896 6896 old_state != DAPLKA_EP_STATE_DISCONNECTING) {
6897 6897 /*
6898 6898 * we can get here if the connection is being aborted
6899 6899 */
6900 6900 DERR("service_event_failure: conn aborted, state = %d, "
6901 6901 "cf_code = %d, cf_msg = %d, cf_reason = %d\n", old_state,
6902 6902 (int)event->cm_event.failed.cf_code,
6903 6903 (int)event->cm_event.failed.cf_msg,
6904 6904 (int)event->cm_event.failed.cf_reason);
6905 6905
6906 6906 daplka_ep_set_state(ep_rp, old_state, new_state);
6907 6907 return (IBT_CM_ACCEPT);
6908 6908 }
6909 6909
6910 6910 bzero(&chan_attrs, sizeof (ibt_rc_chan_query_attr_t));
6911 6911 status = ibt_query_rc_channel(ep_rp->ep_chan_hdl, &chan_attrs);
6912 6912
6913 6913 if ((status == IBT_SUCCESS) &&
6914 6914 (chan_attrs.rc_state != IBT_STATE_ERROR)) {
6915 6915 DERR("service_event_failure: conn abort qpn %d state %d\n",
6916 6916 chan_attrs.rc_qpn, chan_attrs.rc_state);
6917 6917
6918 6918 /* explicit transition the QP to ERROR state */
6919 6919 status = ibt_flush_channel(ep_rp->ep_chan_hdl);
6920 6920 }
6921 6921
6922 6922 /*
6923 6923 * create an event
6924 6924 */
6925 6925 disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
6926 6926 if (disc_ev == NULL) {
6927 6927 DERR("service_event_failure: cannot alloc disc_ev\n");
6928 6928 daplka_ep_set_state(ep_rp, old_state, new_state);
6929 6929 return (IBT_CM_ACCEPT);
6930 6930 }
6931 6931
6932 6932 /*
6933 6933 * fill in the appropriate event type
6934 6934 */
6935 6935 if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_TIMEOUT) {
6936 6936 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_TIMED_OUT;
6937 6937 } else if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_REJ_RCV) {
6938 6938 switch (event->cm_event.failed.cf_reason) {
6939 6939 case IBT_CM_INVALID_CID:
6940 6940 disc_ev->ee_cmev.ec_cm_ev_type =
6941 6941 DAPL_IB_CME_DESTINATION_REJECT;
6942 6942 break;
6943 6943 default:
6944 6944 disc_ev->ee_cmev.ec_cm_ev_type =
6945 6945 DAPL_IB_CME_LOCAL_FAILURE;
6946 6946 break;
6947 6947 }
6948 6948 } else {
6949 6949 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_LOCAL_FAILURE;
6950 6950 }
6951 6951 disc_ev->ee_cmev.ec_cm_cookie = sp_rp->sp_cookie;
6952 6952 disc_ev->ee_cmev.ec_cm_is_passive = B_TRUE;
6953 6953 disc_ev->ee_cmev.ec_cm_psep_cookie = ep_rp->ep_psep_cookie;
6954 6954 disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
6955 6955 disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
6956 6956
6957 6957 D2("service_event_failure: enqueue event(%p) evdp(%p) cf_code(%d) "
6958 6958 "cf_msg(%d) cf_reason(%d) psep(0x%llx)\n", disc_ev,
6959 6959 ep_rp->ep_conn_evd, (int)event->cm_event.failed.cf_code,
6960 6960 (int)event->cm_event.failed.cf_msg,
6961 6961 (int)event->cm_event.failed.cf_reason,
6962 6962 (longlong_t)ep_rp->ep_psep_cookie);
6963 6963
6964 6964 /*
6965 6965 * transition ep_state to DISCONNECTED
6966 6966 */
6967 6967 new_state = DAPLKA_EP_STATE_DISCONNECTED;
6968 6968 daplka_ep_set_state(ep_rp, old_state, new_state);
6969 6969
6970 6970 /*
6971 6971 * enqueue event onto the conn_evd owned by ep_rp
6972 6972 */
6973 6973 daplka_evd_wakeup(ep_rp->ep_conn_evd,
6974 6974 &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);
6975 6975
6976 6976 return (IBT_CM_ACCEPT);
6977 6977 }
6978 6978
6979 6979 /*
6980 6980 * this is the passive side CM handler. it gets registered
6981 6981 * when an SP resource is created in daplka_service_register.
6982 6982 */
6983 6983 static ibt_cm_status_t
6984 6984 daplka_cm_service_handler(void *cm_private, ibt_cm_event_t *event,
6985 6985 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
6986 6986 {
6987 6987 daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)cm_private;
6988 6988
6989 6989 if (sp_rp == NULL) {
6990 6990 DERR("service_handler: sp_rp == NULL\n");
6991 6991 return (IBT_CM_NO_RESOURCE);
6992 6992 }
6993 6993 /*
6994 6994 * default is not to return priv data
6995 6995 */
6996 6996 if (ret_args != NULL) {
6997 6997 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ret_args))
6998 6998 ret_args->cm_ret_len = 0;
6999 6999 }
7000 7000
7001 7001 switch (event->cm_type) {
7002 7002 case IBT_CM_EVENT_REQ_RCV:
7003 7003 D2("service_handler: IBT_CM_EVENT_REQ_RCV\n");
7004 7004 return (daplka_cm_service_req(sp_rp, event, ret_args,
7005 7005 event->cm_priv_data, event->cm_priv_data_len));
7006 7006
7007 7007 case IBT_CM_EVENT_REP_RCV:
7008 7008 /* passive side should not receive this event */
7009 7009 D2("service_handler: IBT_CM_EVENT_REP_RCV\n");
7010 7010 return (IBT_CM_DEFAULT);
7011 7011
7012 7012 case IBT_CM_EVENT_CONN_CLOSED:
7013 7013 D2("service_handler: IBT_CM_EVENT_CONN_CLOSED %d\n",
7014 7014 event->cm_event.closed);
7015 7015 return (daplka_cm_service_conn_closed(sp_rp, event, ret_args,
7016 7016 priv_data, len));
7017 7017
7018 7018 case IBT_CM_EVENT_MRA_RCV:
7019 7019 /* passive side does default processing MRA event */
7020 7020 D2("service_handler: IBT_CM_EVENT_MRA_RCV\n");
7021 7021 return (IBT_CM_DEFAULT);
7022 7022
7023 7023 case IBT_CM_EVENT_CONN_EST:
7024 7024 D2("service_handler: IBT_CM_EVENT_CONN_EST\n");
7025 7025 return (daplka_cm_service_conn_est(sp_rp, event, ret_args,
7026 7026 priv_data, len));
7027 7027
7028 7028 case IBT_CM_EVENT_FAILURE:
7029 7029 D2("service_handler: IBT_CM_EVENT_FAILURE\n");
7030 7030 return (daplka_cm_service_event_failure(sp_rp, event, ret_args,
7031 7031 priv_data, len));
7032 7032 case IBT_CM_EVENT_LAP_RCV:
7033 7033 /* active side had initiated a path migration operation */
7034 7034 D2("service_handler: IBT_CM_EVENT_LAP_RCV\n");
7035 7035 return (IBT_CM_ACCEPT);
7036 7036 default:
7037 7037 DERR("service_handler: invalid event %d\n", event->cm_type);
7038 7038 break;
7039 7039 }
7040 7040 return (IBT_CM_DEFAULT);
7041 7041 }
7042 7042
7043 7043 /*
7044 7044 * Active side CM handlers
7045 7045 */
7046 7046
7047 7047 /*
7048 7048 * Processes the REP_RCV event. When the passive side accepts the
7049 7049 * connection, this handler is called. We make a copy of the private
7050 7050 * data into the ep so that it can be passed back to userland in when
7051 7051 * the CONN_EST event occurs.
7052 7052 */
7053 7053 /* ARGSUSED */
7054 7054 static ibt_cm_status_t
7055 7055 daplka_cm_rc_rep_rcv(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event,
7056 7056 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
7057 7057 {
7058 7058 void *pr_data = event->cm_priv_data;
7059 7059 ibt_priv_data_len_t pr_len = event->cm_priv_data_len;
7060 7060 uint32_t old_state, new_state;
7061 7061
7062 7062 D2("rc_rep_rcv: pr_data(0x%p), pr_len(%d)\n", pr_data,
7063 7063 (int)pr_len);
7064 7064
7065 7065 ASSERT(ep_rp != NULL);
7066 7066 new_state = old_state = daplka_ep_get_state(ep_rp);
7067 7067 if (old_state != DAPLKA_EP_STATE_CONNECTING) {
7068 7068 /*
7069 7069 * we can get here if the connection is being aborted
7070 7070 */
7071 7071 DERR("rc_rep_rcv: conn aborted, state = %d\n", old_state);
7072 7072 daplka_ep_set_state(ep_rp, old_state, new_state);
7073 7073 return (IBT_CM_NO_CHANNEL);
7074 7074 }
7075 7075
7076 7076 /*
7077 7077 * we do not cancel the timer here because the connection
7078 7078 * handshake is still in progress.
7079 7079 */
7080 7080
7081 7081 /*
7082 7082 * save the private data. it will be passed up when
7083 7083 * the connection is established.
7084 7084 */
7085 7085 if (pr_len > 0) {
7086 7086 ep_rp->ep_priv_len = pr_len;
7087 7087 bcopy(pr_data, ep_rp->ep_priv_data, (size_t)pr_len);
7088 7088 }
7089 7089
7090 7090 /*
7091 7091 * we do not actually transition to a different state.
7092 7092 * the state will change when we get a conn_est, failure,
7093 7093 * closed, or timeout event.
7094 7094 */
7095 7095 daplka_ep_set_state(ep_rp, old_state, new_state);
7096 7096 return (IBT_CM_ACCEPT);
7097 7097 }
7098 7098
7099 7099 /*
7100 7100 * Processes the CONN_CLOSED event. This gets called when either
7101 7101 * the active or passive side closes the rc channel.
7102 7102 */
7103 7103 /* ARGSUSED */
7104 7104 static ibt_cm_status_t
7105 7105 daplka_cm_rc_conn_closed(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event,
7106 7106 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
7107 7107 {
7108 7108 daplka_evd_event_t *disc_ev;
7109 7109 uint32_t old_state, new_state;
7110 7110
7111 7111 ASSERT(ep_rp != NULL);
7112 7112 old_state = new_state = daplka_ep_get_state(ep_rp);
7113 7113 if (old_state != DAPLKA_EP_STATE_CONNECTED &&
7114 7114 old_state != DAPLKA_EP_STATE_DISCONNECTING) {
7115 7115 /*
7116 7116 * we can get here if the connection is being aborted
7117 7117 */
7118 7118 D2("rc_conn_closed: conn aborted, state = %d, "
7119 7119 "closed = %d\n", old_state, (int)event->cm_event.closed);
7120 7120 daplka_ep_set_state(ep_rp, old_state, new_state);
7121 7121 return (IBT_CM_ACCEPT);
7122 7122 }
7123 7123
7124 7124 /*
7125 7125 * it's ok for the timer to fire at this point. the
7126 7126 * taskq thread that processes the timer will just wait
7127 7127 * until we are done with our state transition.
7128 7128 */
7129 7129 if (daplka_cancel_timer(ep_rp) != 0) {
7130 7130 /*
7131 7131 * daplka_cancel_timer returns -1 if the timer is
7132 7132 * being processed and 0 for all other cases.
7133 7133 * we need to reset ep_state to allow timer processing
7134 7134 * to continue.
7135 7135 */
7136 7136 DERR("rc_conn_closed: timer is being processed\n");
7137 7137 daplka_ep_set_state(ep_rp, old_state, new_state);
7138 7138 return (IBT_CM_ACCEPT);
7139 7139 }
7140 7140
7141 7141 /*
7142 7142 * create a DAPL_IB_CME_DISCONNECTED event
7143 7143 */
7144 7144 disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
7145 7145 if (disc_ev == NULL) {
7146 7146 DERR("rc_conn_closed: could not alloc ev\n");
7147 7147 daplka_ep_set_state(ep_rp, old_state, new_state);
7148 7148 return (IBT_CM_ACCEPT);
7149 7149 }
7150 7150
7151 7151 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_DISCONNECTED;
7152 7152 disc_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie;
7153 7153 disc_ev->ee_cmev.ec_cm_is_passive = B_FALSE;
7154 7154 disc_ev->ee_cmev.ec_cm_psep_cookie = 0;
7155 7155 disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
7156 7156 disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
7157 7157
7158 7158 D2("rc_conn_closed: enqueue event(%p) evdp(%p) closed(%d)\n",
7159 7159 disc_ev, ep_rp->ep_conn_evd, (int)event->cm_event.closed);
7160 7160
7161 7161 /*
7162 7162 * transition ep_state to DISCONNECTED
7163 7163 */
7164 7164 new_state = DAPLKA_EP_STATE_DISCONNECTED;
7165 7165 daplka_ep_set_state(ep_rp, old_state, new_state);
7166 7166
7167 7167 /*
7168 7168 * enqueue event onto the conn_evd owned by ep_rp
7169 7169 */
7170 7170 daplka_evd_wakeup(ep_rp->ep_conn_evd,
7171 7171 &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);
7172 7172
7173 7173 return (IBT_CM_ACCEPT);
7174 7174 }
7175 7175
7176 7176 /*
7177 7177 * processes the CONN_EST event
7178 7178 */
7179 7179 /* ARGSUSED */
7180 7180 static ibt_cm_status_t
7181 7181 daplka_cm_rc_conn_est(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event,
7182 7182 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
7183 7183 {
7184 7184 daplka_evd_event_t *conn_ev;
7185 7185 uint32_t old_state, new_state;
7186 7186
7187 7187 ASSERT(ep_rp != NULL);
7188 7188 old_state = new_state = daplka_ep_get_state(ep_rp);
7189 7189 if (old_state != DAPLKA_EP_STATE_CONNECTING) {
7190 7190 /*
7191 7191 * we can get here if the connection is being aborted
7192 7192 */
7193 7193 DERR("rc_conn_est: conn aborted, state = %d\n", old_state);
7194 7194 daplka_ep_set_state(ep_rp, old_state, new_state);
7195 7195 return (IBT_CM_ACCEPT);
7196 7196 }
7197 7197
7198 7198 /*
7199 7199 * it's ok for the timer to fire at this point. the
7200 7200 * taskq thread that processes the timer will just wait
7201 7201 * until we are done with our state transition.
7202 7202 */
7203 7203 if (daplka_cancel_timer(ep_rp) != 0) {
7204 7204 /*
7205 7205 * daplka_cancel_timer returns -1 if the timer is
7206 7206 * being processed and 0 for all other cases.
7207 7207 * we need to reset ep_state to allow timer processing
7208 7208 * to continue.
7209 7209 */
7210 7210 DERR("rc_conn_est: timer is being processed\n");
7211 7211 daplka_ep_set_state(ep_rp, old_state, new_state);
7212 7212 return (IBT_CM_ACCEPT);
7213 7213 }
7214 7214
7215 7215 /*
7216 7216 * create a DAPL_IB_CME_CONNECTED event
7217 7217 */
7218 7218 conn_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
7219 7219 if (conn_ev == NULL) {
7220 7220 DERR("rc_conn_est: could not alloc ev\n");
7221 7221 daplka_ep_set_state(ep_rp, old_state, new_state);
7222 7222 return (IBT_CM_ACCEPT);
7223 7223 }
7224 7224
7225 7225 conn_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_CONNECTED;
7226 7226 conn_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie;
7227 7227 conn_ev->ee_cmev.ec_cm_is_passive = B_FALSE;
7228 7228 conn_ev->ee_cmev.ec_cm_psep_cookie = 0;
7229 7229
7230 7230 /*
7231 7231 * The private data passed back in the connection established
7232 7232 * event is what was recvd in the daplka_cm_rc_rep_rcv handler and
7233 7233 * saved in ep resource structure.
7234 7234 */
7235 7235 if (ep_rp->ep_priv_len > 0) {
7236 7236 conn_ev->ee_cmev.ec_cm_ev_priv_data =
7237 7237 kmem_zalloc(ep_rp->ep_priv_len, KM_NOSLEEP);
7238 7238
7239 7239 if (conn_ev->ee_cmev.ec_cm_ev_priv_data == NULL) {
7240 7240 DERR("rc_conn_est: could not alloc pr_data\n");
7241 7241 kmem_free(conn_ev, sizeof (daplka_evd_event_t));
7242 7242 daplka_ep_set_state(ep_rp, old_state, new_state);
7243 7243 return (IBT_CM_ACCEPT);
7244 7244 }
7245 7245 bcopy(ep_rp->ep_priv_data, conn_ev->ee_cmev.ec_cm_ev_priv_data,
7246 7246 ep_rp->ep_priv_len);
7247 7247 }
7248 7248 conn_ev->ee_cmev.ec_cm_ev_priv_data_len = ep_rp->ep_priv_len;
7249 7249
7250 7250 D2("rc_conn_est: enqueue event(%p) evdp(%p) pr_data(0x%p), "
7251 7251 "pr_len(%d)\n", conn_ev, ep_rp->ep_conn_evd,
7252 7252 conn_ev->ee_cmev.ec_cm_ev_priv_data,
7253 7253 (int)conn_ev->ee_cmev.ec_cm_ev_priv_data_len);
7254 7254
7255 7255 /*
7256 7256 * transition ep_state to CONNECTED
7257 7257 */
7258 7258 new_state = DAPLKA_EP_STATE_CONNECTED;
7259 7259 daplka_ep_set_state(ep_rp, old_state, new_state);
7260 7260
7261 7261 /*
7262 7262 * enqueue event onto the conn_evd owned by ep_rp
7263 7263 */
7264 7264 daplka_evd_wakeup(ep_rp->ep_conn_evd,
7265 7265 &ep_rp->ep_conn_evd->evd_conn_events, conn_ev);
7266 7266
7267 7267 return (IBT_CM_ACCEPT);
7268 7268 }
7269 7269
7270 7270 /*
7271 7271 * processes the FAILURE event
7272 7272 */
7273 7273 /* ARGSUSED */
7274 7274 static ibt_cm_status_t
7275 7275 daplka_cm_rc_event_failure(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event,
7276 7276 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
7277 7277 {
7278 7278 daplka_evd_event_t *disc_ev;
7279 7279 ibt_priv_data_len_t pr_len = event->cm_priv_data_len;
7280 7280 void *pr_data = event->cm_priv_data;
7281 7281 uint32_t old_state, new_state;
7282 7282 ibt_rc_chan_query_attr_t chan_attrs;
7283 7283 ibt_status_t status;
7284 7284
7285 7285 ASSERT(ep_rp != NULL);
7286 7286 old_state = new_state = daplka_ep_get_state(ep_rp);
7287 7287 if (old_state != DAPLKA_EP_STATE_CONNECTING &&
7288 7288 old_state != DAPLKA_EP_STATE_DISCONNECTING) {
7289 7289 /*
7290 7290 * we can get here if the connection is being aborted
7291 7291 */
7292 7292 DERR("rc_event_failure: conn aborted, state = %d, "
7293 7293 "cf_code = %d, cf_msg = %d, cf_reason = %d\n", old_state,
7294 7294 (int)event->cm_event.failed.cf_code,
7295 7295 (int)event->cm_event.failed.cf_msg,
7296 7296 (int)event->cm_event.failed.cf_reason);
7297 7297
7298 7298 daplka_ep_set_state(ep_rp, old_state, new_state);
7299 7299 return (IBT_CM_ACCEPT);
7300 7300 }
7301 7301
7302 7302 /*
7303 7303 * it's ok for the timer to fire at this point. the
7304 7304 * taskq thread that processes the timer will just wait
7305 7305 * until we are done with our state transition.
7306 7306 */
7307 7307 if (daplka_cancel_timer(ep_rp) != 0) {
7308 7308 /*
7309 7309 * daplka_cancel_timer returns -1 if the timer is
7310 7310 * being processed and 0 for all other cases.
7311 7311 * we need to reset ep_state to allow timer processing
7312 7312 * to continue.
7313 7313 */
7314 7314 DERR("rc_event_failure: timer is being processed\n");
7315 7315 daplka_ep_set_state(ep_rp, old_state, new_state);
7316 7316 return (IBT_CM_ACCEPT);
7317 7317 }
7318 7318
7319 7319 bzero(&chan_attrs, sizeof (ibt_rc_chan_query_attr_t));
7320 7320 status = ibt_query_rc_channel(ep_rp->ep_chan_hdl, &chan_attrs);
7321 7321
7322 7322 if ((status == IBT_SUCCESS) &&
7323 7323 (chan_attrs.rc_state != IBT_STATE_ERROR)) {
7324 7324 DERR("rc_event_failure: conn abort qpn %d state %d\n",
7325 7325 chan_attrs.rc_qpn, chan_attrs.rc_state);
7326 7326
7327 7327 /* explicit transition the QP to ERROR state */
7328 7328 status = ibt_flush_channel(ep_rp->ep_chan_hdl);
7329 7329 }
7330 7330
7331 7331 /*
7332 7332 * create an event
7333 7333 */
7334 7334 disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
7335 7335 if (disc_ev == NULL) {
7336 7336 DERR("rc_event_failure: cannot alloc disc_ev\n");
7337 7337 daplka_ep_set_state(ep_rp, old_state, new_state);
7338 7338 return (IBT_CM_ACCEPT);
7339 7339 }
7340 7340
7341 7341 /*
7342 7342 * copy private data into event
7343 7343 */
7344 7344 if (pr_len > 0) {
7345 7345 disc_ev->ee_cmev.ec_cm_ev_priv_data =
7346 7346 kmem_zalloc(pr_len, KM_NOSLEEP);
7347 7347
7348 7348 if (disc_ev->ee_cmev.ec_cm_ev_priv_data == NULL) {
7349 7349 DERR("rc_event_failure: cannot alloc pr data\n");
7350 7350 kmem_free(disc_ev, sizeof (daplka_evd_event_t));
7351 7351 daplka_ep_set_state(ep_rp, old_state, new_state);
7352 7352 return (IBT_CM_ACCEPT);
7353 7353 }
7354 7354 bcopy(pr_data, disc_ev->ee_cmev.ec_cm_ev_priv_data, pr_len);
7355 7355 }
7356 7356 disc_ev->ee_cmev.ec_cm_ev_priv_data_len = pr_len;
7357 7357
7358 7358 /*
7359 7359 * fill in the appropriate event type
7360 7360 */
7361 7361 if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_REJ_RCV) {
7362 7362 switch (event->cm_event.failed.cf_reason) {
7363 7363 case IBT_CM_CONSUMER:
7364 7364 disc_ev->ee_cmev.ec_cm_ev_type =
7365 7365 DAPL_IB_CME_DESTINATION_REJECT_PRIVATE_DATA;
7366 7366 break;
7367 7367 case IBT_CM_NO_CHAN:
7368 7368 case IBT_CM_NO_RESC:
7369 7369 disc_ev->ee_cmev.ec_cm_ev_type =
7370 7370 DAPL_IB_CME_DESTINATION_REJECT;
7371 7371 break;
7372 7372 default:
7373 7373 disc_ev->ee_cmev.ec_cm_ev_type =
7374 7374 DAPL_IB_CME_DESTINATION_REJECT;
7375 7375 break;
7376 7376 }
7377 7377 } else if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_TIMEOUT) {
7378 7378 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_TIMED_OUT;
7379 7379 } else {
7380 7380 /* others we'll mark as local failure */
7381 7381 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_LOCAL_FAILURE;
7382 7382 }
7383 7383 disc_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie;
7384 7384 disc_ev->ee_cmev.ec_cm_is_passive = B_FALSE;
7385 7385 disc_ev->ee_cmev.ec_cm_psep_cookie = 0;
7386 7386
7387 7387 D2("rc_event_failure: enqueue event(%p) evdp(%p) cf_code(%d) "
7388 7388 "cf_msg(%d) cf_reason(%d)\n", disc_ev, ep_rp->ep_conn_evd,
7389 7389 (int)event->cm_event.failed.cf_code,
7390 7390 (int)event->cm_event.failed.cf_msg,
7391 7391 (int)event->cm_event.failed.cf_reason);
7392 7392
7393 7393 /*
7394 7394 * transition ep_state to DISCONNECTED
7395 7395 */
7396 7396 new_state = DAPLKA_EP_STATE_DISCONNECTED;
7397 7397 daplka_ep_set_state(ep_rp, old_state, new_state);
7398 7398
7399 7399 /*
7400 7400 * enqueue event onto the conn_evd owned by ep_rp
7401 7401 */
7402 7402 daplka_evd_wakeup(ep_rp->ep_conn_evd,
7403 7403 &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);
7404 7404
7405 7405 return (IBT_CM_ACCEPT);
7406 7406 }
7407 7407
7408 7408 /*
7409 7409 * This is the active side CM handler. It gets registered when
7410 7410 * ibt_open_rc_channel is called.
7411 7411 */
7412 7412 static ibt_cm_status_t
7413 7413 daplka_cm_rc_handler(void *cm_private, ibt_cm_event_t *event,
7414 7414 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
7415 7415 {
7416 7416 daplka_ep_resource_t *ep_rp = (daplka_ep_resource_t *)cm_private;
7417 7417
7418 7418 if (ep_rp == NULL) {
7419 7419 DERR("rc_handler: ep_rp == NULL\n");
7420 7420 return (IBT_CM_NO_CHANNEL);
7421 7421 }
7422 7422 /*
7423 7423 * default is not to return priv data
7424 7424 */
7425 7425 if (ret_args != NULL) {
7426 7426 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ret_args))
7427 7427 ret_args->cm_ret_len = 0;
7428 7428 }
7429 7429
7430 7430 switch (event->cm_type) {
7431 7431 case IBT_CM_EVENT_REQ_RCV:
7432 7432 /* active side should not receive this event */
7433 7433 D2("rc_handler: IBT_CM_EVENT_REQ_RCV\n");
7434 7434 break;
7435 7435
7436 7436 case IBT_CM_EVENT_REP_RCV:
7437 7437 /* connection accepted by passive side */
7438 7438 D2("rc_handler: IBT_CM_EVENT_REP_RCV\n");
7439 7439 return (daplka_cm_rc_rep_rcv(ep_rp, event, ret_args,
7440 7440 priv_data, len));
7441 7441
7442 7442 case IBT_CM_EVENT_CONN_CLOSED:
7443 7443 D2("rc_handler: IBT_CM_EVENT_CONN_CLOSED %d\n",
7444 7444 event->cm_event.closed);
7445 7445 return (daplka_cm_rc_conn_closed(ep_rp, event, ret_args,
7446 7446 priv_data, len));
7447 7447
7448 7448 case IBT_CM_EVENT_MRA_RCV:
7449 7449 /* passive side does default processing MRA event */
7450 7450 D2("rc_handler: IBT_CM_EVENT_MRA_RCV\n");
7451 7451 return (IBT_CM_DEFAULT);
7452 7452
7453 7453 case IBT_CM_EVENT_CONN_EST:
7454 7454 D2("rc_handler: IBT_CM_EVENT_CONN_EST\n");
7455 7455 return (daplka_cm_rc_conn_est(ep_rp, event, ret_args,
7456 7456 priv_data, len));
7457 7457
7458 7458 case IBT_CM_EVENT_FAILURE:
7459 7459 D2("rc_handler: IBT_CM_EVENT_FAILURE\n");
7460 7460 return (daplka_cm_rc_event_failure(ep_rp, event, ret_args,
7461 7461 priv_data, len));
7462 7462
7463 7463 default:
7464 7464 D2("rc_handler: invalid event %d\n", event->cm_type);
7465 7465 break;
7466 7466 }
7467 7467 return (IBT_CM_DEFAULT);
7468 7468 }
7469 7469
7470 7470 /*
7471 7471 * creates an IA resource and inserts it into the global resource table.
7472 7472 */
7473 7473 /* ARGSUSED */
7474 7474 static int
7475 7475 daplka_ia_create(minor_t rnum, intptr_t arg, int mode,
7476 7476 cred_t *cred, int *rvalp)
7477 7477 {
7478 7478 daplka_ia_resource_t *ia_rp, *tmp_rp;
7479 7479 boolean_t inserted = B_FALSE;
7480 7480 dapl_ia_create_t args;
7481 7481 ibt_hca_hdl_t hca_hdl;
7482 7482 ibt_status_t status;
7483 7483 ib_gid_t sgid;
7484 7484 int retval;
7485 7485 ibt_hca_portinfo_t *pinfop;
7486 7486 uint_t pinfon;
7487 7487 uint_t size;
7488 7488 ibt_ar_t ar_s;
7489 7489 daplka_hca_t *hca;
7490 7490
7491 7491 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ia_create_t),
7492 7492 mode);
7493 7493 if (retval != 0) {
7494 7494 DERR("ia_create: copyin error %d\n", retval);
7495 7495 return (EFAULT);
7496 7496 }
7497 7497 if (args.ia_version != DAPL_IF_VERSION) {
7498 7498 DERR("ia_create: invalid version %d, expected version %d\n",
7499 7499 args.ia_version, DAPL_IF_VERSION);
7500 7500 return (EINVAL);
7501 7501 }
7502 7502
7503 7503 /*
7504 7504 * find the hca with the matching guid
7505 7505 */
7506 7506 mutex_enter(&daplka_dev->daplka_mutex);
7507 7507 for (hca = daplka_dev->daplka_hca_list_head; hca != NULL;
7508 7508 hca = hca->hca_next) {
7509 7509 if (hca->hca_guid == args.ia_guid) {
7510 7510 DAPLKA_HOLD_HCA_WITHOUT_LOCK(hca);
7511 7511 break;
7512 7512 }
7513 7513 }
7514 7514 mutex_exit(&daplka_dev->daplka_mutex);
7515 7515
7516 7516 if (hca == NULL) {
7517 7517 DERR("ia_create: guid 0x%016llx not found\n",
7518 7518 (longlong_t)args.ia_guid);
7519 7519 return (EINVAL);
7520 7520 }
7521 7521
7522 7522 /*
7523 7523 * check whether port number is valid and whether it is up
7524 7524 */
7525 7525 if (args.ia_port > hca->hca_nports) {
7526 7526 DERR("ia_create: invalid hca_port %d\n", args.ia_port);
7527 7527 DAPLKA_RELE_HCA(daplka_dev, hca);
7528 7528 return (EINVAL);
7529 7529 }
7530 7530 hca_hdl = hca->hca_hdl;
7531 7531 if (hca_hdl == NULL) {
7532 7532 DERR("ia_create: hca_hdl == NULL\n");
7533 7533 DAPLKA_RELE_HCA(daplka_dev, hca);
7534 7534 return (EINVAL);
7535 7535 }
7536 7536 status = ibt_query_hca_ports(hca_hdl, (uint8_t)args.ia_port,
7537 7537 &pinfop, &pinfon, &size);
7538 7538 if (status != IBT_SUCCESS) {
7539 7539 DERR("ia_create: ibt_query_hca_ports returned %d\n", status);
7540 7540 *rvalp = (int)status;
7541 7541 DAPLKA_RELE_HCA(daplka_dev, hca);
7542 7542 return (0);
7543 7543 }
7544 7544 sgid = pinfop->p_sgid_tbl[0];
7545 7545 ibt_free_portinfo(pinfop, size);
7546 7546
7547 7547 ia_rp = kmem_zalloc(sizeof (daplka_ia_resource_t), daplka_km_flags);
7548 7548 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ia_rp))
7549 7549 DAPLKA_RS_INIT(ia_rp, DAPL_TYPE_IA, rnum, daplka_ia_destroy);
7550 7550
7551 7551 mutex_init(&ia_rp->ia_lock, NULL, MUTEX_DRIVER, NULL);
7552 7552 cv_init(&ia_rp->ia_cv, NULL, CV_DRIVER, NULL);
7553 7553 ia_rp->ia_hca_hdl = hca_hdl;
7554 7554 ia_rp->ia_hca_sgid = sgid;
7555 7555 ia_rp->ia_hca = hca;
7556 7556 ia_rp->ia_port_num = args.ia_port;
7557 7557 ia_rp->ia_port_pkey = args.ia_pkey;
7558 7558 ia_rp->ia_pid = ddi_get_pid();
7559 7559 ia_rp->ia_async_evd_hkeys = NULL;
7560 7560 ia_rp->ia_ar_registered = B_FALSE;
7561 7561 bcopy(args.ia_sadata, ia_rp->ia_sadata, DAPL_ATS_NBYTES);
7562 7562
7563 7563 /* register Address Record */
7564 7564 ar_s.ar_gid = ia_rp->ia_hca_sgid;
7565 7565 ar_s.ar_pkey = ia_rp->ia_port_pkey;
7566 7566 bcopy(ia_rp->ia_sadata, ar_s.ar_data, DAPL_ATS_NBYTES);
7567 7567 #define UC(b) ar_s.ar_data[(b)]
7568 7568 D3("daplka_ia_create: SA[8] %d.%d.%d.%d\n",
7569 7569 UC(8), UC(9), UC(10), UC(11));
7570 7570 D3("daplka_ia_create: SA[12] %d.%d.%d.%d\n",
7571 7571 UC(12), UC(13), UC(14), UC(15));
7572 7572 retval = ibt_register_ar(daplka_dev->daplka_clnt_hdl, &ar_s);
7573 7573 if (retval != IBT_SUCCESS) {
7574 7574 DERR("ia_create: failed to register Address Record.\n");
7575 7575 retval = EINVAL;
7576 7576 goto cleanup;
7577 7577 }
7578 7578 ia_rp->ia_ar_registered = B_TRUE;
7579 7579
7580 7580 /*
7581 7581 * create hash tables for all object types
7582 7582 */
7583 7583 retval = daplka_hash_create(&ia_rp->ia_ep_htbl, DAPLKA_EP_HTBL_SZ,
7584 7584 daplka_hash_ep_free, daplka_hash_generic_lookup);
7585 7585 if (retval != 0) {
7586 7586 DERR("ia_create: cannot create ep hash table\n");
7587 7587 goto cleanup;
7588 7588 }
7589 7589 retval = daplka_hash_create(&ia_rp->ia_mr_htbl, DAPLKA_MR_HTBL_SZ,
7590 7590 daplka_hash_mr_free, daplka_hash_generic_lookup);
7591 7591 if (retval != 0) {
7592 7592 DERR("ia_create: cannot create mr hash table\n");
7593 7593 goto cleanup;
7594 7594 }
7595 7595 retval = daplka_hash_create(&ia_rp->ia_mw_htbl, DAPLKA_MW_HTBL_SZ,
7596 7596 daplka_hash_mw_free, daplka_hash_generic_lookup);
7597 7597 if (retval != 0) {
7598 7598 DERR("ia_create: cannot create mw hash table\n");
7599 7599 goto cleanup;
7600 7600 }
7601 7601 retval = daplka_hash_create(&ia_rp->ia_pd_htbl, DAPLKA_PD_HTBL_SZ,
7602 7602 daplka_hash_pd_free, daplka_hash_generic_lookup);
7603 7603 if (retval != 0) {
7604 7604 DERR("ia_create: cannot create pd hash table\n");
7605 7605 goto cleanup;
7606 7606 }
7607 7607 retval = daplka_hash_create(&ia_rp->ia_evd_htbl, DAPLKA_EVD_HTBL_SZ,
7608 7608 daplka_hash_evd_free, daplka_hash_generic_lookup);
7609 7609 if (retval != 0) {
7610 7610 DERR("ia_create: cannot create evd hash table\n");
7611 7611 goto cleanup;
7612 7612 }
7613 7613 retval = daplka_hash_create(&ia_rp->ia_cno_htbl, DAPLKA_CNO_HTBL_SZ,
7614 7614 daplka_hash_cno_free, daplka_hash_generic_lookup);
7615 7615 if (retval != 0) {
7616 7616 DERR("ia_create: cannot create cno hash table\n");
7617 7617 goto cleanup;
7618 7618 }
7619 7619 retval = daplka_hash_create(&ia_rp->ia_sp_htbl, DAPLKA_SP_HTBL_SZ,
7620 7620 daplka_hash_sp_free, daplka_hash_generic_lookup);
7621 7621 if (retval != 0) {
7622 7622 DERR("ia_create: cannot create sp hash table\n");
7623 7623 goto cleanup;
7624 7624 }
7625 7625 retval = daplka_hash_create(&ia_rp->ia_srq_htbl, DAPLKA_SRQ_HTBL_SZ,
7626 7626 daplka_hash_srq_free, daplka_hash_generic_lookup);
7627 7627 if (retval != 0) {
7628 7628 DERR("ia_create: cannot create srq hash table\n");
7629 7629 goto cleanup;
7630 7630 }
7631 7631 /*
7632 7632 * insert ia_rp into the global resource table
7633 7633 */
7634 7634 retval = daplka_resource_insert(rnum, (daplka_resource_t *)ia_rp);
7635 7635 if (retval != 0) {
7636 7636 DERR("ia_create: cannot insert resource\n");
7637 7637 goto cleanup;
7638 7638 }
7639 7639 inserted = B_TRUE;
7640 7640 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*ia_rp))
7641 7641
7642 7642 args.ia_resnum = rnum;
7643 7643 retval = copyout(&args, (void *)arg, sizeof (dapl_ia_create_t));
7644 7644 if (retval != 0) {
7645 7645 DERR("ia_create: copyout error %d\n", retval);
7646 7646 retval = EFAULT;
7647 7647 goto cleanup;
7648 7648 }
7649 7649 return (0);
7650 7650
7651 7651 cleanup:;
7652 7652 if (inserted) {
7653 7653 tmp_rp = (daplka_ia_resource_t *)daplka_resource_remove(rnum);
7654 7654 if (tmp_rp != ia_rp) {
7655 7655 /*
7656 7656 * we can return here because another thread must
7657 7657 * have freed up the resource
7658 7658 */
7659 7659 DERR("ia_create: cannot remove resource\n");
7660 7660 return (retval);
7661 7661 }
7662 7662 }
7663 7663 DAPLKA_RS_UNREF(ia_rp);
7664 7664 return (retval);
7665 7665 }
7666 7666
7667 7667 /*
7668 7668 * destroys an IA resource
7669 7669 */
7670 7670 static int
7671 7671 daplka_ia_destroy(daplka_resource_t *gen_rp)
7672 7672 {
7673 7673 daplka_ia_resource_t *ia_rp = (daplka_ia_resource_t *)gen_rp;
7674 7674 daplka_async_evd_hkey_t *hkp;
7675 7675 int cnt;
7676 7676 ibt_ar_t ar_s;
7677 7677
7678 7678 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ia_rp))
7679 7679 D3("ia_destroy: entering, ia_rp 0x%p\n", ia_rp);
7680 7680
7681 7681 /* deregister Address Record */
7682 7682 if (ia_rp->ia_ar_registered) {
7683 7683 ar_s.ar_gid = ia_rp->ia_hca_sgid;
7684 7684 ar_s.ar_pkey = ia_rp->ia_port_pkey;
7685 7685 bcopy(ia_rp->ia_sadata, ar_s.ar_data, DAPL_ATS_NBYTES);
7686 7686 (void) ibt_deregister_ar(daplka_dev->daplka_clnt_hdl, &ar_s);
7687 7687 ia_rp->ia_ar_registered = B_FALSE;
7688 7688 }
7689 7689
7690 7690 /*
7691 7691 * destroy hash tables. make sure resources are
7692 7692 * destroyed in the correct order.
7693 7693 */
7694 7694 daplka_hash_destroy(&ia_rp->ia_mw_htbl);
7695 7695 daplka_hash_destroy(&ia_rp->ia_mr_htbl);
7696 7696 daplka_hash_destroy(&ia_rp->ia_ep_htbl);
7697 7697 daplka_hash_destroy(&ia_rp->ia_srq_htbl);
7698 7698 daplka_hash_destroy(&ia_rp->ia_evd_htbl);
7699 7699 daplka_hash_destroy(&ia_rp->ia_cno_htbl);
7700 7700 daplka_hash_destroy(&ia_rp->ia_pd_htbl);
7701 7701 daplka_hash_destroy(&ia_rp->ia_sp_htbl);
7702 7702
7703 7703 /*
7704 7704 * free the async evd list
7705 7705 */
7706 7706 cnt = 0;
7707 7707 hkp = ia_rp->ia_async_evd_hkeys;
7708 7708 while (hkp != NULL) {
7709 7709 daplka_async_evd_hkey_t *free_hkp;
7710 7710
7711 7711 cnt++;
7712 7712 free_hkp = hkp;
7713 7713 hkp = hkp->aeh_next;
7714 7714 kmem_free(free_hkp, sizeof (*free_hkp));
7715 7715 }
7716 7716 if (cnt > 0) {
7717 7717 D3("ia_destroy: freed %d hkeys\n", cnt);
7718 7718 }
7719 7719 mutex_destroy(&ia_rp->ia_lock);
7720 7720 cv_destroy(&ia_rp->ia_cv);
7721 7721 ia_rp->ia_hca_hdl = NULL;
7722 7722
7723 7723 DAPLKA_RS_FINI(ia_rp);
7724 7724
7725 7725 if (ia_rp->ia_hca)
7726 7726 DAPLKA_RELE_HCA(daplka_dev, ia_rp->ia_hca);
7727 7727
7728 7728 kmem_free(ia_rp, sizeof (daplka_ia_resource_t));
7729 7729 D3("ia_destroy: exiting, ia_rp 0x%p\n", ia_rp);
7730 7730 return (0);
7731 7731 }
7732 7732
7733 7733 static void
7734 7734 daplka_async_event_create(ibt_async_code_t code, ibt_async_event_t *event,
7735 7735 uint64_t cookie, daplka_ia_resource_t *ia_rp)
7736 7736 {
7737 7737 daplka_evd_event_t *evp;
7738 7738 daplka_evd_resource_t *async_evd;
7739 7739 daplka_async_evd_hkey_t *curr;
7740 7740
7741 7741 mutex_enter(&ia_rp->ia_lock);
7742 7742 curr = ia_rp->ia_async_evd_hkeys;
7743 7743 while (curr != NULL) {
7744 7744 /*
7745 7745 * Note: this allocation does not zero out the buffer
7746 7746 * since we init all the fields.
7747 7747 */
7748 7748 evp = kmem_alloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
7749 7749 if (evp == NULL) {
7750 7750 DERR("async_event_enqueue: event alloc failed"
7751 7751 "!found\n", ia_rp, curr->aeh_evd_hkey);
7752 7752 curr = curr->aeh_next;
7753 7753 continue;
7754 7754 }
7755 7755 evp->ee_next = NULL;
7756 7756 evp->ee_aev.ibae_type = code;
7757 7757 evp->ee_aev.ibae_hca_guid = event->ev_hca_guid;
7758 7758 evp->ee_aev.ibae_cookie = cookie;
7759 7759 evp->ee_aev.ibae_port = event->ev_port;
7760 7760
7761 7761 /*
7762 7762 * Lookup the async evd corresponding to this ia and enqueue
7763 7763 * evp and wakeup any waiter.
7764 7764 */
7765 7765 async_evd = (daplka_evd_resource_t *)
7766 7766 daplka_hash_lookup(&ia_rp->ia_evd_htbl, curr->aeh_evd_hkey);
7767 7767 if (async_evd == NULL) { /* async evd is being freed */
7768 7768 DERR("async_event_enqueue: ia_rp(%p) asycn_evd %llx "
7769 7769 "!found\n", ia_rp, (longlong_t)curr->aeh_evd_hkey);
7770 7770 kmem_free(evp, sizeof (daplka_evd_event_t));
7771 7771 curr = curr->aeh_next;
7772 7772 continue;
7773 7773 }
7774 7774 daplka_evd_wakeup(async_evd, &async_evd->evd_async_events, evp);
7775 7775
7776 7776 /* decrement refcnt on async_evd */
7777 7777 DAPLKA_RS_UNREF(async_evd);
7778 7778 curr = curr->aeh_next;
7779 7779 }
7780 7780 mutex_exit(&ia_rp->ia_lock);
7781 7781 }
7782 7782 /*
7783 7783 * This routine is called in kernel context
7784 7784 */
7785 7785
7786 7786 /* ARGSUSED */
7787 7787 static void
7788 7788 daplka_rc_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
7789 7789 ibt_async_code_t code, ibt_async_event_t *event)
7790 7790 {
7791 7791 daplka_ep_resource_t *epp;
7792 7792 daplka_ia_resource_t *ia_rp;
7793 7793 minor_t ia_rnum;
7794 7794
7795 7795 if (event->ev_chan_hdl == NULL) {
7796 7796 DERR("daplka_rc_async_handler: ev_chan_hdl is NULL\n");
7797 7797 return;
7798 7798 }
7799 7799
7800 7800 mutex_enter(&daplka_dev->daplka_mutex);
7801 7801 epp = ibt_get_chan_private(event->ev_chan_hdl);
7802 7802 if (epp == NULL) {
7803 7803 mutex_exit(&daplka_dev->daplka_mutex);
7804 7804 DERR("daplka_rc_async_handler: chan_private is NULL\n");
7805 7805 return;
7806 7806 }
7807 7807
7808 7808 /* grab a reference to this ep */
7809 7809 DAPLKA_RS_REF(epp);
7810 7810 mutex_exit(&daplka_dev->daplka_mutex);
7811 7811
7812 7812 /*
7813 7813 * The endpoint resource has the resource number corresponding to
7814 7814 * the IA resource. Use that to lookup the ia resource entry
7815 7815 */
7816 7816 ia_rnum = DAPLKA_RS_RNUM(epp);
7817 7817 ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(ia_rnum);
7818 7818 if ((ia_rp == NULL) || DAPLKA_RS_RESERVED(ia_rp)) {
7819 7819 D2("daplka_rc_async_handler: resource (%d) not found\n",
7820 7820 ia_rnum);
7821 7821 DAPLKA_RS_UNREF(epp);
7822 7822 return;
7823 7823 }
7824 7824
7825 7825 /*
7826 7826 * Create an async event and chain it to the async evd
7827 7827 */
7828 7828 daplka_async_event_create(code, event, epp->ep_cookie, ia_rp);
7829 7829
7830 7830 DAPLKA_RS_UNREF(ia_rp);
7831 7831 DAPLKA_RS_UNREF(epp);
7832 7832 }
7833 7833
7834 7834 /*
7835 7835 * This routine is called in kernel context
7836 7836 */
7837 7837
7838 7838 /* ARGSUSED */
7839 7839 static void
7840 7840 daplka_cq_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
7841 7841 ibt_async_code_t code, ibt_async_event_t *event)
7842 7842 {
7843 7843 daplka_evd_resource_t *evdp;
7844 7844 daplka_ia_resource_t *ia_rp;
7845 7845 minor_t ia_rnum;
7846 7846
7847 7847 if (event->ev_cq_hdl == NULL)
7848 7848 return;
7849 7849
7850 7850 mutex_enter(&daplka_dev->daplka_mutex);
7851 7851 evdp = ibt_get_cq_private(event->ev_cq_hdl);
7852 7852 if (evdp == NULL) {
7853 7853 mutex_exit(&daplka_dev->daplka_mutex);
7854 7854 DERR("daplka_cq_async_handler: get cq private(%p) failed\n",
7855 7855 event->ev_cq_hdl);
7856 7856 return;
7857 7857 }
7858 7858 /* grab a reference to this evd resource */
7859 7859 DAPLKA_RS_REF(evdp);
7860 7860 mutex_exit(&daplka_dev->daplka_mutex);
7861 7861
7862 7862 /*
7863 7863 * The endpoint resource has the resource number corresponding to
7864 7864 * the IA resource. Use that to lookup the ia resource entry
7865 7865 */
7866 7866 ia_rnum = DAPLKA_RS_RNUM(evdp);
7867 7867 ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(ia_rnum);
7868 7868 if ((ia_rp == NULL) || DAPLKA_RS_RESERVED(ia_rp)) {
7869 7869 DERR("daplka_cq_async_handler: resource (%d) not found\n",
7870 7870 ia_rnum);
7871 7871 DAPLKA_RS_UNREF(evdp);
7872 7872 return;
7873 7873 }
7874 7874
7875 7875 /*
7876 7876 * Create an async event and chain it to the async evd
7877 7877 */
7878 7878 daplka_async_event_create(code, event, evdp->evd_cookie, ia_rp);
7879 7879
7880 7880 /* release all the refcount that were acquired */
7881 7881 DAPLKA_RS_UNREF(ia_rp);
7882 7882 DAPLKA_RS_UNREF(evdp);
7883 7883 }
7884 7884
7885 7885 /*
7886 7886 * This routine is called in kernel context, handles unaffiliated async errors
7887 7887 */
7888 7888
7889 7889 /* ARGSUSED */
7890 7890 static void
7891 7891 daplka_un_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
7892 7892 ibt_async_code_t code, ibt_async_event_t *event)
7893 7893 {
7894 7894 int i, j;
7895 7895 daplka_resource_blk_t *blk;
7896 7896 daplka_resource_t *rp;
7897 7897 daplka_ia_resource_t *ia_rp;
7898 7898
7899 7899 /*
7900 7900 * Walk the resource table looking for an ia that matches the
7901 7901 * hca_hdl.
7902 7902 */
7903 7903 rw_enter(&daplka_resource.daplka_rct_lock, RW_READER);
7904 7904 for (i = 0; i < daplka_resource.daplka_rc_len; i++) {
7905 7905 blk = daplka_resource.daplka_rc_root[i];
7906 7906 if (blk == NULL)
7907 7907 continue;
7908 7908 for (j = 0; j < DAPLKA_RC_BLKSZ; j++) {
7909 7909 rp = blk->daplka_rcblk_blks[j];
7910 7910 if ((rp == NULL) ||
7911 7911 ((intptr_t)rp == DAPLKA_RC_RESERVED) ||
7912 7912 (rp->rs_type != DAPL_TYPE_IA)) {
7913 7913 continue;
7914 7914 }
7915 7915 /*
7916 7916 * rp is an IA resource check if it belongs
7917 7917 * to the hca/port for which we got the event
7918 7918 */
7919 7919 ia_rp = (daplka_ia_resource_t *)rp;
7920 7920 DAPLKA_RS_REF(ia_rp);
7921 7921 if ((hca_hdl == ia_rp->ia_hca_hdl) &&
7922 7922 (event->ev_port == ia_rp->ia_port_num)) {
7923 7923 /*
7924 7924 * walk the ep hash table. Acquire a
7925 7925 * reader lock. NULL dgid indicates
7926 7926 * local port up event.
7927 7927 */
7928 7928 daplka_hash_walk(&ia_rp->ia_ep_htbl,
7929 7929 daplka_ep_failback, NULL, RW_READER);
7930 7930 }
7931 7931 DAPLKA_RS_UNREF(ia_rp);
7932 7932 }
7933 7933 }
7934 7934 rw_exit(&daplka_resource.daplka_rct_lock);
7935 7935 }
7936 7936
7937 7937 static int
7938 7938 daplka_handle_hca_detach_event(ibt_async_event_t *event)
7939 7939 {
7940 7940 daplka_hca_t *hca;
7941 7941
7942 7942 /*
7943 7943 * find the hca with the matching guid
7944 7944 */
7945 7945 mutex_enter(&daplka_dev->daplka_mutex);
7946 7946 for (hca = daplka_dev->daplka_hca_list_head; hca != NULL;
7947 7947 hca = hca->hca_next) {
7948 7948 if (hca->hca_guid == event->ev_hca_guid) {
7949 7949 if (DAPLKA_HCA_BUSY(hca)) {
7950 7950 mutex_exit(&daplka_dev->daplka_mutex);
7951 7951 return (IBT_HCA_RESOURCES_NOT_FREED);
7952 7952 }
7953 7953 daplka_dequeue_hca(daplka_dev, hca);
7954 7954 break;
7955 7955 }
7956 7956 }
7957 7957 mutex_exit(&daplka_dev->daplka_mutex);
7958 7958
7959 7959 if (hca == NULL)
7960 7960 return (IBT_FAILURE);
7961 7961
7962 7962 return (daplka_fini_hca(daplka_dev, hca));
7963 7963 }
7964 7964
7965 7965 /*
7966 7966 * This routine is called in kernel context
7967 7967 */
7968 7968 static void
7969 7969 daplka_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
7970 7970 ibt_async_code_t code, ibt_async_event_t *event)
7971 7971 {
7972 7972 switch (code) {
7973 7973 case IBT_ERROR_CATASTROPHIC_CHAN:
7974 7974 case IBT_ERROR_INVALID_REQUEST_CHAN:
7975 7975 case IBT_ERROR_ACCESS_VIOLATION_CHAN:
7976 7976 case IBT_ERROR_PATH_MIGRATE_REQ:
7977 7977 D2("daplka_async_handler(): Channel affiliated=0x%x\n", code);
7978 7978 /* These events are affiliated with a the RC channel */
7979 7979 daplka_rc_async_handler(clnt_private, hca_hdl, code, event);
7980 7980 break;
7981 7981 case IBT_ERROR_CQ:
7982 7982 /* This event is affiliated with a the CQ */
7983 7983 D2("daplka_async_handler(): IBT_ERROR_CQ\n");
7984 7984 daplka_cq_async_handler(clnt_private, hca_hdl, code, event);
7985 7985 break;
7986 7986 case IBT_ERROR_PORT_DOWN:
7987 7987 D2("daplka_async_handler(): IBT_PORT_DOWN\n");
7988 7988 break;
7989 7989 case IBT_EVENT_PORT_UP:
7990 7990 D2("daplka_async_handler(): IBT_PORT_UP\n");
7991 7991 if (daplka_apm) {
7992 7992 daplka_un_async_handler(clnt_private, hca_hdl, code,
7993 7993 event);
7994 7994 }
7995 7995 break;
7996 7996 case IBT_HCA_ATTACH_EVENT:
7997 7997 /*
7998 7998 * NOTE: In some error recovery paths, it is possible to
7999 7999 * receive IBT_HCA_ATTACH_EVENTs on already known HCAs.
8000 8000 */
8001 8001 D2("daplka_async_handler(): IBT_HCA_ATTACH\n");
8002 8002 (void) daplka_init_hca(daplka_dev, event->ev_hca_guid);
8003 8003 break;
8004 8004 case IBT_HCA_DETACH_EVENT:
8005 8005 D2("daplka_async_handler(): IBT_HCA_DETACH\n");
8006 8006 /* Free all hca resources and close the HCA. */
8007 8007 (void) daplka_handle_hca_detach_event(event);
8008 8008 break;
8009 8009 case IBT_EVENT_PATH_MIGRATED:
8010 8010 /* This event is affiliated with APM */
8011 8011 D2("daplka_async_handler(): IBT_PATH_MIGRATED.\n");
8012 8012 break;
8013 8013 default:
8014 8014 D2("daplka_async_handler(): unhandled code = 0x%x\n", code);
8015 8015 break;
8016 8016 }
8017 8017 }
8018 8018
8019 8019 /*
8020 8020 * This routine is called in kernel context related to Subnet events
8021 8021 */
8022 8022 /*ARGSUSED*/
8023 8023 static void
8024 8024 daplka_sm_notice_handler(void *arg, ib_gid_t gid, ibt_subnet_event_code_t code,
8025 8025 ibt_subnet_event_t *event)
8026 8026 {
8027 8027 ib_gid_t *sgid = &gid;
8028 8028 ib_gid_t *dgid;
8029 8029
8030 8030 dgid = &event->sm_notice_gid;
8031 8031 switch (code) {
8032 8032 case IBT_SM_EVENT_GID_AVAIL:
8033 8033 /* This event is affiliated with remote port up */
8034 8034 D2("daplka_sm_notice_handler(): IBT_SM_EVENT_GID_AVAIL\n");
8035 8035 if (daplka_apm)
8036 8036 daplka_sm_gid_avail(sgid, dgid);
8037 8037 return;
8038 8038 case IBT_SM_EVENT_GID_UNAVAIL:
8039 8039 /* This event is affiliated with remote port down */
8040 8040 D2("daplka_sm_notice_handler(): IBT_SM_EVENT_GID_UNAVAIL\n");
8041 8041 return;
8042 8042 default:
8043 8043 D2("daplka_sm_notice_handler(): unhandled IBT_SM_EVENT_[%d]\n",
8044 8044 code);
8045 8045 return;
8046 8046 }
8047 8047 }
8048 8048
8049 8049 /*
8050 8050 * This routine is called in kernel context, handles Subnet GID avail events
8051 8051 * which correspond to remote port up. Setting up alternate path or path
8052 8052 * migration (failback) has to be initiated from the active side of the
8053 8053 * original connect.
8054 8054 */
8055 8055 static void
8056 8056 daplka_sm_gid_avail(ib_gid_t *sgid, ib_gid_t *dgid)
8057 8057 {
8058 8058 int i, j;
8059 8059 daplka_resource_blk_t *blk;
8060 8060 daplka_resource_t *rp;
8061 8061 daplka_ia_resource_t *ia_rp;
8062 8062
8063 8063 D2("daplka_sm_gid_avail: sgid=%llx:%llx dgid=%llx:%llx\n",
8064 8064 (longlong_t)sgid->gid_prefix, (longlong_t)sgid->gid_guid,
8065 8065 (longlong_t)dgid->gid_prefix, (longlong_t)dgid->gid_guid);
8066 8066
8067 8067 /*
8068 8068 * Walk the resource table looking for an ia that matches the sgid
8069 8069 */
8070 8070 rw_enter(&daplka_resource.daplka_rct_lock, RW_READER);
8071 8071 for (i = 0; i < daplka_resource.daplka_rc_len; i++) {
8072 8072 blk = daplka_resource.daplka_rc_root[i];
8073 8073 if (blk == NULL)
8074 8074 continue;
8075 8075 for (j = 0; j < DAPLKA_RC_BLKSZ; j++) {
8076 8076 rp = blk->daplka_rcblk_blks[j];
8077 8077 if ((rp == NULL) ||
8078 8078 ((intptr_t)rp == DAPLKA_RC_RESERVED) ||
8079 8079 (rp->rs_type != DAPL_TYPE_IA)) {
8080 8080 continue;
8081 8081 }
8082 8082 /*
8083 8083 * rp is an IA resource check if its gid
8084 8084 * matches with the calling sgid
8085 8085 */
8086 8086 ia_rp = (daplka_ia_resource_t *)rp;
8087 8087 DAPLKA_RS_REF(ia_rp);
8088 8088 if ((sgid->gid_prefix ==
8089 8089 ia_rp->ia_hca_sgid.gid_prefix) &&
8090 8090 (sgid->gid_guid == ia_rp->ia_hca_sgid.gid_guid)) {
8091 8091 /*
8092 8092 * walk the ep hash table. Acquire a
8093 8093 * reader lock.
8094 8094 */
8095 8095 daplka_hash_walk(&ia_rp->ia_ep_htbl,
8096 8096 daplka_ep_failback,
8097 8097 (void *)dgid, RW_READER);
8098 8098 }
8099 8099 DAPLKA_RS_UNREF(ia_rp);
8100 8100 }
8101 8101 }
8102 8102 rw_exit(&daplka_resource.daplka_rct_lock);
8103 8103 }
8104 8104
8105 8105 /*
8106 8106 * This routine is called in kernel context to get and set an alternate path
8107 8107 */
8108 8108 static int
8109 8109 daplka_ep_altpath(daplka_ep_resource_t *ep_rp, ib_gid_t *dgid)
8110 8110 {
8111 8111 ibt_alt_path_info_t path_info;
8112 8112 ibt_alt_path_attr_t path_attr;
8113 8113 ibt_ap_returns_t ap_rets;
8114 8114 ibt_status_t status;
8115 8115
8116 8116 D2("daplka_ep_altpath : ibt_get_alt_path()\n");
8117 8117 bzero(&path_info, sizeof (ibt_alt_path_info_t));
8118 8118 bzero(&path_attr, sizeof (ibt_alt_path_attr_t));
8119 8119 if (dgid != NULL) {
8120 8120 path_attr.apa_sgid = ep_rp->ep_sgid;
8121 8121 path_attr.apa_dgid = *dgid;
8122 8122 }
8123 8123 status = ibt_get_alt_path(ep_rp->ep_chan_hdl, IBT_PATH_AVAIL,
8124 8124 &path_attr, &path_info);
8125 8125 if (status != IBT_SUCCESS) {
8126 8126 DERR("daplka_ep_altpath : ibt_get_alt_path failed %d\n",
8127 8127 status);
8128 8128 return (1);
8129 8129 }
8130 8130
8131 8131 D2("daplka_ep_altpath : ibt_set_alt_path()\n");
8132 8132 bzero(&ap_rets, sizeof (ibt_ap_returns_t));
8133 8133 status = ibt_set_alt_path(ep_rp->ep_chan_hdl, IBT_BLOCKING,
8134 8134 &path_info, NULL, 0, &ap_rets);
8135 8135 if ((status != IBT_SUCCESS) ||
8136 8136 (ap_rets.ap_status != IBT_CM_AP_LOADED)) {
8137 8137 DERR("daplka_ep_altpath : ibt_set_alt_path failed "
8138 8138 "status %d ap_status %d\n", status, ap_rets.ap_status);
8139 8139 return (1);
8140 8140 }
8141 8141 return (0);
8142 8142 }
8143 8143
8144 8144 /*
8145 8145 * This routine is called in kernel context to failback to the original path
8146 8146 */
8147 8147 static int
8148 8148 daplka_ep_failback(void *objp, void *arg)
8149 8149 {
8150 8150 daplka_ep_resource_t *ep_rp = (daplka_ep_resource_t *)objp;
8151 8151 ib_gid_t *dgid;
8152 8152 ibt_status_t status;
8153 8153 ibt_rc_chan_query_attr_t chan_attrs;
8154 8154 int i;
8155 8155
8156 8156 ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
8157 8157 D2("daplka_ep_failback ep : sgid=%llx:%llx dgid=%llx:%llx\n",
8158 8158 (longlong_t)ep_rp->ep_sgid.gid_prefix,
8159 8159 (longlong_t)ep_rp->ep_sgid.gid_guid,
8160 8160 (longlong_t)ep_rp->ep_dgid.gid_prefix,
8161 8161 (longlong_t)ep_rp->ep_dgid.gid_guid);
8162 8162
8163 8163 /*
8164 8164 * daplka_ep_failback is called from daplka_hash_walk
8165 8165 * which holds the read lock on hash table to protect
8166 8166 * the endpoint resource from removal
8167 8167 */
8168 8168 mutex_enter(&ep_rp->ep_lock);
8169 8169 /* check for unconnected endpoints */
8170 8170 /* first check for ep state */
8171 8171 if (ep_rp->ep_state != DAPLKA_EP_STATE_CONNECTED) {
8172 8172 mutex_exit(&ep_rp->ep_lock);
8173 8173 D2("daplka_ep_failback : endpoints not connected\n");
8174 8174 return (0);
8175 8175 }
8176 8176
8177 8177 /* second check for gids */
8178 8178 if (((ep_rp->ep_sgid.gid_prefix == 0) &&
8179 8179 (ep_rp->ep_sgid.gid_guid == 0)) ||
8180 8180 ((ep_rp->ep_dgid.gid_prefix == 0) &&
8181 8181 (ep_rp->ep_dgid.gid_guid == 0))) {
8182 8182 mutex_exit(&ep_rp->ep_lock);
8183 8183 D2("daplka_ep_failback : skip unconnected endpoints\n");
8184 8184 return (0);
8185 8185 }
8186 8186
8187 8187 /*
8188 8188 * matching destination ep
8189 8189 * when dgid is NULL, the async event is a local port up.
8190 8190 * dgid becomes wild card, i.e. all endpoints match
8191 8191 */
8192 8192 dgid = (ib_gid_t *)arg;
8193 8193 if (dgid == NULL) {
8194 8194 /* ignore loopback ep */
8195 8195 if ((ep_rp->ep_sgid.gid_prefix == ep_rp->ep_dgid.gid_prefix) &&
8196 8196 (ep_rp->ep_sgid.gid_guid == ep_rp->ep_dgid.gid_guid)) {
8197 8197 mutex_exit(&ep_rp->ep_lock);
8198 8198 D2("daplka_ep_failback : skip loopback endpoints\n");
8199 8199 return (0);
8200 8200 }
8201 8201 } else {
8202 8202 /* matching remote ep */
8203 8203 if ((ep_rp->ep_dgid.gid_prefix != dgid->gid_prefix) ||
8204 8204 (ep_rp->ep_dgid.gid_guid != dgid->gid_guid)) {
8205 8205 mutex_exit(&ep_rp->ep_lock);
8206 8206 D2("daplka_ep_failback : unrelated endpoints\n");
8207 8207 return (0);
8208 8208 }
8209 8209 }
8210 8210
8211 8211 /* call get and set altpath with original dgid used in ep_connect */
8212 8212 if (daplka_ep_altpath(ep_rp, &ep_rp->ep_dgid)) {
8213 8213 mutex_exit(&ep_rp->ep_lock);
8214 8214 return (0);
8215 8215 }
8216 8216
8217 8217 /*
8218 8218 * wait for migration state to be ARMed
8219 8219 * e.g. a post_send msg will transit mig_state from REARM to ARM
8220 8220 */
8221 8221 for (i = 0; i < daplka_query_aft_setaltpath; i++) {
8222 8222 bzero(&chan_attrs, sizeof (ibt_rc_chan_query_attr_t));
8223 8223 status = ibt_query_rc_channel(ep_rp->ep_chan_hdl, &chan_attrs);
8224 8224 if (status != IBT_SUCCESS) {
8225 8225 mutex_exit(&ep_rp->ep_lock);
8226 8226 DERR("daplka_ep_altpath : ibt_query_rc_channel err\n");
8227 8227 return (0);
8228 8228 }
8229 8229 if (chan_attrs.rc_mig_state == IBT_STATE_ARMED)
8230 8230 break;
8231 8231 }
8232 8232
8233 8233 D2("daplka_ep_altpath : query[%d] mig_st=%d\n",
8234 8234 i, chan_attrs.rc_mig_state);
8235 8235 D2("daplka_ep_altpath : P sgid=%llx:%llx dgid=%llx:%llx\n",
8236 8236 (longlong_t)
8237 8237 chan_attrs.rc_prim_path.cep_adds_vect.av_sgid.gid_prefix,
8238 8238 (longlong_t)chan_attrs.rc_prim_path.cep_adds_vect.av_sgid.gid_guid,
8239 8239 (longlong_t)
8240 8240 chan_attrs.rc_prim_path.cep_adds_vect.av_dgid.gid_prefix,
8241 8241 (longlong_t)chan_attrs.rc_prim_path.cep_adds_vect.av_dgid.gid_guid);
8242 8242 D2("daplka_ep_altpath : A sgid=%llx:%llx dgid=%llx:%llx\n",
8243 8243 (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_sgid.gid_prefix,
8244 8244 (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_sgid.gid_guid,
8245 8245 (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_dgid.gid_prefix,
8246 8246 (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_dgid.gid_guid);
8247 8247
8248 8248 /* skip failback on ARMed state not reached or env override */
8249 8249 if ((i >= daplka_query_aft_setaltpath) || (daplka_failback == 0)) {
8250 8250 mutex_exit(&ep_rp->ep_lock);
8251 8251 DERR("daplka_ep_altpath : ARMed state not reached\n");
8252 8252 return (0);
8253 8253 }
8254 8254
8255 8255 D2("daplka_ep_failback : ibt_migrate_path() to original ep\n");
8256 8256 status = ibt_migrate_path(ep_rp->ep_chan_hdl);
8257 8257 if (status != IBT_SUCCESS) {
8258 8258 mutex_exit(&ep_rp->ep_lock);
8259 8259 DERR("daplka_ep_failback : migration failed "
8260 8260 "status %d\n", status);
8261 8261 return (0);
8262 8262 }
8263 8263
8264 8264 /* call get and altpath with NULL dgid to indicate unspecified dgid */
8265 8265 (void) daplka_ep_altpath(ep_rp, NULL);
8266 8266 mutex_exit(&ep_rp->ep_lock);
8267 8267 return (0);
8268 8268 }
8269 8269
8270 8270 /*
8271 8271 * IBTF wrappers used for resource accounting
8272 8272 */
8273 8273 static ibt_status_t
8274 8274 daplka_ibt_alloc_rc_channel(daplka_ep_resource_t *ep_rp, ibt_hca_hdl_t hca_hdl,
8275 8275 ibt_chan_alloc_flags_t flags, ibt_rc_chan_alloc_args_t *args,
8276 8276 ibt_channel_hdl_t *chan_hdl_p, ibt_chan_sizes_t *sizes)
8277 8277 {
8278 8278 daplka_hca_t *hca_p;
8279 8279 uint32_t max_qps;
8280 8280 boolean_t acct_enabled;
8281 8281 ibt_status_t status;
8282 8282
8283 8283 acct_enabled = daplka_accounting_enabled;
8284 8284 hca_p = ep_rp->ep_hca;
8285 8285 max_qps = daplka_max_qp_percent * hca_p->hca_attr.hca_max_chans / 100;
↓ open down ↓ |
8285 lines elided |
↑ open up ↑ |
8286 8286
8287 8287 if (acct_enabled) {
8288 8288 if (daplka_max_qp_percent != 0 &&
8289 8289 max_qps <= hca_p->hca_qp_count) {
8290 8290 DERR("ibt_alloc_rc_channel: resource limit exceeded "
8291 8291 "(limit %d, count %d)\n", max_qps,
8292 8292 hca_p->hca_qp_count);
8293 8293 return (IBT_INSUFF_RESOURCE);
8294 8294 }
8295 8295 DAPLKA_RS_ACCT_INC(ep_rp, 1);
8296 - atomic_add_32(&hca_p->hca_qp_count, 1);
8296 + atomic_inc_32(&hca_p->hca_qp_count);
8297 8297 }
8298 8298 status = ibt_alloc_rc_channel(hca_hdl, flags, args, chan_hdl_p, sizes);
8299 8299
8300 8300 if (status != IBT_SUCCESS && acct_enabled) {
8301 8301 DAPLKA_RS_ACCT_DEC(ep_rp, 1);
8302 - atomic_add_32(&hca_p->hca_qp_count, -1);
8302 + atomic_dec_32(&hca_p->hca_qp_count);
8303 8303 }
8304 8304 return (status);
8305 8305 }
8306 8306
8307 8307 static ibt_status_t
8308 8308 daplka_ibt_free_channel(daplka_ep_resource_t *ep_rp, ibt_channel_hdl_t chan_hdl)
8309 8309 {
8310 8310 daplka_hca_t *hca_p;
8311 8311 ibt_status_t status;
8312 8312
8313 8313 hca_p = ep_rp->ep_hca;
8314 8314
8315 8315 status = ibt_free_channel(chan_hdl);
8316 8316 if (status != IBT_SUCCESS) {
8317 8317 return (status);
8318 8318 }
8319 8319 if (DAPLKA_RS_ACCT_CHARGED(ep_rp) > 0) {
8320 8320 DAPLKA_RS_ACCT_DEC(ep_rp, 1);
8321 - atomic_add_32(&hca_p->hca_qp_count, -1);
8321 + atomic_dec_32(&hca_p->hca_qp_count);
8322 8322 }
8323 8323 return (status);
8324 8324 }
8325 8325
8326 8326 static ibt_status_t
8327 8327 daplka_ibt_alloc_cq(daplka_evd_resource_t *evd_rp, ibt_hca_hdl_t hca_hdl,
8328 8328 ibt_cq_attr_t *cq_attr, ibt_cq_hdl_t *ibt_cq_p, uint32_t *real_size)
8329 8329 {
8330 8330 daplka_hca_t *hca_p;
8331 8331 uint32_t max_cqs;
8332 8332 boolean_t acct_enabled;
8333 8333 ibt_status_t status;
8334 8334
8335 8335 acct_enabled = daplka_accounting_enabled;
8336 8336 hca_p = evd_rp->evd_hca;
8337 8337 max_cqs = daplka_max_cq_percent * hca_p->hca_attr.hca_max_cq / 100;
↓ open down ↓ |
6 lines elided |
↑ open up ↑ |
8338 8338
8339 8339 if (acct_enabled) {
8340 8340 if (daplka_max_cq_percent != 0 &&
8341 8341 max_cqs <= hca_p->hca_cq_count) {
8342 8342 DERR("ibt_alloc_cq: resource limit exceeded "
8343 8343 "(limit %d, count %d)\n", max_cqs,
8344 8344 hca_p->hca_cq_count);
8345 8345 return (IBT_INSUFF_RESOURCE);
8346 8346 }
8347 8347 DAPLKA_RS_ACCT_INC(evd_rp, 1);
8348 - atomic_add_32(&hca_p->hca_cq_count, 1);
8348 + atomic_inc_32(&hca_p->hca_cq_count);
8349 8349 }
8350 8350 status = ibt_alloc_cq(hca_hdl, cq_attr, ibt_cq_p, real_size);
8351 8351
8352 8352 if (status != IBT_SUCCESS && acct_enabled) {
8353 8353 DAPLKA_RS_ACCT_DEC(evd_rp, 1);
8354 - atomic_add_32(&hca_p->hca_cq_count, -1);
8354 + atomic_dec_32(&hca_p->hca_cq_count);
8355 8355 }
8356 8356 return (status);
8357 8357 }
8358 8358
8359 8359 static ibt_status_t
8360 8360 daplka_ibt_free_cq(daplka_evd_resource_t *evd_rp, ibt_cq_hdl_t cq_hdl)
8361 8361 {
8362 8362 daplka_hca_t *hca_p;
8363 8363 ibt_status_t status;
8364 8364
8365 8365 hca_p = evd_rp->evd_hca;
8366 8366
8367 8367 status = ibt_free_cq(cq_hdl);
8368 8368 if (status != IBT_SUCCESS) {
8369 8369 return (status);
8370 8370 }
8371 8371 if (DAPLKA_RS_ACCT_CHARGED(evd_rp) > 0) {
8372 8372 DAPLKA_RS_ACCT_DEC(evd_rp, 1);
8373 - atomic_add_32(&hca_p->hca_cq_count, -1);
8373 + atomic_dec_32(&hca_p->hca_cq_count);
8374 8374 }
8375 8375 return (status);
8376 8376 }
8377 8377
8378 8378 static ibt_status_t
8379 8379 daplka_ibt_alloc_pd(daplka_pd_resource_t *pd_rp, ibt_hca_hdl_t hca_hdl,
8380 8380 ibt_pd_flags_t flags, ibt_pd_hdl_t *pd_hdl_p)
8381 8381 {
8382 8382 daplka_hca_t *hca_p;
8383 8383 uint32_t max_pds;
8384 8384 boolean_t acct_enabled;
8385 8385 ibt_status_t status;
8386 8386
8387 8387 acct_enabled = daplka_accounting_enabled;
8388 8388 hca_p = pd_rp->pd_hca;
8389 8389 max_pds = daplka_max_pd_percent * hca_p->hca_attr.hca_max_pd / 100;
↓ open down ↓ |
6 lines elided |
↑ open up ↑ |
8390 8390
8391 8391 if (acct_enabled) {
8392 8392 if (daplka_max_pd_percent != 0 &&
8393 8393 max_pds <= hca_p->hca_pd_count) {
8394 8394 DERR("ibt_alloc_pd: resource limit exceeded "
8395 8395 "(limit %d, count %d)\n", max_pds,
8396 8396 hca_p->hca_pd_count);
8397 8397 return (IBT_INSUFF_RESOURCE);
8398 8398 }
8399 8399 DAPLKA_RS_ACCT_INC(pd_rp, 1);
8400 - atomic_add_32(&hca_p->hca_pd_count, 1);
8400 + atomic_inc_32(&hca_p->hca_pd_count);
8401 8401 }
8402 8402 status = ibt_alloc_pd(hca_hdl, flags, pd_hdl_p);
8403 8403
8404 8404 if (status != IBT_SUCCESS && acct_enabled) {
8405 8405 DAPLKA_RS_ACCT_DEC(pd_rp, 1);
8406 - atomic_add_32(&hca_p->hca_pd_count, -1);
8406 + atomic_dec_32(&hca_p->hca_pd_count);
8407 8407 }
8408 8408 return (status);
8409 8409 }
8410 8410
8411 8411 static ibt_status_t
8412 8412 daplka_ibt_free_pd(daplka_pd_resource_t *pd_rp, ibt_hca_hdl_t hca_hdl,
8413 8413 ibt_pd_hdl_t pd_hdl)
8414 8414 {
8415 8415 daplka_hca_t *hca_p;
8416 8416 ibt_status_t status;
8417 8417
8418 8418 hca_p = pd_rp->pd_hca;
8419 8419
8420 8420 status = ibt_free_pd(hca_hdl, pd_hdl);
8421 8421 if (status != IBT_SUCCESS) {
8422 8422 return (status);
8423 8423 }
8424 8424 if (DAPLKA_RS_ACCT_CHARGED(pd_rp) > 0) {
8425 8425 DAPLKA_RS_ACCT_DEC(pd_rp, 1);
8426 - atomic_add_32(&hca_p->hca_pd_count, -1);
8426 + atomic_dec_32(&hca_p->hca_pd_count);
8427 8427 }
8428 8428 return (status);
8429 8429 }
8430 8430
8431 8431 static ibt_status_t
8432 8432 daplka_ibt_alloc_mw(daplka_mw_resource_t *mw_rp, ibt_hca_hdl_t hca_hdl,
8433 8433 ibt_pd_hdl_t pd_hdl, ibt_mw_flags_t flags, ibt_mw_hdl_t *mw_hdl_p,
8434 8434 ibt_rkey_t *rkey_p)
8435 8435 {
8436 8436 daplka_hca_t *hca_p;
8437 8437 uint32_t max_mws;
8438 8438 boolean_t acct_enabled;
8439 8439 ibt_status_t status;
8440 8440
8441 8441 acct_enabled = daplka_accounting_enabled;
8442 8442 hca_p = mw_rp->mw_hca;
8443 8443 max_mws = daplka_max_mw_percent * hca_p->hca_attr.hca_max_mem_win / 100;
↓ open down ↓ |
7 lines elided |
↑ open up ↑ |
8444 8444
8445 8445 if (acct_enabled) {
8446 8446 if (daplka_max_mw_percent != 0 &&
8447 8447 max_mws <= hca_p->hca_mw_count) {
8448 8448 DERR("ibt_alloc_mw: resource limit exceeded "
8449 8449 "(limit %d, count %d)\n", max_mws,
8450 8450 hca_p->hca_mw_count);
8451 8451 return (IBT_INSUFF_RESOURCE);
8452 8452 }
8453 8453 DAPLKA_RS_ACCT_INC(mw_rp, 1);
8454 - atomic_add_32(&hca_p->hca_mw_count, 1);
8454 + atomic_inc_32(&hca_p->hca_mw_count);
8455 8455 }
8456 8456 status = ibt_alloc_mw(hca_hdl, pd_hdl, flags, mw_hdl_p, rkey_p);
8457 8457
8458 8458 if (status != IBT_SUCCESS && acct_enabled) {
8459 8459 DAPLKA_RS_ACCT_DEC(mw_rp, 1);
8460 - atomic_add_32(&hca_p->hca_mw_count, -1);
8460 + atomic_dec_32(&hca_p->hca_mw_count);
8461 8461 }
8462 8462 return (status);
8463 8463 }
8464 8464
8465 8465 static ibt_status_t
8466 8466 daplka_ibt_free_mw(daplka_mw_resource_t *mw_rp, ibt_hca_hdl_t hca_hdl,
8467 8467 ibt_mw_hdl_t mw_hdl)
8468 8468 {
8469 8469 daplka_hca_t *hca_p;
8470 8470 ibt_status_t status;
8471 8471
8472 8472 hca_p = mw_rp->mw_hca;
8473 8473
8474 8474 status = ibt_free_mw(hca_hdl, mw_hdl);
8475 8475 if (status != IBT_SUCCESS) {
8476 8476 return (status);
8477 8477 }
8478 8478 if (DAPLKA_RS_ACCT_CHARGED(mw_rp) > 0) {
8479 8479 DAPLKA_RS_ACCT_DEC(mw_rp, 1);
8480 - atomic_add_32(&hca_p->hca_mw_count, -1);
8480 + atomic_dec_32(&hca_p->hca_mw_count);
8481 8481 }
8482 8482 return (status);
8483 8483 }
8484 8484
8485 8485 static ibt_status_t
8486 8486 daplka_ibt_register_mr(daplka_mr_resource_t *mr_rp, ibt_hca_hdl_t hca_hdl,
8487 8487 ibt_pd_hdl_t pd_hdl, ibt_mr_attr_t *mr_attr, ibt_mr_hdl_t *mr_hdl_p,
8488 8488 ibt_mr_desc_t *mr_desc_p)
8489 8489 {
8490 8490 daplka_hca_t *hca_p;
8491 8491 uint32_t max_mrs;
8492 8492 boolean_t acct_enabled;
8493 8493 ibt_status_t status;
8494 8494
8495 8495 acct_enabled = daplka_accounting_enabled;
8496 8496 hca_p = mr_rp->mr_hca;
8497 8497 max_mrs = daplka_max_mr_percent * hca_p->hca_attr.hca_max_memr / 100;
↓ open down ↓ |
7 lines elided |
↑ open up ↑ |
8498 8498
8499 8499 if (acct_enabled) {
8500 8500 if (daplka_max_mr_percent != 0 &&
8501 8501 max_mrs <= hca_p->hca_mr_count) {
8502 8502 DERR("ibt_register_mr: resource limit exceeded "
8503 8503 "(limit %d, count %d)\n", max_mrs,
8504 8504 hca_p->hca_mr_count);
8505 8505 return (IBT_INSUFF_RESOURCE);
8506 8506 }
8507 8507 DAPLKA_RS_ACCT_INC(mr_rp, 1);
8508 - atomic_add_32(&hca_p->hca_mr_count, 1);
8508 + atomic_inc_32(&hca_p->hca_mr_count);
8509 8509 }
8510 8510 status = ibt_register_mr(hca_hdl, pd_hdl, mr_attr, mr_hdl_p, mr_desc_p);
8511 8511
8512 8512 if (status != IBT_SUCCESS && acct_enabled) {
8513 8513 DAPLKA_RS_ACCT_DEC(mr_rp, 1);
8514 - atomic_add_32(&hca_p->hca_mr_count, -1);
8514 + atomic_dec_32(&hca_p->hca_mr_count);
8515 8515 }
8516 8516 return (status);
8517 8517 }
8518 8518
8519 8519 static ibt_status_t
8520 8520 daplka_ibt_register_shared_mr(daplka_mr_resource_t *mr_rp,
8521 8521 ibt_hca_hdl_t hca_hdl, ibt_mr_hdl_t mr_hdl, ibt_pd_hdl_t pd_hdl,
8522 8522 ibt_smr_attr_t *smr_attr_p, ibt_mr_hdl_t *mr_hdl_p,
8523 8523 ibt_mr_desc_t *mr_desc_p)
8524 8524 {
8525 8525 daplka_hca_t *hca_p;
8526 8526 uint32_t max_mrs;
8527 8527 boolean_t acct_enabled;
8528 8528 ibt_status_t status;
8529 8529
8530 8530 acct_enabled = daplka_accounting_enabled;
8531 8531 hca_p = mr_rp->mr_hca;
8532 8532 max_mrs = daplka_max_mr_percent * hca_p->hca_attr.hca_max_memr / 100;
↓ open down ↓ |
8 lines elided |
↑ open up ↑ |
8533 8533
8534 8534 if (acct_enabled) {
8535 8535 if (daplka_max_mr_percent != 0 &&
8536 8536 max_mrs <= hca_p->hca_mr_count) {
8537 8537 DERR("ibt_register_shared_mr: resource limit exceeded "
8538 8538 "(limit %d, count %d)\n", max_mrs,
8539 8539 hca_p->hca_mr_count);
8540 8540 return (IBT_INSUFF_RESOURCE);
8541 8541 }
8542 8542 DAPLKA_RS_ACCT_INC(mr_rp, 1);
8543 - atomic_add_32(&hca_p->hca_mr_count, 1);
8543 + atomic_inc_32(&hca_p->hca_mr_count);
8544 8544 }
8545 8545 status = ibt_register_shared_mr(hca_hdl, mr_hdl, pd_hdl,
8546 8546 smr_attr_p, mr_hdl_p, mr_desc_p);
8547 8547
8548 8548 if (status != IBT_SUCCESS && acct_enabled) {
8549 8549 DAPLKA_RS_ACCT_DEC(mr_rp, 1);
8550 - atomic_add_32(&hca_p->hca_mr_count, -1);
8550 + atomic_dec_32(&hca_p->hca_mr_count);
8551 8551 }
8552 8552 return (status);
8553 8553 }
8554 8554
8555 8555 static ibt_status_t
8556 8556 daplka_ibt_deregister_mr(daplka_mr_resource_t *mr_rp, ibt_hca_hdl_t hca_hdl,
8557 8557 ibt_mr_hdl_t mr_hdl)
8558 8558 {
8559 8559 daplka_hca_t *hca_p;
8560 8560 ibt_status_t status;
8561 8561
8562 8562 hca_p = mr_rp->mr_hca;
8563 8563
8564 8564 status = ibt_deregister_mr(hca_hdl, mr_hdl);
8565 8565 if (status != IBT_SUCCESS) {
8566 8566 return (status);
8567 8567 }
8568 8568 if (DAPLKA_RS_ACCT_CHARGED(mr_rp) > 0) {
8569 8569 DAPLKA_RS_ACCT_DEC(mr_rp, 1);
8570 - atomic_add_32(&hca_p->hca_mr_count, -1);
8570 + atomic_dec_32(&hca_p->hca_mr_count);
8571 8571 }
8572 8572 return (status);
8573 8573 }
8574 8574
8575 8575 static ibt_status_t
8576 8576 daplka_ibt_alloc_srq(daplka_srq_resource_t *srq_rp, ibt_hca_hdl_t hca_hdl,
8577 8577 ibt_srq_flags_t flags, ibt_pd_hdl_t pd, ibt_srq_sizes_t *reqsz,
8578 8578 ibt_srq_hdl_t *srq_hdl_p, ibt_srq_sizes_t *realsz)
8579 8579 {
8580 8580 daplka_hca_t *hca_p;
8581 8581 uint32_t max_srqs;
8582 8582 boolean_t acct_enabled;
8583 8583 ibt_status_t status;
8584 8584
8585 8585 acct_enabled = daplka_accounting_enabled;
8586 8586 hca_p = srq_rp->srq_hca;
8587 8587 max_srqs = daplka_max_srq_percent * hca_p->hca_attr.hca_max_srqs / 100;
↓ open down ↓ |
7 lines elided |
↑ open up ↑ |
8588 8588
8589 8589 if (acct_enabled) {
8590 8590 if (daplka_max_srq_percent != 0 &&
8591 8591 max_srqs <= hca_p->hca_srq_count) {
8592 8592 DERR("ibt_alloc_srq: resource limit exceeded "
8593 8593 "(limit %d, count %d)\n", max_srqs,
8594 8594 hca_p->hca_srq_count);
8595 8595 return (IBT_INSUFF_RESOURCE);
8596 8596 }
8597 8597 DAPLKA_RS_ACCT_INC(srq_rp, 1);
8598 - atomic_add_32(&hca_p->hca_srq_count, 1);
8598 + atomic_inc_32(&hca_p->hca_srq_count);
8599 8599 }
8600 8600 status = ibt_alloc_srq(hca_hdl, flags, pd, reqsz, srq_hdl_p, realsz);
8601 8601
8602 8602 if (status != IBT_SUCCESS && acct_enabled) {
8603 8603 DAPLKA_RS_ACCT_DEC(srq_rp, 1);
8604 - atomic_add_32(&hca_p->hca_srq_count, -1);
8604 + atomic_dec_32(&hca_p->hca_srq_count);
8605 8605 }
8606 8606 return (status);
8607 8607 }
8608 8608
8609 8609 static ibt_status_t
8610 8610 daplka_ibt_free_srq(daplka_srq_resource_t *srq_rp, ibt_srq_hdl_t srq_hdl)
8611 8611 {
8612 8612 daplka_hca_t *hca_p;
8613 8613 ibt_status_t status;
8614 8614
8615 8615 hca_p = srq_rp->srq_hca;
8616 8616
8617 8617 D3("ibt_free_srq: %p %p\n", srq_rp, srq_hdl);
8618 8618
8619 8619 status = ibt_free_srq(srq_hdl);
8620 8620 if (status != IBT_SUCCESS) {
8621 8621 return (status);
8622 8622 }
8623 8623 if (DAPLKA_RS_ACCT_CHARGED(srq_rp) > 0) {
8624 8624 DAPLKA_RS_ACCT_DEC(srq_rp, 1);
8625 - atomic_add_32(&hca_p->hca_srq_count, -1);
8625 + atomic_dec_32(&hca_p->hca_srq_count);
8626 8626 }
8627 8627 return (status);
8628 8628 }
8629 8629
8630 8630
8631 8631 static int
8632 8632 daplka_common_ioctl(int cmd, minor_t rnum, intptr_t arg, int mode,
8633 8633 cred_t *cred, int *rvalp)
8634 8634 {
8635 8635 int error;
8636 8636
8637 8637 switch (cmd) {
8638 8638 case DAPL_IA_CREATE:
8639 8639 error = daplka_ia_create(rnum, arg, mode, cred, rvalp);
8640 8640 break;
8641 8641
8642 8642 /* can potentially add other commands here */
8643 8643
8644 8644 default:
8645 8645 DERR("daplka_common_ioctl: cmd not supported\n");
8646 8646 error = DDI_FAILURE;
8647 8647 }
8648 8648 return (error);
8649 8649 }
8650 8650
8651 8651 static int
8652 8652 daplka_evd_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8653 8653 cred_t *cred, int *rvalp)
8654 8654 {
8655 8655 int error;
8656 8656
8657 8657 switch (cmd) {
8658 8658 case DAPL_EVD_CREATE:
8659 8659 error = daplka_evd_create(rp, arg, mode, cred, rvalp);
8660 8660 break;
8661 8661
8662 8662 case DAPL_CQ_RESIZE:
8663 8663 error = daplka_cq_resize(rp, arg, mode, cred, rvalp);
8664 8664 break;
8665 8665
8666 8666 case DAPL_EVENT_POLL:
8667 8667 error = daplka_event_poll(rp, arg, mode, cred, rvalp);
8668 8668 break;
8669 8669
8670 8670 case DAPL_EVENT_WAKEUP:
8671 8671 error = daplka_event_wakeup(rp, arg, mode, cred, rvalp);
8672 8672 break;
8673 8673
8674 8674 case DAPL_EVD_MODIFY_CNO:
8675 8675 error = daplka_evd_modify_cno(rp, arg, mode, cred, rvalp);
8676 8676 break;
8677 8677
8678 8678 case DAPL_EVD_FREE:
8679 8679 error = daplka_evd_free(rp, arg, mode, cred, rvalp);
8680 8680 break;
8681 8681
8682 8682 default:
8683 8683 DERR("daplka_evd_ioctl: cmd not supported\n");
8684 8684 error = DDI_FAILURE;
8685 8685 }
8686 8686 return (error);
8687 8687 }
8688 8688
8689 8689 static int
8690 8690 daplka_ep_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8691 8691 cred_t *cred, int *rvalp)
8692 8692 {
8693 8693 int error;
8694 8694
8695 8695 switch (cmd) {
8696 8696 case DAPL_EP_MODIFY:
8697 8697 error = daplka_ep_modify(rp, arg, mode, cred, rvalp);
8698 8698 break;
8699 8699
8700 8700 case DAPL_EP_FREE:
8701 8701 error = daplka_ep_free(rp, arg, mode, cred, rvalp);
8702 8702 break;
8703 8703
8704 8704 case DAPL_EP_CONNECT:
8705 8705 error = daplka_ep_connect(rp, arg, mode, cred, rvalp);
8706 8706 break;
8707 8707
8708 8708 case DAPL_EP_DISCONNECT:
8709 8709 error = daplka_ep_disconnect(rp, arg, mode, cred, rvalp);
8710 8710 break;
8711 8711
8712 8712 case DAPL_EP_REINIT:
8713 8713 error = daplka_ep_reinit(rp, arg, mode, cred, rvalp);
8714 8714 break;
8715 8715
8716 8716 case DAPL_EP_CREATE:
8717 8717 error = daplka_ep_create(rp, arg, mode, cred, rvalp);
8718 8718 break;
8719 8719
8720 8720 default:
8721 8721 DERR("daplka_ep_ioctl: cmd not supported\n");
8722 8722 error = DDI_FAILURE;
8723 8723 }
8724 8724 return (error);
8725 8725 }
8726 8726
8727 8727 static int
8728 8728 daplka_mr_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8729 8729 cred_t *cred, int *rvalp)
8730 8730 {
8731 8731 int error;
8732 8732
8733 8733 switch (cmd) {
8734 8734 case DAPL_MR_REGISTER:
8735 8735 error = daplka_mr_register(rp, arg, mode, cred, rvalp);
8736 8736 break;
8737 8737
8738 8738 case DAPL_MR_REGISTER_LMR:
8739 8739 error = daplka_mr_register_lmr(rp, arg, mode, cred, rvalp);
8740 8740 break;
8741 8741
8742 8742 case DAPL_MR_REGISTER_SHARED:
8743 8743 error = daplka_mr_register_shared(rp, arg, mode, cred, rvalp);
8744 8744 break;
8745 8745
8746 8746 case DAPL_MR_DEREGISTER:
8747 8747 error = daplka_mr_deregister(rp, arg, mode, cred, rvalp);
8748 8748 break;
8749 8749
8750 8750 case DAPL_MR_SYNC:
8751 8751 error = daplka_mr_sync(rp, arg, mode, cred, rvalp);
8752 8752 break;
8753 8753
8754 8754 default:
8755 8755 DERR("daplka_mr_ioctl: cmd not supported\n");
8756 8756 error = DDI_FAILURE;
8757 8757 }
8758 8758 return (error);
8759 8759 }
8760 8760
8761 8761 static int
8762 8762 daplka_mw_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8763 8763 cred_t *cred, int *rvalp)
8764 8764 {
8765 8765 int error;
8766 8766
8767 8767 switch (cmd) {
8768 8768 case DAPL_MW_ALLOC:
8769 8769 error = daplka_mw_alloc(rp, arg, mode, cred, rvalp);
8770 8770 break;
8771 8771
8772 8772 case DAPL_MW_FREE:
8773 8773 error = daplka_mw_free(rp, arg, mode, cred, rvalp);
8774 8774 break;
8775 8775
8776 8776 default:
8777 8777 DERR("daplka_mw_ioctl: cmd not supported\n");
8778 8778 error = DDI_FAILURE;
8779 8779 }
8780 8780 return (error);
8781 8781 }
8782 8782
8783 8783 static int
8784 8784 daplka_cno_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8785 8785 cred_t *cred, int *rvalp)
8786 8786 {
8787 8787 int error;
8788 8788
8789 8789 switch (cmd) {
8790 8790 case DAPL_CNO_ALLOC:
8791 8791 error = daplka_cno_alloc(rp, arg, mode, cred, rvalp);
8792 8792 break;
8793 8793
8794 8794 case DAPL_CNO_FREE:
8795 8795 error = daplka_cno_free(rp, arg, mode, cred, rvalp);
8796 8796 break;
8797 8797
8798 8798 case DAPL_CNO_WAIT:
8799 8799 error = daplka_cno_wait(rp, arg, mode, cred, rvalp);
8800 8800 break;
8801 8801
8802 8802 default:
8803 8803 DERR("daplka_cno_ioctl: cmd not supported\n");
8804 8804 error = DDI_FAILURE;
8805 8805 }
8806 8806 return (error);
8807 8807 }
8808 8808
8809 8809 static int
8810 8810 daplka_pd_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8811 8811 cred_t *cred, int *rvalp)
8812 8812 {
8813 8813 int error;
8814 8814
8815 8815 switch (cmd) {
8816 8816 case DAPL_PD_ALLOC:
8817 8817 error = daplka_pd_alloc(rp, arg, mode, cred, rvalp);
8818 8818 break;
8819 8819
8820 8820 case DAPL_PD_FREE:
8821 8821 error = daplka_pd_free(rp, arg, mode, cred, rvalp);
8822 8822 break;
8823 8823
8824 8824 default:
8825 8825 DERR("daplka_pd_ioctl: cmd not supported\n");
8826 8826 error = DDI_FAILURE;
8827 8827 }
8828 8828 return (error);
8829 8829 }
8830 8830
8831 8831 static int
8832 8832 daplka_sp_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8833 8833 cred_t *cred, int *rvalp)
8834 8834 {
8835 8835 int error;
8836 8836
8837 8837 switch (cmd) {
8838 8838 case DAPL_SERVICE_REGISTER:
8839 8839 error = daplka_service_register(rp, arg, mode, cred, rvalp);
8840 8840 break;
8841 8841
8842 8842 case DAPL_SERVICE_DEREGISTER:
8843 8843 error = daplka_service_deregister(rp, arg, mode, cred, rvalp);
8844 8844 break;
8845 8845
8846 8846 default:
8847 8847 DERR("daplka_sp_ioctl: cmd not supported\n");
8848 8848 error = DDI_FAILURE;
8849 8849 }
8850 8850 return (error);
8851 8851 }
8852 8852
8853 8853 static int
8854 8854 daplka_srq_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8855 8855 cred_t *cred, int *rvalp)
8856 8856 {
8857 8857 int error;
8858 8858
8859 8859 switch (cmd) {
8860 8860 case DAPL_SRQ_CREATE:
8861 8861 error = daplka_srq_create(rp, arg, mode, cred, rvalp);
8862 8862 break;
8863 8863
8864 8864 case DAPL_SRQ_RESIZE:
8865 8865 error = daplka_srq_resize(rp, arg, mode, cred, rvalp);
8866 8866 break;
8867 8867
8868 8868 case DAPL_SRQ_FREE:
8869 8869 error = daplka_srq_free(rp, arg, mode, cred, rvalp);
8870 8870 break;
8871 8871
8872 8872 default:
8873 8873 DERR("daplka_srq_ioctl: cmd(%d) not supported\n", cmd);
8874 8874 error = DDI_FAILURE;
8875 8875 break;
8876 8876 }
8877 8877 return (error);
8878 8878 }
8879 8879
8880 8880 static int
8881 8881 daplka_misc_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8882 8882 cred_t *cred, int *rvalp)
8883 8883 {
8884 8884 int error;
8885 8885
8886 8886 switch (cmd) {
8887 8887 case DAPL_CR_ACCEPT:
8888 8888 error = daplka_cr_accept(rp, arg, mode, cred, rvalp);
8889 8889 break;
8890 8890
8891 8891 case DAPL_CR_REJECT:
8892 8892 error = daplka_cr_reject(rp, arg, mode, cred, rvalp);
8893 8893 break;
8894 8894
8895 8895 case DAPL_IA_QUERY:
8896 8896 error = daplka_ia_query(rp, arg, mode, cred, rvalp);
8897 8897 break;
8898 8898
8899 8899 case DAPL_CR_HANDOFF:
8900 8900 error = daplka_cr_handoff(rp, arg, mode, cred, rvalp);
8901 8901 break;
8902 8902
8903 8903 default:
8904 8904 DERR("daplka_misc_ioctl: cmd not supported\n");
8905 8905 error = DDI_FAILURE;
8906 8906 }
8907 8907 return (error);
8908 8908 }
8909 8909
8910 8910 /*ARGSUSED*/
8911 8911 static int
8912 8912 daplka_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred,
8913 8913 int *rvalp)
8914 8914 {
8915 8915 daplka_ia_resource_t *ia_rp;
8916 8916 minor_t rnum;
8917 8917 int error = 0;
8918 8918
8919 8919 rnum = getminor(dev);
8920 8920 ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(rnum);
8921 8921 if (ia_rp == NULL) {
8922 8922 DERR("ioctl: resource not found, rnum %d\n", rnum);
8923 8923 return (ENXIO);
8924 8924 }
8925 8925
8926 8926 D4("ioctl: rnum = %d, cmd = 0x%x\n", rnum, cmd);
8927 8927 if (DAPLKA_RS_RESERVED(ia_rp)) {
8928 8928 error = daplka_common_ioctl(cmd, rnum, arg, mode, cred, rvalp);
8929 8929 return (error);
8930 8930 }
8931 8931 if (DAPLKA_RS_TYPE(ia_rp) != DAPL_TYPE_IA) {
8932 8932 DERR("ioctl: invalid type %d\n", DAPLKA_RS_TYPE(ia_rp));
8933 8933 error = EINVAL;
8934 8934 goto cleanup;
8935 8935 }
8936 8936 if (ia_rp->ia_pid != ddi_get_pid()) {
8937 8937 DERR("ioctl: ia_pid %d != pid %d\n",
8938 8938 ia_rp->ia_pid, ddi_get_pid());
8939 8939 error = EINVAL;
8940 8940 goto cleanup;
8941 8941 }
8942 8942
8943 8943 switch (cmd & DAPL_TYPE_MASK) {
8944 8944 case DAPL_TYPE_EVD:
8945 8945 error = daplka_evd_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8946 8946 break;
8947 8947
8948 8948 case DAPL_TYPE_EP:
8949 8949 error = daplka_ep_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8950 8950 break;
8951 8951
8952 8952 case DAPL_TYPE_MR:
8953 8953 error = daplka_mr_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8954 8954 break;
8955 8955
8956 8956 case DAPL_TYPE_MW:
8957 8957 error = daplka_mw_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8958 8958 break;
8959 8959
8960 8960 case DAPL_TYPE_PD:
8961 8961 error = daplka_pd_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8962 8962 break;
8963 8963
8964 8964 case DAPL_TYPE_SP:
8965 8965 error = daplka_sp_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8966 8966 break;
8967 8967
8968 8968 case DAPL_TYPE_CNO:
8969 8969 error = daplka_cno_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8970 8970 break;
8971 8971
8972 8972 case DAPL_TYPE_MISC:
8973 8973 error = daplka_misc_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8974 8974 break;
8975 8975
8976 8976 case DAPL_TYPE_SRQ:
8977 8977 error = daplka_srq_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8978 8978 break;
8979 8979
8980 8980 default:
8981 8981 DERR("ioctl: invalid dapl type = %d\n", DAPLKA_RS_TYPE(ia_rp));
8982 8982 error = DDI_FAILURE;
8983 8983 }
8984 8984
8985 8985 cleanup:;
8986 8986 DAPLKA_RS_UNREF(ia_rp);
8987 8987 return (error);
8988 8988 }
8989 8989
8990 8990 /* ARGSUSED */
8991 8991 static int
8992 8992 daplka_open(dev_t *devp, int flag, int otyp, struct cred *cred)
8993 8993 {
8994 8994 minor_t rnum;
8995 8995
8996 8996 /*
8997 8997 * Char only
8998 8998 */
8999 8999 if (otyp != OTYP_CHR) {
9000 9000 return (EINVAL);
9001 9001 }
9002 9002
9003 9003 /*
9004 9004 * Only zero can be opened, clones are used for resources.
9005 9005 */
9006 9006 if (getminor(*devp) != DAPLKA_DRIVER_MINOR) {
9007 9007 DERR("daplka_open: bad minor %d\n", getminor(*devp));
9008 9008 return (ENODEV);
9009 9009 }
9010 9010
9011 9011 /*
9012 9012 * - allocate new minor number
9013 9013 * - update devp argument to new device
9014 9014 */
9015 9015 if (daplka_resource_reserve(&rnum) == 0) {
9016 9016 *devp = makedevice(getmajor(*devp), rnum);
9017 9017 } else {
9018 9018 return (ENOMEM);
9019 9019 }
9020 9020
9021 9021 return (DDI_SUCCESS);
9022 9022 }
9023 9023
9024 9024 /* ARGSUSED */
9025 9025 static int
9026 9026 daplka_close(dev_t dev, int flag, int otyp, struct cred *cred)
9027 9027 {
↓ open down ↓ |
392 lines elided |
↑ open up ↑ |
9028 9028 daplka_ia_resource_t *ia_rp;
9029 9029 minor_t rnum = getminor(dev);
9030 9030
9031 9031 /*
9032 9032 * Char only
9033 9033 */
9034 9034 if (otyp != OTYP_CHR) {
9035 9035 return (EINVAL);
9036 9036 }
9037 9037 D2("daplka_close: closing rnum = %d\n", rnum);
9038 - atomic_add_32(&daplka_pending_close, 1);
9038 + atomic_inc_32(&daplka_pending_close);
9039 9039
9040 9040 /*
9041 9041 * remove from resource table.
9042 9042 */
9043 9043 ia_rp = (daplka_ia_resource_t *)daplka_resource_remove(rnum);
9044 9044
9045 9045 /*
9046 9046 * remove the initial reference
9047 9047 */
9048 9048 if (ia_rp != NULL) {
9049 9049 DAPLKA_RS_UNREF(ia_rp);
9050 9050 }
9051 - atomic_add_32(&daplka_pending_close, -1);
9051 + atomic_dec_32(&daplka_pending_close);
9052 9052 return (DDI_SUCCESS);
9053 9053 }
9054 9054
9055 9055
9056 9056 /*
9057 9057 * Resource management routines
9058 9058 *
9059 9059 * We start with no resource array. Each time we run out of slots, we
9060 9060 * reallocate a new larger array and copy the pointer to the new array and
9061 9061 * a new resource blk is allocated and added to the hash table.
9062 9062 *
9063 9063 * The resource control block contains:
9064 9064 * root - array of pointer of resource blks
9065 9065 * sz - current size of array.
9066 9066 * len - last valid entry in array.
9067 9067 *
9068 9068 * A search operation based on a resource number is as follows:
9069 9069 * index = rnum / RESOURCE_BLKSZ;
9070 9070 * ASSERT(index < resource_block.len);
9071 9071 * ASSERT(index < resource_block.sz);
9072 9072 * offset = rnum % RESOURCE_BLKSZ;
9073 9073 * ASSERT(offset >= resource_block.root[index]->base);
9074 9074 * ASSERT(offset < resource_block.root[index]->base + RESOURCE_BLKSZ);
9075 9075 * return resource_block.root[index]->blks[offset];
9076 9076 *
9077 9077 * A resource blk is freed when its used count reaches zero.
9078 9078 */
9079 9079
9080 9080 /*
9081 9081 * initializes the global resource table
9082 9082 */
9083 9083 static void
9084 9084 daplka_resource_init(void)
9085 9085 {
9086 9086 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(daplka_resource))
9087 9087 rw_init(&daplka_resource.daplka_rct_lock, NULL, RW_DRIVER, NULL);
9088 9088 daplka_resource.daplka_rc_len = 0;
9089 9089 daplka_resource.daplka_rc_sz = 0;
9090 9090 daplka_resource.daplka_rc_cnt = 0;
9091 9091 daplka_resource.daplka_rc_flag = 0;
9092 9092 daplka_resource.daplka_rc_root = NULL;
9093 9093 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(daplka_resource))
9094 9094 }
9095 9095
9096 9096 /*
9097 9097 * destroys the global resource table
9098 9098 */
9099 9099 static void
9100 9100 daplka_resource_fini(void)
9101 9101 {
9102 9102 int i;
9103 9103
9104 9104 rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER);
9105 9105 for (i = 0; i < daplka_resource.daplka_rc_len; i++) {
9106 9106 daplka_resource_blk_t *blk;
9107 9107 int j;
9108 9108
9109 9109 blk = daplka_resource.daplka_rc_root[i];
9110 9110 if (blk == NULL) {
9111 9111 continue;
9112 9112 }
9113 9113 for (j = 0; j < DAPLKA_RC_BLKSZ; j++) {
9114 9114 if (blk->daplka_rcblk_blks[j] != NULL) {
9115 9115 DERR("resource_fini: non-null slot %d, %p\n",
9116 9116 j, blk->daplka_rcblk_blks[j]);
9117 9117 }
9118 9118 }
9119 9119 kmem_free(blk, sizeof (*blk));
9120 9120 daplka_resource.daplka_rc_root[i] = NULL;
9121 9121 }
9122 9122 if (daplka_resource.daplka_rc_root != NULL) {
9123 9123 uint_t sz;
9124 9124
9125 9125 sz = daplka_resource.daplka_rc_sz *
9126 9126 sizeof (daplka_resource_blk_t *);
9127 9127 kmem_free(daplka_resource.daplka_rc_root, (uint_t)sz);
9128 9128 daplka_resource.daplka_rc_root = NULL;
9129 9129 daplka_resource.daplka_rc_len = 0;
9130 9130 daplka_resource.daplka_rc_sz = 0;
9131 9131 }
9132 9132 rw_exit(&daplka_resource.daplka_rct_lock);
9133 9133 rw_destroy(&daplka_resource.daplka_rct_lock);
9134 9134 }
9135 9135
9136 9136 /*
9137 9137 * reserves a slot in the global resource table.
9138 9138 * this is called by the open() syscall. it is needed because
9139 9139 * at open() time, we do not have sufficient information to
9140 9140 * create an IA resource. the library needs to subsequently
9141 9141 * call daplka_ia_create to insert an IA resource into this
9142 9142 * reserved slot.
9143 9143 */
9144 9144 static int
9145 9145 daplka_resource_reserve(minor_t *rnum)
9146 9146 {
9147 9147 int i, j, empty = -1;
9148 9148 daplka_resource_blk_t *blk;
9149 9149
9150 9150 rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER);
9151 9151 /*
9152 9152 * Try to find an empty slot
9153 9153 */
9154 9154 for (i = 0; i < daplka_resource.daplka_rc_len; i++) {
9155 9155 blk = daplka_resource.daplka_rc_root[i];
9156 9156 if (blk != NULL && blk->daplka_rcblk_avail > 0) {
9157 9157
9158 9158 D3("resource_alloc: available blks %d\n",
9159 9159 blk->daplka_rcblk_avail);
9160 9160
9161 9161 /*
9162 9162 * found an empty slot in this blk
9163 9163 */
9164 9164 for (j = 0; j < DAPLKA_RC_BLKSZ; j++) {
9165 9165 if (blk->daplka_rcblk_blks[j] == NULL) {
9166 9166 *rnum = (minor_t)
9167 9167 (j + (i * DAPLKA_RC_BLKSZ));
9168 9168 blk->daplka_rcblk_blks[j] =
9169 9169 (daplka_resource_t *)
9170 9170 DAPLKA_RC_RESERVED;
9171 9171 blk->daplka_rcblk_avail--;
9172 9172 daplka_resource.daplka_rc_cnt++;
9173 9173 rw_exit(&daplka_resource.
9174 9174 daplka_rct_lock);
9175 9175 return (0);
9176 9176 }
9177 9177 }
9178 9178 } else if (blk == NULL && empty < 0) {
9179 9179 /*
9180 9180 * remember first empty slot
9181 9181 */
9182 9182 empty = i;
9183 9183 }
9184 9184 }
9185 9185
9186 9186 /*
9187 9187 * Couldn't find anything, allocate a new blk
9188 9188 * Do we need to reallocate the root array
9189 9189 */
9190 9190 if (empty < 0) {
9191 9191 if (daplka_resource.daplka_rc_len ==
9192 9192 daplka_resource.daplka_rc_sz) {
9193 9193 /*
9194 9194 * Allocate new array and copy current stuff into it
9195 9195 */
9196 9196 daplka_resource_blk_t **p;
9197 9197 uint_t newsz = (uint_t)daplka_resource.daplka_rc_sz +
9198 9198 DAPLKA_RC_BLKSZ;
9199 9199
9200 9200 D3("resource_alloc: increasing no. of buckets to %d\n",
9201 9201 newsz);
9202 9202
9203 9203 p = kmem_zalloc(newsz * sizeof (*p), daplka_km_flags);
9204 9204
9205 9205 if (daplka_resource.daplka_rc_root) {
9206 9206 uint_t oldsz;
9207 9207
9208 9208 oldsz = (uint_t)(daplka_resource.daplka_rc_sz *
9209 9209 (int)sizeof (*p));
9210 9210
9211 9211 /*
9212 9212 * Copy old data into new space and
9213 9213 * free old stuff
9214 9214 */
9215 9215 bcopy(daplka_resource.daplka_rc_root, p, oldsz);
9216 9216 kmem_free(daplka_resource.daplka_rc_root,
9217 9217 oldsz);
9218 9218 }
9219 9219
9220 9220 daplka_resource.daplka_rc_root = p;
9221 9221 daplka_resource.daplka_rc_sz = (int)newsz;
9222 9222 }
9223 9223
9224 9224 empty = daplka_resource.daplka_rc_len;
9225 9225 daplka_resource.daplka_rc_len++;
9226 9226
9227 9227 D3("resource_alloc: daplka_rc_len %d\n",
9228 9228 daplka_resource.daplka_rc_len);
9229 9229 }
9230 9230
9231 9231 /*
9232 9232 * Allocate a new blk
9233 9233 */
9234 9234 blk = kmem_zalloc(sizeof (*blk), daplka_km_flags);
9235 9235 ASSERT(daplka_resource.daplka_rc_root[empty] == NULL);
9236 9236 daplka_resource.daplka_rc_root[empty] = blk;
9237 9237 blk->daplka_rcblk_avail = DAPLKA_RC_BLKSZ - 1;
9238 9238
9239 9239 /*
9240 9240 * Allocate slot
9241 9241 */
9242 9242 *rnum = (minor_t)(empty * DAPLKA_RC_BLKSZ);
9243 9243 blk->daplka_rcblk_blks[0] = (daplka_resource_t *)DAPLKA_RC_RESERVED;
9244 9244 daplka_resource.daplka_rc_cnt++;
9245 9245 rw_exit(&daplka_resource.daplka_rct_lock);
9246 9246
9247 9247 return (0);
9248 9248 }
9249 9249
9250 9250 /*
9251 9251 * removes resource from global resource table
9252 9252 */
9253 9253 static daplka_resource_t *
9254 9254 daplka_resource_remove(minor_t rnum)
9255 9255 {
9256 9256 int i, j;
9257 9257 daplka_resource_blk_t *blk;
9258 9258 daplka_resource_t *p;
9259 9259
9260 9260 i = (int)(rnum / DAPLKA_RC_BLKSZ);
9261 9261 j = (int)(rnum % DAPLKA_RC_BLKSZ);
9262 9262
9263 9263 rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER);
9264 9264 if (i >= daplka_resource.daplka_rc_len) {
9265 9265 rw_exit(&daplka_resource.daplka_rct_lock);
9266 9266 DERR("resource_remove: invalid rnum %d\n", rnum);
9267 9267 return (NULL);
9268 9268 }
9269 9269
9270 9270 ASSERT(daplka_resource.daplka_rc_root);
9271 9271 ASSERT(i < daplka_resource.daplka_rc_len);
9272 9272 ASSERT(i < daplka_resource.daplka_rc_sz);
9273 9273 blk = daplka_resource.daplka_rc_root[i];
9274 9274 if (blk == NULL) {
9275 9275 rw_exit(&daplka_resource.daplka_rct_lock);
9276 9276 DERR("resource_remove: invalid rnum %d\n", rnum);
9277 9277 return (NULL);
9278 9278 }
9279 9279
9280 9280 if (blk->daplka_rcblk_blks[j] == NULL) {
9281 9281 rw_exit(&daplka_resource.daplka_rct_lock);
9282 9282 DERR("resource_remove: blk->daplka_rcblk_blks[j] == NULL\n");
9283 9283 return (NULL);
9284 9284 }
9285 9285 p = blk->daplka_rcblk_blks[j];
9286 9286 blk->daplka_rcblk_blks[j] = NULL;
9287 9287 blk->daplka_rcblk_avail++;
9288 9288 if (blk->daplka_rcblk_avail == DAPLKA_RC_BLKSZ) {
9289 9289 /*
9290 9290 * free this blk
9291 9291 */
9292 9292 kmem_free(blk, sizeof (*blk));
9293 9293 daplka_resource.daplka_rc_root[i] = NULL;
9294 9294 }
9295 9295 daplka_resource.daplka_rc_cnt--;
9296 9296 rw_exit(&daplka_resource.daplka_rct_lock);
9297 9297
9298 9298 if ((intptr_t)p == DAPLKA_RC_RESERVED) {
9299 9299 return (NULL);
9300 9300 } else {
9301 9301 return (p);
9302 9302 }
9303 9303 }
9304 9304
9305 9305 /*
9306 9306 * inserts resource into the slot designated by rnum
9307 9307 */
9308 9308 static int
9309 9309 daplka_resource_insert(minor_t rnum, daplka_resource_t *rp)
9310 9310 {
9311 9311 int i, j, error = -1;
9312 9312 daplka_resource_blk_t *blk;
9313 9313
9314 9314 /*
9315 9315 * Find resource and lock it in WRITER mode
9316 9316 * search for available resource slot
9317 9317 */
9318 9318
9319 9319 i = (int)(rnum / DAPLKA_RC_BLKSZ);
9320 9320 j = (int)(rnum % DAPLKA_RC_BLKSZ);
9321 9321
9322 9322 rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER);
9323 9323 if (i >= daplka_resource.daplka_rc_len) {
9324 9324 rw_exit(&daplka_resource.daplka_rct_lock);
9325 9325 DERR("resource_insert: resource %d not found\n", rnum);
9326 9326 return (-1);
9327 9327 }
9328 9328
9329 9329 blk = daplka_resource.daplka_rc_root[i];
9330 9330 if (blk != NULL) {
9331 9331 ASSERT(i < daplka_resource.daplka_rc_len);
9332 9332 ASSERT(i < daplka_resource.daplka_rc_sz);
9333 9333
9334 9334 if ((intptr_t)blk->daplka_rcblk_blks[j] == DAPLKA_RC_RESERVED) {
9335 9335 blk->daplka_rcblk_blks[j] = rp;
9336 9336 error = 0;
9337 9337 } else {
9338 9338 DERR("resource_insert: %d not reserved, blk = %p\n",
9339 9339 rnum, blk->daplka_rcblk_blks[j]);
9340 9340 }
9341 9341 } else {
9342 9342 DERR("resource_insert: resource %d not found\n", rnum);
9343 9343 }
9344 9344 rw_exit(&daplka_resource.daplka_rct_lock);
9345 9345 return (error);
9346 9346 }
9347 9347
9348 9348 /*
9349 9349 * finds resource using minor device number
9350 9350 */
9351 9351 static daplka_resource_t *
9352 9352 daplka_resource_lookup(minor_t rnum)
9353 9353 {
9354 9354 int i, j;
9355 9355 daplka_resource_blk_t *blk;
9356 9356 daplka_resource_t *rp;
9357 9357
9358 9358 /*
9359 9359 * Find resource and lock it in READER mode
9360 9360 * search for available resource slot
9361 9361 */
9362 9362
9363 9363 i = (int)(rnum / DAPLKA_RC_BLKSZ);
9364 9364 j = (int)(rnum % DAPLKA_RC_BLKSZ);
9365 9365
9366 9366 rw_enter(&daplka_resource.daplka_rct_lock, RW_READER);
9367 9367 if (i >= daplka_resource.daplka_rc_len) {
9368 9368 rw_exit(&daplka_resource.daplka_rct_lock);
9369 9369 DERR("resource_lookup: resource %d not found\n", rnum);
9370 9370 return (NULL);
9371 9371 }
9372 9372
9373 9373 blk = daplka_resource.daplka_rc_root[i];
9374 9374 if (blk != NULL) {
9375 9375 ASSERT(i < daplka_resource.daplka_rc_len);
9376 9376 ASSERT(i < daplka_resource.daplka_rc_sz);
9377 9377
9378 9378 rp = blk->daplka_rcblk_blks[j];
9379 9379 if (rp == NULL || (intptr_t)rp == DAPLKA_RC_RESERVED) {
9380 9380 D3("resource_lookup: %d not found, blk = %p\n",
9381 9381 rnum, blk->daplka_rcblk_blks[j]);
9382 9382 } else {
9383 9383 DAPLKA_RS_REF((daplka_ia_resource_t *)rp);
9384 9384 }
9385 9385 } else {
9386 9386 DERR("resource_lookup: resource %d not found\n", rnum);
9387 9387 rp = NULL;
9388 9388 }
9389 9389 rw_exit(&daplka_resource.daplka_rct_lock);
9390 9390 return (rp);
9391 9391 }
9392 9392
9393 9393 /*
9394 9394 * generic hash table implementation
9395 9395 */
9396 9396
9397 9397 /*
9398 9398 * daplka_hash_create:
9399 9399 * initializes a hash table with the specified parameters
9400 9400 *
9401 9401 * input:
9402 9402 * htblp pointer to hash table
9403 9403 *
9404 9404 * nbuckets number of buckets (must be power of 2)
9405 9405 *
9406 9406 * free_func this function is called on each hash
9407 9407 * table element when daplka_hash_destroy
9408 9408 * is called
9409 9409 *
9410 9410 * lookup_func if daplka_hash_lookup is able to find
9411 9411 * the desired object, this function is
9412 9412 * applied on the object before
9413 9413 * daplka_hash_lookup returns
9414 9414 * output:
9415 9415 * none
9416 9416 *
9417 9417 * return value(s):
9418 9418 * EINVAL nbuckets is not a power of 2
9419 9419 * ENOMEM cannot allocate buckets
9420 9420 * 0 success
9421 9421 */
9422 9422 static int
9423 9423 daplka_hash_create(daplka_hash_table_t *htblp, uint_t nbuckets,
9424 9424 void (*free_func)(void *), void (*lookup_func)(void *))
9425 9425 {
9426 9426 int i;
9427 9427
9428 9428 if ((nbuckets & ~(nbuckets - 1)) != nbuckets) {
9429 9429 DERR("hash_create: nbuckets not power of 2\n");
9430 9430 return (EINVAL);
9431 9431 }
9432 9432 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*htblp))
9433 9433
9434 9434 htblp->ht_buckets =
9435 9435 kmem_zalloc(sizeof (daplka_hash_bucket_t) * nbuckets,
9436 9436 daplka_km_flags);
9437 9437 if (htblp->ht_buckets == NULL) {
9438 9438 DERR("hash_create: cannot allocate buckets\n");
9439 9439 return (ENOMEM);
9440 9440 }
9441 9441 for (i = 0; i < nbuckets; i++) {
9442 9442 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(htblp->ht_buckets[i]))
9443 9443 htblp->ht_buckets[i].hb_count = 0;
9444 9444 htblp->ht_buckets[i].hb_entries = NULL;
9445 9445 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(htblp->ht_buckets[i]))
9446 9446 }
9447 9447 rw_init(&htblp->ht_table_lock, NULL, RW_DRIVER, NULL);
9448 9448 mutex_init(&htblp->ht_key_lock, NULL, MUTEX_DRIVER, NULL);
9449 9449
9450 9450 htblp->ht_count = 0;
9451 9451 htblp->ht_next_hkey = (uint64_t)gethrtime();
9452 9452 htblp->ht_nbuckets = nbuckets;
9453 9453 htblp->ht_free_func = free_func;
9454 9454 htblp->ht_lookup_func = lookup_func;
9455 9455 htblp->ht_initialized = B_TRUE;
9456 9456 D3("hash_create: done, buckets = %d\n", nbuckets);
9457 9457 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*htblp))
9458 9458 return (0);
9459 9459 }
9460 9460
9461 9461 /*
9462 9462 * daplka_hash_insert:
9463 9463 * inserts an object into a hash table
9464 9464 *
9465 9465 * input:
9466 9466 * htblp pointer to hash table
9467 9467 *
9468 9468 * hkeyp pointer to hash key.
9469 9469 * *hkeyp being non-zero means that the caller
9470 9470 * has generated its own hkey. if *hkeyp is zero,
9471 9471 * this function will generate an hkey for the
9472 9472 * caller. it is recommended that the caller
9473 9473 * leave the hkey generation to this function
9474 9474 * because the hkey is more likely to be evenly
9475 9475 * distributed.
9476 9476 *
9477 9477 * objp pointer to object to be inserted into
9478 9478 * hash table
9479 9479 *
9480 9480 * output:
9481 9481 * hkeyp the generated hkey is returned via this pointer
9482 9482 *
9483 9483 * return value(s):
9484 9484 * EINVAL invalid parameter
9485 9485 * ENOMEM cannot allocate hash entry
9486 9486 * 0 successful
9487 9487 */
9488 9488 static int
9489 9489 daplka_hash_insert(daplka_hash_table_t *htblp, uint64_t *hkeyp, void *objp)
9490 9490 {
9491 9491 daplka_hash_entry_t *hep, *curr_hep;
9492 9492 daplka_hash_bucket_t *hbp;
9493 9493 uint32_t bucket;
9494 9494 uint64_t hkey;
9495 9495
9496 9496 if (hkeyp == NULL) {
9497 9497 DERR("hash_insert: hkeyp == NULL\n");
9498 9498 return (EINVAL);
9499 9499 }
9500 9500 hep = kmem_zalloc(sizeof (*hep), daplka_km_flags);
9501 9501 if (hep == NULL) {
9502 9502 DERR("hash_insert: cannot alloc hash_entry\n");
9503 9503 return (ENOMEM);
9504 9504 }
9505 9505 if (*hkeyp == 0) {
9506 9506 /* generate a new key */
9507 9507 mutex_enter(&htblp->ht_key_lock);
9508 9508 hkey = ++htblp->ht_next_hkey;
9509 9509 if (hkey == 0) {
9510 9510 hkey = htblp->ht_next_hkey = (uint64_t)gethrtime();
9511 9511 }
9512 9512 mutex_exit(&htblp->ht_key_lock);
9513 9513 } else {
9514 9514 /* use user generated key */
9515 9515 hkey = *hkeyp;
9516 9516 }
9517 9517
9518 9518 /* only works if ht_nbuckets is a power of 2 */
9519 9519 bucket = (uint32_t)(hkey & (htblp->ht_nbuckets - 1));
9520 9520 ASSERT(objp != NULL);
9521 9521 ASSERT(bucket < htblp->ht_nbuckets);
9522 9522
9523 9523 rw_enter(&htblp->ht_table_lock, RW_WRITER);
9524 9524 hep->he_hkey = hkey;
9525 9525 hep->he_objp = objp;
9526 9526
9527 9527 /* look for duplicate entries */
9528 9528 hbp = &htblp->ht_buckets[bucket];
9529 9529 curr_hep = hbp->hb_entries;
9530 9530 while (curr_hep != NULL) {
9531 9531 if (curr_hep->he_hkey == hep->he_hkey) {
9532 9532 break;
9533 9533 }
9534 9534 curr_hep = curr_hep->he_next;
9535 9535 }
9536 9536 if (curr_hep != NULL) {
9537 9537 DERR("hash_insert: found duplicate hash entry: "
9538 9538 "bucket %d, hkey 0x%016llx\n",
9539 9539 bucket, (longlong_t)hep->he_hkey);
9540 9540 kmem_free(hep, sizeof (*hep));
9541 9541 rw_exit(&htblp->ht_table_lock);
9542 9542 return (EINVAL);
9543 9543 }
9544 9544 hep->he_next = hbp->hb_entries;
9545 9545 hbp->hb_entries = hep;
9546 9546 hbp->hb_count++;
9547 9547 htblp->ht_count++;
9548 9548 rw_exit(&htblp->ht_table_lock);
9549 9549
9550 9550 if (*hkeyp == 0) {
9551 9551 *hkeyp = hkey;
9552 9552 ASSERT(*hkeyp != 0);
9553 9553 }
9554 9554 D3("hash_insert: htblp 0x%p, hkey = 0x%016llx, bucket = %d\n",
9555 9555 htblp, (longlong_t)*hkeyp, bucket);
9556 9556 return (0);
9557 9557 }
9558 9558
9559 9559 /*
9560 9560 * daplka_hash_remove:
9561 9561 * removes object identified by hkey from hash table
9562 9562 *
9563 9563 * input:
9564 9564 * htblp pointer to hash table
9565 9565 *
9566 9566 * hkey hkey that identifies the object to be removed
9567 9567 *
9568 9568 * output:
9569 9569 * objpp pointer to pointer to object.
9570 9570 * if remove is successful, the removed object
9571 9571 * will be returned via *objpp.
9572 9572 *
9573 9573 * return value(s):
9574 9574 * EINVAL cannot find hash entry
9575 9575 * 0 successful
9576 9576 */
9577 9577 static int
9578 9578 daplka_hash_remove(daplka_hash_table_t *htblp, uint64_t hkey, void **objpp)
9579 9579 {
9580 9580 daplka_hash_entry_t *free_hep, **curr_hepp;
9581 9581 daplka_hash_bucket_t *hbp;
9582 9582 uint32_t bucket;
9583 9583
9584 9584 bucket = (uint32_t)(hkey & (htblp->ht_nbuckets - 1));
9585 9585
9586 9586 rw_enter(&htblp->ht_table_lock, RW_WRITER);
9587 9587 hbp = &htblp->ht_buckets[bucket];
9588 9588
9589 9589 curr_hepp = &hbp->hb_entries;
9590 9590 while (*curr_hepp != NULL) {
9591 9591 if ((*curr_hepp)->he_hkey == hkey) {
9592 9592 break;
9593 9593 }
9594 9594 curr_hepp = &(*curr_hepp)->he_next;
9595 9595 }
9596 9596 if (*curr_hepp == NULL) {
9597 9597 DERR("hash_remove: cannot find hash entry: "
9598 9598 "bucket %d, hkey 0x%016llx\n", bucket, (longlong_t)hkey);
9599 9599 rw_exit(&htblp->ht_table_lock);
9600 9600 return (EINVAL);
9601 9601 } else {
9602 9602 if (objpp != NULL) {
9603 9603 *objpp = (*curr_hepp)->he_objp;
9604 9604 }
9605 9605 free_hep = *curr_hepp;
9606 9606 *curr_hepp = (*curr_hepp)->he_next;
9607 9607 kmem_free(free_hep, sizeof (*free_hep));
9608 9608 }
9609 9609 hbp->hb_count--;
9610 9610 htblp->ht_count--;
9611 9611 D3("hash_remove: removed entry, hkey 0x%016llx, bucket %d, "
9612 9612 "hb_count %d, hb_count %d\n",
9613 9613 (longlong_t)hkey, bucket, hbp->hb_count, htblp->ht_count);
9614 9614 rw_exit(&htblp->ht_table_lock);
9615 9615 return (0);
9616 9616 }
9617 9617
9618 9618 /*
9619 9619 * daplka_hash_walk:
9620 9620 * walks through the entire hash table. applying func on each of
9621 9621 * the inserted objects. stops walking if func returns non-zero.
9622 9622 *
9623 9623 * input:
9624 9624 * htblp pointer to hash table
9625 9625 *
9626 9626 * func function to be applied on each object
9627 9627 *
9628 9628 * farg second argument to func
9629 9629 *
9630 9630 * lockmode can be RW_WRITER or RW_READER. this
9631 9631 * allows the caller to choose what type
9632 9632 * of lock to acquire before walking the
9633 9633 * table.
9634 9634 *
9635 9635 * output:
9636 9636 * none
9637 9637 *
9638 9638 * return value(s):
9639 9639 * none
9640 9640 */
9641 9641 static void
9642 9642 daplka_hash_walk(daplka_hash_table_t *htblp, int (*func)(void *, void *),
9643 9643 void *farg, krw_t lockmode)
9644 9644 {
9645 9645 daplka_hash_entry_t *curr_hep;
9646 9646 daplka_hash_bucket_t *hbp;
9647 9647 uint32_t bucket, retval = 0;
9648 9648
9649 9649 ASSERT(lockmode == RW_WRITER || lockmode == RW_READER);
9650 9650
9651 9651 /* needed for warlock */
9652 9652 if (lockmode == RW_WRITER) {
9653 9653 rw_enter(&htblp->ht_table_lock, RW_WRITER);
9654 9654 } else {
9655 9655 rw_enter(&htblp->ht_table_lock, RW_READER);
9656 9656 }
9657 9657 for (bucket = 0; bucket < htblp->ht_nbuckets && retval == 0; bucket++) {
9658 9658 hbp = &htblp->ht_buckets[bucket];
9659 9659 curr_hep = hbp->hb_entries;
9660 9660 while (curr_hep != NULL) {
9661 9661 retval = (*func)(curr_hep->he_objp, farg);
9662 9662 if (retval != 0) {
9663 9663 break;
9664 9664 }
9665 9665 curr_hep = curr_hep->he_next;
9666 9666 }
9667 9667 }
9668 9668 rw_exit(&htblp->ht_table_lock);
9669 9669 }
9670 9670
9671 9671 /*
9672 9672 * daplka_hash_lookup:
9673 9673 * finds object from hkey
9674 9674 *
9675 9675 * input:
9676 9676 * htblp pointer to hash table
9677 9677 *
9678 9678 * hkey hkey that identifies the object to be looked up
9679 9679 *
9680 9680 * output:
9681 9681 * none
9682 9682 *
9683 9683 * return value(s):
9684 9684 * NULL if not found
9685 9685 * object pointer if found
9686 9686 */
9687 9687 static void *
9688 9688 daplka_hash_lookup(daplka_hash_table_t *htblp, uint64_t hkey)
9689 9689 {
9690 9690 daplka_hash_entry_t *curr_hep;
9691 9691 uint32_t bucket;
9692 9692 void *objp;
9693 9693
9694 9694 bucket = (uint32_t)(hkey & (htblp->ht_nbuckets - 1));
9695 9695
9696 9696 rw_enter(&htblp->ht_table_lock, RW_READER);
9697 9697 curr_hep = htblp->ht_buckets[bucket].hb_entries;
9698 9698 while (curr_hep != NULL) {
9699 9699 if (curr_hep->he_hkey == hkey) {
9700 9700 break;
9701 9701 }
9702 9702 curr_hep = curr_hep->he_next;
9703 9703 }
9704 9704 if (curr_hep == NULL) {
9705 9705 DERR("hash_lookup: cannot find hash entry: "
9706 9706 "bucket %d, hkey 0x%016llx\n", bucket, (longlong_t)hkey);
9707 9707 rw_exit(&htblp->ht_table_lock);
9708 9708 return (NULL);
9709 9709 }
9710 9710 objp = curr_hep->he_objp;
9711 9711 ASSERT(objp != NULL);
9712 9712 if (htblp->ht_lookup_func != NULL) {
9713 9713 (*htblp->ht_lookup_func)(objp);
9714 9714 }
9715 9715 rw_exit(&htblp->ht_table_lock);
9716 9716 return (objp);
9717 9717 }
9718 9718
9719 9719 /*
9720 9720 * daplka_hash_destroy:
9721 9721 * destroys hash table. applies free_func on all inserted objects.
9722 9722 *
9723 9723 * input:
9724 9724 * htblp pointer to hash table
9725 9725 *
9726 9726 * output:
9727 9727 * none
9728 9728 *
9729 9729 * return value(s):
9730 9730 * none
9731 9731 */
9732 9732 static void
9733 9733 daplka_hash_destroy(daplka_hash_table_t *htblp)
9734 9734 {
9735 9735 daplka_hash_entry_t *curr_hep, *free_hep;
9736 9736 daplka_hash_entry_t *free_list = NULL;
9737 9737 daplka_hash_bucket_t *hbp;
9738 9738 uint32_t bucket, cnt, total = 0;
9739 9739
9740 9740 if (!htblp->ht_initialized) {
9741 9741 DERR("hash_destroy: not initialized\n");
9742 9742 return;
9743 9743 }
9744 9744 /* free all elements from hash table */
9745 9745 rw_enter(&htblp->ht_table_lock, RW_WRITER);
9746 9746 for (bucket = 0; bucket < htblp->ht_nbuckets; bucket++) {
9747 9747 hbp = &htblp->ht_buckets[bucket];
9748 9748
9749 9749 /* build list of elements to be freed */
9750 9750 curr_hep = hbp->hb_entries;
9751 9751 cnt = 0;
9752 9752 while (curr_hep != NULL) {
9753 9753 cnt++;
9754 9754 free_hep = curr_hep;
9755 9755 curr_hep = curr_hep->he_next;
9756 9756
9757 9757 free_hep->he_next = free_list;
9758 9758 free_list = free_hep;
9759 9759 }
9760 9760 ASSERT(cnt == hbp->hb_count);
9761 9761 total += cnt;
9762 9762 hbp->hb_count = 0;
9763 9763 hbp->hb_entries = NULL;
9764 9764 }
9765 9765 ASSERT(total == htblp->ht_count);
9766 9766 D3("hash_destroy: htblp 0x%p, nbuckets %d, freed %d hash entries\n",
9767 9767 htblp, htblp->ht_nbuckets, total);
9768 9768 rw_exit(&htblp->ht_table_lock);
9769 9769
9770 9770 /* free all objects, now without holding the hash table lock */
9771 9771 cnt = 0;
9772 9772 while (free_list != NULL) {
9773 9773 cnt++;
9774 9774 free_hep = free_list;
9775 9775 free_list = free_list->he_next;
9776 9776 if (htblp->ht_free_func != NULL) {
9777 9777 (*htblp->ht_free_func)(free_hep->he_objp);
9778 9778 }
9779 9779 kmem_free(free_hep, sizeof (*free_hep));
9780 9780 }
9781 9781 ASSERT(total == cnt);
9782 9782
9783 9783 /* free hash buckets and destroy locks */
9784 9784 kmem_free(htblp->ht_buckets,
9785 9785 sizeof (daplka_hash_bucket_t) * htblp->ht_nbuckets);
9786 9786
9787 9787 rw_enter(&htblp->ht_table_lock, RW_WRITER);
9788 9788 htblp->ht_buckets = NULL;
9789 9789 htblp->ht_count = 0;
9790 9790 htblp->ht_nbuckets = 0;
9791 9791 htblp->ht_free_func = NULL;
9792 9792 htblp->ht_lookup_func = NULL;
9793 9793 htblp->ht_initialized = B_FALSE;
9794 9794 rw_exit(&htblp->ht_table_lock);
9795 9795
9796 9796 mutex_destroy(&htblp->ht_key_lock);
9797 9797 rw_destroy(&htblp->ht_table_lock);
9798 9798 }
9799 9799
9800 9800 /*
9801 9801 * daplka_hash_getsize:
9802 9802 * return the number of objects in hash table
9803 9803 *
9804 9804 * input:
9805 9805 * htblp pointer to hash table
9806 9806 *
9807 9807 * output:
9808 9808 * none
9809 9809 *
9810 9810 * return value(s):
9811 9811 * number of objects in hash table
9812 9812 */
9813 9813 static uint32_t
9814 9814 daplka_hash_getsize(daplka_hash_table_t *htblp)
9815 9815 {
9816 9816 uint32_t sz;
9817 9817
9818 9818 rw_enter(&htblp->ht_table_lock, RW_READER);
9819 9819 sz = htblp->ht_count;
9820 9820 rw_exit(&htblp->ht_table_lock);
9821 9821
9822 9822 return (sz);
9823 9823 }
9824 9824
9825 9825 /*
9826 9826 * this function is used as ht_lookup_func above when lookup is called.
9827 9827 * other types of objs may use a more elaborate lookup_func.
9828 9828 */
9829 9829 static void
9830 9830 daplka_hash_generic_lookup(void *obj)
9831 9831 {
9832 9832 daplka_resource_t *rp = (daplka_resource_t *)obj;
9833 9833
9834 9834 mutex_enter(&rp->rs_reflock);
9835 9835 rp->rs_refcnt++;
9836 9836 ASSERT(rp->rs_refcnt != 0);
9837 9837 mutex_exit(&rp->rs_reflock);
9838 9838 }
↓ open down ↓ |
777 lines elided |
↑ open up ↑ |
9839 9839
9840 9840 /*
9841 9841 * Generates a non-zero 32 bit hash key used for the timer hash table.
9842 9842 */
9843 9843 static uint32_t
9844 9844 daplka_timer_hkey_gen()
9845 9845 {
9846 9846 uint32_t new_hkey;
9847 9847
9848 9848 do {
9849 - new_hkey = atomic_add_32_nv(&daplka_timer_hkey, 1);
9849 + new_hkey = atomic_inc_32_nv(&daplka_timer_hkey);
9850 9850 } while (new_hkey == 0);
9851 9851
9852 9852 return (new_hkey);
9853 9853 }
9854 9854
9855 9855
9856 9856 /*
9857 9857 * The DAPL KA debug logging routines
9858 9858 */
9859 9859
9860 9860 /*
9861 9861 * Add the string str to the end of the debug log, followed by a newline.
9862 9862 */
9863 9863 static void
9864 9864 daplka_dbglog(char *str)
9865 9865 {
9866 9866 size_t length;
9867 9867 size_t remlen;
9868 9868
9869 9869 /*
9870 9870 * If this is the first time we've written to the log, initialize it.
9871 9871 */
9872 9872 if (!daplka_dbginit) {
9873 9873 return;
9874 9874 }
9875 9875 mutex_enter(&daplka_dbglock);
9876 9876 /*
9877 9877 * Note the log is circular; if this string would run over the end,
9878 9878 * we copy the first piece to the end and then the last piece to
9879 9879 * the beginning of the log.
9880 9880 */
9881 9881 length = strlen(str);
9882 9882
9883 9883 remlen = (size_t)sizeof (daplka_dbgbuf) - daplka_dbgnext - 1;
9884 9884
9885 9885 if (length > remlen) {
9886 9886 if (remlen)
9887 9887 bcopy(str, daplka_dbgbuf + daplka_dbgnext, remlen);
9888 9888 daplka_dbgbuf[sizeof (daplka_dbgbuf) - 1] = (char)NULL;
9889 9889 str += remlen;
9890 9890 length -= remlen;
9891 9891 daplka_dbgnext = 0;
9892 9892 }
9893 9893 bcopy(str, daplka_dbgbuf + daplka_dbgnext, length);
9894 9894 daplka_dbgnext += length;
9895 9895
9896 9896 if (daplka_dbgnext >= sizeof (daplka_dbgbuf))
9897 9897 daplka_dbgnext = 0;
9898 9898 mutex_exit(&daplka_dbglock);
9899 9899 }
9900 9900
9901 9901
9902 9902 /*
9903 9903 * Add a printf-style message to whichever debug logs we're currently using.
9904 9904 */
9905 9905 static void
9906 9906 daplka_debug(const char *fmt, ...)
9907 9907 {
9908 9908 char buff[512];
9909 9909 va_list ap;
9910 9910 /*
9911 9911 * The system prepends the thread id and high resolution time
9912 9912 * (nanoseconds are dropped and so are the upper digits)
9913 9913 * to the specified string.
9914 9914 * The unit for timestamp is 10 microseconds.
9915 9915 * It wraps around every 10000 seconds.
9916 9916 * Ex: gethrtime() = X ns = X/1000 us = X/10000 10 micro sec.
9917 9917 */
9918 9918 int micro_time = (int)((gethrtime() / 10000) % 1000000000);
9919 9919 (void) sprintf(buff, "th %p tm %9d: ", (void *)curthread, micro_time);
9920 9920
9921 9921 va_start(ap, fmt);
9922 9922 (void) vsprintf(buff+strlen(buff), fmt, ap);
9923 9923 va_end(ap);
9924 9924
9925 9925 daplka_dbglog(buff);
9926 9926 }
9927 9927
9928 9928 static void
9929 9929 daplka_console(const char *fmt, ...)
9930 9930 {
9931 9931 char buff[512];
9932 9932 va_list ap;
9933 9933
9934 9934 va_start(ap, fmt);
9935 9935 (void) vsprintf(buff, fmt, ap);
9936 9936 va_end(ap);
9937 9937
9938 9938 cmn_err(CE_CONT, "%s", buff);
9939 9939 }
↓ open down ↓ |
80 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX