Print this page
5255 uts shouldn't open-code ISP2
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/io/ib/adapters/hermon/hermon_srq.c
+++ new/usr/src/uts/common/io/ib/adapters/hermon/hermon_srq.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
↓ open down ↓ |
23 lines elided |
↑ open up ↑ |
24 24 */
25 25
26 26 /*
27 27 * hermon_srq.c
28 28 * Hermon Shared Receive Queue Processing Routines
29 29 *
30 30 * Implements all the routines necessary for allocating, freeing, querying,
31 31 * modifying and posting shared receive queues.
32 32 */
33 33
34 +#include <sys/sysmacros.h>
34 35 #include <sys/types.h>
35 36 #include <sys/conf.h>
36 37 #include <sys/ddi.h>
37 38 #include <sys/sunddi.h>
38 39 #include <sys/modctl.h>
39 40 #include <sys/bitmap.h>
40 41
41 42 #include <sys/ib/adapters/hermon/hermon.h>
42 43
43 44 static void hermon_srq_sgl_to_logwqesz(hermon_state_t *state, uint_t num_sgl,
44 45 hermon_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl);
45 46
46 47 /*
47 48 * hermon_srq_alloc()
48 49 * Context: Can be called only from user or kernel context.
49 50 */
50 51 int
51 52 hermon_srq_alloc(hermon_state_t *state, hermon_srq_info_t *srqinfo,
52 53 uint_t sleepflag)
53 54 {
54 55 ibt_srq_hdl_t ibt_srqhdl;
55 56 hermon_pdhdl_t pd;
56 57 ibt_srq_sizes_t *sizes;
57 58 ibt_srq_sizes_t *real_sizes;
58 59 hermon_srqhdl_t *srqhdl;
59 60 ibt_srq_flags_t flags;
60 61 hermon_rsrc_t *srqc, *rsrc;
61 62 hermon_hw_srqc_t srqc_entry;
62 63 uint32_t *buf;
63 64 hermon_srqhdl_t srq;
64 65 hermon_umap_db_entry_t *umapdb;
65 66 ibt_mr_attr_t mr_attr;
66 67 hermon_mr_options_t mr_op;
67 68 hermon_mrhdl_t mr;
68 69 uint64_t value, srq_desc_off;
69 70 uint32_t log_srq_size;
70 71 uint32_t uarpg;
71 72 uint_t srq_is_umap;
72 73 int flag, status;
73 74 uint_t max_sgl;
74 75 uint_t wqesz;
75 76 uint_t srq_wr_sz;
76 77 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sizes))
77 78
78 79 /*
79 80 * options-->wq_location used to be for location, now explicitly
80 81 * LOCATION_NORMAL
81 82 */
82 83
83 84 /*
84 85 * Extract the necessary info from the hermon_srq_info_t structure
85 86 */
86 87 real_sizes = srqinfo->srqi_real_sizes;
87 88 sizes = srqinfo->srqi_sizes;
88 89 pd = srqinfo->srqi_pd;
89 90 ibt_srqhdl = srqinfo->srqi_ibt_srqhdl;
90 91 flags = srqinfo->srqi_flags;
91 92 srqhdl = srqinfo->srqi_srqhdl;
92 93
93 94 /*
94 95 * Determine whether SRQ is being allocated for userland access or
95 96 * whether it is being allocated for kernel access. If the SRQ is
96 97 * being allocated for userland access, then lookup the UAR doorbell
97 98 * page number for the current process. Note: If this is not found
98 99 * (e.g. if the process has not previously open()'d the Hermon driver),
99 100 * then an error is returned.
100 101 */
101 102 srq_is_umap = (flags & IBT_SRQ_USER_MAP) ? 1 : 0;
102 103 if (srq_is_umap) {
103 104 status = hermon_umap_db_find(state->hs_instance, ddi_get_pid(),
104 105 MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
105 106 if (status != DDI_SUCCESS) {
106 107 status = IBT_INVALID_PARAM;
107 108 goto srqalloc_fail3;
108 109 }
109 110 uarpg = ((hermon_rsrc_t *)(uintptr_t)value)->hr_indx;
110 111 } else {
111 112 uarpg = state->hs_kernel_uar_index;
112 113 }
113 114
114 115 /* Increase PD refcnt */
115 116 hermon_pd_refcnt_inc(pd);
116 117
117 118 /* Allocate an SRQ context entry */
118 119 status = hermon_rsrc_alloc(state, HERMON_SRQC, 1, sleepflag, &srqc);
119 120 if (status != DDI_SUCCESS) {
120 121 status = IBT_INSUFF_RESOURCE;
121 122 goto srqalloc_fail1;
122 123 }
123 124
124 125 /* Allocate the SRQ Handle entry */
125 126 status = hermon_rsrc_alloc(state, HERMON_SRQHDL, 1, sleepflag, &rsrc);
126 127 if (status != DDI_SUCCESS) {
127 128 status = IBT_INSUFF_RESOURCE;
128 129 goto srqalloc_fail2;
129 130 }
130 131
131 132 srq = (hermon_srqhdl_t)rsrc->hr_addr;
132 133 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*srq))
133 134
134 135 bzero(srq, sizeof (struct hermon_sw_srq_s));
135 136 /* Calculate the SRQ number */
136 137
137 138 /* just use the index, implicit in Hermon */
138 139 srq->srq_srqnum = srqc->hr_indx;
139 140
140 141 /*
141 142 * If this will be a user-mappable SRQ, then allocate an entry for
142 143 * the "userland resources database". This will later be added to
143 144 * the database (after all further SRQ operations are successful).
144 145 * If we fail here, we must undo the reference counts and the
145 146 * previous resource allocation.
146 147 */
147 148 if (srq_is_umap) {
148 149 umapdb = hermon_umap_db_alloc(state->hs_instance,
149 150 srq->srq_srqnum, MLNX_UMAP_SRQMEM_RSRC,
150 151 (uint64_t)(uintptr_t)rsrc);
151 152 if (umapdb == NULL) {
152 153 status = IBT_INSUFF_RESOURCE;
153 154 goto srqalloc_fail3;
154 155 }
155 156 }
156 157
157 158 /*
158 159 * Allocate the doorbell record. Hermon just needs one for the
159 160 * SRQ, and use uarpg (above) as the uar index
160 161 */
161 162
162 163 status = hermon_dbr_alloc(state, uarpg, &srq->srq_wq_dbr_acchdl,
163 164 &srq->srq_wq_vdbr, &srq->srq_wq_pdbr, &srq->srq_rdbr_mapoffset);
164 165 if (status != DDI_SUCCESS) {
165 166 status = IBT_INSUFF_RESOURCE;
166 167 goto srqalloc_fail4;
↓ open down ↓ |
123 lines elided |
↑ open up ↑ |
167 168 }
168 169
169 170 /*
170 171 * Calculate the appropriate size for the SRQ.
171 172 * Note: All Hermon SRQs must be a power-of-2 in size. Also
172 173 * they may not be any smaller than HERMON_SRQ_MIN_SIZE. This step
173 174 * is to round the requested size up to the next highest power-of-2
174 175 */
175 176 srq_wr_sz = max(sizes->srq_wr_sz + 1, HERMON_SRQ_MIN_SIZE);
176 177 log_srq_size = highbit(srq_wr_sz);
177 - if ((srq_wr_sz & (srq_wr_sz - 1)) == 0) {
178 + if (ISP2(srq_wr_sz)) {
178 179 log_srq_size = log_srq_size - 1;
179 180 }
180 181
181 182 /*
182 183 * Next we verify that the rounded-up size is valid (i.e. consistent
183 184 * with the device limits and/or software-configured limits). If not,
184 185 * then obviously we have a lot of cleanup to do before returning.
185 186 */
186 187 if (log_srq_size > state->hs_cfg_profile->cp_log_max_srq_sz) {
187 188 status = IBT_HCA_WR_EXCEEDED;
188 189 goto srqalloc_fail4a;
189 190 }
190 191
191 192 /*
192 193 * Next we verify that the requested number of SGL is valid (i.e.
193 194 * consistent with the device limits and/or software-configured
194 195 * limits). If not, then obviously the same cleanup needs to be done.
195 196 */
196 197 max_sgl = state->hs_ibtfinfo.hca_attr->hca_max_srq_sgl;
197 198 if (sizes->srq_sgl_sz > max_sgl) {
198 199 status = IBT_HCA_SGL_EXCEEDED;
199 200 goto srqalloc_fail4a;
200 201 }
201 202
202 203 /*
203 204 * Determine the SRQ's WQE sizes. This depends on the requested
204 205 * number of SGLs. Note: This also has the side-effect of
205 206 * calculating the real number of SGLs (for the calculated WQE size)
206 207 */
207 208 hermon_srq_sgl_to_logwqesz(state, sizes->srq_sgl_sz,
208 209 HERMON_QP_WQ_TYPE_RECVQ, &srq->srq_wq_log_wqesz,
209 210 &srq->srq_wq_sgl);
210 211
211 212 /*
212 213 * Allocate the memory for SRQ work queues. Note: The location from
213 214 * which we will allocate these work queues is always
214 215 * QUEUE_LOCATION_NORMAL. Since Hermon work queues are not
215 216 * allowed to cross a 32-bit (4GB) boundary, the alignment of the work
216 217 * queue memory is very important. We used to allocate work queues
217 218 * (the combined receive and send queues) so that they would be aligned
218 219 * on their combined size. That alignment guaranteed that they would
219 220 * never cross the 4GB boundary (Hermon work queues are on the order of
220 221 * MBs at maximum). Now we are able to relax this alignment constraint
221 222 * by ensuring that the IB address assigned to the queue memory (as a
222 223 * result of the hermon_mr_register() call) is offset from zero.
223 224 * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
224 225 * guarantee the alignment, but when attempting to use IOMMU bypass
225 226 * mode we found that we were not allowed to specify any alignment that
226 227 * was more restrictive than the system page size. So we avoided this
227 228 * constraint by passing two alignment values, one for the memory
228 229 * allocation itself and the other for the DMA handle (for later bind).
229 230 * This used to cause more memory than necessary to be allocated (in
230 231 * order to guarantee the more restrictive alignment contraint). But
231 232 * be guaranteeing the zero-based IB virtual address for the queue, we
232 233 * are able to conserve this memory.
233 234 *
234 235 * Note: If SRQ is not user-mappable, then it may come from either
235 236 * kernel system memory or from HCA-attached local DDR memory.
236 237 *
237 238 * Note2: We align this queue on a pagesize boundary. This is required
238 239 * to make sure that all the resulting IB addresses will start at 0, for
239 240 * a zero-based queue. By making sure we are aligned on at least a
240 241 * page, any offset we use into our queue will be the same as when we
241 242 * perform hermon_srq_modify() operations later.
242 243 */
243 244 wqesz = (1 << srq->srq_wq_log_wqesz);
244 245 srq->srq_wqinfo.qa_size = (1 << log_srq_size) * wqesz;
245 246 srq->srq_wqinfo.qa_alloc_align = PAGESIZE;
246 247 srq->srq_wqinfo.qa_bind_align = PAGESIZE;
247 248 if (srq_is_umap) {
248 249 srq->srq_wqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND;
249 250 } else {
250 251 srq->srq_wqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
251 252 }
252 253 status = hermon_queue_alloc(state, &srq->srq_wqinfo, sleepflag);
253 254 if (status != DDI_SUCCESS) {
254 255 status = IBT_INSUFF_RESOURCE;
255 256 goto srqalloc_fail4a;
256 257 }
257 258 buf = (uint32_t *)srq->srq_wqinfo.qa_buf_aligned;
258 259 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf))
259 260
260 261 /*
261 262 * Register the memory for the SRQ work queues. The memory for the SRQ
262 263 * must be registered in the Hermon cMPT tables. This gives us the LKey
263 264 * to specify in the SRQ context later. Note: If the work queue is to
264 265 * be allocated from DDR memory, then only a "bypass" mapping is
265 266 * appropriate. And if the SRQ memory is user-mappable, then we force
266 267 * DDI_DMA_CONSISTENT mapping. Also, in order to meet the alignment
267 268 * restriction, we pass the "mro_bind_override_addr" flag in the call
268 269 * to hermon_mr_register(). This guarantees that the resulting IB vaddr
269 270 * will be zero-based (modulo the offset into the first page). If we
270 271 * fail here, we still have the bunch of resource and reference count
271 272 * cleanup to do.
272 273 */
273 274 flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP :
274 275 IBT_MR_NOSLEEP;
275 276 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf;
276 277 mr_attr.mr_len = srq->srq_wqinfo.qa_size;
277 278 mr_attr.mr_as = NULL;
278 279 mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE;
279 280 mr_op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass;
280 281 mr_op.mro_bind_dmahdl = srq->srq_wqinfo.qa_dmahdl;
281 282 mr_op.mro_bind_override_addr = 1;
282 283 status = hermon_mr_register(state, pd, &mr_attr, &mr,
283 284 &mr_op, HERMON_SRQ_CMPT);
284 285 if (status != DDI_SUCCESS) {
285 286 status = IBT_INSUFF_RESOURCE;
286 287 goto srqalloc_fail5;
287 288 }
288 289 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
289 290
290 291 /*
291 292 * Calculate the offset between the kernel virtual address space
292 293 * and the IB virtual address space. This will be used when
293 294 * posting work requests to properly initialize each WQE.
294 295 */
295 296 srq_desc_off = (uint64_t)(uintptr_t)srq->srq_wqinfo.qa_buf_aligned -
296 297 (uint64_t)mr->mr_bindinfo.bi_addr;
297 298
298 299 srq->srq_wq_wqhdr = hermon_wrid_wqhdr_create(1 << log_srq_size);
299 300
300 301 /*
301 302 * Fill in all the return arguments (if necessary). This includes
302 303 * real queue size and real SGLs.
303 304 */
304 305 if (real_sizes != NULL) {
305 306 real_sizes->srq_wr_sz = (1 << log_srq_size) - 1;
306 307 real_sizes->srq_sgl_sz = srq->srq_wq_sgl;
307 308 }
308 309
309 310 /*
310 311 * Fill in the SRQC entry. This is the final step before passing
311 312 * ownership of the SRQC entry to the Hermon hardware. We use all of
312 313 * the information collected/calculated above to fill in the
313 314 * requisite portions of the SRQC. Note: If this SRQ is going to be
314 315 * used for userland access, then we need to set the UAR page number
315 316 * appropriately (otherwise it's a "don't care")
316 317 */
317 318 bzero(&srqc_entry, sizeof (hermon_hw_srqc_t));
318 319 srqc_entry.state = HERMON_SRQ_STATE_HW_OWNER;
319 320 srqc_entry.log_srq_size = log_srq_size;
320 321 srqc_entry.srqn = srq->srq_srqnum;
321 322 srqc_entry.log_rq_stride = srq->srq_wq_log_wqesz - 4;
322 323 /* 16-byte chunks */
323 324
324 325 srqc_entry.page_offs = srq->srq_wqinfo.qa_pgoffs >> 6;
325 326 srqc_entry.log2_pgsz = mr->mr_log2_pgsz;
326 327 srqc_entry.mtt_base_addrh = (uint32_t)((mr->mr_mttaddr >> 32) & 0xFF);
327 328 srqc_entry.mtt_base_addrl = mr->mr_mttaddr >> 3;
328 329 srqc_entry.pd = pd->pd_pdnum;
329 330 srqc_entry.dbr_addrh = (uint32_t)((uint64_t)srq->srq_wq_pdbr >> 32);
330 331 srqc_entry.dbr_addrl = (uint32_t)((uint64_t)srq->srq_wq_pdbr >> 2);
331 332
332 333 /*
333 334 * all others - specifically, xrcd, cqn_xrc, lwm, wqe_cnt, and wqe_cntr
334 335 * are zero thanks to the bzero of the structure
335 336 */
336 337
337 338 /*
338 339 * Write the SRQC entry to hardware. Lastly, we pass ownership of
339 340 * the entry to the hardware (using the Hermon SW2HW_SRQ firmware
340 341 * command). Note: In general, this operation shouldn't fail. But
341 342 * if it does, we have to undo everything we've done above before
342 343 * returning error.
343 344 */
344 345 status = hermon_cmn_ownership_cmd_post(state, SW2HW_SRQ, &srqc_entry,
345 346 sizeof (hermon_hw_srqc_t), srq->srq_srqnum,
346 347 sleepflag);
347 348 if (status != HERMON_CMD_SUCCESS) {
348 349 cmn_err(CE_CONT, "Hermon: SW2HW_SRQ command failed: %08x\n",
349 350 status);
350 351 if (status == HERMON_CMD_INVALID_STATUS) {
351 352 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
352 353 }
353 354 status = ibc_get_ci_failure(0);
354 355 goto srqalloc_fail8;
355 356 }
356 357
357 358 /*
358 359 * Fill in the rest of the Hermon SRQ handle. We can update
359 360 * the following fields for use in further operations on the SRQ.
360 361 */
361 362 srq->srq_srqcrsrcp = srqc;
362 363 srq->srq_rsrcp = rsrc;
363 364 srq->srq_mrhdl = mr;
364 365 srq->srq_refcnt = 0;
365 366 srq->srq_is_umap = srq_is_umap;
366 367 srq->srq_uarpg = uarpg;
367 368 srq->srq_umap_dhp = (devmap_cookie_t)NULL;
368 369 srq->srq_pdhdl = pd;
369 370 srq->srq_wq_bufsz = (1 << log_srq_size);
370 371 srq->srq_wq_buf = buf;
371 372 srq->srq_desc_off = srq_desc_off;
372 373 srq->srq_hdlrarg = (void *)ibt_srqhdl;
373 374 srq->srq_state = 0;
374 375 srq->srq_real_sizes.srq_wr_sz = (1 << log_srq_size);
375 376 srq->srq_real_sizes.srq_sgl_sz = srq->srq_wq_sgl;
376 377
377 378 /*
378 379 * Put SRQ handle in Hermon SRQNum-to-SRQhdl list. Then fill in the
379 380 * "srqhdl" and return success
380 381 */
381 382 hermon_icm_set_num_to_hdl(state, HERMON_SRQC, srqc->hr_indx, srq);
382 383
383 384 /*
384 385 * If this is a user-mappable SRQ, then we need to insert the
385 386 * previously allocated entry into the "userland resources database".
386 387 * This will allow for later lookup during devmap() (i.e. mmap())
387 388 * calls.
388 389 */
389 390 if (srq->srq_is_umap) {
390 391 hermon_umap_db_add(umapdb);
391 392 } else { /* initialize work queue for kernel SRQs */
392 393 int i, len, last;
393 394 uint16_t *desc;
394 395
395 396 desc = (uint16_t *)buf;
396 397 len = wqesz / sizeof (*desc);
397 398 last = srq->srq_wq_bufsz - 1;
398 399 for (i = 0; i < last; i++) {
399 400 desc[1] = htons(i + 1);
400 401 desc += len;
401 402 }
402 403 srq->srq_wq_wqhdr->wq_tail = last;
403 404 srq->srq_wq_wqhdr->wq_head = 0;
404 405 }
405 406
406 407 *srqhdl = srq;
407 408
408 409 return (status);
409 410
410 411 /*
411 412 * The following is cleanup for all possible failure cases in this routine
412 413 */
413 414 srqalloc_fail8:
414 415 hermon_wrid_wqhdr_destroy(srq->srq_wq_wqhdr);
415 416 srqalloc_fail7:
416 417 if (hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
417 418 HERMON_SLEEPFLAG_FOR_CONTEXT()) != DDI_SUCCESS) {
418 419 HERMON_WARNING(state, "failed to deregister SRQ memory");
419 420 }
420 421 srqalloc_fail5:
421 422 hermon_queue_free(&srq->srq_wqinfo);
422 423 srqalloc_fail4a:
423 424 hermon_dbr_free(state, uarpg, srq->srq_wq_vdbr);
424 425 srqalloc_fail4:
425 426 if (srq_is_umap) {
426 427 hermon_umap_db_free(umapdb);
427 428 }
428 429 srqalloc_fail3:
429 430 hermon_rsrc_free(state, &rsrc);
430 431 srqalloc_fail2:
431 432 hermon_rsrc_free(state, &srqc);
432 433 srqalloc_fail1:
433 434 hermon_pd_refcnt_dec(pd);
434 435 srqalloc_fail:
435 436 return (status);
436 437 }
437 438
438 439
439 440 /*
440 441 * hermon_srq_free()
441 442 * Context: Can be called only from user or kernel context.
442 443 */
443 444 /* ARGSUSED */
444 445 int
445 446 hermon_srq_free(hermon_state_t *state, hermon_srqhdl_t *srqhdl,
446 447 uint_t sleepflag)
447 448 {
448 449 hermon_rsrc_t *srqc, *rsrc;
449 450 hermon_umap_db_entry_t *umapdb;
450 451 uint64_t value;
451 452 hermon_srqhdl_t srq;
452 453 hermon_mrhdl_t mr;
453 454 hermon_pdhdl_t pd;
454 455 hermon_hw_srqc_t srqc_entry;
455 456 uint32_t srqnum;
456 457 uint_t maxprot;
457 458 int status;
458 459
459 460 /*
460 461 * Pull all the necessary information from the Hermon Shared Receive
461 462 * Queue handle. This is necessary here because the resource for the
462 463 * SRQ handle is going to be freed up as part of this operation.
463 464 */
464 465 srq = *srqhdl;
465 466 mutex_enter(&srq->srq_lock);
466 467 srqc = srq->srq_srqcrsrcp;
467 468 rsrc = srq->srq_rsrcp;
468 469 pd = srq->srq_pdhdl;
469 470 mr = srq->srq_mrhdl;
470 471 srqnum = srq->srq_srqnum;
471 472
472 473 /*
473 474 * If there are work queues still associated with the SRQ, then return
474 475 * an error. Otherwise, we will be holding the SRQ lock.
475 476 */
476 477 if (srq->srq_refcnt != 0) {
477 478 mutex_exit(&srq->srq_lock);
478 479 return (IBT_SRQ_IN_USE);
479 480 }
480 481
481 482 /*
482 483 * If this was a user-mappable SRQ, then we need to remove its entry
483 484 * from the "userland resources database". If it is also currently
484 485 * mmap()'d out to a user process, then we need to call
485 486 * devmap_devmem_remap() to remap the SRQ memory to an invalid mapping.
486 487 * We also need to invalidate the SRQ tracking information for the
487 488 * user mapping.
488 489 */
489 490 if (srq->srq_is_umap) {
490 491 status = hermon_umap_db_find(state->hs_instance,
491 492 srq->srq_srqnum, MLNX_UMAP_SRQMEM_RSRC, &value,
492 493 HERMON_UMAP_DB_REMOVE, &umapdb);
493 494 if (status != DDI_SUCCESS) {
494 495 mutex_exit(&srq->srq_lock);
495 496 HERMON_WARNING(state, "failed to find in database");
496 497 return (ibc_get_ci_failure(0));
497 498 }
498 499 hermon_umap_db_free(umapdb);
499 500 if (srq->srq_umap_dhp != NULL) {
500 501 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
501 502 status = devmap_devmem_remap(srq->srq_umap_dhp,
502 503 state->hs_dip, 0, 0, srq->srq_wqinfo.qa_size,
503 504 maxprot, DEVMAP_MAPPING_INVALID, NULL);
504 505 if (status != DDI_SUCCESS) {
505 506 mutex_exit(&srq->srq_lock);
506 507 HERMON_WARNING(state, "failed in SRQ memory "
507 508 "devmap_devmem_remap()");
508 509 return (ibc_get_ci_failure(0));
509 510 }
510 511 srq->srq_umap_dhp = (devmap_cookie_t)NULL;
511 512 }
512 513 }
513 514
514 515 /*
515 516 * Put NULL into the Hermon SRQNum-to-SRQHdl list. This will allow any
516 517 * in-progress events to detect that the SRQ corresponding to this
517 518 * number has been freed.
518 519 */
519 520 hermon_icm_set_num_to_hdl(state, HERMON_SRQC, srqc->hr_indx, NULL);
520 521
521 522 mutex_exit(&srq->srq_lock);
522 523 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*srq));
523 524
524 525 /*
525 526 * Reclaim SRQC entry from hardware (using the Hermon HW2SW_SRQ
526 527 * firmware command). If the ownership transfer fails for any reason,
527 528 * then it is an indication that something (either in HW or SW) has
528 529 * gone seriously wrong.
529 530 */
530 531 status = hermon_cmn_ownership_cmd_post(state, HW2SW_SRQ, &srqc_entry,
531 532 sizeof (hermon_hw_srqc_t), srqnum, sleepflag);
532 533 if (status != HERMON_CMD_SUCCESS) {
533 534 HERMON_WARNING(state, "failed to reclaim SRQC ownership");
534 535 cmn_err(CE_CONT, "Hermon: HW2SW_SRQ command failed: %08x\n",
535 536 status);
536 537 if (status == HERMON_CMD_INVALID_STATUS) {
537 538 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
538 539 }
539 540 return (ibc_get_ci_failure(0));
540 541 }
541 542
542 543 /*
543 544 * Deregister the memory for the Shared Receive Queue. If this fails
544 545 * for any reason, then it is an indication that something (either
545 546 * in HW or SW) has gone seriously wrong. So we print a warning
546 547 * message and return.
547 548 */
548 549 status = hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
549 550 sleepflag);
550 551 if (status != DDI_SUCCESS) {
551 552 HERMON_WARNING(state, "failed to deregister SRQ memory");
552 553 return (IBT_FAILURE);
553 554 }
554 555
555 556 hermon_wrid_wqhdr_destroy(srq->srq_wq_wqhdr);
556 557
557 558 /* Free the memory for the SRQ */
558 559 hermon_queue_free(&srq->srq_wqinfo);
559 560
560 561 /* Free the dbr */
561 562 hermon_dbr_free(state, srq->srq_uarpg, srq->srq_wq_vdbr);
562 563
563 564 /* Free the Hermon SRQ Handle */
564 565 hermon_rsrc_free(state, &rsrc);
565 566
566 567 /* Free the SRQC entry resource */
567 568 hermon_rsrc_free(state, &srqc);
568 569
569 570 /* Decrement the reference count on the protection domain (PD) */
570 571 hermon_pd_refcnt_dec(pd);
571 572
572 573 /* Set the srqhdl pointer to NULL and return success */
573 574 *srqhdl = NULL;
574 575
575 576 return (DDI_SUCCESS);
576 577 }
577 578
578 579
579 580 /*
580 581 * hermon_srq_modify()
581 582 * Context: Can be called only from user or kernel context.
582 583 */
583 584 int
584 585 hermon_srq_modify(hermon_state_t *state, hermon_srqhdl_t srq, uint_t size,
585 586 uint_t *real_size, uint_t sleepflag)
586 587 {
587 588 hermon_qalloc_info_t new_srqinfo, old_srqinfo;
588 589 hermon_rsrc_t *mtt, *old_mtt;
589 590 hermon_bind_info_t bind;
590 591 hermon_bind_info_t old_bind;
591 592 hermon_mrhdl_t mr;
592 593 hermon_hw_srqc_t srqc_entry;
593 594 hermon_hw_dmpt_t mpt_entry;
594 595 uint64_t *wre_new, *wre_old;
595 596 uint64_t mtt_addr;
596 597 uint64_t srq_pgoffs;
597 598 uint64_t srq_desc_off;
598 599 uint32_t *buf, srq_old_bufsz;
599 600 uint32_t wqesz;
600 601 uint_t max_srq_size;
601 602 uint_t mtt_pgsize_bits;
602 603 uint_t log_srq_size, maxprot;
603 604 int status;
604 605
605 606 if ((state->hs_devlim.mod_wr_srq == 0) ||
606 607 (state->hs_cfg_profile->cp_srq_resize_enabled == 0))
607 608 return (IBT_NOT_SUPPORTED);
608 609
609 610 /*
610 611 * If size requested is larger than device capability, return
611 612 * Insufficient Resources
612 613 */
613 614 max_srq_size = (1 << state->hs_cfg_profile->cp_log_max_srq_sz);
614 615 if (size > max_srq_size) {
615 616 return (IBT_HCA_WR_EXCEEDED);
↓ open down ↓ |
428 lines elided |
↑ open up ↑ |
616 617 }
617 618
618 619 /*
619 620 * Calculate the appropriate size for the SRQ.
620 621 * Note: All Hermon SRQs must be a power-of-2 in size. Also
621 622 * they may not be any smaller than HERMON_SRQ_MIN_SIZE. This step
622 623 * is to round the requested size up to the next highest power-of-2
623 624 */
624 625 size = max(size, HERMON_SRQ_MIN_SIZE);
625 626 log_srq_size = highbit(size);
626 - if ((size & (size - 1)) == 0) {
627 + if (ISP2(size)) {
627 628 log_srq_size = log_srq_size - 1;
628 629 }
629 630
630 631 /*
631 632 * Next we verify that the rounded-up size is valid (i.e. consistent
632 633 * with the device limits and/or software-configured limits).
633 634 */
634 635 if (log_srq_size > state->hs_cfg_profile->cp_log_max_srq_sz) {
635 636 status = IBT_HCA_WR_EXCEEDED;
636 637 goto srqmodify_fail;
637 638 }
638 639
639 640 /*
640 641 * Allocate the memory for newly resized Shared Receive Queue.
641 642 *
642 643 * Note: If SRQ is not user-mappable, then it may come from either
643 644 * kernel system memory or from HCA-attached local DDR memory.
644 645 *
645 646 * Note2: We align this queue on a pagesize boundary. This is required
646 647 * to make sure that all the resulting IB addresses will start at 0,
647 648 * for a zero-based queue. By making sure we are aligned on at least a
648 649 * page, any offset we use into our queue will be the same as it was
649 650 * when we allocated it at hermon_srq_alloc() time.
650 651 */
651 652 wqesz = (1 << srq->srq_wq_log_wqesz);
652 653 new_srqinfo.qa_size = (1 << log_srq_size) * wqesz;
653 654 new_srqinfo.qa_alloc_align = PAGESIZE;
654 655 new_srqinfo.qa_bind_align = PAGESIZE;
655 656 if (srq->srq_is_umap) {
656 657 new_srqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND;
657 658 } else {
658 659 new_srqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
659 660 }
660 661 status = hermon_queue_alloc(state, &new_srqinfo, sleepflag);
661 662 if (status != DDI_SUCCESS) {
662 663 status = IBT_INSUFF_RESOURCE;
663 664 goto srqmodify_fail;
664 665 }
665 666 buf = (uint32_t *)new_srqinfo.qa_buf_aligned;
666 667 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf))
667 668
668 669 /*
669 670 * Allocate the memory for the new WRE list. This will be used later
670 671 * when we resize the wridlist based on the new SRQ size.
671 672 */
672 673 wre_new = kmem_zalloc((1 << log_srq_size) * sizeof (uint64_t),
673 674 sleepflag);
674 675 if (wre_new == NULL) {
675 676 status = IBT_INSUFF_RESOURCE;
676 677 goto srqmodify_fail;
677 678 }
678 679
679 680 /*
680 681 * Fill in the "bind" struct. This struct provides the majority
681 682 * of the information that will be used to distinguish between an
682 683 * "addr" binding (as is the case here) and a "buf" binding (see
683 684 * below). The "bind" struct is later passed to hermon_mr_mem_bind()
684 685 * which does most of the "heavy lifting" for the Hermon memory
685 686 * registration routines.
686 687 */
687 688 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(bind))
688 689 bzero(&bind, sizeof (hermon_bind_info_t));
689 690 bind.bi_type = HERMON_BINDHDL_VADDR;
690 691 bind.bi_addr = (uint64_t)(uintptr_t)buf;
691 692 bind.bi_len = new_srqinfo.qa_size;
692 693 bind.bi_as = NULL;
693 694 bind.bi_flags = sleepflag == HERMON_SLEEP ? IBT_MR_SLEEP :
694 695 IBT_MR_NOSLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
695 696 bind.bi_bypass = state->hs_cfg_profile->cp_iommu_bypass;
696 697
697 698 status = hermon_mr_mtt_bind(state, &bind, new_srqinfo.qa_dmahdl, &mtt,
698 699 &mtt_pgsize_bits, 0); /* no relaxed ordering */
699 700 if (status != DDI_SUCCESS) {
700 701 status = status;
701 702 kmem_free(wre_new, (1 << log_srq_size) *
702 703 sizeof (uint64_t));
703 704 hermon_queue_free(&new_srqinfo);
704 705 goto srqmodify_fail;
705 706 }
706 707
707 708 /*
708 709 * Calculate the offset between the kernel virtual address space
709 710 * and the IB virtual address space. This will be used when
710 711 * posting work requests to properly initialize each WQE.
711 712 *
712 713 * Note: bind addr is zero-based (from alloc) so we calculate the
713 714 * correct new offset here.
714 715 */
715 716 bind.bi_addr = bind.bi_addr & ((1 << mtt_pgsize_bits) - 1);
716 717 srq_desc_off = (uint64_t)(uintptr_t)new_srqinfo.qa_buf_aligned -
717 718 (uint64_t)bind.bi_addr;
718 719 srq_pgoffs = (uint_t)
719 720 ((uintptr_t)new_srqinfo.qa_buf_aligned & HERMON_PAGEOFFSET);
720 721
721 722 /*
722 723 * Fill in the MPT entry. This is the final step before passing
723 724 * ownership of the MPT entry to the Hermon hardware. We use all of
724 725 * the information collected/calculated above to fill in the
725 726 * requisite portions of the MPT.
726 727 */
727 728 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
728 729 mpt_entry.reg_win_len = bind.bi_len;
729 730 mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
730 731 mpt_entry.mtt_addr_h = mtt_addr >> 32;
731 732 mpt_entry.mtt_addr_l = mtt_addr >> 3;
732 733
733 734 /*
734 735 * for hermon we build up a new srqc and pass that (partially filled
735 736 * to resize SRQ instead of modifying the (d)mpt directly
736 737 */
737 738
738 739
739 740
740 741 /*
741 742 * Now we grab the SRQ lock. Since we will be updating the actual
742 743 * SRQ location and the producer/consumer indexes, we should hold
743 744 * the lock.
744 745 *
745 746 * We do a HERMON_NOSLEEP here (and below), though, because we are
746 747 * holding the "srq_lock" and if we got raised to interrupt level
747 748 * by priority inversion, we would not want to block in this routine
748 749 * waiting for success.
749 750 */
750 751 mutex_enter(&srq->srq_lock);
751 752
752 753 /*
753 754 * Copy old entries to new buffer
754 755 */
755 756 srq_old_bufsz = srq->srq_wq_bufsz;
756 757 bcopy(srq->srq_wq_buf, buf, srq_old_bufsz * wqesz);
757 758
758 759 /*
759 760 * Setup MPT information for use in the MODIFY_MPT command
760 761 */
761 762 mr = srq->srq_mrhdl;
762 763 mutex_enter(&mr->mr_lock);
763 764
764 765 /*
765 766 * now, setup the srqc information needed for resize - limit the
766 767 * values, but use the same structure as the srqc
767 768 */
768 769
769 770 srqc_entry.log_srq_size = log_srq_size;
770 771 srqc_entry.page_offs = srq_pgoffs >> 6;
771 772 srqc_entry.log2_pgsz = mr->mr_log2_pgsz;
772 773 srqc_entry.mtt_base_addrl = (uint64_t)mtt_addr >> 32;
773 774 srqc_entry.mtt_base_addrh = mtt_addr >> 3;
774 775
775 776 /*
776 777 * RESIZE_SRQ
777 778 *
778 779 * If this fails for any reason, then it is an indication that
779 780 * something (either in HW or SW) has gone seriously wrong. So we
780 781 * print a warning message and return.
781 782 */
782 783 status = hermon_resize_srq_cmd_post(state, &srqc_entry,
783 784 srq->srq_srqnum, sleepflag);
784 785 if (status != HERMON_CMD_SUCCESS) {
785 786 cmn_err(CE_CONT, "Hermon: RESIZE_SRQ command failed: %08x\n",
786 787 status);
787 788 if (status == HERMON_CMD_INVALID_STATUS) {
788 789 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
789 790 }
790 791 (void) hermon_mr_mtt_unbind(state, &bind, mtt);
791 792 kmem_free(wre_new, (1 << log_srq_size) *
792 793 sizeof (uint64_t));
793 794 hermon_queue_free(&new_srqinfo);
794 795 mutex_exit(&mr->mr_lock);
795 796 mutex_exit(&srq->srq_lock);
796 797 return (ibc_get_ci_failure(0));
797 798 }
798 799 /*
799 800 * Update the Hermon Shared Receive Queue handle with all the new
800 801 * information. At the same time, save away all the necessary
801 802 * information for freeing up the old resources
802 803 */
803 804 old_srqinfo = srq->srq_wqinfo;
804 805 old_mtt = srq->srq_mrhdl->mr_mttrsrcp;
805 806 bcopy(&srq->srq_mrhdl->mr_bindinfo, &old_bind,
806 807 sizeof (hermon_bind_info_t));
807 808
808 809 /* Now set the new info */
809 810 srq->srq_wqinfo = new_srqinfo;
810 811 srq->srq_wq_buf = buf;
811 812 srq->srq_wq_bufsz = (1 << log_srq_size);
812 813 bcopy(&bind, &srq->srq_mrhdl->mr_bindinfo, sizeof (hermon_bind_info_t));
813 814 srq->srq_mrhdl->mr_mttrsrcp = mtt;
814 815 srq->srq_desc_off = srq_desc_off;
815 816 srq->srq_real_sizes.srq_wr_sz = (1 << log_srq_size);
816 817
817 818 /* Update MR mtt pagesize */
818 819 mr->mr_logmttpgsz = mtt_pgsize_bits;
819 820 mutex_exit(&mr->mr_lock);
820 821
821 822 /*
822 823 * Initialize new wridlist, if needed.
823 824 *
824 825 * If a wridlist already is setup on an SRQ (the QP associated with an
825 826 * SRQ has moved "from_reset") then we must update this wridlist based
826 827 * on the new SRQ size. We allocate the new size of Work Request ID
827 828 * Entries, copy over the old entries to the new list, and
828 829 * re-initialize the srq wridlist in non-umap case
829 830 */
830 831 wre_old = srq->srq_wq_wqhdr->wq_wrid;
831 832
832 833 bcopy(wre_old, wre_new, srq_old_bufsz * sizeof (uint64_t));
833 834
834 835 /* Setup new sizes in wre */
835 836 srq->srq_wq_wqhdr->wq_wrid = wre_new;
836 837
837 838 /*
838 839 * If "old" SRQ was a user-mappable SRQ that is currently mmap()'d out
839 840 * to a user process, then we need to call devmap_devmem_remap() to
840 841 * invalidate the mapping to the SRQ memory. We also need to
841 842 * invalidate the SRQ tracking information for the user mapping.
842 843 *
843 844 * Note: On failure, the remap really shouldn't ever happen. So, if it
844 845 * does, it is an indication that something has gone seriously wrong.
845 846 * So we print a warning message and return error (knowing, of course,
846 847 * that the "old" SRQ memory will be leaked)
847 848 */
848 849 if ((srq->srq_is_umap) && (srq->srq_umap_dhp != NULL)) {
849 850 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
850 851 status = devmap_devmem_remap(srq->srq_umap_dhp,
851 852 state->hs_dip, 0, 0, srq->srq_wqinfo.qa_size, maxprot,
852 853 DEVMAP_MAPPING_INVALID, NULL);
853 854 if (status != DDI_SUCCESS) {
854 855 mutex_exit(&srq->srq_lock);
855 856 HERMON_WARNING(state, "failed in SRQ memory "
856 857 "devmap_devmem_remap()");
857 858 /* We can, however, free the memory for old wre */
858 859 kmem_free(wre_old, srq_old_bufsz * sizeof (uint64_t));
859 860 return (ibc_get_ci_failure(0));
860 861 }
861 862 srq->srq_umap_dhp = (devmap_cookie_t)NULL;
862 863 }
863 864
864 865 /*
865 866 * Drop the SRQ lock now. The only thing left to do is to free up
866 867 * the old resources.
867 868 */
868 869 mutex_exit(&srq->srq_lock);
869 870
870 871 /*
871 872 * Unbind the MTT entries.
872 873 */
873 874 status = hermon_mr_mtt_unbind(state, &old_bind, old_mtt);
874 875 if (status != DDI_SUCCESS) {
875 876 HERMON_WARNING(state, "failed to unbind old SRQ memory");
876 877 status = ibc_get_ci_failure(0);
877 878 goto srqmodify_fail;
878 879 }
879 880
880 881 /* Free the memory for old wre */
881 882 kmem_free(wre_old, srq_old_bufsz * sizeof (uint64_t));
882 883
883 884 /* Free the memory for the old SRQ */
884 885 hermon_queue_free(&old_srqinfo);
885 886
886 887 /*
887 888 * Fill in the return arguments (if necessary). This includes the
888 889 * real new completion queue size.
889 890 */
890 891 if (real_size != NULL) {
891 892 *real_size = (1 << log_srq_size);
892 893 }
893 894
894 895 return (DDI_SUCCESS);
895 896
896 897 srqmodify_fail:
897 898 return (status);
898 899 }
899 900
900 901
901 902 /*
902 903 * hermon_srq_refcnt_inc()
903 904 * Context: Can be called from interrupt or base context.
904 905 */
905 906 void
906 907 hermon_srq_refcnt_inc(hermon_srqhdl_t srq)
907 908 {
908 909 mutex_enter(&srq->srq_lock);
909 910 srq->srq_refcnt++;
910 911 mutex_exit(&srq->srq_lock);
911 912 }
912 913
913 914
914 915 /*
915 916 * hermon_srq_refcnt_dec()
916 917 * Context: Can be called from interrupt or base context.
917 918 */
918 919 void
919 920 hermon_srq_refcnt_dec(hermon_srqhdl_t srq)
920 921 {
921 922 mutex_enter(&srq->srq_lock);
922 923 srq->srq_refcnt--;
923 924 mutex_exit(&srq->srq_lock);
924 925 }
925 926
926 927
927 928 /*
928 929 * hermon_srqhdl_from_srqnum()
929 930 * Context: Can be called from interrupt or base context.
930 931 *
931 932 * This routine is important because changing the unconstrained
932 933 * portion of the SRQ number is critical to the detection of a
933 934 * potential race condition in the SRQ handler code (i.e. the case
934 935 * where a SRQ is freed and alloc'd again before an event for the
935 936 * "old" SRQ can be handled).
936 937 *
937 938 * While this is not a perfect solution (not sure that one exists)
938 939 * it does help to mitigate the chance that this race condition will
939 940 * cause us to deliver a "stale" event to the new SRQ owner. Note:
940 941 * this solution does not scale well because the number of constrained
941 942 * bits increases (and, hence, the number of unconstrained bits
942 943 * decreases) as the number of supported SRQ grows. For small and
943 944 * intermediate values, it should hopefully provide sufficient
944 945 * protection.
945 946 */
946 947 hermon_srqhdl_t
947 948 hermon_srqhdl_from_srqnum(hermon_state_t *state, uint_t srqnum)
948 949 {
949 950 uint_t srqindx, srqmask;
950 951
951 952 /* Calculate the SRQ table index from the srqnum */
952 953 srqmask = (1 << state->hs_cfg_profile->cp_log_num_srq) - 1;
953 954 srqindx = srqnum & srqmask;
954 955 return (hermon_icm_num_to_hdl(state, HERMON_SRQC, srqindx));
955 956 }
956 957
957 958
958 959 /*
959 960 * hermon_srq_sgl_to_logwqesz()
960 961 * Context: Can be called from interrupt or base context.
961 962 */
962 963 static void
963 964 hermon_srq_sgl_to_logwqesz(hermon_state_t *state, uint_t num_sgl,
964 965 hermon_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl)
965 966 {
966 967 uint_t max_size, log2, actual_sgl;
↓ open down ↓ |
330 lines elided |
↑ open up ↑ |
967 968
968 969 switch (wq_type) {
969 970 case HERMON_QP_WQ_TYPE_RECVQ:
970 971 /*
971 972 * Use requested maximum SGL to calculate max descriptor size
972 973 * (while guaranteeing that the descriptor size is a
973 974 * power-of-2 cachelines).
974 975 */
975 976 max_size = (HERMON_QP_WQE_MLX_SRQ_HDRS + (num_sgl << 4));
976 977 log2 = highbit(max_size);
977 - if ((max_size & (max_size - 1)) == 0) {
978 + if (ISP2(max_size)) {
978 979 log2 = log2 - 1;
979 980 }
980 981
981 982 /* Make sure descriptor is at least the minimum size */
982 983 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM);
983 984
984 985 /* Calculate actual number of SGL (given WQE size) */
985 986 actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_SRQ_HDRS) >> 4;
986 987 break;
987 988
988 989 default:
989 990 HERMON_WARNING(state, "unexpected work queue type");
990 991 break;
991 992 }
992 993
993 994 /* Fill in the return values */
994 995 *logwqesz = log2;
995 996 *max_sgl = min(state->hs_cfg_profile->cp_srq_max_sgl, actual_sgl);
996 997 }
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX