5255 uts shouldn't open-code ISP2
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * hermon_cfg.c
28 * Hermon Configuration Profile Routines
29 *
30 * Implements the routines necessary for initializing and (later) tearing
31 * down the list of Hermon configuration information.
32 */
33
34 #include <sys/types.h>
35 #include <sys/conf.h>
36 #include <sys/ddi.h>
37 #include <sys/sunddi.h>
38 #include <sys/modctl.h>
39 #include <sys/bitmap.h>
40
41 #include <sys/ib/adapters/hermon/hermon.h>
42
43 /*
44 * Below are the elements that make up the Hermon configuration profile.
45 * For advanced users who wish to alter these values, this can be done via
46 * the /etc/system file. By default, values are assigned to the number of
47 * supported resources, either from the HCA's reported capacities or by
48 * a by-design limit in the driver.
49 */
50
51 /* Number of supported QPs, CQs and SRQs */
52 uint32_t hermon_log_num_qp = HERMON_NUM_QP_SHIFT;
53 uint32_t hermon_log_num_cq = HERMON_NUM_CQ_SHIFT;
54 uint32_t hermon_log_num_srq = HERMON_NUM_SRQ_SHIFT;
55
56 /* Number of supported SGL per WQE for SQ/RQ, and for SRQ */
57 /* XXX use the same for all queues if limitation in srq.h is resolved */
58 uint32_t hermon_wqe_max_sgl = HERMON_NUM_SGL_PER_WQE;
59 uint32_t hermon_srq_max_sgl = HERMON_SRQ_MAX_SGL;
60
61 /* Maximum "responder resources" (in) and "initiator depth" (out) per QP */
62 uint32_t hermon_log_num_rdb_per_qp = HERMON_LOG_NUM_RDB_PER_QP;
63
64 /*
65 * Number of multicast groups (MCGs), number of QP per MCG, and the number
66 * of entries (from the total number) in the multicast group "hash table"
67 */
68 uint32_t hermon_log_num_mcg = HERMON_NUM_MCG_SHIFT;
69 uint32_t hermon_num_qp_per_mcg = HERMON_NUM_QP_PER_MCG;
70 uint32_t hermon_log_num_mcg_hash = HERMON_NUM_MCG_HASH_SHIFT;
71
72 /* Number of UD AVs */
73 uint32_t hermon_log_num_ah = HERMON_NUM_AH_SHIFT;
74
75 /* Number of EQs and their default size */
76 uint32_t hermon_log_num_eq = HERMON_NUM_EQ_SHIFT;
77 uint32_t hermon_log_eq_sz = HERMON_DEFAULT_EQ_SZ_SHIFT;
78
79 /*
80 * Number of supported MPTs, MTTs and also the maximum MPT size.
81 */
82 uint32_t hermon_log_num_mtt = HERMON_NUM_MTT_SHIFT;
83 uint32_t hermon_log_num_dmpt = HERMON_NUM_DMPT_SHIFT;
84 uint32_t hermon_log_max_mrw_sz = HERMON_MAX_MEM_MPT_SHIFT;
85
86 /*
87 * Number of supported UAR (User Access Regions) for this HCA.
88 * We could in the future read in uar_sz from devlim, and thus
89 * derive the number of UAR. Since this is derived from PAGESIZE,
90 * however, this means that x86 systems would have twice as many
91 * UARs as SPARC systems. Therefore for consistency's sake, we will
92 * just use 1024 pages, which is the maximum on SPARC systems.
93 */
94 uint32_t hermon_log_num_uar = HERMON_NUM_UAR_SHIFT;
95
96 /*
97 * Number of remaps allowed for FMR before a sync is required. This value
98 * determines how many times we can fmr_deregister() before the underlying fmr
99 * framework places the region to wait for an MTT_SYNC operation, cleaning up
100 * the old mappings.
101 */
102 uint32_t hermon_fmr_num_remaps = HERMON_FMR_MAX_REMAPS;
103
104 /*
105 * Number of supported Hermon mailboxes ("In" and "Out") and their maximum
106 * sizes, respectively
107 */
108 uint32_t hermon_log_num_inmbox = HERMON_NUM_MAILBOXES_SHIFT;
109 uint32_t hermon_log_num_outmbox = HERMON_NUM_MAILBOXES_SHIFT;
110 uint32_t hermon_log_inmbox_size = HERMON_MBOX_SIZE_SHIFT;
111 uint32_t hermon_log_outmbox_size = HERMON_MBOX_SIZE_SHIFT;
112 uint32_t hermon_log_num_intr_inmbox = HERMON_NUM_INTR_MAILBOXES_SHIFT;
113 uint32_t hermon_log_num_intr_outmbox = HERMON_NUM_INTR_MAILBOXES_SHIFT;
114
115 /* Number of supported Protection Domains (PD) */
116 uint32_t hermon_log_num_pd = HERMON_NUM_PD_SHIFT;
117
118 /*
119 * Number of total supported PKeys per PKey table (i.e.
120 * per port). Also the number of SGID per GID table.
121 */
122 uint32_t hermon_log_max_pkeytbl = HERMON_NUM_PKEYTBL_SHIFT;
123 uint32_t hermon_log_max_gidtbl = HERMON_NUM_GIDTBL_SHIFT;
124
125 /* Maximum supported MTU and portwidth */
126 uint32_t hermon_max_mtu = HERMON_MAX_MTU;
127 uint32_t hermon_max_port_width = HERMON_MAX_PORT_WIDTH;
128
129 /* Number of supported Virtual Lanes (VL) */
130 uint32_t hermon_max_vlcap = HERMON_MAX_VLCAP;
131
132 /*
133 * Whether or not to use the built-in (i.e. in firmware) agents for QP0 and
134 * QP1, respectively.
135 */
136 uint32_t hermon_qp0_agents_in_fw = 0;
137 uint32_t hermon_qp1_agents_in_fw = 0;
138
139 /*
140 * Whether DMA mappings should bypass the PCI IOMMU or not.
141 * hermon_iommu_bypass is a global setting for all memory addresses.
142 */
143 uint32_t hermon_iommu_bypass = 1;
144
145 /*
146 * Whether *DATA* buffers should be bound w/ Relaxed Ordering (RO) turned on
147 * via the SW workaround (HCAs don't support RO in HW). Defaulted on,
148 * though care must be taken w/ some Userland clients that *MAY* have
149 * peeked in the data to understand when data xfer was done - MPI does
150 * as an efficiency
151 */
152
153 uint32_t hermon_kernel_data_ro = HERMON_RO_ENABLED; /* default */
154 uint32_t hermon_user_data_ro = HERMON_RO_ENABLED; /* default */
155
156 /*
157 * Whether Hermon should use MSI (Message Signaled Interrupts), if available.
158 * Note: 0 indicates 'legacy interrupt', 1 indicates MSI (if available)
159 */
160 uint32_t hermon_use_msi_if_avail = 1;
161
162 /*
163 * This is a patchable variable that determines the time we will wait after
164 * initiating SW reset before we do our first read from Hermon config space.
165 * If this value is set too small (less than the default 100ms), it is
166 * possible for Hermon hardware to be unready to respond to the config cycle
167 * reads. This could cause master abort on the PCI bridge. Note: If
168 * "hermon_sw_reset_delay" is set to zero, then no software reset of the Hermon
169 * device will be attempted.
170 */
171 uint32_t hermon_sw_reset_delay = HERMON_SW_RESET_DELAY;
172
173 /*
174 * These are patchable variables for hermon command polling. The poll_delay is
175 * the number of usec to wait in-between calls to poll the 'go' bit. The
176 * poll_max is the total number of usec to loop in waiting for the 'go' bit to
177 * clear.
178 */
179 uint32_t hermon_cmd_poll_delay = HERMON_CMD_POLL_DELAY;
180 uint32_t hermon_cmd_poll_max = HERMON_CMD_POLL_MAX;
181
182 /*
183 * This is a patchable variable that determines the frequency with which
184 * the AckReq bit will be set in outgoing RC packets. The AckReq bit will be
185 * set in at least every 2^hermon_qp_ackreq_freq packets (but at least once
186 * per message, i.e. in the last packet). Tuning this value can increase
187 * IB fabric utilization by cutting down on the number of unnecessary ACKs.
188 */
189 uint32_t hermon_qp_ackreq_freq = HERMON_QP_ACKREQ_FREQ;
190
191 static void hermon_cfg_wqe_sizes(hermon_state_t *state,
192 hermon_cfg_profile_t *cp);
193 #ifdef __sparc
194 static void hermon_check_iommu_bypass(hermon_state_t *state,
195 hermon_cfg_profile_t *cp);
196 #endif
197
198 /*
199 * hermon_cfg_profile_init_phase1()
200 * Context: Only called from attach() path context
201 */
202 int
203 hermon_cfg_profile_init_phase1(hermon_state_t *state)
204 {
205 hermon_cfg_profile_t *cp;
206
207 /*
208 * Allocate space for the configuration profile structure
209 */
210 cp = (hermon_cfg_profile_t *)kmem_zalloc(sizeof (hermon_cfg_profile_t),
211 KM_SLEEP);
212
213 /*
214 * Common to all profiles.
215 */
216 cp->cp_qp0_agents_in_fw = hermon_qp0_agents_in_fw;
217 cp->cp_qp1_agents_in_fw = hermon_qp1_agents_in_fw;
218 cp->cp_sw_reset_delay = hermon_sw_reset_delay;
219 cp->cp_cmd_poll_delay = hermon_cmd_poll_delay;
220 cp->cp_cmd_poll_max = hermon_cmd_poll_max;
221 cp->cp_ackreq_freq = hermon_qp_ackreq_freq;
222 cp->cp_fmr_max_remaps = hermon_fmr_num_remaps;
223
224 /*
225 * Although most of the configuration is enabled in "phase2" of the
226 * cfg_profile_init, we have to setup the OUT mailboxes soon, since
227 * they are used immediately after this "phase1" completes, to run the
228 * firmware and get the device limits, which we'll need for 'phase2'.
229 * That's done in rsrc_init_phase1, called shortly after we do this
230 * and the sw reset - see hermon.c
231 */
232 if (state->hs_cfg_profile_setting == HERMON_CFG_MEMFREE) {
233 cp->cp_log_num_outmbox = hermon_log_num_outmbox;
234 cp->cp_log_outmbox_size = hermon_log_outmbox_size;
235 cp->cp_log_num_inmbox = hermon_log_num_inmbox;
236 cp->cp_log_inmbox_size = hermon_log_inmbox_size;
237 cp->cp_log_num_intr_inmbox = hermon_log_num_intr_inmbox;
238 cp->cp_log_num_intr_outmbox = hermon_log_num_intr_outmbox;
239
240 } else {
241 return (DDI_FAILURE);
242 }
243
244 /*
245 * Set IOMMU bypass or not. Ensure consistency of flags with
246 * architecture type.
247 */
248 #ifdef __sparc
249 if (hermon_iommu_bypass == 1) {
250 hermon_check_iommu_bypass(state, cp);
251 } else {
252 cp->cp_iommu_bypass = HERMON_BINDMEM_NORMAL;
253 }
254 #else
255 cp->cp_iommu_bypass = HERMON_BINDMEM_NORMAL;
256 #endif
257
258 /* Attach the configuration profile to Hermon softstate */
259 state->hs_cfg_profile = cp;
260
261 return (DDI_SUCCESS);
262 }
263
264 /*
265 * hermon_cfg_profile_init_phase2()
266 * Context: Only called from attach() path context
267 */
268 int
269 hermon_cfg_profile_init_phase2(hermon_state_t *state)
270 {
271 hermon_cfg_profile_t *cp;
272 hermon_hw_querydevlim_t *devlim;
273 hermon_hw_query_port_t *port;
274 uint32_t num, size;
275 int i;
276
277 /* Read in the device limits */
278 devlim = &state->hs_devlim;
279 /* and the port information */
280 port = &state->hs_queryport;
281
282 /* Read the configuration profile */
283 cp = state->hs_cfg_profile;
284
285 /*
286 * We configure all Hermon HCAs with the same profile, which
287 * is based upon the default value assignments above. If we want to
288 * add additional profiles in the future, they can be added here.
289 * Note the reference to "Memfree" is a holdover from Arbel/Sinai
290 */
291 if (state->hs_cfg_profile_setting != HERMON_CFG_MEMFREE) {
292 return (DDI_FAILURE);
293 }
294
295 /*
296 * Note for most configuration parameters, we use the lesser of our
297 * desired configuration value or the device-defined maximum value.
298 */
299 cp->cp_log_num_mtt = min(hermon_log_num_mtt, devlim->log_max_mtt);
300 cp->cp_log_num_dmpt = min(hermon_log_num_dmpt, devlim->log_max_dmpt);
301 cp->cp_log_num_cmpt = HERMON_LOG_CMPT_PER_TYPE + 2; /* times 4, */
302 /* per PRM */
303 cp->cp_log_max_mrw_sz = min(hermon_log_max_mrw_sz,
304 devlim->log_max_mrw_sz);
305 cp->cp_log_num_pd = min(hermon_log_num_pd, devlim->log_max_pd);
306 cp->cp_log_num_qp = min(hermon_log_num_qp, devlim->log_max_qp);
307 cp->cp_log_num_cq = min(hermon_log_num_cq, devlim->log_max_cq);
308 cp->cp_log_num_srq = min(hermon_log_num_srq, devlim->log_max_srq);
309 cp->cp_log_num_eq = min(hermon_log_num_eq, devlim->log_max_eq);
310 cp->cp_log_eq_sz = min(hermon_log_eq_sz, devlim->log_max_eq_sz);
311 cp->cp_log_num_rdb = cp->cp_log_num_qp +
312 min(hermon_log_num_rdb_per_qp, devlim->log_max_ra_req_qp);
313 cp->cp_hca_max_rdma_in_qp = cp->cp_hca_max_rdma_out_qp =
314 1 << min(hermon_log_num_rdb_per_qp, devlim->log_max_ra_req_qp);
315 cp->cp_num_qp_per_mcg = max(hermon_num_qp_per_mcg,
316 HERMON_NUM_QP_PER_MCG_MIN);
317 cp->cp_num_qp_per_mcg = min(cp->cp_num_qp_per_mcg,
318 (1 << devlim->log_max_qp_mcg) - 8);
319 cp->cp_num_qp_per_mcg = (1 << highbit(cp->cp_num_qp_per_mcg + 7)) - 8;
320 cp->cp_log_num_mcg = min(hermon_log_num_mcg, devlim->log_max_mcg);
321 cp->cp_log_num_mcg_hash = hermon_log_num_mcg_hash;
322
323 /* until srq_resize is debugged, disable it */
324 cp->cp_srq_resize_enabled = 0;
325
326 /* cp->cp_log_num_uar = hermon_log_num_uar; */
327 /*
328 * now, we HAVE to calculate the number of UAR pages, so that we can
329 * get the blueflame stuff correct as well
330 */
331
332 size = devlim->log_max_uar_sz;
333 /* 1MB (2^^20) times size (2^^size) / sparc_pg (2^^13) */
334 num = (20 + size) - 13; /* XXX - consider using PAGESHIFT */
335 if (devlim->blu_flm)
336 num -= 1; /* if blueflame, only half the size for UARs */
337 cp->cp_log_num_uar = min(hermon_log_num_uar, num);
338
339
340 /* while we're at it, calculate the index of the kernel uar page */
341 /* either the reserved uar's or 128, whichever is smaller */
342 state->hs_kernel_uar_index = (devlim->num_rsvd_uar > 128) ?
343 devlim->num_rsvd_uar : 128;
344
345 cp->cp_log_max_pkeytbl = port->log_max_pkey;
346
347 cp->cp_log_max_qp_sz = devlim->log_max_qp_sz;
348 cp->cp_log_max_cq_sz = devlim->log_max_cq_sz;
349 cp->cp_log_max_srq_sz = devlim->log_max_srq_sz;
350 cp->cp_log_max_gidtbl = port->log_max_gid;
351 cp->cp_max_mtu = port->ib_mtu; /* XXX now from query_port */
352 cp->cp_max_port_width = port->ib_port_wid; /* now from query_port */
353 cp->cp_max_vlcap = port->max_vl;
354 cp->cp_log_num_ah = hermon_log_num_ah;
355
356 /* Paranoia, ensure no arrays indexed by port_num are out of bounds */
357 cp->cp_num_ports = devlim->num_ports;
358 if (cp->cp_num_ports > HERMON_MAX_PORTS) {
359 cmn_err(CE_CONT, "device has more ports (%d) than are "
360 "supported; Using %d ports\n",
361 cp->cp_num_ports, HERMON_MAX_PORTS);
362 cp->cp_num_ports = HERMON_MAX_PORTS;
363 };
364
365 /* allocate variable sized arrays */
366 for (i = 0; i < HERMON_MAX_PORTS; i++) {
367 state->hs_pkey[i] = kmem_zalloc((1 << cp->cp_log_max_pkeytbl) *
368 sizeof (ib_pkey_t), KM_SLEEP);
369 state->hs_guid[i] = kmem_zalloc((1 << cp->cp_log_max_gidtbl) *
370 sizeof (ib_guid_t), KM_SLEEP);
371 }
372
373 /* Determine WQE sizes from requested max SGLs */
374 hermon_cfg_wqe_sizes(state, cp);
375
376 /* Set whether to use MSIs or not */
377 cp->cp_use_msi_if_avail = hermon_use_msi_if_avail;
378
379 #if !defined(_ELF64)
380 /*
381 * Need to reduce the hermon kernel virtual memory footprint
382 * on 32-bit kernels.
383 */
384 cp->cp_log_num_mtt -= 6;
385 cp->cp_log_num_dmpt -= 6;
386 cp->cp_log_num_pd -= 6;
387 cp->cp_log_num_qp -= 6;
388 cp->cp_log_num_cq -= 6;
389 cp->cp_log_num_srq -= 6;
390 cp->cp_log_num_rdb = cp->cp_log_num_qp +
391 min(hermon_log_num_rdb_per_qp, devlim->log_max_ra_req_qp);
392 cp->cp_hca_max_rdma_in_qp = cp->cp_hca_max_rdma_out_qp =
393 1 << min(hermon_log_num_rdb_per_qp, devlim->log_max_ra_req_qp);
394 #endif
395
396 return (DDI_SUCCESS);
397 }
398
399
400 /*
401 * hermon_cfg_profile_fini()
402 * Context: Only called from attach() and/or detach() path contexts
403 */
404 void
405 hermon_cfg_profile_fini(hermon_state_t *state)
406 {
407 /*
408 * Free up the space for configuration profile
409 */
410 kmem_free(state->hs_cfg_profile, sizeof (hermon_cfg_profile_t));
411 }
412
413
414 /*
415 * hermon_cfg_wqe_sizes()
416 * Context: Only called from attach() path context
417 */
418 static void
419 hermon_cfg_wqe_sizes(hermon_state_t *state, hermon_cfg_profile_t *cp)
420 {
421 uint_t max_size, log2;
422 uint_t max_sgl, real_max_sgl;
423
424 /*
425 * Get the requested maximum number SGL per WQE from the Hermon
426 * patchable variable
427 */
428 max_sgl = hermon_wqe_max_sgl;
429
430 /*
431 * Use requested maximum number of SGL to calculate the max descriptor
432 * size (while guaranteeing that the descriptor size is a power-of-2
433 * cachelines). We have to use the calculation for QP1 MLX transport
434 * because the possibility that we might need to inline a GRH, along
435 * with all the other headers and alignment restrictions, sets the
436 * maximum for the number of SGLs that we can advertise support for.
437 */
438 max_size = (HERMON_QP_WQE_MLX_QP1_HDRS + (max_sgl << 4));
439 log2 = highbit(max_size);
440 if ((max_size & (max_size - 1)) == 0) {
441 log2 = log2 - 1;
442 }
443 max_size = (1 << log2);
444
445 max_size = min(max_size, state->hs_devlim.max_desc_sz_sq);
446
447 /*
448 * Then use the calculated max descriptor size to determine the "real"
449 * maximum SGL (the number beyond which we would roll over to the next
450 * power-of-2).
451 */
452 real_max_sgl = (max_size - HERMON_QP_WQE_MLX_QP1_HDRS) >> 4;
453
454 /* Then save away this configuration information */
455 cp->cp_wqe_max_sgl = max_sgl;
456 cp->cp_wqe_real_max_sgl = real_max_sgl;
457
458 /* SRQ SGL gets set to it's own patchable variable value */
459 cp->cp_srq_max_sgl = hermon_srq_max_sgl;
460 }
461
462 #ifdef __sparc
463 /*
464 * hermon_check_iommu_bypass()
465 * Context: Only called from attach() path context
466 * XXX This is a DMA allocation routine outside the normal
467 * path. FMA hardening will not like this.
468 */
469 static void
470 hermon_check_iommu_bypass(hermon_state_t *state, hermon_cfg_profile_t *cp)
471 {
472 ddi_dma_handle_t dmahdl;
473 ddi_dma_attr_t dma_attr;
474 int status;
475 ddi_acc_handle_t acc_hdl;
476 caddr_t kaddr;
477 size_t actual_len;
478 ddi_dma_cookie_t cookie;
479 uint_t cookiecnt;
480
481 hermon_dma_attr_init(state, &dma_attr);
482
483 /* Try mapping for IOMMU bypass (Force Physical) */
484 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL |
485 DDI_DMA_RELAXED_ORDERING;
486
487 /*
488 * Call ddi_dma_alloc_handle(). If this returns DDI_DMA_BADATTR then
489 * it is not possible to use IOMMU bypass with our PCI bridge parent.
490 * Since the function we are in can only be called if iommu bypass was
491 * requested in the config profile, we configure for bypass if the
492 * ddi_dma_alloc_handle() was successful. Otherwise, we configure
493 * for non-bypass (ie: normal) mapping.
494 */
495 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr,
496 DDI_DMA_SLEEP, NULL, &dmahdl);
497 if (status == DDI_DMA_BADATTR) {
498 cp->cp_iommu_bypass = HERMON_BINDMEM_NORMAL;
499 return;
500 } else if (status != DDI_SUCCESS) { /* failed somehow */
501 hermon_kernel_data_ro = HERMON_RO_DISABLED;
502 hermon_user_data_ro = HERMON_RO_DISABLED;
503 cp->cp_iommu_bypass = HERMON_BINDMEM_BYPASS;
504 return;
505 } else {
506 cp->cp_iommu_bypass = HERMON_BINDMEM_BYPASS;
507 }
508
509 status = ddi_dma_mem_alloc(dmahdl, 256,
510 &state->hs_reg_accattr, DDI_DMA_CONSISTENT,
511 DDI_DMA_SLEEP, NULL, (caddr_t *)&kaddr, &actual_len, &acc_hdl);
512
513 if (status != DDI_SUCCESS) { /* failed somehow */
514 hermon_kernel_data_ro = HERMON_RO_DISABLED;
515 hermon_user_data_ro = HERMON_RO_DISABLED;
516 ddi_dma_free_handle(&dmahdl);
517 return;
518 }
519
520 status = ddi_dma_addr_bind_handle(dmahdl, NULL, kaddr, actual_len,
521 DDI_DMA_RDWR, DDI_DMA_SLEEP, NULL, &cookie, &cookiecnt);
522
523 if (status == DDI_DMA_MAPPED) {
524 (void) ddi_dma_unbind_handle(dmahdl);
525 } else {
526 hermon_kernel_data_ro = HERMON_RO_DISABLED;
527 hermon_user_data_ro = HERMON_RO_DISABLED;
528 }
529
530 ddi_dma_mem_free(&acc_hdl);
531 ddi_dma_free_handle(&dmahdl);
532 }
533 #endif
--- EOF ---