Print this page
XXXX introduce drv_sectohz
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/os/ddi_intr_irm.c
+++ new/usr/src/uts/common/os/ddi_intr_irm.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 */
24 24
25 25 #include <sys/note.h>
26 26 #include <sys/sysmacros.h>
27 27 #include <sys/types.h>
28 28 #include <sys/param.h>
29 29 #include <sys/systm.h>
30 30 #include <sys/kmem.h>
31 31 #include <sys/cmn_err.h>
32 32 #include <sys/debug.h>
33 33 #include <sys/ddi.h>
34 34 #include <sys/sunndi.h>
35 35 #include <sys/ndi_impldefs.h> /* include prototypes */
36 36
37 37 #if defined(__i386) || defined(__amd64)
38 38 /*
39 39 * MSI-X allocation limit.
40 40 */
41 41 extern uint_t ddi_msix_alloc_limit;
42 42 #endif
43 43
44 44 /*
45 45 * Interrupt Resource Management (IRM).
46 46 */
47 47
48 48 #define DDI_IRM_BALANCE_DELAY (60) /* In seconds */
49 49
50 50 #define DDI_IRM_HAS_CB(c) ((c) && (c->cb_flags & DDI_CB_FLAG_INTR))
51 51
52 52 #define DDI_IRM_IS_REDUCIBLE(r) (((r->ireq_flags & DDI_IRM_FLAG_CALLBACK) && \
53 53 (r->ireq_type == DDI_INTR_TYPE_MSIX)) || \
54 54 (r->ireq_flags & DDI_IRM_FLAG_NEW))
55 55
56 56 extern pri_t minclsyspri;
57 57
58 58 /* Global policies */
59 59 int irm_enable = 1;
60 60 boolean_t irm_active = B_FALSE;
61 61 int irm_default_policy = DDI_IRM_POLICY_LARGE;
62 62 uint_t irm_balance_delay = DDI_IRM_BALANCE_DELAY;
63 63
64 64 /* Global list of interrupt pools */
65 65 kmutex_t irm_pools_lock;
66 66 list_t irm_pools_list;
67 67
68 68 /* Global debug tunables */
69 69 #ifdef DEBUG
70 70 int irm_debug_policy = 0;
71 71 uint_t irm_debug_size = 0;
72 72 #endif /* DEBUG */
73 73
74 74 static void irm_balance_thread(ddi_irm_pool_t *);
75 75 static void i_ddi_irm_balance(ddi_irm_pool_t *);
76 76 static void i_ddi_irm_enqueue(ddi_irm_pool_t *, boolean_t);
77 77 static void i_ddi_irm_reduce(ddi_irm_pool_t *pool);
78 78 static int i_ddi_irm_reduce_by_policy(ddi_irm_pool_t *, int, int);
79 79 static void i_ddi_irm_reduce_new(ddi_irm_pool_t *, int);
80 80 static void i_ddi_irm_insertion_sort(list_t *, ddi_irm_req_t *);
81 81 static int i_ddi_irm_notify(ddi_irm_pool_t *, ddi_irm_req_t *);
82 82 static int i_ddi_irm_modify_increase(ddi_irm_req_t *, int);
83 83
84 84 /*
85 85 * OS Initialization Routines
86 86 */
87 87
88 88 /*
89 89 * irm_init()
90 90 *
91 91 * Initialize IRM subsystem before any drivers are attached.
92 92 */
93 93 void
94 94 irm_init(void)
95 95 {
96 96 /* Do nothing if IRM is disabled */
97 97 if (!irm_enable)
98 98 return;
99 99
100 100 /* Verify that the default balancing policy is valid */
101 101 if (!DDI_IRM_POLICY_VALID(irm_default_policy))
102 102 irm_default_policy = DDI_IRM_POLICY_LARGE;
103 103
104 104 /* Initialize the global list of interrupt pools */
105 105 mutex_init(&irm_pools_lock, NULL, MUTEX_DRIVER, NULL);
106 106 list_create(&irm_pools_list, sizeof (ddi_irm_pool_t),
107 107 offsetof(ddi_irm_pool_t, ipool_link));
108 108 }
109 109
110 110 /*
111 111 * i_ddi_irm_poststartup()
112 112 *
113 113 * IRM is not activated until after the IO subsystem is initialized.
114 114 * When activated, per-pool balancing threads are spawned and a flag
115 115 * is set so that all future pools will be activated when created.
116 116 *
117 117 * NOTE: the global variable 'irm_enable' disables IRM if zero.
118 118 */
119 119 void
120 120 i_ddi_irm_poststartup(void)
121 121 {
122 122 ddi_irm_pool_t *pool_p;
123 123
124 124 /* Do nothing if IRM is disabled */
125 125 if (!irm_enable)
126 126 return;
127 127
128 128 /* Lock the global list */
129 129 mutex_enter(&irm_pools_lock);
130 130
131 131 /* Activate all defined pools */
132 132 for (pool_p = list_head(&irm_pools_list); pool_p;
133 133 pool_p = list_next(&irm_pools_list, pool_p))
134 134 pool_p->ipool_thread = thread_create(NULL, 0,
135 135 irm_balance_thread, pool_p, 0, &p0, TS_RUN, minclsyspri);
136 136
137 137 /* Set future pools to be active */
138 138 irm_active = B_TRUE;
139 139
140 140 /* Unlock the global list */
141 141 mutex_exit(&irm_pools_lock);
142 142 }
143 143
144 144 /*
145 145 * NDI interfaces for creating/destroying IRM pools.
146 146 */
147 147
148 148 /*
149 149 * ndi_irm_create()
150 150 *
151 151 * Nexus interface to create an IRM pool. Create the new
152 152 * pool and add it to the global list of interrupt pools.
153 153 */
154 154 int
155 155 ndi_irm_create(dev_info_t *dip, ddi_irm_params_t *paramsp,
156 156 ddi_irm_pool_t **pool_retp)
157 157 {
158 158 ddi_irm_pool_t *pool_p;
159 159
160 160 ASSERT(dip != NULL);
161 161 ASSERT(paramsp != NULL);
162 162 ASSERT(pool_retp != NULL);
163 163 ASSERT(paramsp->iparams_total >= 1);
164 164 ASSERT(paramsp->iparams_types != 0);
165 165
166 166 DDI_INTR_IRMDBG((CE_CONT, "ndi_irm_create: dip %p\n", (void *)dip));
167 167
168 168 /* Check if IRM is enabled */
169 169 if (!irm_enable)
170 170 return (NDI_FAILURE);
171 171
172 172 /* Validate parameters */
173 173 if ((dip == NULL) || (paramsp == NULL) || (pool_retp == NULL) ||
174 174 (paramsp->iparams_total < 1) || (paramsp->iparams_types == 0))
175 175 return (NDI_FAILURE);
176 176
177 177 /* Allocate and initialize the pool */
178 178 pool_p = kmem_zalloc(sizeof (ddi_irm_pool_t), KM_SLEEP);
179 179 pool_p->ipool_owner = dip;
180 180 pool_p->ipool_policy = irm_default_policy;
181 181 pool_p->ipool_types = paramsp->iparams_types;
182 182 pool_p->ipool_totsz = paramsp->iparams_total;
183 183 pool_p->ipool_defsz = MIN(DDI_MAX_MSIX_ALLOC, MAX(DDI_MIN_MSIX_ALLOC,
184 184 paramsp->iparams_total / DDI_MSIX_ALLOC_DIVIDER));
185 185 list_create(&pool_p->ipool_req_list, sizeof (ddi_irm_req_t),
186 186 offsetof(ddi_irm_req_t, ireq_link));
187 187 list_create(&pool_p->ipool_scratch_list, sizeof (ddi_irm_req_t),
188 188 offsetof(ddi_irm_req_t, ireq_scratch_link));
189 189 cv_init(&pool_p->ipool_cv, NULL, CV_DRIVER, NULL);
190 190 mutex_init(&pool_p->ipool_lock, NULL, MUTEX_DRIVER, NULL);
191 191 mutex_init(&pool_p->ipool_navail_lock, NULL, MUTEX_DRIVER, NULL);
192 192
193 193 /* Add to global list of pools */
194 194 mutex_enter(&irm_pools_lock);
195 195 list_insert_tail(&irm_pools_list, pool_p);
196 196 mutex_exit(&irm_pools_lock);
197 197
198 198 /* If IRM is active, then activate the pool */
199 199 if (irm_active)
200 200 pool_p->ipool_thread = thread_create(NULL, 0,
201 201 irm_balance_thread, pool_p, 0, &p0, TS_RUN, minclsyspri);
202 202
203 203 *pool_retp = pool_p;
204 204 return (NDI_SUCCESS);
205 205 }
206 206
207 207 /*
208 208 * ndi_irm_resize_pool()
209 209 *
210 210 * Nexus interface to resize IRM pool. If the pool size drops
211 211 * below the allocated number of vectors then initiate rebalance
212 212 * operation before resizing the pool. If rebalance operation fails
213 213 * then return NDI_FAILURE.
214 214 */
215 215 int
216 216 ndi_irm_resize_pool(ddi_irm_pool_t *pool_p, uint_t new_size)
217 217 {
218 218 uint_t prev_size;
219 219
220 220 ASSERT(pool_p != NULL);
221 221
222 222 DDI_INTR_IRMDBG((CE_CONT, "ndi_irm_resize_pool: pool_p %p"
223 223 " current-size 0x%x new-size 0x%x\n",
224 224 (void *)pool_p, pool_p->ipool_totsz, new_size));
225 225
226 226 if (pool_p == NULL)
227 227 return (NDI_EINVAL);
228 228
229 229 /* Check if IRM is enabled */
230 230 if (!irm_enable)
231 231 return (NDI_FAILURE);
232 232
233 233 mutex_enter(&pool_p->ipool_lock);
234 234
235 235 /*
236 236 * If we are increasing the pool size or if the reserved
237 237 * number of vectors is <= the new pool size then simply
238 238 * update the pool size and enqueue a reblance operation
239 239 * if necessary to use the new vectors.
240 240 */
241 241 if ((pool_p->ipool_totsz < new_size) ||
242 242 (pool_p->ipool_resno <= new_size)) {
243 243 /* set new pool size */
244 244 pool_p->ipool_totsz = new_size;
245 245 /* adjust the default allocation limit */
246 246 pool_p->ipool_defsz = MIN(DDI_MAX_MSIX_ALLOC,
247 247 MAX(DDI_MIN_MSIX_ALLOC, new_size / DDI_MSIX_ALLOC_DIVIDER));
248 248 /* queue a rebalance operation to use the new vectors */
249 249 if (pool_p->ipool_reqno > pool_p->ipool_resno)
250 250 i_ddi_irm_enqueue(pool_p, B_FALSE);
251 251 mutex_exit(&pool_p->ipool_lock);
252 252 return (NDI_SUCCESS);
253 253 }
254 254
255 255 DDI_INTR_IRMDBG((CE_CONT, "ndi_irm_resize_pool: pool_p %p"
256 256 " needs a rebalance operation\n", (void *)pool_p));
257 257
258 258 /*
259 259 * requires a rebalance operation
260 260 */
261 261 /* save the current pool size */
262 262 prev_size = pool_p->ipool_totsz;
263 263 /* set the pool size to the desired new value */
264 264 pool_p->ipool_totsz = new_size;
265 265 /* perform the rebalance operation */
266 266 i_ddi_irm_enqueue(pool_p, B_TRUE);
267 267
268 268 /*
269 269 * If rebalance operation couldn't free up enough
270 270 * vectors then fail the resize operation.
271 271 */
272 272 if (pool_p->ipool_resno > new_size) { /* rebalance failed */
273 273 /* restore the pool size to the previous value */
274 274 pool_p->ipool_totsz = prev_size;
275 275 /* enqueue a rebalance operation for the original pool size */
276 276 i_ddi_irm_enqueue(pool_p, B_FALSE);
277 277 mutex_exit(&pool_p->ipool_lock);
278 278 return (NDI_FAILURE);
279 279 } else { /* rebalance worked */
280 280 /* adjust the default allocation limit */
281 281 pool_p->ipool_defsz = MIN(DDI_MAX_MSIX_ALLOC,
282 282 MAX(DDI_MIN_MSIX_ALLOC, new_size / DDI_MSIX_ALLOC_DIVIDER));
283 283 mutex_exit(&pool_p->ipool_lock);
284 284 DDI_INTR_IRMDBG((CE_CONT, "ndi_irm_resize_pool: pool_p %p"
285 285 " resized from %x to %x\n",
286 286 (void *)pool_p, prev_size, pool_p->ipool_totsz));
287 287 return (NDI_SUCCESS);
288 288 }
289 289 }
290 290
291 291 /*
292 292 * ndi_irm_destroy()
293 293 *
294 294 * Nexus interface to destroy an IRM pool. Destroy the pool
295 295 * and remove it from the global list of interrupt pools.
296 296 */
297 297 int
298 298 ndi_irm_destroy(ddi_irm_pool_t *pool_p)
299 299 {
300 300 ASSERT(pool_p != NULL);
301 301 ASSERT(pool_p->ipool_resno == 0);
302 302
303 303 DDI_INTR_IRMDBG((CE_CONT, "ndi_irm_destroy: pool_p %p\n",
304 304 (void *)pool_p));
305 305
306 306 /* Validate parameters */
307 307 if (pool_p == NULL)
308 308 return (NDI_FAILURE);
309 309
310 310 /* Validate that pool is empty */
311 311 if (pool_p->ipool_resno != 0)
312 312 return (NDI_BUSY);
313 313
314 314 /* Remove the pool from the global list */
315 315 mutex_enter(&irm_pools_lock);
316 316 list_remove(&irm_pools_list, pool_p);
317 317 mutex_exit(&irm_pools_lock);
318 318
319 319 /* Terminate the balancing thread */
320 320 mutex_enter(&pool_p->ipool_lock);
321 321 if (pool_p->ipool_thread &&
322 322 (pool_p->ipool_flags & DDI_IRM_FLAG_ACTIVE)) {
323 323 pool_p->ipool_flags |= DDI_IRM_FLAG_EXIT;
324 324 cv_signal(&pool_p->ipool_cv);
325 325 mutex_exit(&pool_p->ipool_lock);
326 326 thread_join(pool_p->ipool_thread->t_did);
327 327 } else
328 328 mutex_exit(&pool_p->ipool_lock);
329 329
330 330 /* Destroy the pool */
331 331 cv_destroy(&pool_p->ipool_cv);
332 332 mutex_destroy(&pool_p->ipool_lock);
333 333 mutex_destroy(&pool_p->ipool_navail_lock);
334 334 list_destroy(&pool_p->ipool_req_list);
335 335 list_destroy(&pool_p->ipool_scratch_list);
336 336 kmem_free(pool_p, sizeof (ddi_irm_pool_t));
337 337
338 338 return (NDI_SUCCESS);
339 339 }
340 340
341 341 /*
342 342 * Insert/Modify/Remove Interrupt Requests
343 343 */
344 344
345 345 /*
346 346 * i_ddi_irm_insert()
347 347 *
348 348 * Insert a new request into an interrupt pool, and balance the pool.
349 349 */
350 350 int
351 351 i_ddi_irm_insert(dev_info_t *dip, int type, int count)
352 352 {
353 353 ddi_irm_req_t *req_p;
354 354 devinfo_intr_t *intr_p;
355 355 ddi_irm_pool_t *pool_p;
356 356 uint_t nreq, nmin, npartial;
357 357 boolean_t irm_flag = B_FALSE;
358 358
359 359 ASSERT(dip != NULL);
360 360 ASSERT(DDI_INTR_TYPE_FLAG_VALID(type));
361 361 ASSERT(count > 0);
362 362
363 363 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: dip %p type %d count %d\n",
364 364 (void *)dip, type, count));
365 365
366 366 /* Validate parameters */
367 367 if ((dip == NULL) || (count < 1) || !DDI_INTR_TYPE_FLAG_VALID(type)) {
368 368 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: invalid args\n"));
369 369 return (DDI_EINVAL);
370 370 }
371 371
372 372 /* Check for an existing request */
373 373 if (((intr_p = DEVI(dip)->devi_intr_p) != NULL) &&
374 374 (intr_p->devi_irm_req_p != NULL))
375 375 return (DDI_SUCCESS);
376 376
377 377 /* Check for IRM support from the system */
378 378 if ((pool_p = i_ddi_intr_get_pool(dip, type)) == NULL) {
379 379 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: not supported\n"));
380 380 return (DDI_ENOTSUP);
381 381 }
382 382
383 383 /* Check for IRM support from the driver */
384 384 if (i_ddi_irm_supported(dip, type) == DDI_SUCCESS)
385 385 irm_flag = B_TRUE;
386 386
387 387 /* Determine request size */
388 388 nreq = (irm_flag) ? count :
389 389 MIN(count, i_ddi_intr_get_limit(dip, type, pool_p));
390 390 nmin = (irm_flag) ? 1 : nreq;
391 391 npartial = MIN(nreq, pool_p->ipool_defsz);
392 392
393 393 /* Allocate and initialize the request */
394 394 req_p = kmem_zalloc(sizeof (ddi_irm_req_t), KM_SLEEP);
395 395 req_p->ireq_type = type;
396 396 req_p->ireq_dip = dip;
397 397 req_p->ireq_pool_p = pool_p;
398 398 req_p->ireq_nreq = nreq;
399 399 req_p->ireq_flags = DDI_IRM_FLAG_NEW;
400 400 if (irm_flag)
401 401 req_p->ireq_flags |= DDI_IRM_FLAG_CALLBACK;
402 402
403 403 /* Lock the pool */
404 404 mutex_enter(&pool_p->ipool_lock);
405 405
406 406 /* Check for minimal fit before inserting */
407 407 if ((pool_p->ipool_minno + nmin) > pool_p->ipool_totsz) {
408 408 cmn_err(CE_WARN, "%s%d: interrupt pool too full.\n",
409 409 ddi_driver_name(dip), ddi_get_instance(dip));
410 410 mutex_exit(&pool_p->ipool_lock);
411 411 kmem_free(req_p, sizeof (ddi_irm_req_t));
412 412 return (DDI_EAGAIN);
413 413 }
414 414
415 415 /* Insert the request into the pool */
416 416 pool_p->ipool_reqno += nreq;
417 417 pool_p->ipool_minno += nmin;
418 418 i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, req_p);
419 419
420 420 /*
421 421 * Try to fulfill the request.
422 422 *
423 423 * If all the interrupts are available, and either the request
424 424 * is static or the pool is active, then just take them directly.
425 425 *
426 426 * If only some of the interrupts are available, and the request
427 427 * can receive future callbacks, then take some now but queue the
428 428 * pool to be rebalanced later.
429 429 *
430 430 * Otherwise, immediately rebalance the pool and wait.
431 431 */
432 432 if ((!irm_flag || (pool_p->ipool_flags & DDI_IRM_FLAG_ACTIVE)) &&
433 433 ((pool_p->ipool_resno + nreq) <= pool_p->ipool_totsz)) {
434 434
435 435 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: "
436 436 "request completely fulfilled.\n"));
437 437 pool_p->ipool_resno += nreq;
438 438 req_p->ireq_navail = nreq;
439 439 req_p->ireq_flags &= ~(DDI_IRM_FLAG_NEW);
440 440
441 441 } else if (irm_flag &&
442 442 ((pool_p->ipool_resno + npartial) <= pool_p->ipool_totsz)) {
443 443
444 444 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: "
445 445 "request partially fulfilled.\n"));
446 446 pool_p->ipool_resno += npartial;
447 447 req_p->ireq_navail = npartial;
448 448 req_p->ireq_flags &= ~(DDI_IRM_FLAG_NEW);
449 449 i_ddi_irm_enqueue(pool_p, B_FALSE);
450 450
451 451 } else {
452 452
453 453 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: "
454 454 "request needs immediate rebalance.\n"));
455 455 i_ddi_irm_enqueue(pool_p, B_TRUE);
456 456 req_p->ireq_flags &= ~(DDI_IRM_FLAG_NEW);
457 457 }
458 458
459 459 /* Fail if the request cannot be fulfilled at all */
460 460 if (req_p->ireq_navail == 0) {
461 461 cmn_err(CE_WARN, "%s%d: interrupt pool too full.\n",
462 462 ddi_driver_name(dip), ddi_get_instance(dip));
463 463 pool_p->ipool_reqno -= nreq;
464 464 pool_p->ipool_minno -= nmin;
465 465 list_remove(&pool_p->ipool_req_list, req_p);
466 466 mutex_exit(&pool_p->ipool_lock);
467 467 kmem_free(req_p, sizeof (ddi_irm_req_t));
468 468 return (DDI_EAGAIN);
469 469 }
470 470
471 471 /* Unlock the pool */
472 472 mutex_exit(&pool_p->ipool_lock);
473 473
474 474 intr_p->devi_irm_req_p = req_p;
475 475 return (DDI_SUCCESS);
476 476 }
477 477
478 478 /*
479 479 * i_ddi_irm_modify()
480 480 *
481 481 * Modify an existing request in an interrupt pool, and balance the pool.
482 482 */
483 483 int
484 484 i_ddi_irm_modify(dev_info_t *dip, int nreq)
485 485 {
486 486 devinfo_intr_t *intr_p;
487 487 ddi_irm_req_t *req_p;
488 488 ddi_irm_pool_t *pool_p;
489 489 int type;
490 490 int retval = DDI_SUCCESS;
491 491
492 492 ASSERT(dip != NULL);
493 493 ASSERT(nreq > 0);
494 494
495 495 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: dip %p nreq %d\n",
496 496 (void *)dip, nreq));
497 497
498 498 /* Validate parameters */
499 499 if ((dip == NULL) || (nreq < 1)) {
500 500 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: invalid args\n"));
501 501 return (DDI_EINVAL);
502 502 }
503 503
504 504 /* Do nothing if not mapped to an IRM pool */
505 505 if (((intr_p = DEVI(dip)->devi_intr_p) == NULL) ||
506 506 ((req_p = intr_p->devi_irm_req_p) == NULL))
507 507 return (DDI_SUCCESS);
508 508
509 509 /* Do nothing if new size is the same */
510 510 if (nreq == req_p->ireq_nreq)
511 511 return (DDI_SUCCESS);
512 512
513 513 /* Do not allow MSI requests to be resized */
514 514 if ((type = req_p->ireq_type) == DDI_INTR_TYPE_MSI) {
515 515 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: invalid type\n"));
516 516 return (DDI_ENOTSUP);
517 517 }
518 518
519 519 /* Select the pool */
520 520 if ((pool_p = req_p->ireq_pool_p) == NULL) {
521 521 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: missing pool\n"));
522 522 return (DDI_FAILURE);
523 523 }
524 524
525 525 /* Validate request size is not too large */
526 526 if (nreq > i_ddi_intr_get_limit(dip, type, pool_p)) {
527 527 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: invalid args\n"));
528 528 return (DDI_EINVAL);
529 529 }
530 530
531 531 /* Lock the pool */
532 532 mutex_enter(&pool_p->ipool_lock);
533 533
534 534 /*
535 535 * Process the modification.
536 536 *
537 537 * - To increase a non-IRM request, call the implementation in
538 538 * i_ddi_irm_modify_increase().
539 539 *
540 540 * - To decrease a non-IRM request, directly update the pool and
541 541 * request, then queue the pool for later rebalancing.
542 542 *
543 543 * - To modify an IRM request, always queue the pool for later
544 544 * rebalancing. IRM consumers rely upon callbacks for changes.
545 545 */
546 546 if ((nreq > req_p->ireq_nreq) &&
547 547 (i_ddi_irm_supported(dip, type) != DDI_SUCCESS)) {
548 548
549 549 retval = i_ddi_irm_modify_increase(req_p, nreq);
550 550
551 551 } else {
552 552
553 553 /* Update pool and request */
554 554 pool_p->ipool_reqno -= req_p->ireq_nreq;
555 555 pool_p->ipool_reqno += nreq;
556 556 if (i_ddi_irm_supported(dip, type) != DDI_SUCCESS) {
557 557 pool_p->ipool_minno -= req_p->ireq_navail;
558 558 pool_p->ipool_resno -= req_p->ireq_navail;
559 559 pool_p->ipool_minno += nreq;
560 560 pool_p->ipool_resno += nreq;
561 561 req_p->ireq_navail = nreq;
562 562 }
563 563 req_p->ireq_nreq = nreq;
564 564
565 565 /* Re-sort request into the pool */
566 566 list_remove(&pool_p->ipool_req_list, req_p);
567 567 i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, req_p);
568 568
569 569 /* Queue pool for asynchronous rebalance */
570 570 i_ddi_irm_enqueue(pool_p, B_FALSE);
571 571 }
572 572
573 573 /* Unlock the pool */
574 574 mutex_exit(&pool_p->ipool_lock);
575 575
576 576 return (retval);
577 577 }
578 578
579 579 /*
580 580 * i_ddi_irm_modify_increase()
581 581 *
582 582 * Increase a non-IRM request. The additional interrupts are
583 583 * directly taken from the pool when possible. Otherwise, an
584 584 * immediate, synchronous rebalance is performed. A temporary
585 585 * proxy request is used for any rebalance operation to ensure
586 586 * the request is not reduced below its current allocation.
587 587 *
588 588 * NOTE: pool must already be locked.
589 589 */
590 590 static int
591 591 i_ddi_irm_modify_increase(ddi_irm_req_t *req_p, int nreq)
592 592 {
593 593 dev_info_t *dip = req_p->ireq_dip;
594 594 ddi_irm_pool_t *pool_p = req_p->ireq_pool_p;
595 595 ddi_irm_req_t new_req;
596 596 int count, delta;
597 597
598 598 ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
599 599
600 600 /* Compute number of additional vectors */
601 601 count = nreq - req_p->ireq_nreq;
602 602
603 603 /* Check for minimal fit */
604 604 if ((pool_p->ipool_minno + count) > pool_p->ipool_totsz) {
605 605 cmn_err(CE_WARN, "%s%d: interrupt pool too full.\n",
606 606 ddi_driver_name(dip), ddi_get_instance(dip));
607 607 return (DDI_EAGAIN);
608 608 }
609 609
610 610 /* Update the pool */
611 611 pool_p->ipool_reqno += count;
612 612 pool_p->ipool_minno += count;
613 613
614 614 /* Attempt direct implementation */
615 615 if ((pool_p->ipool_resno + count) <= pool_p->ipool_totsz) {
616 616 req_p->ireq_nreq += count;
617 617 req_p->ireq_navail += count;
618 618 pool_p->ipool_resno += count;
619 619 return (DDI_SUCCESS);
620 620 }
621 621
622 622 /* Rebalance required: fail if pool is not active */
623 623 if ((pool_p->ipool_flags & DDI_IRM_FLAG_ACTIVE) == 0) {
624 624 pool_p->ipool_reqno -= count;
625 625 pool_p->ipool_minno -= count;
626 626 return (DDI_EAGAIN);
627 627 }
628 628
629 629 /* Insert temporary proxy request */
630 630 bzero(&new_req, sizeof (ddi_irm_req_t));
631 631 new_req.ireq_dip = dip;
632 632 new_req.ireq_nreq = count;
633 633 new_req.ireq_pool_p = pool_p;
634 634 new_req.ireq_type = req_p->ireq_type;
635 635 new_req.ireq_flags = DDI_IRM_FLAG_NEW;
636 636 i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, &new_req);
637 637
638 638 /* Synchronously rebalance */
639 639 i_ddi_irm_enqueue(pool_p, B_TRUE);
640 640
641 641 /* Remove proxy request, and merge into original request */
642 642 req_p->ireq_nreq += count;
643 643 if ((delta = (count - new_req.ireq_navail)) > 0) {
644 644 req_p->ireq_nreq -= delta;
645 645 pool_p->ipool_reqno -= delta;
646 646 pool_p->ipool_minno -= delta;
647 647 }
648 648 req_p->ireq_navail += new_req.ireq_navail;
649 649 list_remove(&pool_p->ipool_req_list, req_p);
650 650 list_remove(&pool_p->ipool_req_list, &new_req);
651 651 i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, req_p);
652 652
653 653 return (DDI_SUCCESS);
654 654 }
655 655
656 656 /*
657 657 * i_ddi_irm_remove()
658 658 *
659 659 * Remove a request from an interrupt pool, and balance the pool.
660 660 */
661 661 int
662 662 i_ddi_irm_remove(dev_info_t *dip)
663 663 {
664 664 devinfo_intr_t *intr_p;
665 665 ddi_irm_pool_t *pool_p;
666 666 ddi_irm_req_t *req_p;
667 667 uint_t nmin;
668 668
669 669 ASSERT(dip != NULL);
670 670
671 671 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_remove: dip %p\n", (void *)dip));
672 672
673 673 /* Validate parameters */
674 674 if (dip == NULL) {
675 675 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_remove: invalid args\n"));
676 676 return (DDI_EINVAL);
677 677 }
678 678
679 679 /* Check if the device has a request */
680 680 if (!(intr_p = DEVI(dip)->devi_intr_p) ||
681 681 !(req_p = intr_p->devi_irm_req_p)) {
682 682 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: not found\n"));
683 683 return (DDI_EINVAL);
684 684 }
685 685
686 686 /* Lock the pool */
687 687 pool_p = req_p->ireq_pool_p;
688 688 mutex_enter(&pool_p->ipool_lock);
689 689
690 690 /* Remove request */
691 691 nmin = DDI_IRM_IS_REDUCIBLE(req_p) ? 1 : req_p->ireq_nreq;
692 692 pool_p->ipool_minno -= nmin;
693 693 pool_p->ipool_reqno -= req_p->ireq_nreq;
694 694 pool_p->ipool_resno -= req_p->ireq_navail;
695 695 list_remove(&pool_p->ipool_req_list, req_p);
696 696
697 697 /* Queue pool to be rebalanced */
698 698 i_ddi_irm_enqueue(pool_p, B_FALSE);
699 699
700 700 /* Unlock the pool */
701 701 mutex_exit(&pool_p->ipool_lock);
702 702
703 703 /* Destroy the request */
704 704 intr_p->devi_irm_req_p = NULL;
705 705 kmem_free(req_p, sizeof (ddi_irm_req_t));
706 706
707 707 return (DDI_SUCCESS);
708 708 }
709 709
710 710 /*
711 711 * i_ddi_irm_set_cb()
712 712 *
713 713 * Change the callback flag for a request, in response to
714 714 * a change in its callback registration. Then rebalance
715 715 * the interrupt pool.
716 716 *
717 717 * NOTE: the request is not locked because the navail value
718 718 * is not directly affected. The balancing thread may
719 719 * modify the navail value in the background after it
720 720 * locks the request itself.
721 721 */
722 722 void
723 723 i_ddi_irm_set_cb(dev_info_t *dip, boolean_t has_cb_flag)
724 724 {
725 725 devinfo_intr_t *intr_p;
726 726 ddi_irm_pool_t *pool_p;
727 727 ddi_irm_req_t *req_p;
728 728 uint_t nreq;
729 729
730 730 ASSERT(dip != NULL);
731 731
732 732 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_set_cb: dip %p has_cb_flag %d\n",
733 733 (void *)dip, (int)has_cb_flag));
734 734
735 735 /* Validate parameters */
736 736 if (dip == NULL)
737 737 return;
738 738
739 739 /* Check for association with interrupt pool */
740 740 if (!(intr_p = DEVI(dip)->devi_intr_p) ||
741 741 !(req_p = intr_p->devi_irm_req_p)) {
742 742 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_set_cb: not in pool\n"));
743 743 return;
744 744 }
745 745
746 746 /* Lock the pool */
747 747 pool_p = req_p->ireq_pool_p;
748 748 mutex_enter(&pool_p->ipool_lock);
749 749
750 750 /*
751 751 * Update the request and the pool
752 752 */
753 753 if (has_cb_flag) {
754 754
755 755 /* Update pool statistics */
756 756 if (req_p->ireq_type == DDI_INTR_TYPE_MSIX)
757 757 pool_p->ipool_minno -= (req_p->ireq_nreq - 1);
758 758
759 759 /* Update request */
760 760 req_p->ireq_flags |= DDI_IRM_FLAG_CALLBACK;
761 761
762 762 /* Rebalance in background */
763 763 i_ddi_irm_enqueue(pool_p, B_FALSE);
764 764
765 765 } else {
766 766
767 767 /* Determine new request size */
768 768 nreq = MIN(req_p->ireq_nreq, pool_p->ipool_defsz);
769 769
770 770 #if defined(__i386) || defined(__amd64)
771 771 /* Use the default static limit for non-IRM drivers */
772 772 if (req_p->ireq_type == DDI_INTR_TYPE_MSIX)
773 773 nreq = MIN(nreq, ddi_msix_alloc_limit);
774 774 #endif
775 775
776 776 /* Update pool statistics */
777 777 pool_p->ipool_reqno -= req_p->ireq_nreq;
778 778 pool_p->ipool_reqno += nreq;
779 779 if (req_p->ireq_type == DDI_INTR_TYPE_MSIX) {
780 780 pool_p->ipool_minno -= 1;
781 781 pool_p->ipool_minno += nreq;
782 782 } else {
783 783 pool_p->ipool_minno -= req_p->ireq_nreq;
784 784 pool_p->ipool_minno += nreq;
785 785 }
786 786
787 787 /* Update request size, and re-sort in pool */
788 788 req_p->ireq_nreq = nreq;
789 789 list_remove(&pool_p->ipool_req_list, req_p);
790 790 i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, req_p);
791 791
792 792 /* Rebalance synchronously, before losing callback */
793 793 i_ddi_irm_enqueue(pool_p, B_TRUE);
794 794
795 795 /* Remove callback flag */
796 796 req_p->ireq_flags &= ~(DDI_IRM_FLAG_CALLBACK);
797 797 }
798 798
799 799 /* Unlock the pool */
800 800 mutex_exit(&pool_p->ipool_lock);
801 801 }
802 802
803 803 /*
804 804 * i_ddi_irm_supported()
805 805 *
806 806 * Query if IRM is supported by a driver using a specific interrupt type.
807 807 * Notice that IRM is limited to MSI-X users with registered callbacks.
808 808 */
809 809 int
810 810 i_ddi_irm_supported(dev_info_t *dip, int type)
811 811 {
812 812 ddi_cb_t *cb_p = DEVI(dip)->devi_cb_p;
813 813
814 814 return ((DDI_IRM_HAS_CB(cb_p) && (type == DDI_INTR_TYPE_MSIX)) ?
815 815 DDI_SUCCESS : DDI_ENOTSUP);
816 816 }
817 817
818 818 /*
819 819 * Interrupt Pool Balancing
820 820 */
821 821
822 822 /*
823 823 * irm_balance_thread()
824 824 *
825 825 * One instance of this thread operates per each defined IRM pool.
826 826 * It does the initial activation of the pool, as well as balancing
827 827 * any requests that were queued up before the pool was active.
828 828 * Once active, it waits forever to service balance operations.
829 829 */
830 830 static void
831 831 irm_balance_thread(ddi_irm_pool_t *pool_p)
832 832 {
833 833 clock_t interval;
834 834
835 835 DDI_INTR_IRMDBG((CE_CONT, "irm_balance_thread: pool_p %p\n",
836 836 (void *)pool_p));
837 837
838 838 /* Lock the pool */
839 839 mutex_enter(&pool_p->ipool_lock);
840 840
841 841 /* Perform initial balance if required */
842 842 if (pool_p->ipool_reqno > pool_p->ipool_resno)
843 843 i_ddi_irm_balance(pool_p);
844 844
845 845 /* Activate the pool */
↓ open down ↓ |
845 lines elided |
↑ open up ↑ |
846 846 pool_p->ipool_flags |= DDI_IRM_FLAG_ACTIVE;
847 847
848 848 /*
849 849 * Main loop.
850 850 * Iterate once first before wait on signal, in case there is signal
851 851 * sent before this thread being created
852 852 */
853 853 for (;;) {
854 854
855 855 /* Compute the delay interval */
856 - interval = drv_usectohz(irm_balance_delay * 1000000);
856 + interval = drv_sectohz(irm_balance_delay);
857 857
858 858 /* Wait one interval, or until there are waiters */
859 859 if ((interval > 0) &&
860 860 !(pool_p->ipool_flags & DDI_IRM_FLAG_WAITERS) &&
861 861 !(pool_p->ipool_flags & DDI_IRM_FLAG_EXIT)) {
862 862 (void) cv_reltimedwait(&pool_p->ipool_cv,
863 863 &pool_p->ipool_lock, interval, TR_CLOCK_TICK);
864 864 }
865 865
866 866 /* Check if awakened to exit */
867 867 if (pool_p->ipool_flags & DDI_IRM_FLAG_EXIT) {
868 868 DDI_INTR_IRMDBG((CE_CONT,
869 869 "irm_balance_thread: exiting...\n"));
870 870 mutex_exit(&pool_p->ipool_lock);
871 871 thread_exit();
872 872 }
873 873
874 874 /* Balance the pool */
875 875 i_ddi_irm_balance(pool_p);
876 876
877 877 /* Notify waiters */
878 878 if (pool_p->ipool_flags & DDI_IRM_FLAG_WAITERS) {
879 879 cv_broadcast(&pool_p->ipool_cv);
880 880 pool_p->ipool_flags &= ~(DDI_IRM_FLAG_WAITERS);
881 881 }
882 882
883 883 /* Clear QUEUED condition */
884 884 pool_p->ipool_flags &= ~(DDI_IRM_FLAG_QUEUED);
885 885
886 886 /* Sleep until queued */
887 887 cv_wait(&pool_p->ipool_cv, &pool_p->ipool_lock);
888 888
889 889 DDI_INTR_IRMDBG((CE_CONT, "irm_balance_thread: signaled.\n"));
890 890 }
891 891 }
892 892
893 893 /*
894 894 * i_ddi_irm_balance()
895 895 *
896 896 * Balance a pool. The general algorithm is to first reset all
897 897 * requests to their maximum size, use reduction algorithms to
898 898 * solve any imbalance, and then notify affected drivers.
899 899 */
900 900 static void
901 901 i_ddi_irm_balance(ddi_irm_pool_t *pool_p)
902 902 {
903 903 ddi_irm_req_t *req_p;
904 904
905 905 #ifdef DEBUG
906 906 uint_t debug_totsz = 0;
907 907 int debug_policy = 0;
908 908 #endif /* DEBUG */
909 909
910 910 ASSERT(pool_p != NULL);
911 911 ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
912 912
913 913 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_balance: pool_p %p\n",
914 914 (void *)pool_p));
915 915
916 916 #ifndef DEBUG
917 917 if ((pool_p->ipool_reqno == pool_p->ipool_resno)) {
918 918 #else
919 919 if ((pool_p->ipool_reqno == pool_p->ipool_resno) && !irm_debug_size) {
920 920 #endif /* DEBUG */
921 921 DDI_INTR_IRMDBG((CE_CONT,
922 922 "i_ddi_irm_balance: pool already balanced\n"));
923 923 return;
924 924 }
925 925
926 926 #ifdef DEBUG /* Adjust size and policy settings */
927 927 if (irm_debug_size > pool_p->ipool_minno) {
928 928 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_balance: debug size %d\n",
929 929 irm_debug_size));
930 930 debug_totsz = pool_p->ipool_totsz;
931 931 pool_p->ipool_totsz = irm_debug_size;
932 932 }
933 933 if (DDI_IRM_POLICY_VALID(irm_debug_policy)) {
934 934 DDI_INTR_IRMDBG((CE_CONT,
935 935 "i_ddi_irm_balance: debug policy %d\n", irm_debug_policy));
936 936 debug_policy = pool_p->ipool_policy;
937 937 pool_p->ipool_policy = irm_debug_policy;
938 938 }
939 939 #endif /* DEBUG */
940 940
941 941 /* Lock the availability lock */
942 942 mutex_enter(&pool_p->ipool_navail_lock);
943 943
944 944 /*
945 945 * Put all of the reducible requests into a scratch list.
946 946 * Reset each one of them to their maximum availability.
947 947 */
948 948 for (req_p = list_head(&pool_p->ipool_req_list); req_p;
949 949 req_p = list_next(&pool_p->ipool_req_list, req_p)) {
950 950 if (DDI_IRM_IS_REDUCIBLE(req_p)) {
951 951 pool_p->ipool_resno -= req_p->ireq_navail;
952 952 req_p->ireq_scratch = req_p->ireq_navail;
953 953 req_p->ireq_navail = req_p->ireq_nreq;
954 954 pool_p->ipool_resno += req_p->ireq_navail;
955 955 list_insert_tail(&pool_p->ipool_scratch_list, req_p);
956 956 }
957 957 }
958 958
959 959 /* Balance the requests */
960 960 i_ddi_irm_reduce(pool_p);
961 961
962 962 /* Unlock the availability lock */
963 963 mutex_exit(&pool_p->ipool_navail_lock);
964 964
965 965 /*
966 966 * Process REMOVE notifications.
967 967 *
968 968 * If a driver fails to release interrupts: exclude it from
969 969 * further processing, correct the resulting imbalance, and
970 970 * start over again at the head of the scratch list.
971 971 */
972 972 req_p = list_head(&pool_p->ipool_scratch_list);
973 973 while (req_p) {
974 974 if ((req_p->ireq_navail < req_p->ireq_scratch) &&
975 975 (i_ddi_irm_notify(pool_p, req_p) != DDI_SUCCESS)) {
976 976 list_remove(&pool_p->ipool_scratch_list, req_p);
977 977 mutex_enter(&pool_p->ipool_navail_lock);
978 978 i_ddi_irm_reduce(pool_p);
979 979 mutex_exit(&pool_p->ipool_navail_lock);
980 980 req_p = list_head(&pool_p->ipool_scratch_list);
981 981 } else {
982 982 req_p = list_next(&pool_p->ipool_scratch_list, req_p);
983 983 }
984 984 }
985 985
986 986 /*
987 987 * Process ADD notifications.
988 988 *
989 989 * This is the last use of the scratch list, so empty it.
990 990 */
991 991 while (req_p = list_remove_head(&pool_p->ipool_scratch_list)) {
992 992 if (req_p->ireq_navail > req_p->ireq_scratch) {
993 993 (void) i_ddi_irm_notify(pool_p, req_p);
994 994 }
995 995 }
996 996
997 997 #ifdef DEBUG /* Restore size and policy settings */
998 998 if (debug_totsz != 0)
999 999 pool_p->ipool_totsz = debug_totsz;
1000 1000 if (debug_policy != 0)
1001 1001 pool_p->ipool_policy = debug_policy;
1002 1002 #endif /* DEBUG */
1003 1003 }
1004 1004
1005 1005 /*
1006 1006 * i_ddi_irm_reduce()
1007 1007 *
1008 1008 * Use reduction algorithms to correct an imbalance in a pool.
1009 1009 */
1010 1010 static void
1011 1011 i_ddi_irm_reduce(ddi_irm_pool_t *pool_p)
1012 1012 {
1013 1013 int imbalance;
1014 1014
1015 1015 ASSERT(pool_p != NULL);
1016 1016 ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
1017 1017 ASSERT(DDI_IRM_POLICY_VALID(pool_p->ipool_policy));
1018 1018
1019 1019 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_reduce: pool_p %p\n",
1020 1020 (void *)pool_p));
1021 1021
1022 1022 /* Compute the imbalance. Do nothing if already balanced. */
1023 1023 if ((imbalance = pool_p->ipool_resno - pool_p->ipool_totsz) <= 0)
1024 1024 return;
1025 1025
1026 1026 /*
1027 1027 * Try policy based reduction first. If it failed, then
1028 1028 * possibly reduce new requests as a last resort.
1029 1029 */
1030 1030 if (i_ddi_irm_reduce_by_policy(pool_p, imbalance, pool_p->ipool_policy)
1031 1031 != DDI_SUCCESS) {
1032 1032
1033 1033 DDI_INTR_IRMDBG((CE_CONT,
1034 1034 "i_ddi_irm_reduce: policy reductions failed.\n"));
1035 1035
1036 1036 /* Compute remaining imbalance */
1037 1037 imbalance = pool_p->ipool_resno - pool_p->ipool_totsz;
1038 1038
1039 1039 ASSERT(imbalance > 0);
1040 1040
1041 1041 i_ddi_irm_reduce_new(pool_p, imbalance);
1042 1042 }
1043 1043 }
1044 1044
1045 1045 /*
1046 1046 * i_ddi_irm_enqueue()
1047 1047 *
1048 1048 * Queue a pool to be balanced. Signals the balancing thread to wake
1049 1049 * up and process the pool. If 'wait_flag' is true, then the current
1050 1050 * thread becomes a waiter and blocks until the balance is completed.
1051 1051 */
1052 1052 static void
1053 1053 i_ddi_irm_enqueue(ddi_irm_pool_t *pool_p, boolean_t wait_flag)
1054 1054 {
1055 1055 ASSERT(pool_p != NULL);
1056 1056 ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
1057 1057
1058 1058 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_enqueue: pool_p %p wait_flag %d\n",
1059 1059 (void *)pool_p, (int)wait_flag));
1060 1060
1061 1061 /* Do nothing if pool is already balanced */
1062 1062 #ifndef DEBUG
1063 1063 if ((pool_p->ipool_reqno == pool_p->ipool_resno)) {
1064 1064 #else
1065 1065 if ((pool_p->ipool_reqno == pool_p->ipool_resno) && !irm_debug_size) {
1066 1066 #endif /* DEBUG */
1067 1067 DDI_INTR_IRMDBG((CE_CONT,
1068 1068 "i_ddi_irm_enqueue: pool already balanced\n"));
1069 1069 return;
1070 1070 }
1071 1071
1072 1072 /* Avoid deadlocks when IRM is not active */
1073 1073 if (!irm_active && wait_flag) {
1074 1074 DDI_INTR_IRMDBG((CE_CONT,
1075 1075 "i_ddi_irm_enqueue: pool not active.\n"));
1076 1076 return;
1077 1077 }
1078 1078
1079 1079 if (wait_flag)
1080 1080 pool_p->ipool_flags |= DDI_IRM_FLAG_WAITERS;
1081 1081
1082 1082 if (wait_flag || !(pool_p->ipool_flags & DDI_IRM_FLAG_QUEUED)) {
1083 1083 pool_p->ipool_flags |= DDI_IRM_FLAG_QUEUED;
1084 1084 cv_signal(&pool_p->ipool_cv);
1085 1085 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_enqueue: pool queued.\n"));
1086 1086 }
1087 1087
1088 1088 if (wait_flag) {
1089 1089 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_enqueue: waiting...\n"));
1090 1090 cv_wait(&pool_p->ipool_cv, &pool_p->ipool_lock);
1091 1091 }
1092 1092 }
1093 1093
1094 1094 /*
1095 1095 * i_ddi_irm_reduce_by_policy()
1096 1096 *
1097 1097 * Reduces requests based on reduction policies.
1098 1098 *
1099 1099 * For the DDI_IRM_POLICY_LARGE reduction policy, the algorithm
1100 1100 * generally reduces larger requests first, before advancing
1101 1101 * to smaller requests.
1102 1102 * For the DDI_IRM_POLICY_EVEN reduction policy, the algorithm
1103 1103 * reduces requests evenly, without giving a specific preference
1104 1104 * to smaller or larger requests. Each iteration reduces all
1105 1105 * reducible requests by the same amount until the imbalance is
1106 1106 * corrected.
1107 1107 *
1108 1108 * The scratch list is initially sorted in descending order by current
1109 1109 * navail values, which are maximized prior to reduction. This sorted
1110 1110 * order is preserved. It avoids reducing requests below the threshold
1111 1111 * of the interrupt pool's default allocation size.
1112 1112 *
1113 1113 * Optimizations in this algorithm include trying to reduce multiple
1114 1114 * requests together. And the algorithm attempts to reduce in larger
1115 1115 * increments when possible to minimize the total number of iterations.
1116 1116 */
1117 1117 static int
1118 1118 i_ddi_irm_reduce_by_policy(ddi_irm_pool_t *pool_p, int imbalance, int policy)
1119 1119 {
1120 1120 ASSERT(pool_p != NULL);
1121 1121 ASSERT(imbalance > 0);
1122 1122 ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
1123 1123
1124 1124 while (imbalance > 0) {
1125 1125 list_t *slist_p = &pool_p->ipool_scratch_list;
1126 1126 ddi_irm_req_t *req_p = list_head(slist_p), *last_p;
1127 1127 uint_t nreduce = 0, nremain = 0, stop_navail;
1128 1128 uint_t pool_defsz = pool_p->ipool_defsz;
1129 1129 uint_t reduction, max_redu;
1130 1130
1131 1131 /* Fail if none are reducible */
1132 1132 if (!req_p || req_p->ireq_navail <= pool_defsz) {
1133 1133 DDI_INTR_IRMDBG((CE_CONT,
1134 1134 "i_ddi_irm_reduce_by_policy: Failure. "
1135 1135 "All requests have downsized to low limit.\n"));
1136 1136 return (DDI_FAILURE);
1137 1137 }
1138 1138
1139 1139 /* Count reducible requests */
1140 1140 stop_navail = (policy == DDI_IRM_POLICY_LARGE) ?
1141 1141 req_p->ireq_navail - 1 : pool_defsz;
1142 1142 for (; req_p; req_p = list_next(slist_p, req_p)) {
1143 1143 if (req_p->ireq_navail <= stop_navail)
1144 1144 break;
1145 1145 nreduce++;
1146 1146 }
1147 1147
1148 1148 /* Compute reduction */
1149 1149 last_p = req_p ? list_prev(slist_p, req_p) : list_tail(slist_p);
1150 1150 if ((policy == DDI_IRM_POLICY_LARGE) && req_p &&
1151 1151 req_p->ireq_navail > pool_defsz)
1152 1152 reduction = last_p->ireq_navail - req_p->ireq_navail;
1153 1153 else
1154 1154 reduction = last_p->ireq_navail - pool_defsz;
1155 1155
1156 1156 if ((max_redu = reduction * nreduce) > imbalance) {
1157 1157 reduction = imbalance / nreduce;
1158 1158 nremain = imbalance % nreduce;
1159 1159 pool_p->ipool_resno -= imbalance;
1160 1160 imbalance = 0;
1161 1161 } else {
1162 1162 pool_p->ipool_resno -= max_redu;
1163 1163 imbalance -= max_redu;
1164 1164 }
1165 1165
1166 1166 /* Reduce */
1167 1167 for (req_p = list_head(slist_p); (reduction != 0) && nreduce--;
1168 1168 req_p = list_next(slist_p, req_p)) {
1169 1169 req_p->ireq_navail -= reduction;
1170 1170 }
1171 1171
1172 1172 for (req_p = last_p; nremain--;
1173 1173 req_p = list_prev(slist_p, req_p)) {
1174 1174 req_p->ireq_navail--;
1175 1175 }
1176 1176 }
1177 1177
1178 1178 return (DDI_SUCCESS);
1179 1179 }
1180 1180
1181 1181 /*
1182 1182 * i_ddi_irm_reduce_new()
1183 1183 *
1184 1184 * Reduces new requests. This is only used as a last resort
1185 1185 * after another reduction algorithm failed.
1186 1186 *
1187 1187 * NOTE: The pool locking in i_ddi_irm_insert() ensures
1188 1188 * there can be only one new request at a time in a pool.
1189 1189 */
1190 1190 static void
1191 1191 i_ddi_irm_reduce_new(ddi_irm_pool_t *pool_p, int imbalance)
1192 1192 {
1193 1193 ddi_irm_req_t *req_p;
1194 1194
1195 1195 ASSERT(pool_p != NULL);
1196 1196 ASSERT(imbalance > 0);
1197 1197 ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
1198 1198
1199 1199 DDI_INTR_IRMDBG((CE_CONT,
1200 1200 "i_ddi_irm_reduce_new: pool_p %p imbalance %d\n",
1201 1201 (void *)pool_p, imbalance));
1202 1202
1203 1203 for (req_p = list_head(&pool_p->ipool_scratch_list); req_p;
1204 1204 req_p = list_next(&pool_p->ipool_scratch_list, req_p)) {
1205 1205 if (req_p->ireq_flags & DDI_IRM_FLAG_NEW) {
1206 1206 ASSERT(req_p->ireq_navail >= imbalance);
1207 1207 req_p->ireq_navail -= imbalance;
1208 1208 pool_p->ipool_resno -= imbalance;
1209 1209 return;
1210 1210 }
1211 1211 }
1212 1212
1213 1213 /* should never go here */
1214 1214 ASSERT(B_FALSE);
1215 1215 }
1216 1216
1217 1217 /*
1218 1218 * Miscellaneous Helper Functions
1219 1219 */
1220 1220
1221 1221 /*
1222 1222 * i_ddi_intr_get_pool()
1223 1223 *
1224 1224 * Get an IRM pool that supplies interrupts of a specified type.
1225 1225 * Invokes a DDI_INTROP_GETPOOL to the bus nexus driver. Fails
1226 1226 * if no pool exists.
1227 1227 */
1228 1228 ddi_irm_pool_t *
1229 1229 i_ddi_intr_get_pool(dev_info_t *dip, int type)
1230 1230 {
1231 1231 devinfo_intr_t *intr_p;
1232 1232 ddi_irm_pool_t *pool_p;
1233 1233 ddi_irm_req_t *req_p;
1234 1234 ddi_intr_handle_impl_t hdl;
1235 1235
1236 1236 ASSERT(dip != NULL);
1237 1237 ASSERT(DDI_INTR_TYPE_FLAG_VALID(type));
1238 1238
1239 1239 if (((intr_p = DEVI(dip)->devi_intr_p) != NULL) &&
1240 1240 ((req_p = intr_p->devi_irm_req_p) != NULL) &&
1241 1241 ((pool_p = req_p->ireq_pool_p) != NULL) &&
1242 1242 (pool_p->ipool_types & type)) {
1243 1243 return (pool_p);
1244 1244 }
1245 1245
1246 1246 bzero(&hdl, sizeof (ddi_intr_handle_impl_t));
1247 1247 hdl.ih_dip = dip;
1248 1248 hdl.ih_type = type;
1249 1249
1250 1250 if (i_ddi_intr_ops(dip, dip, DDI_INTROP_GETPOOL,
1251 1251 &hdl, (void *)&pool_p) == DDI_SUCCESS)
1252 1252 return (pool_p);
1253 1253
1254 1254 return (NULL);
1255 1255 }
1256 1256
1257 1257 /*
1258 1258 * i_ddi_irm_insertion_sort()
1259 1259 *
1260 1260 * Use the insertion sort method to insert a request into a list.
1261 1261 * The list is sorted in descending order by request size.
1262 1262 */
1263 1263 static void
1264 1264 i_ddi_irm_insertion_sort(list_t *req_list, ddi_irm_req_t *req_p)
1265 1265 {
1266 1266 ddi_irm_req_t *next_p;
1267 1267
1268 1268 next_p = list_head(req_list);
1269 1269
1270 1270 while (next_p && (next_p->ireq_nreq > req_p->ireq_nreq))
1271 1271 next_p = list_next(req_list, next_p);
1272 1272
1273 1273 list_insert_before(req_list, next_p, req_p);
1274 1274 }
1275 1275
1276 1276 /*
1277 1277 * i_ddi_irm_notify()
1278 1278 *
1279 1279 * Notify a driver of changes to its interrupt request using the
1280 1280 * generic callback mechanism. Checks for errors in processing.
1281 1281 */
1282 1282 static int
1283 1283 i_ddi_irm_notify(ddi_irm_pool_t *pool_p, ddi_irm_req_t *req_p)
1284 1284 {
1285 1285 ddi_cb_action_t action;
1286 1286 ddi_cb_t *cb_p;
1287 1287 uint_t nintrs;
1288 1288 int ret, count;
1289 1289
1290 1290 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_notify: pool_p %p req_p %p\n",
1291 1291 (void *)pool_p, (void *)req_p));
1292 1292
1293 1293 /* Do not notify new or unchanged requests */
1294 1294 if ((req_p->ireq_navail == req_p->ireq_scratch) ||
1295 1295 (req_p->ireq_flags & DDI_IRM_FLAG_NEW))
1296 1296 return (DDI_SUCCESS);
1297 1297
1298 1298 /* Determine action and count */
1299 1299 if (req_p->ireq_navail > req_p->ireq_scratch) {
1300 1300 action = DDI_CB_INTR_ADD;
1301 1301 count = req_p->ireq_navail - req_p->ireq_scratch;
1302 1302 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_notify: adding %d\n",
1303 1303 count));
1304 1304 } else {
1305 1305 action = DDI_CB_INTR_REMOVE;
1306 1306 count = req_p->ireq_scratch - req_p->ireq_navail;
1307 1307 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_notify: removing %d\n",
1308 1308 count));
1309 1309 }
1310 1310
1311 1311 /* Lookup driver callback */
1312 1312 if ((cb_p = DEVI(req_p->ireq_dip)->devi_cb_p) == NULL) {
1313 1313 DDI_INTR_IRMDBG((CE_WARN, "i_ddi_irm_notify: no callback!\n"));
1314 1314 return (DDI_FAILURE);
1315 1315 }
1316 1316
1317 1317 /* Do callback */
1318 1318 ret = cb_p->cb_func(req_p->ireq_dip, action, (void *)(uintptr_t)count,
1319 1319 cb_p->cb_arg1, cb_p->cb_arg2);
1320 1320
1321 1321 /* Log callback errors */
1322 1322 if (ret != DDI_SUCCESS) {
1323 1323 cmn_err(CE_WARN, "%s%d: failed callback (action=%d, ret=%d)\n",
1324 1324 ddi_driver_name(req_p->ireq_dip),
1325 1325 ddi_get_instance(req_p->ireq_dip), (int)action, ret);
1326 1326 }
1327 1327
1328 1328 /* Check if the driver exceeds its availability */
1329 1329 nintrs = i_ddi_intr_get_current_nintrs(req_p->ireq_dip);
1330 1330 if (nintrs > req_p->ireq_navail) {
1331 1331 cmn_err(CE_WARN, "%s%d: failed to release interrupts "
1332 1332 "(nintrs=%d, navail=%d).\n",
1333 1333 ddi_driver_name(req_p->ireq_dip),
1334 1334 ddi_get_instance(req_p->ireq_dip), nintrs,
1335 1335 req_p->ireq_navail);
1336 1336 pool_p->ipool_resno += (nintrs - req_p->ireq_navail);
1337 1337 req_p->ireq_navail = nintrs;
1338 1338 return (DDI_FAILURE);
1339 1339 }
1340 1340
1341 1341 /* Update request */
1342 1342 req_p->ireq_scratch = req_p->ireq_navail;
1343 1343
1344 1344 return (DDI_SUCCESS);
1345 1345 }
1346 1346
1347 1347 /*
1348 1348 * i_ddi_irm_debug_balance()
1349 1349 *
1350 1350 * A debug/test only routine to force the immediate,
1351 1351 * synchronous rebalancing of an interrupt pool.
1352 1352 */
1353 1353 #ifdef DEBUG
1354 1354 void
1355 1355 i_ddi_irm_debug_balance(dev_info_t *dip, boolean_t wait_flag)
1356 1356 {
1357 1357 ddi_irm_pool_t *pool_p;
1358 1358 int type;
1359 1359
1360 1360 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_debug_balance: dip %p wait %d\n",
1361 1361 (void *)dip, (int)wait_flag));
1362 1362
1363 1363 if (((type = i_ddi_intr_get_current_type(dip)) != 0) &&
1364 1364 ((pool_p = i_ddi_intr_get_pool(dip, type)) != NULL)) {
1365 1365 mutex_enter(&pool_p->ipool_lock);
1366 1366 i_ddi_irm_enqueue(pool_p, wait_flag);
1367 1367 mutex_exit(&pool_p->ipool_lock);
1368 1368 }
1369 1369 }
1370 1370 #endif
↓ open down ↓ |
504 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX