1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 /*
  26  * Copyright (c) 2010, Intel Corporation.
  27  * All rights reserved.
  28  */
  29 
  30 #include <sys/types.h>
  31 #include <sys/param.h>
  32 #include <sys/t_lock.h>
  33 #include <sys/thread.h>
  34 #include <sys/cpuvar.h>
  35 #include <sys/x_call.h>
  36 #include <sys/xc_levels.h>
  37 #include <sys/cpu.h>
  38 #include <sys/psw.h>
  39 #include <sys/sunddi.h>
  40 #include <sys/debug.h>
  41 #include <sys/systm.h>
  42 #include <sys/archsystm.h>
  43 #include <sys/machsystm.h>
  44 #include <sys/mutex_impl.h>
  45 #include <sys/stack.h>
  46 #include <sys/promif.h>
  47 #include <sys/x86_archext.h>
  48 
  49 /*
  50  * Implementation for cross-processor calls via interprocessor interrupts
  51  *
  52  * This implementation uses a message passing architecture to allow multiple
  53  * concurrent cross calls to be in flight at any given time. We use the cmpxchg
  54  * instruction, aka atomic_cas_ptr(), to implement simple efficient work
  55  * queues for message passing between CPUs with almost no need for regular
  56  * locking.  See xc_extract() and xc_insert() below.
  57  *
  58  * The general idea is that initiating a cross call means putting a message
  59  * on a target(s) CPU's work queue. Any synchronization is handled by passing
  60  * the message back and forth between initiator and target(s).
  61  *
  62  * Every CPU has xc_work_cnt, which indicates it has messages to process.
  63  * This value is incremented as message traffic is initiated and decremented
  64  * with every message that finishes all processing.
  65  *
  66  * The code needs no mfence or other membar_*() calls. The uses of
  67  * atomic_cas_ptr(), atomic_inc_32_nv() and atomic_dec_32() for the message
  68  * passing are implemented with LOCK prefix instructions which are
  69  * equivalent to mfence.
  70  *
  71  * One interesting aspect of this implmentation is that it allows 2 or more
  72  * CPUs to initiate cross calls to intersecting sets of CPUs at the same time.
  73  * The cross call processing by the CPUs will happen in any order with only
  74  * a guarantee, for xc_call() and xc_sync(), that an initiator won't return
  75  * from cross calls before all slaves have invoked the function.
  76  *
  77  * The reason for this asynchronous approach is to allow for fast global
  78  * TLB shootdowns. If all CPUs, say N, tried to do a global TLB invalidation
  79  * on a different Virtual Address at the same time. The old code required
  80  * N squared IPIs. With this method, depending on timing, it could happen
  81  * with just N IPIs.
  82  */
  83 
  84 /*
  85  * The default is to not enable collecting counts of IPI information, since
  86  * the updating of shared cachelines could cause excess bus traffic.
  87  */
  88 uint_t xc_collect_enable = 0;
  89 uint64_t xc_total_cnt = 0;      /* total #IPIs sent for cross calls */
  90 uint64_t xc_multi_cnt = 0;      /* # times we piggy backed on another IPI */
  91 
  92 /*
  93  * Values for message states. Here are the normal transitions. A transition
  94  * of "->" happens in the slave cpu and "=>" happens in the master cpu as
  95  * the messages are passed back and forth.
  96  *
  97  * FREE => ASYNC ->                       DONE => FREE
  98  * FREE => CALL ->                        DONE => FREE
  99  * FREE => SYNC -> WAITING => RELEASED -> DONE => FREE
 100  *
 101  * The interesing one above is ASYNC. You might ask, why not go directly
 102  * to FREE, instead of DONE. If it did that, it might be possible to exhaust
 103  * the master's xc_free list if a master can generate ASYNC messages faster
 104  * then the slave can process them. That could be handled with more complicated
 105  * handling. However since nothing important uses ASYNC, I've not bothered.
 106  */
 107 #define XC_MSG_FREE     (0)     /* msg in xc_free queue */
 108 #define XC_MSG_ASYNC    (1)     /* msg in slave xc_msgbox */
 109 #define XC_MSG_CALL     (2)     /* msg in slave xc_msgbox */
 110 #define XC_MSG_SYNC     (3)     /* msg in slave xc_msgbox */
 111 #define XC_MSG_WAITING  (4)     /* msg in master xc_msgbox or xc_waiters */
 112 #define XC_MSG_RELEASED (5)     /* msg in slave xc_msgbox */
 113 #define XC_MSG_DONE     (6)     /* msg in master xc_msgbox */
 114 
 115 /*
 116  * We allow for one high priority message at a time to happen in the system.
 117  * This is used for panic, kmdb, etc., so no locking is done.
 118  */
 119 static volatile cpuset_t xc_priority_set_store;
 120 static volatile ulong_t *xc_priority_set = CPUSET2BV(xc_priority_set_store);
 121 static xc_data_t xc_priority_data;
 122 
 123 /*
 124  * Wrappers to avoid C compiler warnings due to volatile. The atomic bit
 125  * operations don't accept volatile bit vectors - which is a bit silly.
 126  */
 127 #define XC_BT_SET(vector, b)    BT_ATOMIC_SET((ulong_t *)(vector), (b))
 128 #define XC_BT_CLEAR(vector, b)  BT_ATOMIC_CLEAR((ulong_t *)(vector), (b))
 129 
 130 /*
 131  * Decrement a CPU's work count
 132  */
 133 static void
 134 xc_decrement(struct machcpu *mcpu)
 135 {
 136         atomic_dec_32(&mcpu->xc_work_cnt);
 137 }
 138 
 139 /*
 140  * Increment a CPU's work count and return the old value
 141  */
 142 static int
 143 xc_increment(struct machcpu *mcpu)
 144 {
 145         return (atomic_inc_32_nv(&mcpu->xc_work_cnt) - 1);
 146 }
 147 
 148 /*
 149  * Put a message into a queue. The insertion is atomic no matter
 150  * how many different inserts/extracts to the same queue happen.
 151  */
 152 static void
 153 xc_insert(void *queue, xc_msg_t *msg)
 154 {
 155         xc_msg_t *old_head;
 156 
 157         /*
 158          * FREE messages should only ever be getting inserted into
 159          * the xc_master CPUs xc_free queue.
 160          */
 161         ASSERT(msg->xc_command != XC_MSG_FREE ||
 162             cpu[msg->xc_master] == NULL || /* possible only during init */
 163             queue == &cpu[msg->xc_master]->cpu_m.xc_free);
 164 
 165         do {
 166                 old_head = (xc_msg_t *)*(volatile xc_msg_t **)queue;
 167                 msg->xc_next = old_head;
 168         } while (atomic_cas_ptr(queue, old_head, msg) != old_head);
 169 }
 170 
 171 /*
 172  * Extract a message from a queue. The extraction is atomic only
 173  * when just one thread does extractions from the queue.
 174  * If the queue is empty, NULL is returned.
 175  */
 176 static xc_msg_t *
 177 xc_extract(xc_msg_t **queue)
 178 {
 179         xc_msg_t *old_head;
 180 
 181         do {
 182                 old_head = (xc_msg_t *)*(volatile xc_msg_t **)queue;
 183                 if (old_head == NULL)
 184                         return (old_head);
 185         } while (atomic_cas_ptr(queue, old_head, old_head->xc_next) !=
 186             old_head);
 187         old_head->xc_next = NULL;
 188         return (old_head);
 189 }
 190 
 191 /*
 192  * Initialize the machcpu fields used for cross calls
 193  */
 194 static uint_t xc_initialized = 0;
 195 
 196 void
 197 xc_init_cpu(struct cpu *cpup)
 198 {
 199         xc_msg_t *msg;
 200         int c;
 201 
 202         /*
 203          * Allocate message buffers for the new CPU.
 204          */
 205         for (c = 0; c < max_ncpus; ++c) {
 206                 if (plat_dr_support_cpu()) {
 207                         /*
 208                          * Allocate a message buffer for every CPU possible
 209                          * in system, including our own, and add them to our xc
 210                          * message queue.
 211                          */
 212                         msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
 213                         msg->xc_command = XC_MSG_FREE;
 214                         msg->xc_master = cpup->cpu_id;
 215                         xc_insert(&cpup->cpu_m.xc_free, msg);
 216                 } else if (cpu[c] != NULL && cpu[c] != cpup) {
 217                         /*
 218                          * Add a new message buffer to each existing CPU's free
 219                          * list, as well as one for my list for each of them.
 220                          * Note: cpu0 is statically inserted into cpu[] array,
 221                          * so need to check cpu[c] isn't cpup itself to avoid
 222                          * allocating extra message buffers for cpu0.
 223                          */
 224                         msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
 225                         msg->xc_command = XC_MSG_FREE;
 226                         msg->xc_master = c;
 227                         xc_insert(&cpu[c]->cpu_m.xc_free, msg);
 228 
 229                         msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
 230                         msg->xc_command = XC_MSG_FREE;
 231                         msg->xc_master = cpup->cpu_id;
 232                         xc_insert(&cpup->cpu_m.xc_free, msg);
 233                 }
 234         }
 235 
 236         if (!plat_dr_support_cpu()) {
 237                 /*
 238                  * Add one for self messages if CPU hotplug is disabled.
 239                  */
 240                 msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
 241                 msg->xc_command = XC_MSG_FREE;
 242                 msg->xc_master = cpup->cpu_id;
 243                 xc_insert(&cpup->cpu_m.xc_free, msg);
 244         }
 245 
 246         if (!xc_initialized)
 247                 xc_initialized = 1;
 248 }
 249 
 250 void
 251 xc_fini_cpu(struct cpu *cpup)
 252 {
 253         xc_msg_t *msg;
 254 
 255         ASSERT((cpup->cpu_flags & CPU_READY) == 0);
 256         ASSERT(cpup->cpu_m.xc_msgbox == NULL);
 257         ASSERT(cpup->cpu_m.xc_work_cnt == 0);
 258 
 259         while ((msg = xc_extract(&cpup->cpu_m.xc_free)) != NULL) {
 260                 kmem_free(msg, sizeof (*msg));
 261         }
 262 }
 263 
 264 #define XC_FLUSH_MAX_WAITS              1000
 265 
 266 /* Flush inflight message buffers. */
 267 int
 268 xc_flush_cpu(struct cpu *cpup)
 269 {
 270         int i;
 271 
 272         ASSERT((cpup->cpu_flags & CPU_READY) == 0);
 273 
 274         /*
 275          * Pause all working CPUs, which ensures that there's no CPU in
 276          * function xc_common().
 277          * This is used to work around a race condition window in xc_common()
 278          * between checking CPU_READY flag and increasing working item count.
 279          */
 280         pause_cpus(cpup, NULL);
 281         start_cpus();
 282 
 283         for (i = 0; i < XC_FLUSH_MAX_WAITS; i++) {
 284                 if (cpup->cpu_m.xc_work_cnt == 0) {
 285                         break;
 286                 }
 287                 DELAY(1);
 288         }
 289         for (; i < XC_FLUSH_MAX_WAITS; i++) {
 290                 if (!BT_TEST(xc_priority_set, cpup->cpu_id)) {
 291                         break;
 292                 }
 293                 DELAY(1);
 294         }
 295 
 296         return (i >= XC_FLUSH_MAX_WAITS ? ETIME : 0);
 297 }
 298 
 299 /*
 300  * X-call message processing routine. Note that this is used by both
 301  * senders and recipients of messages.
 302  *
 303  * We're protected against changing CPUs by either being in a high-priority
 304  * interrupt, having preemption disabled or by having a raised SPL.
 305  */
 306 /*ARGSUSED*/
 307 uint_t
 308 xc_serv(caddr_t arg1, caddr_t arg2)
 309 {
 310         struct machcpu *mcpup = &(CPU->cpu_m);
 311         xc_msg_t *msg;
 312         xc_data_t *data;
 313         xc_msg_t *xc_waiters = NULL;
 314         uint32_t num_waiting = 0;
 315         xc_func_t func;
 316         xc_arg_t a1;
 317         xc_arg_t a2;
 318         xc_arg_t a3;
 319         uint_t rc = DDI_INTR_UNCLAIMED;
 320 
 321         while (mcpup->xc_work_cnt != 0) {
 322                 rc = DDI_INTR_CLAIMED;
 323 
 324                 /*
 325                  * We may have to wait for a message to arrive.
 326                  */
 327                 for (msg = NULL; msg == NULL;
 328                     msg = xc_extract(&mcpup->xc_msgbox)) {
 329 
 330                         /*
 331                          * Alway check for and handle a priority message.
 332                          */
 333                         if (BT_TEST(xc_priority_set, CPU->cpu_id)) {
 334                                 func = xc_priority_data.xc_func;
 335                                 a1 = xc_priority_data.xc_a1;
 336                                 a2 = xc_priority_data.xc_a2;
 337                                 a3 = xc_priority_data.xc_a3;
 338                                 XC_BT_CLEAR(xc_priority_set, CPU->cpu_id);
 339                                 xc_decrement(mcpup);
 340                                 func(a1, a2, a3);
 341                                 if (mcpup->xc_work_cnt == 0)
 342                                         return (rc);
 343                         }
 344 
 345                         /*
 346                          * wait for a message to arrive
 347                          */
 348                         SMT_PAUSE();
 349                 }
 350 
 351 
 352                 /*
 353                  * process the message
 354                  */
 355                 switch (msg->xc_command) {
 356 
 357                 /*
 358                  * ASYNC gives back the message immediately, then we do the
 359                  * function and return with no more waiting.
 360                  */
 361                 case XC_MSG_ASYNC:
 362                         data = &cpu[msg->xc_master]->cpu_m.xc_data;
 363                         func = data->xc_func;
 364                         a1 = data->xc_a1;
 365                         a2 = data->xc_a2;
 366                         a3 = data->xc_a3;
 367                         msg->xc_command = XC_MSG_DONE;
 368                         xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
 369                         if (func != NULL)
 370                                 (void) (*func)(a1, a2, a3);
 371                         xc_decrement(mcpup);
 372                         break;
 373 
 374                 /*
 375                  * SYNC messages do the call, then send it back to the master
 376                  * in WAITING mode
 377                  */
 378                 case XC_MSG_SYNC:
 379                         data = &cpu[msg->xc_master]->cpu_m.xc_data;
 380                         if (data->xc_func != NULL)
 381                                 (void) (*data->xc_func)(data->xc_a1,
 382                                     data->xc_a2, data->xc_a3);
 383                         msg->xc_command = XC_MSG_WAITING;
 384                         xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
 385                         break;
 386 
 387                 /*
 388                  * WAITING messsages are collected by the master until all
 389                  * have arrived. Once all arrive, we release them back to
 390                  * the slaves
 391                  */
 392                 case XC_MSG_WAITING:
 393                         xc_insert(&xc_waiters, msg);
 394                         if (++num_waiting < mcpup->xc_wait_cnt)
 395                                 break;
 396                         while ((msg = xc_extract(&xc_waiters)) != NULL) {
 397                                 msg->xc_command = XC_MSG_RELEASED;
 398                                 xc_insert(&cpu[msg->xc_slave]->cpu_m.xc_msgbox,
 399                                     msg);
 400                                 --num_waiting;
 401                         }
 402                         if (num_waiting != 0)
 403                                 panic("wrong number waiting");
 404                         mcpup->xc_wait_cnt = 0;
 405                         break;
 406 
 407                 /*
 408                  * CALL messages do the function and then, like RELEASE,
 409                  * send the message is back to master as DONE.
 410                  */
 411                 case XC_MSG_CALL:
 412                         data = &cpu[msg->xc_master]->cpu_m.xc_data;
 413                         if (data->xc_func != NULL)
 414                                 (void) (*data->xc_func)(data->xc_a1,
 415                                     data->xc_a2, data->xc_a3);
 416                         /*FALLTHROUGH*/
 417                 case XC_MSG_RELEASED:
 418                         msg->xc_command = XC_MSG_DONE;
 419                         xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
 420                         xc_decrement(mcpup);
 421                         break;
 422 
 423                 /*
 424                  * DONE means a slave has completely finished up.
 425                  * Once we collect all the DONE messages, we'll exit
 426                  * processing too.
 427                  */
 428                 case XC_MSG_DONE:
 429                         msg->xc_command = XC_MSG_FREE;
 430                         xc_insert(&mcpup->xc_free, msg);
 431                         xc_decrement(mcpup);
 432                         break;
 433 
 434                 case XC_MSG_FREE:
 435                         panic("free message 0x%p in msgbox", (void *)msg);
 436                         break;
 437 
 438                 default:
 439                         panic("bad message 0x%p in msgbox", (void *)msg);
 440                         break;
 441                 }
 442         }
 443         return (rc);
 444 }
 445 
 446 /*
 447  * Initiate cross call processing.
 448  */
 449 static void
 450 xc_common(
 451         xc_func_t func,
 452         xc_arg_t arg1,
 453         xc_arg_t arg2,
 454         xc_arg_t arg3,
 455         ulong_t *set,
 456         uint_t command)
 457 {
 458         int c;
 459         struct cpu *cpup;
 460         xc_msg_t *msg;
 461         xc_data_t *data;
 462         int cnt;
 463         int save_spl;
 464 
 465         if (!xc_initialized) {
 466                 if (BT_TEST(set, CPU->cpu_id) && (CPU->cpu_flags & CPU_READY) &&
 467                     func != NULL)
 468                         (void) (*func)(arg1, arg2, arg3);
 469                 return;
 470         }
 471 
 472         save_spl = splr(ipltospl(XC_HI_PIL));
 473 
 474         /*
 475          * fill in cross call data
 476          */
 477         data = &CPU->cpu_m.xc_data;
 478         data->xc_func = func;
 479         data->xc_a1 = arg1;
 480         data->xc_a2 = arg2;
 481         data->xc_a3 = arg3;
 482 
 483         /*
 484          * Post messages to all CPUs involved that are CPU_READY
 485          */
 486         CPU->cpu_m.xc_wait_cnt = 0;
 487         for (c = 0; c < max_ncpus; ++c) {
 488                 if (!BT_TEST(set, c))
 489                         continue;
 490                 cpup = cpu[c];
 491                 if (cpup == NULL || !(cpup->cpu_flags & CPU_READY))
 492                         continue;
 493 
 494                 /*
 495                  * Fill out a new message.
 496                  */
 497                 msg = xc_extract(&CPU->cpu_m.xc_free);
 498                 if (msg == NULL)
 499                         panic("Ran out of free xc_msg_t's");
 500                 msg->xc_command = command;
 501                 if (msg->xc_master != CPU->cpu_id)
 502                         panic("msg %p has wrong xc_master", (void *)msg);
 503                 msg->xc_slave = c;
 504 
 505                 /*
 506                  * Increment my work count for all messages that I'll
 507                  * transition from DONE to FREE.
 508                  * Also remember how many XC_MSG_WAITINGs to look for
 509                  */
 510                 (void) xc_increment(&CPU->cpu_m);
 511                 if (command == XC_MSG_SYNC)
 512                         ++CPU->cpu_m.xc_wait_cnt;
 513 
 514                 /*
 515                  * Increment the target CPU work count then insert the message
 516                  * in the target msgbox. If I post the first bit of work
 517                  * for the target to do, send an IPI to the target CPU.
 518                  */
 519                 cnt = xc_increment(&cpup->cpu_m);
 520                 xc_insert(&cpup->cpu_m.xc_msgbox, msg);
 521                 if (cpup != CPU) {
 522                         if (cnt == 0) {
 523                                 CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
 524                                 send_dirint(c, XC_HI_PIL);
 525                                 if (xc_collect_enable)
 526                                         ++xc_total_cnt;
 527                         } else if (xc_collect_enable) {
 528                                 ++xc_multi_cnt;
 529                         }
 530                 }
 531         }
 532 
 533         /*
 534          * Now drop into the message handler until all work is done
 535          */
 536         (void) xc_serv(NULL, NULL);
 537         splx(save_spl);
 538 }
 539 
 540 /*
 541  * Push out a priority cross call.
 542  */
 543 static void
 544 xc_priority_common(
 545         xc_func_t func,
 546         xc_arg_t arg1,
 547         xc_arg_t arg2,
 548         xc_arg_t arg3,
 549         ulong_t *set)
 550 {
 551         int i;
 552         int c;
 553         struct cpu *cpup;
 554 
 555         /*
 556          * Wait briefly for any previous xc_priority to have finished.
 557          */
 558         for (c = 0; c < max_ncpus; ++c) {
 559                 cpup = cpu[c];
 560                 if (cpup == NULL || !(cpup->cpu_flags & CPU_READY))
 561                         continue;
 562 
 563                 /*
 564                  * The value of 40000 here is from old kernel code. It
 565                  * really should be changed to some time based value, since
 566                  * under a hypervisor, there's no guarantee a remote CPU
 567                  * is even scheduled.
 568                  */
 569                 for (i = 0; BT_TEST(xc_priority_set, c) && i < 40000; ++i)
 570                         SMT_PAUSE();
 571 
 572                 /*
 573                  * Some CPU did not respond to a previous priority request. It's
 574                  * probably deadlocked with interrupts blocked or some such
 575                  * problem. We'll just erase the previous request - which was
 576                  * most likely a kmdb_enter that has already expired - and plow
 577                  * ahead.
 578                  */
 579                 if (BT_TEST(xc_priority_set, c)) {
 580                         XC_BT_CLEAR(xc_priority_set, c);
 581                         if (cpup->cpu_m.xc_work_cnt > 0)
 582                                 xc_decrement(&cpup->cpu_m);
 583                 }
 584         }
 585 
 586         /*
 587          * fill in cross call data
 588          */
 589         xc_priority_data.xc_func = func;
 590         xc_priority_data.xc_a1 = arg1;
 591         xc_priority_data.xc_a2 = arg2;
 592         xc_priority_data.xc_a3 = arg3;
 593 
 594         /*
 595          * Post messages to all CPUs involved that are CPU_READY
 596          * We'll always IPI, plus bang on the xc_msgbox for i86_mwait()
 597          */
 598         for (c = 0; c < max_ncpus; ++c) {
 599                 if (!BT_TEST(set, c))
 600                         continue;
 601                 cpup = cpu[c];
 602                 if (cpup == NULL || !(cpup->cpu_flags & CPU_READY) ||
 603                     cpup == CPU)
 604                         continue;
 605                 (void) xc_increment(&cpup->cpu_m);
 606                 XC_BT_SET(xc_priority_set, c);
 607                 send_dirint(c, XC_HI_PIL);
 608                 for (i = 0; i < 10; ++i) {
 609                         (void) atomic_cas_ptr(&cpup->cpu_m.xc_msgbox,
 610                             cpup->cpu_m.xc_msgbox, cpup->cpu_m.xc_msgbox);
 611                 }
 612         }
 613 }
 614 
 615 /*
 616  * Do cross call to all other CPUs with absolutely no waiting or handshaking.
 617  * This should only be used for extraordinary operations, like panic(), which
 618  * need to work, in some fashion, in a not completely functional system.
 619  * All other uses that want minimal waiting should use xc_call_nowait().
 620  */
 621 void
 622 xc_priority(
 623         xc_arg_t arg1,
 624         xc_arg_t arg2,
 625         xc_arg_t arg3,
 626         ulong_t *set,
 627         xc_func_t func)
 628 {
 629         extern int IGNORE_KERNEL_PREEMPTION;
 630         int save_spl = splr(ipltospl(XC_HI_PIL));
 631         int save_kernel_preemption = IGNORE_KERNEL_PREEMPTION;
 632 
 633         IGNORE_KERNEL_PREEMPTION = 1;
 634         xc_priority_common((xc_func_t)func, arg1, arg2, arg3, set);
 635         IGNORE_KERNEL_PREEMPTION = save_kernel_preemption;
 636         splx(save_spl);
 637 }
 638 
 639 /*
 640  * Wrapper for kmdb to capture other CPUs, causing them to enter the debugger.
 641  */
 642 void
 643 kdi_xc_others(int this_cpu, void (*func)(void))
 644 {
 645         extern int IGNORE_KERNEL_PREEMPTION;
 646         int save_kernel_preemption;
 647         cpuset_t set;
 648 
 649         if (!xc_initialized)
 650                 return;
 651 
 652         save_kernel_preemption = IGNORE_KERNEL_PREEMPTION;
 653         IGNORE_KERNEL_PREEMPTION = 1;
 654         CPUSET_ALL_BUT(set, this_cpu);
 655         xc_priority_common((xc_func_t)func, 0, 0, 0, CPUSET2BV(set));
 656         IGNORE_KERNEL_PREEMPTION = save_kernel_preemption;
 657 }
 658 
 659 
 660 
 661 /*
 662  * Invoke function on specified processors. Remotes may continue after
 663  * service with no waiting. xc_call_nowait() may return immediately too.
 664  */
 665 void
 666 xc_call_nowait(
 667         xc_arg_t arg1,
 668         xc_arg_t arg2,
 669         xc_arg_t arg3,
 670         ulong_t *set,
 671         xc_func_t func)
 672 {
 673         xc_common(func, arg1, arg2, arg3, set, XC_MSG_ASYNC);
 674 }
 675 
 676 /*
 677  * Invoke function on specified processors. Remotes may continue after
 678  * service with no waiting. xc_call() returns only after remotes have finished.
 679  */
 680 void
 681 xc_call(
 682         xc_arg_t arg1,
 683         xc_arg_t arg2,
 684         xc_arg_t arg3,
 685         ulong_t *set,
 686         xc_func_t func)
 687 {
 688         xc_common(func, arg1, arg2, arg3, set, XC_MSG_CALL);
 689 }
 690 
 691 /*
 692  * Invoke function on specified processors. Remotes wait until all have
 693  * finished. xc_sync() also waits until all remotes have finished.
 694  */
 695 void
 696 xc_sync(
 697         xc_arg_t arg1,
 698         xc_arg_t arg2,
 699         xc_arg_t arg3,
 700         ulong_t *set,
 701         xc_func_t func)
 702 {
 703         xc_common(func, arg1, arg2, arg3, set, XC_MSG_SYNC);
 704 }