1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 /*
  26  * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
  27  */
  28 
  29 /*
  30  * PSMI 1.1 extensions are supported only in 2.6 and later versions.
  31  * PSMI 1.2 extensions are supported only in 2.7 and later versions.
  32  * PSMI 1.3 and 1.4 extensions are supported in Solaris 10.
  33  * PSMI 1.5 extensions are supported in Solaris Nevada.
  34  * PSMI 1.6 extensions are supported in Solaris Nevada.
  35  * PSMI 1.7 extensions are supported in Solaris Nevada.
  36  */
  37 #define PSMI_1_7
  38 
  39 #include <sys/processor.h>
  40 #include <sys/time.h>
  41 #include <sys/psm.h>
  42 #include <sys/smp_impldefs.h>
  43 #include <sys/cram.h>
  44 #include <sys/acpi/acpi.h>
  45 #include <sys/acpica.h>
  46 #include <sys/psm_common.h>
  47 #include <sys/apic.h>
  48 #include <sys/pit.h>
  49 #include <sys/ddi.h>
  50 #include <sys/sunddi.h>
  51 #include <sys/ddi_impldefs.h>
  52 #include <sys/pci.h>
  53 #include <sys/promif.h>
  54 #include <sys/x86_archext.h>
  55 #include <sys/cpc_impl.h>
  56 #include <sys/uadmin.h>
  57 #include <sys/panic.h>
  58 #include <sys/debug.h>
  59 #include <sys/archsystm.h>
  60 #include <sys/trap.h>
  61 #include <sys/machsystm.h>
  62 #include <sys/sysmacros.h>
  63 #include <sys/cpuvar.h>
  64 #include <sys/rm_platter.h>
  65 #include <sys/privregs.h>
  66 #include <sys/note.h>
  67 #include <sys/pci_intr_lib.h>
  68 #include <sys/spl.h>
  69 #include <sys/clock.h>
  70 #include <sys/dditypes.h>
  71 #include <sys/sunddi.h>
  72 #include <sys/x_call.h>
  73 #include <sys/reboot.h>
  74 #include <sys/hpet.h>
  75 #include <sys/apic_common.h>
  76 #include <sys/apic_timer.h>
  77 
  78 static void     apic_record_ioapic_rdt(void *intrmap_private,
  79                     ioapic_rdt_t *irdt);
  80 static void     apic_record_msi(void *intrmap_private, msi_regs_t *mregs);
  81 
  82 /*
  83  * Common routines between pcplusmp & apix (taken from apic.c).
  84  */
  85 
  86 int     apic_clkinit(int);
  87 hrtime_t apic_gethrtime(void);
  88 void    apic_send_ipi(int, int);
  89 void    apic_set_idlecpu(processorid_t);
  90 void    apic_unset_idlecpu(processorid_t);
  91 void    apic_shutdown(int, int);
  92 void    apic_preshutdown(int, int);
  93 processorid_t   apic_get_next_processorid(processorid_t);
  94 
  95 hrtime_t apic_gettime();
  96 
  97 enum apic_ioapic_method_type apix_mul_ioapic_method = APIC_MUL_IOAPIC_PCPLUSMP;
  98 
  99 /* Now the ones for Dynamic Interrupt distribution */
 100 int     apic_enable_dynamic_migration = 0;
 101 
 102 /* maximum loop count when sending Start IPIs. */
 103 int apic_sipi_max_loop_count = 0x1000;
 104 
 105 /*
 106  * These variables are frequently accessed in apic_intr_enter(),
 107  * apic_intr_exit and apic_setspl, so group them together
 108  */
 109 volatile uint32_t *apicadr =  NULL;     /* virtual addr of local APIC   */
 110 int apic_setspl_delay = 1;              /* apic_setspl - delay enable   */
 111 int apic_clkvect;
 112 
 113 /* vector at which error interrupts come in */
 114 int apic_errvect;
 115 int apic_enable_error_intr = 1;
 116 int apic_error_display_delay = 100;
 117 
 118 /* vector at which performance counter overflow interrupts come in */
 119 int apic_cpcovf_vect;
 120 int apic_enable_cpcovf_intr = 1;
 121 
 122 /* vector at which CMCI interrupts come in */
 123 int apic_cmci_vect;
 124 extern int cmi_enable_cmci;
 125 extern void cmi_cmci_trap(void);
 126 
 127 kmutex_t cmci_cpu_setup_lock;   /* protects cmci_cpu_setup_registered */
 128 int cmci_cpu_setup_registered;
 129 
 130 /* number of CPUs in power-on transition state */
 131 static int apic_poweron_cnt = 0;
 132 lock_t apic_mode_switch_lock;
 133 
 134 /*
 135  * Patchable global variables.
 136  */
 137 int     apic_forceload = 0;
 138 
 139 int     apic_coarse_hrtime = 1;         /* 0 - use accurate slow gethrtime() */
 140 
 141 int     apic_flat_model = 0;            /* 0 - clustered. 1 - flat */
 142 int     apic_panic_on_nmi = 0;
 143 int     apic_panic_on_apic_error = 0;
 144 
 145 int     apic_verbose = 0;       /* 0x1ff */
 146 
 147 #ifdef DEBUG
 148 int     apic_debug = 0;
 149 int     apic_restrict_vector = 0;
 150 
 151 int     apic_debug_msgbuf[APIC_DEBUG_MSGBUFSIZE];
 152 int     apic_debug_msgbufindex = 0;
 153 
 154 #endif /* DEBUG */
 155 
 156 uint_t apic_nticks = 0;
 157 uint_t apic_skipped_redistribute = 0;
 158 
 159 uint_t last_count_read = 0;
 160 lock_t  apic_gethrtime_lock;
 161 volatile int    apic_hrtime_stamp = 0;
 162 volatile hrtime_t apic_nsec_since_boot = 0;
 163 
 164 static  hrtime_t        apic_last_hrtime = 0;
 165 int             apic_hrtime_error = 0;
 166 int             apic_remote_hrterr = 0;
 167 int             apic_num_nmis = 0;
 168 int             apic_apic_error = 0;
 169 int             apic_num_apic_errors = 0;
 170 int             apic_num_cksum_errors = 0;
 171 
 172 int     apic_error = 0;
 173 
 174 static  int     apic_cmos_ssb_set = 0;
 175 
 176 /* use to make sure only one cpu handles the nmi */
 177 lock_t  apic_nmi_lock;
 178 /* use to make sure only one cpu handles the error interrupt */
 179 lock_t  apic_error_lock;
 180 
 181 static  struct {
 182         uchar_t cntl;
 183         uchar_t data;
 184 } aspen_bmc[] = {
 185         { CC_SMS_WR_START,      0x18 },         /* NetFn/LUN */
 186         { CC_SMS_WR_NEXT,       0x24 },         /* Cmd SET_WATCHDOG_TIMER */
 187         { CC_SMS_WR_NEXT,       0x84 },         /* DataByte 1: SMS/OS no log */
 188         { CC_SMS_WR_NEXT,       0x2 },          /* DataByte 2: Power Down */
 189         { CC_SMS_WR_NEXT,       0x0 },          /* DataByte 3: no pre-timeout */
 190         { CC_SMS_WR_NEXT,       0x0 },          /* DataByte 4: timer expir. */
 191         { CC_SMS_WR_NEXT,       0xa },          /* DataByte 5: init countdown */
 192         { CC_SMS_WR_END,        0x0 },          /* DataByte 6: init countdown */
 193 
 194         { CC_SMS_WR_START,      0x18 },         /* NetFn/LUN */
 195         { CC_SMS_WR_END,        0x22 }          /* Cmd RESET_WATCHDOG_TIMER */
 196 };
 197 
 198 static  struct {
 199         int     port;
 200         uchar_t data;
 201 } sitka_bmc[] = {
 202         { SMS_COMMAND_REGISTER, SMS_WRITE_START },
 203         { SMS_DATA_REGISTER,    0x18 },         /* NetFn/LUN */
 204         { SMS_DATA_REGISTER,    0x24 },         /* Cmd SET_WATCHDOG_TIMER */
 205         { SMS_DATA_REGISTER,    0x84 },         /* DataByte 1: SMS/OS no log */
 206         { SMS_DATA_REGISTER,    0x2 },          /* DataByte 2: Power Down */
 207         { SMS_DATA_REGISTER,    0x0 },          /* DataByte 3: no pre-timeout */
 208         { SMS_DATA_REGISTER,    0x0 },          /* DataByte 4: timer expir. */
 209         { SMS_DATA_REGISTER,    0xa },          /* DataByte 5: init countdown */
 210         { SMS_COMMAND_REGISTER, SMS_WRITE_END },
 211         { SMS_DATA_REGISTER,    0x0 },          /* DataByte 6: init countdown */
 212 
 213         { SMS_COMMAND_REGISTER, SMS_WRITE_START },
 214         { SMS_DATA_REGISTER,    0x18 },         /* NetFn/LUN */
 215         { SMS_COMMAND_REGISTER, SMS_WRITE_END },
 216         { SMS_DATA_REGISTER,    0x22 }          /* Cmd RESET_WATCHDOG_TIMER */
 217 };
 218 
 219 /* Patchable global variables. */
 220 int             apic_kmdb_on_nmi = 0;           /* 0 - no, 1 - yes enter kmdb */
 221 uint32_t        apic_divide_reg_init = 0;       /* 0 - divide by 2 */
 222 
 223 /* default apic ops without interrupt remapping */
 224 static apic_intrmap_ops_t apic_nointrmap_ops = {
 225         (int (*)(int))return_instr,
 226         (void (*)(int))return_instr,
 227         (void (*)(void **, dev_info_t *, uint16_t, int, uchar_t))return_instr,
 228         (void (*)(void *, void *, uint16_t, int))return_instr,
 229         (void (*)(void **))return_instr,
 230         apic_record_ioapic_rdt,
 231         apic_record_msi,
 232 };
 233 
 234 apic_intrmap_ops_t *apic_vt_ops = &apic_nointrmap_ops;
 235 apic_cpus_info_t        *apic_cpus = NULL;
 236 cpuset_t        apic_cpumask;
 237 uint_t          apic_picinit_called;
 238 
 239 /* Flag to indicate that we need to shut down all processors */
 240 static uint_t   apic_shutdown_processors;
 241 
 242 /*
 243  * Probe the ioapic method for apix module. Called in apic_probe_common()
 244  */
 245 int
 246 apic_ioapic_method_probe()
 247 {
 248         if (apix_enable == 0)
 249                 return (PSM_SUCCESS);
 250 
 251         /*
 252          * Set IOAPIC EOI handling method. The priority from low to high is:
 253          *      1. IOxAPIC: with EOI register
 254          *      2. IOMMU interrupt mapping
 255          *      3. Mask-Before-EOI method for systems without boot
 256          *      interrupt routing, such as systems with only one IOAPIC;
 257          *      NVIDIA CK8-04/MCP55 systems; systems with bridge solution
 258          *      which disables the boot interrupt routing already.
 259          *      4. Directed EOI
 260          */
 261         if (apic_io_ver[0] >= 0x20)
 262                 apix_mul_ioapic_method = APIC_MUL_IOAPIC_IOXAPIC;
 263         if ((apic_io_max == 1) || (apic_nvidia_io_max == apic_io_max))
 264                 apix_mul_ioapic_method = APIC_MUL_IOAPIC_MASK;
 265         if (apic_directed_EOI_supported())
 266                 apix_mul_ioapic_method = APIC_MUL_IOAPIC_DEOI;
 267 
 268         /* fall back to pcplusmp */
 269         if (apix_mul_ioapic_method == APIC_MUL_IOAPIC_PCPLUSMP) {
 270                 /* make sure apix is after pcplusmp in /etc/mach */
 271                 apix_enable = 0; /* go ahead with pcplusmp install next */
 272                 return (PSM_FAILURE);
 273         }
 274 
 275         return (PSM_SUCCESS);
 276 }
 277 
 278 /*
 279  * handler for APIC Error interrupt. Just print a warning and continue
 280  */
 281 int
 282 apic_error_intr()
 283 {
 284         uint_t  error0, error1, error;
 285         uint_t  i;
 286 
 287         /*
 288          * We need to write before read as per 7.4.17 of system prog manual.
 289          * We do both and or the results to be safe
 290          */
 291         error0 = apic_reg_ops->apic_read(APIC_ERROR_STATUS);
 292         apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
 293         error1 = apic_reg_ops->apic_read(APIC_ERROR_STATUS);
 294         error = error0 | error1;
 295 
 296         /*
 297          * Clear the APIC error status (do this on all cpus that enter here)
 298          * (two writes are required due to the semantics of accessing the
 299          * error status register.)
 300          */
 301         apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
 302         apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
 303 
 304         /*
 305          * Prevent more than 1 CPU from handling error interrupt causing
 306          * double printing (interleave of characters from multiple
 307          * CPU's when using prom_printf)
 308          */
 309         if (lock_try(&apic_error_lock) == 0)
 310                 return (error ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED);
 311         if (error) {
 312 #if     DEBUG
 313                 if (apic_debug)
 314                         debug_enter("pcplusmp: APIC Error interrupt received");
 315 #endif /* DEBUG */
 316                 if (apic_panic_on_apic_error)
 317                         cmn_err(CE_PANIC,
 318                             "APIC Error interrupt on CPU %d. Status = %x",
 319                             psm_get_cpu_id(), error);
 320                 else {
 321                         if ((error & ~APIC_CS_ERRORS) == 0) {
 322                                 /* cksum error only */
 323                                 apic_error |= APIC_ERR_APIC_ERROR;
 324                                 apic_apic_error |= error;
 325                                 apic_num_apic_errors++;
 326                                 apic_num_cksum_errors++;
 327                         } else {
 328                                 /*
 329                                  * prom_printf is the best shot we have of
 330                                  * something which is problem free from
 331                                  * high level/NMI type of interrupts
 332                                  */
 333                                 prom_printf("APIC Error interrupt on CPU %d. "
 334                                     "Status 0 = %x, Status 1 = %x\n",
 335                                     psm_get_cpu_id(), error0, error1);
 336                                 apic_error |= APIC_ERR_APIC_ERROR;
 337                                 apic_apic_error |= error;
 338                                 apic_num_apic_errors++;
 339                                 for (i = 0; i < apic_error_display_delay; i++) {
 340                                         tenmicrosec();
 341                                 }
 342                                 /*
 343                                  * provide more delay next time limited to
 344                                  * roughly 1 clock tick time
 345                                  */
 346                                 if (apic_error_display_delay < 500)
 347                                         apic_error_display_delay *= 2;
 348                         }
 349                 }
 350                 lock_clear(&apic_error_lock);
 351                 return (DDI_INTR_CLAIMED);
 352         } else {
 353                 lock_clear(&apic_error_lock);
 354                 return (DDI_INTR_UNCLAIMED);
 355         }
 356 }
 357 
 358 /*
 359  * Turn off the mask bit in the performance counter Local Vector Table entry.
 360  */
 361 void
 362 apic_cpcovf_mask_clear(void)
 363 {
 364         apic_reg_ops->apic_write(APIC_PCINT_VECT,
 365             (apic_reg_ops->apic_read(APIC_PCINT_VECT) & ~APIC_LVT_MASK));
 366 }
 367 
 368 /*ARGSUSED*/
 369 static int
 370 apic_cmci_enable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
 371 {
 372         apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect);
 373         return (0);
 374 }
 375 
 376 /*ARGSUSED*/
 377 static int
 378 apic_cmci_disable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
 379 {
 380         apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect | AV_MASK);
 381         return (0);
 382 }
 383 
 384 /*ARGSUSED*/
 385 int
 386 cmci_cpu_setup(cpu_setup_t what, int cpuid, void *arg)
 387 {
 388         cpuset_t        cpu_set;
 389 
 390         CPUSET_ONLY(cpu_set, cpuid);
 391 
 392         switch (what) {
 393                 case CPU_ON:
 394                         xc_call(NULL, NULL, NULL, CPUSET2BV(cpu_set),
 395                             (xc_func_t)apic_cmci_enable);
 396                         break;
 397 
 398                 case CPU_OFF:
 399                         xc_call(NULL, NULL, NULL, CPUSET2BV(cpu_set),
 400                             (xc_func_t)apic_cmci_disable);
 401                         break;
 402 
 403                 default:
 404                         break;
 405         }
 406 
 407         return (0);
 408 }
 409 
 410 static void
 411 apic_disable_local_apic(void)
 412 {
 413         apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL);
 414         apic_reg_ops->apic_write(APIC_LOCAL_TIMER, AV_MASK);
 415 
 416         /* local intr reg 0 */
 417         apic_reg_ops->apic_write(APIC_INT_VECT0, AV_MASK);
 418 
 419         /* disable NMI */
 420         apic_reg_ops->apic_write(APIC_INT_VECT1, AV_MASK);
 421 
 422         /* and error interrupt */
 423         apic_reg_ops->apic_write(APIC_ERR_VECT, AV_MASK);
 424 
 425         /* and perf counter intr */
 426         apic_reg_ops->apic_write(APIC_PCINT_VECT, AV_MASK);
 427 
 428         apic_reg_ops->apic_write(APIC_SPUR_INT_REG, APIC_SPUR_INTR);
 429 }
 430 
 431 static void
 432 apic_cpu_send_SIPI(processorid_t cpun, boolean_t start)
 433 {
 434         int             loop_count;
 435         uint32_t        vector;
 436         uint_t          apicid;
 437         ulong_t         iflag;
 438 
 439         apicid =  apic_cpus[cpun].aci_local_id;
 440 
 441         /*
 442          * Interrupts on current CPU will be disabled during the
 443          * steps in order to avoid unwanted side effects from
 444          * executing interrupt handlers on a problematic BIOS.
 445          */
 446         iflag = intr_clear();
 447 
 448         if (start) {
 449                 outb(CMOS_ADDR, SSB);
 450                 outb(CMOS_DATA, BIOS_SHUTDOWN);
 451         }
 452 
 453         /*
 454          * According to X2APIC specification in section '2.3.5.1' of
 455          * Interrupt Command Register Semantics, the semantics of
 456          * programming the Interrupt Command Register to dispatch an interrupt
 457          * is simplified. A single MSR write to the 64-bit ICR is required
 458          * for dispatching an interrupt. Specifically, with the 64-bit MSR
 459          * interface to ICR, system software is not required to check the
 460          * status of the delivery status bit prior to writing to the ICR
 461          * to send an IPI. With the removal of the Delivery Status bit,
 462          * system software no longer has a reason to read the ICR. It remains
 463          * readable only to aid in debugging.
 464          */
 465 #ifdef  DEBUG
 466         APIC_AV_PENDING_SET();
 467 #else
 468         if (apic_mode == LOCAL_APIC) {
 469                 APIC_AV_PENDING_SET();
 470         }
 471 #endif /* DEBUG */
 472 
 473         /* for integrated - make sure there is one INIT IPI in buffer */
 474         /* for external - it will wake up the cpu */
 475         apic_reg_ops->apic_write_int_cmd(apicid, AV_ASSERT | AV_RESET);
 476 
 477         /* If only 1 CPU is installed, PENDING bit will not go low */
 478         for (loop_count = apic_sipi_max_loop_count; loop_count; loop_count--) {
 479                 if (apic_mode == LOCAL_APIC &&
 480                     apic_reg_ops->apic_read(APIC_INT_CMD1) & AV_PENDING)
 481                         apic_ret();
 482                 else
 483                         break;
 484         }
 485 
 486         apic_reg_ops->apic_write_int_cmd(apicid, AV_DEASSERT | AV_RESET);
 487         drv_usecwait(20000);            /* 20 milli sec */
 488 
 489         if (apic_cpus[cpun].aci_local_ver >= APIC_INTEGRATED_VERS) {
 490                 /* integrated apic */
 491 
 492                 vector = (rm_platter_pa >> MMU_PAGESHIFT) &
 493                     (APIC_VECTOR_MASK | APIC_IPL_MASK);
 494 
 495                 /* to offset the INIT IPI queue up in the buffer */
 496                 apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP);
 497                 drv_usecwait(200);              /* 20 micro sec */
 498 
 499                 /*
 500                  * send the second SIPI (Startup IPI) as recommended by Intel
 501                  * software development manual.
 502                  */
 503                 apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP);
 504                 drv_usecwait(200);      /* 20 micro sec */
 505         }
 506 
 507         intr_restore(iflag);
 508 }
 509 
 510 /*ARGSUSED1*/
 511 int
 512 apic_cpu_start(processorid_t cpun, caddr_t arg)
 513 {
 514         ASSERT(MUTEX_HELD(&cpu_lock));
 515 
 516         if (!apic_cpu_in_range(cpun)) {
 517                 return (EINVAL);
 518         }
 519 
 520         /*
 521          * Switch to apic_common_send_ipi for safety during starting other CPUs.
 522          */
 523         if (apic_mode == LOCAL_X2APIC) {
 524                 apic_switch_ipi_callback(B_TRUE);
 525         }
 526 
 527         apic_cmos_ssb_set = 1;
 528         apic_cpu_send_SIPI(cpun, B_TRUE);
 529 
 530         return (0);
 531 }
 532 
 533 /*
 534  * Put CPU into halted state with interrupts disabled.
 535  */
 536 /*ARGSUSED1*/
 537 int
 538 apic_cpu_stop(processorid_t cpun, caddr_t arg)
 539 {
 540         int             rc;
 541         cpu_t           *cp;
 542         extern cpuset_t cpu_ready_set;
 543         extern void cpu_idle_intercept_cpu(cpu_t *cp);
 544 
 545         ASSERT(MUTEX_HELD(&cpu_lock));
 546 
 547         if (!apic_cpu_in_range(cpun)) {
 548                 return (EINVAL);
 549         }
 550         if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) {
 551                 return (ENOTSUP);
 552         }
 553 
 554         cp = cpu_get(cpun);
 555         ASSERT(cp != NULL);
 556         ASSERT((cp->cpu_flags & CPU_OFFLINE) != 0);
 557         ASSERT((cp->cpu_flags & CPU_QUIESCED) != 0);
 558         ASSERT((cp->cpu_flags & CPU_ENABLE) == 0);
 559 
 560         /* Clear CPU_READY flag to disable cross calls. */
 561         cp->cpu_flags &= ~CPU_READY;
 562         CPUSET_ATOMIC_DEL(cpu_ready_set, cpun);
 563         rc = xc_flush_cpu(cp);
 564         if (rc != 0) {
 565                 CPUSET_ATOMIC_ADD(cpu_ready_set, cpun);
 566                 cp->cpu_flags |= CPU_READY;
 567                 return (rc);
 568         }
 569 
 570         /* Intercept target CPU at a safe point before powering it off. */
 571         cpu_idle_intercept_cpu(cp);
 572 
 573         apic_cpu_send_SIPI(cpun, B_FALSE);
 574         cp->cpu_flags &= ~CPU_RUNNING;
 575 
 576         return (0);
 577 }
 578 
 579 int
 580 apic_cpu_ops(psm_cpu_request_t *reqp)
 581 {
 582         if (reqp == NULL) {
 583                 return (EINVAL);
 584         }
 585 
 586         switch (reqp->pcr_cmd) {
 587         case PSM_CPU_ADD:
 588                 return (apic_cpu_add(reqp));
 589 
 590         case PSM_CPU_REMOVE:
 591                 return (apic_cpu_remove(reqp));
 592 
 593         case PSM_CPU_STOP:
 594                 return (apic_cpu_stop(reqp->req.cpu_stop.cpuid,
 595                     reqp->req.cpu_stop.ctx));
 596 
 597         default:
 598                 return (ENOTSUP);
 599         }
 600 }
 601 
 602 #ifdef  DEBUG
 603 int     apic_break_on_cpu = 9;
 604 int     apic_stretch_interrupts = 0;
 605 int     apic_stretch_ISR = 1 << 3;        /* IPL of 3 matches nothing now */
 606 #endif /* DEBUG */
 607 
 608 /*
 609  * generates an interprocessor interrupt to another CPU. Any changes made to
 610  * this routine must be accompanied by similar changes to
 611  * apic_common_send_ipi().
 612  */
 613 void
 614 apic_send_ipi(int cpun, int ipl)
 615 {
 616         int vector;
 617         ulong_t flag;
 618 
 619         vector = apic_resv_vector[ipl];
 620 
 621         ASSERT((vector >= APIC_BASE_VECT) && (vector <= APIC_SPUR_INTR));
 622 
 623         flag = intr_clear();
 624 
 625         APIC_AV_PENDING_SET();
 626 
 627         apic_reg_ops->apic_write_int_cmd(apic_cpus[cpun].aci_local_id,
 628             vector);
 629 
 630         intr_restore(flag);
 631 }
 632 
 633 
 634 /*ARGSUSED*/
 635 void
 636 apic_set_idlecpu(processorid_t cpun)
 637 {
 638 }
 639 
 640 /*ARGSUSED*/
 641 void
 642 apic_unset_idlecpu(processorid_t cpun)
 643 {
 644 }
 645 
 646 
 647 void
 648 apic_ret()
 649 {
 650 }
 651 
 652 /*
 653  * If apic_coarse_time == 1, then apic_gettime() is used instead of
 654  * apic_gethrtime().  This is used for performance instead of accuracy.
 655  */
 656 
 657 hrtime_t
 658 apic_gettime()
 659 {
 660         int old_hrtime_stamp;
 661         hrtime_t temp;
 662 
 663         /*
 664          * In one-shot mode, we do not keep time, so if anyone
 665          * calls psm_gettime() directly, we vector over to
 666          * gethrtime().
 667          * one-shot mode MUST NOT be enabled if this psm is the source of
 668          * hrtime.
 669          */
 670 
 671         if (apic_oneshot)
 672                 return (gethrtime());
 673 
 674 
 675 gettime_again:
 676         while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
 677                 apic_ret();
 678 
 679         temp = apic_nsec_since_boot;
 680 
 681         if (apic_hrtime_stamp != old_hrtime_stamp) {    /* got an interrupt */
 682                 goto gettime_again;
 683         }
 684         return (temp);
 685 }
 686 
 687 /*
 688  * Here we return the number of nanoseconds since booting.  Note every
 689  * clock interrupt increments apic_nsec_since_boot by the appropriate
 690  * amount.
 691  */
 692 hrtime_t
 693 apic_gethrtime(void)
 694 {
 695         int curr_timeval, countval, elapsed_ticks;
 696         int old_hrtime_stamp, status;
 697         hrtime_t temp;
 698         uint32_t cpun;
 699         ulong_t oflags;
 700 
 701         /*
 702          * In one-shot mode, we do not keep time, so if anyone
 703          * calls psm_gethrtime() directly, we vector over to
 704          * gethrtime().
 705          * one-shot mode MUST NOT be enabled if this psm is the source of
 706          * hrtime.
 707          */
 708 
 709         if (apic_oneshot)
 710                 return (gethrtime());
 711 
 712         oflags = intr_clear();  /* prevent migration */
 713 
 714         cpun = apic_reg_ops->apic_read(APIC_LID_REG);
 715         if (apic_mode == LOCAL_APIC)
 716                 cpun >>= APIC_ID_BIT_OFFSET;
 717 
 718         lock_set(&apic_gethrtime_lock);
 719 
 720 gethrtime_again:
 721         while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
 722                 apic_ret();
 723 
 724         /*
 725          * Check to see which CPU we are on.  Note the time is kept on
 726          * the local APIC of CPU 0.  If on CPU 0, simply read the current
 727          * counter.  If on another CPU, issue a remote read command to CPU 0.
 728          */
 729         if (cpun == apic_cpus[0].aci_local_id) {
 730                 countval = apic_reg_ops->apic_read(APIC_CURR_COUNT);
 731         } else {
 732 #ifdef  DEBUG
 733                 APIC_AV_PENDING_SET();
 734 #else
 735                 if (apic_mode == LOCAL_APIC)
 736                         APIC_AV_PENDING_SET();
 737 #endif /* DEBUG */
 738 
 739                 apic_reg_ops->apic_write_int_cmd(
 740                     apic_cpus[0].aci_local_id, APIC_CURR_ADD | AV_REMOTE);
 741 
 742                 while ((status = apic_reg_ops->apic_read(APIC_INT_CMD1))
 743                     & AV_READ_PENDING) {
 744                         apic_ret();
 745                 }
 746 
 747                 if (status & AV_REMOTE_STATUS)      /* 1 = valid */
 748                         countval = apic_reg_ops->apic_read(APIC_REMOTE_READ);
 749                 else {  /* 0 = invalid */
 750                         apic_remote_hrterr++;
 751                         /*
 752                          * return last hrtime right now, will need more
 753                          * testing if change to retry
 754                          */
 755                         temp = apic_last_hrtime;
 756 
 757                         lock_clear(&apic_gethrtime_lock);
 758 
 759                         intr_restore(oflags);
 760 
 761                         return (temp);
 762                 }
 763         }
 764         if (countval > last_count_read)
 765                 countval = 0;
 766         else
 767                 last_count_read = countval;
 768 
 769         elapsed_ticks = apic_hertz_count - countval;
 770 
 771         curr_timeval = APIC_TICKS_TO_NSECS(elapsed_ticks);
 772         temp = apic_nsec_since_boot + curr_timeval;
 773 
 774         if (apic_hrtime_stamp != old_hrtime_stamp) {    /* got an interrupt */
 775                 /* we might have clobbered last_count_read. Restore it */
 776                 last_count_read = apic_hertz_count;
 777                 goto gethrtime_again;
 778         }
 779 
 780         if (temp < apic_last_hrtime) {
 781                 /* return last hrtime if error occurs */
 782                 apic_hrtime_error++;
 783                 temp = apic_last_hrtime;
 784         }
 785         else
 786                 apic_last_hrtime = temp;
 787 
 788         lock_clear(&apic_gethrtime_lock);
 789         intr_restore(oflags);
 790 
 791         return (temp);
 792 }
 793 
 794 /* apic NMI handler */
 795 /*ARGSUSED*/
 796 void
 797 apic_nmi_intr(caddr_t arg, struct regs *rp)
 798 {
 799         if (apic_shutdown_processors) {
 800                 apic_disable_local_apic();
 801                 return;
 802         }
 803 
 804         apic_error |= APIC_ERR_NMI;
 805 
 806         if (!lock_try(&apic_nmi_lock))
 807                 return;
 808         apic_num_nmis++;
 809 
 810         if (apic_kmdb_on_nmi && psm_debugger()) {
 811                 debug_enter("NMI received: entering kmdb\n");
 812         } else if (apic_panic_on_nmi) {
 813                 /* Keep panic from entering kmdb. */
 814                 nopanicdebug = 1;
 815                 panic("NMI received\n");
 816         } else {
 817                 /*
 818                  * prom_printf is the best shot we have of something which is
 819                  * problem free from high level/NMI type of interrupts
 820                  */
 821                 prom_printf("NMI received\n");
 822         }
 823 
 824         lock_clear(&apic_nmi_lock);
 825 }
 826 
 827 processorid_t
 828 apic_get_next_processorid(processorid_t cpu_id)
 829 {
 830 
 831         int i;
 832 
 833         if (cpu_id == -1)
 834                 return ((processorid_t)0);
 835 
 836         for (i = cpu_id + 1; i < NCPU; i++) {
 837                 if (apic_cpu_in_range(i))
 838                         return (i);
 839         }
 840 
 841         return ((processorid_t)-1);
 842 }
 843 
 844 int
 845 apic_cpu_add(psm_cpu_request_t *reqp)
 846 {
 847         int i, rv = 0;
 848         ulong_t iflag;
 849         boolean_t first = B_TRUE;
 850         uchar_t localver;
 851         uint32_t localid, procid;
 852         processorid_t cpuid = (processorid_t)-1;
 853         mach_cpu_add_arg_t *ap;
 854 
 855         ASSERT(reqp != NULL);
 856         reqp->req.cpu_add.cpuid = (processorid_t)-1;
 857 
 858         /* Check whether CPU hotplug is supported. */
 859         if (!plat_dr_support_cpu() || apic_max_nproc == -1) {
 860                 return (ENOTSUP);
 861         }
 862 
 863         ap = (mach_cpu_add_arg_t *)reqp->req.cpu_add.argp;
 864         switch (ap->type) {
 865         case MACH_CPU_ARG_LOCAL_APIC:
 866                 localid = ap->arg.apic.apic_id;
 867                 procid = ap->arg.apic.proc_id;
 868                 if (localid >= 255 || procid > 255) {
 869                         cmn_err(CE_WARN,
 870                             "!apic: apicid(%u) or procid(%u) is invalid.",
 871                             localid, procid);
 872                         return (EINVAL);
 873                 }
 874                 break;
 875 
 876         case MACH_CPU_ARG_LOCAL_X2APIC:
 877                 localid = ap->arg.apic.apic_id;
 878                 procid = ap->arg.apic.proc_id;
 879                 if (localid >= UINT32_MAX) {
 880                         cmn_err(CE_WARN,
 881                             "!apic: x2apicid(%u) is invalid.", localid);
 882                         return (EINVAL);
 883                 } else if (localid >= 255 && apic_mode == LOCAL_APIC) {
 884                         cmn_err(CE_WARN, "!apic: system is in APIC mode, "
 885                             "can't support x2APIC processor.");
 886                         return (ENOTSUP);
 887                 }
 888                 break;
 889 
 890         default:
 891                 cmn_err(CE_WARN,
 892                     "!apic: unknown argument type %d to apic_cpu_add().",
 893                     ap->type);
 894                 return (EINVAL);
 895         }
 896 
 897         /* Use apic_ioapic_lock to sync with apic_get_next_bind_cpu. */
 898         iflag = intr_clear();
 899         lock_set(&apic_ioapic_lock);
 900 
 901         /* Check whether local APIC id already exists. */
 902         for (i = 0; i < apic_nproc; i++) {
 903                 if (!CPU_IN_SET(apic_cpumask, i))
 904                         continue;
 905                 if (apic_cpus[i].aci_local_id == localid) {
 906                         lock_clear(&apic_ioapic_lock);
 907                         intr_restore(iflag);
 908                         cmn_err(CE_WARN,
 909                             "!apic: local apic id %u already exists.",
 910                             localid);
 911                         return (EEXIST);
 912                 } else if (apic_cpus[i].aci_processor_id == procid) {
 913                         lock_clear(&apic_ioapic_lock);
 914                         intr_restore(iflag);
 915                         cmn_err(CE_WARN,
 916                             "!apic: processor id %u already exists.",
 917                             (int)procid);
 918                         return (EEXIST);
 919                 }
 920 
 921                 /*
 922                  * There's no local APIC version number available in MADT table,
 923                  * so assume that all CPUs are homogeneous and use local APIC
 924                  * version number of the first existing CPU.
 925                  */
 926                 if (first) {
 927                         first = B_FALSE;
 928                         localver = apic_cpus[i].aci_local_ver;
 929                 }
 930         }
 931         ASSERT(first == B_FALSE);
 932 
 933         /*
 934          * Try to assign the same cpuid if APIC id exists in the dirty cache.
 935          */
 936         for (i = 0; i < apic_max_nproc; i++) {
 937                 if (CPU_IN_SET(apic_cpumask, i)) {
 938                         ASSERT((apic_cpus[i].aci_status & APIC_CPU_FREE) == 0);
 939                         continue;
 940                 }
 941                 ASSERT(apic_cpus[i].aci_status & APIC_CPU_FREE);
 942                 if ((apic_cpus[i].aci_status & APIC_CPU_DIRTY) &&
 943                     apic_cpus[i].aci_local_id == localid &&
 944                     apic_cpus[i].aci_processor_id == procid) {
 945                         cpuid = i;
 946                         break;
 947                 }
 948         }
 949 
 950         /* Avoid the dirty cache and allocate fresh slot if possible. */
 951         if (cpuid == (processorid_t)-1) {
 952                 for (i = 0; i < apic_max_nproc; i++) {
 953                         if ((apic_cpus[i].aci_status & APIC_CPU_FREE) &&
 954                             (apic_cpus[i].aci_status & APIC_CPU_DIRTY) == 0) {
 955                                 cpuid = i;
 956                                 break;
 957                         }
 958                 }
 959         }
 960 
 961         /* Try to find any free slot as last resort. */
 962         if (cpuid == (processorid_t)-1) {
 963                 for (i = 0; i < apic_max_nproc; i++) {
 964                         if (apic_cpus[i].aci_status & APIC_CPU_FREE) {
 965                                 cpuid = i;
 966                                 break;
 967                         }
 968                 }
 969         }
 970 
 971         if (cpuid == (processorid_t)-1) {
 972                 lock_clear(&apic_ioapic_lock);
 973                 intr_restore(iflag);
 974                 cmn_err(CE_NOTE,
 975                     "!apic: failed to allocate cpu id for processor %u.",
 976                     procid);
 977                 rv = EAGAIN;
 978         } else if (ACPI_FAILURE(acpica_map_cpu(cpuid, procid))) {
 979                 lock_clear(&apic_ioapic_lock);
 980                 intr_restore(iflag);
 981                 cmn_err(CE_NOTE,
 982                     "!apic: failed to build mapping for processor %u.",
 983                     procid);
 984                 rv = EBUSY;
 985         } else {
 986                 ASSERT(cpuid >= 0 && cpuid < NCPU);
 987                 ASSERT(cpuid < apic_max_nproc && cpuid < max_ncpus);
 988                 bzero(&apic_cpus[cpuid], sizeof (apic_cpus[0]));
 989                 apic_cpus[cpuid].aci_processor_id = procid;
 990                 apic_cpus[cpuid].aci_local_id = localid;
 991                 apic_cpus[cpuid].aci_local_ver = localver;
 992                 CPUSET_ATOMIC_ADD(apic_cpumask, cpuid);
 993                 if (cpuid >= apic_nproc) {
 994                         apic_nproc = cpuid + 1;
 995                 }
 996                 lock_clear(&apic_ioapic_lock);
 997                 intr_restore(iflag);
 998                 reqp->req.cpu_add.cpuid = cpuid;
 999         }
1000 
1001         return (rv);
1002 }
1003 
1004 int
1005 apic_cpu_remove(psm_cpu_request_t *reqp)
1006 {
1007         int i;
1008         ulong_t iflag;
1009         processorid_t cpuid;
1010 
1011         /* Check whether CPU hotplug is supported. */
1012         if (!plat_dr_support_cpu() || apic_max_nproc == -1) {
1013                 return (ENOTSUP);
1014         }
1015 
1016         cpuid = reqp->req.cpu_remove.cpuid;
1017 
1018         /* Use apic_ioapic_lock to sync with apic_get_next_bind_cpu. */
1019         iflag = intr_clear();
1020         lock_set(&apic_ioapic_lock);
1021 
1022         if (!apic_cpu_in_range(cpuid)) {
1023                 lock_clear(&apic_ioapic_lock);
1024                 intr_restore(iflag);
1025                 cmn_err(CE_WARN,
1026                     "!apic: cpuid %d doesn't exist in apic_cpus array.",
1027                     cpuid);
1028                 return (ENODEV);
1029         }
1030         ASSERT((apic_cpus[cpuid].aci_status & APIC_CPU_FREE) == 0);
1031 
1032         if (ACPI_FAILURE(acpica_unmap_cpu(cpuid))) {
1033                 lock_clear(&apic_ioapic_lock);
1034                 intr_restore(iflag);
1035                 return (ENOENT);
1036         }
1037 
1038         if (cpuid == apic_nproc - 1) {
1039                 /*
1040                  * We are removing the highest numbered cpuid so we need to
1041                  * find the next highest cpuid as the new value for apic_nproc.
1042                  */
1043                 for (i = apic_nproc; i > 0; i--) {
1044                         if (CPU_IN_SET(apic_cpumask, i - 1)) {
1045                                 apic_nproc = i;
1046                                 break;
1047                         }
1048                 }
1049                 /* at least one CPU left */
1050                 ASSERT(i > 0);
1051         }
1052         CPUSET_ATOMIC_DEL(apic_cpumask, cpuid);
1053         /* mark slot as free and keep it in the dirty cache */
1054         apic_cpus[cpuid].aci_status = APIC_CPU_FREE | APIC_CPU_DIRTY;
1055 
1056         lock_clear(&apic_ioapic_lock);
1057         intr_restore(iflag);
1058 
1059         return (0);
1060 }
1061 
1062 /*
1063  * Return the number of APIC clock ticks elapsed for 8245 to decrement
1064  * (APIC_TIME_COUNT + pit_ticks_adj) ticks.
1065  */
1066 uint_t
1067 apic_calibrate(volatile uint32_t *addr, uint16_t *pit_ticks_adj)
1068 {
1069         uint8_t         pit_tick_lo;
1070         uint16_t        pit_tick, target_pit_tick;
1071         uint32_t        start_apic_tick, end_apic_tick;
1072         ulong_t         iflag;
1073         uint32_t        reg;
1074 
1075         reg = addr + APIC_CURR_COUNT - apicadr;
1076 
1077         iflag = intr_clear();
1078 
1079         do {
1080                 pit_tick_lo = inb(PITCTR0_PORT);
1081                 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1082         } while (pit_tick < APIC_TIME_MIN ||
1083             pit_tick_lo <= APIC_LB_MIN || pit_tick_lo >= APIC_LB_MAX);
1084 
1085         /*
1086          * Wait for the 8254 to decrement by 5 ticks to ensure
1087          * we didn't start in the middle of a tick.
1088          * Compare with 0x10 for the wrap around case.
1089          */
1090         target_pit_tick = pit_tick - 5;
1091         do {
1092                 pit_tick_lo = inb(PITCTR0_PORT);
1093                 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1094         } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
1095 
1096         start_apic_tick = apic_reg_ops->apic_read(reg);
1097 
1098         /*
1099          * Wait for the 8254 to decrement by
1100          * (APIC_TIME_COUNT + pit_ticks_adj) ticks
1101          */
1102         target_pit_tick = pit_tick - APIC_TIME_COUNT;
1103         do {
1104                 pit_tick_lo = inb(PITCTR0_PORT);
1105                 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1106         } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
1107 
1108         end_apic_tick = apic_reg_ops->apic_read(reg);
1109 
1110         *pit_ticks_adj = target_pit_tick - pit_tick;
1111 
1112         intr_restore(iflag);
1113 
1114         return (start_apic_tick - end_apic_tick);
1115 }
1116 
1117 /*
1118  * Initialise the APIC timer on the local APIC of CPU 0 to the desired
1119  * frequency.  Note at this stage in the boot sequence, the boot processor
1120  * is the only active processor.
1121  * hertz value of 0 indicates a one-shot mode request.  In this case
1122  * the function returns the resolution (in nanoseconds) for the hardware
1123  * timer interrupt.  If one-shot mode capability is not available,
1124  * the return value will be 0. apic_enable_oneshot is a global switch
1125  * for disabling the functionality.
1126  * A non-zero positive value for hertz indicates a periodic mode request.
1127  * In this case the hardware will be programmed to generate clock interrupts
1128  * at hertz frequency and returns the resolution of interrupts in
1129  * nanosecond.
1130  */
1131 
1132 int
1133 apic_clkinit(int hertz)
1134 {
1135         int             ret;
1136 
1137         apic_int_busy_mark = (apic_int_busy_mark *
1138             apic_sample_factor_redistribution) / 100;
1139         apic_int_free_mark = (apic_int_free_mark *
1140             apic_sample_factor_redistribution) / 100;
1141         apic_diff_for_redistribution = (apic_diff_for_redistribution *
1142             apic_sample_factor_redistribution) / 100;
1143 
1144         ret = apic_timer_init(hertz);
1145         return (ret);
1146 
1147 }
1148 
1149 /*
1150  * apic_preshutdown:
1151  * Called early in shutdown whilst we can still access filesystems to do
1152  * things like loading modules which will be required to complete shutdown
1153  * after filesystems are all unmounted.
1154  */
1155 void
1156 apic_preshutdown(int cmd, int fcn)
1157 {
1158         APIC_VERBOSE_POWEROFF(("apic_preshutdown(%d,%d); m=%d a=%d\n",
1159             cmd, fcn, apic_poweroff_method, apic_enable_acpi));
1160 }
1161 
1162 void
1163 apic_shutdown(int cmd, int fcn)
1164 {
1165         int restarts, attempts;
1166         int i;
1167         uchar_t byte;
1168         ulong_t iflag;
1169 
1170         hpet_acpi_fini();
1171 
1172         /* Send NMI to all CPUs except self to do per processor shutdown */
1173         iflag = intr_clear();
1174 #ifdef  DEBUG
1175         APIC_AV_PENDING_SET();
1176 #else
1177         if (apic_mode == LOCAL_APIC)
1178                 APIC_AV_PENDING_SET();
1179 #endif /* DEBUG */
1180         apic_shutdown_processors = 1;
1181         apic_reg_ops->apic_write(APIC_INT_CMD1,
1182             AV_NMI | AV_LEVEL | AV_SH_ALL_EXCSELF);
1183 
1184         /* restore cmos shutdown byte before reboot */
1185         if (apic_cmos_ssb_set) {
1186                 outb(CMOS_ADDR, SSB);
1187                 outb(CMOS_DATA, 0);
1188         }
1189 
1190         ioapic_disable_redirection();
1191 
1192         /*      disable apic mode if imcr present       */
1193         if (apic_imcrp) {
1194                 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
1195                 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_PIC);
1196         }
1197 
1198         apic_disable_local_apic();
1199 
1200         intr_restore(iflag);
1201 
1202         /* remainder of function is for shutdown cases only */
1203         if (cmd != A_SHUTDOWN)
1204                 return;
1205 
1206         /*
1207          * Switch system back into Legacy-Mode if using ACPI and
1208          * not powering-off.  Some BIOSes need to remain in ACPI-mode
1209          * for power-off to succeed (Dell Dimension 4600)
1210          * Do not disable ACPI while doing fastreboot
1211          */
1212         if (apic_enable_acpi && fcn != AD_POWEROFF && fcn != AD_FASTREBOOT)
1213                 (void) AcpiDisable();
1214 
1215         if (fcn == AD_FASTREBOOT) {
1216                 apic_reg_ops->apic_write(APIC_INT_CMD1,
1217                     AV_ASSERT | AV_RESET | AV_SH_ALL_EXCSELF);
1218         }
1219 
1220         /* remainder of function is for shutdown+poweroff case only */
1221         if (fcn != AD_POWEROFF)
1222                 return;
1223 
1224         switch (apic_poweroff_method) {
1225                 case APIC_POWEROFF_VIA_RTC:
1226 
1227                         /* select the extended NVRAM bank in the RTC */
1228                         outb(CMOS_ADDR, RTC_REGA);
1229                         byte = inb(CMOS_DATA);
1230                         outb(CMOS_DATA, (byte | EXT_BANK));
1231 
1232                         outb(CMOS_ADDR, PFR_REG);
1233 
1234                         /* for Predator must toggle the PAB bit */
1235                         byte = inb(CMOS_DATA);
1236 
1237                         /*
1238                          * clear power active bar, wakeup alarm and
1239                          * kickstart
1240                          */
1241                         byte &= ~(PAB_CBIT | WF_FLAG | KS_FLAG);
1242                         outb(CMOS_DATA, byte);
1243 
1244                         /* delay before next write */
1245                         drv_usecwait(1000);
1246 
1247                         /* for S40 the following would suffice */
1248                         byte = inb(CMOS_DATA);
1249 
1250                         /* power active bar control bit */
1251                         byte |= PAB_CBIT;
1252                         outb(CMOS_DATA, byte);
1253 
1254                         break;
1255 
1256                 case APIC_POWEROFF_VIA_ASPEN_BMC:
1257                         restarts = 0;
1258 restart_aspen_bmc:
1259                         if (++restarts == 3)
1260                                 break;
1261                         attempts = 0;
1262                         do {
1263                                 byte = inb(MISMIC_FLAG_REGISTER);
1264                                 byte &= MISMIC_BUSY_MASK;
1265                                 if (byte != 0) {
1266                                         drv_usecwait(1000);
1267                                         if (attempts >= 3)
1268                                                 goto restart_aspen_bmc;
1269                                         ++attempts;
1270                                 }
1271                         } while (byte != 0);
1272                         outb(MISMIC_CNTL_REGISTER, CC_SMS_GET_STATUS);
1273                         byte = inb(MISMIC_FLAG_REGISTER);
1274                         byte |= 0x1;
1275                         outb(MISMIC_FLAG_REGISTER, byte);
1276                         i = 0;
1277                         for (; i < (sizeof (aspen_bmc)/sizeof (aspen_bmc[0]));
1278                             i++) {
1279                                 attempts = 0;
1280                                 do {
1281                                         byte = inb(MISMIC_FLAG_REGISTER);
1282                                         byte &= MISMIC_BUSY_MASK;
1283                                         if (byte != 0) {
1284                                                 drv_usecwait(1000);
1285                                                 if (attempts >= 3)
1286                                                         goto restart_aspen_bmc;
1287                                                 ++attempts;
1288                                         }
1289                                 } while (byte != 0);
1290                                 outb(MISMIC_CNTL_REGISTER, aspen_bmc[i].cntl);
1291                                 outb(MISMIC_DATA_REGISTER, aspen_bmc[i].data);
1292                                 byte = inb(MISMIC_FLAG_REGISTER);
1293                                 byte |= 0x1;
1294                                 outb(MISMIC_FLAG_REGISTER, byte);
1295                         }
1296                         break;
1297 
1298                 case APIC_POWEROFF_VIA_SITKA_BMC:
1299                         restarts = 0;
1300 restart_sitka_bmc:
1301                         if (++restarts == 3)
1302                                 break;
1303                         attempts = 0;
1304                         do {
1305                                 byte = inb(SMS_STATUS_REGISTER);
1306                                 byte &= SMS_STATE_MASK;
1307                                 if ((byte == SMS_READ_STATE) ||
1308                                     (byte == SMS_WRITE_STATE)) {
1309                                         drv_usecwait(1000);
1310                                         if (attempts >= 3)
1311                                                 goto restart_sitka_bmc;
1312                                         ++attempts;
1313                                 }
1314                         } while ((byte == SMS_READ_STATE) ||
1315                             (byte == SMS_WRITE_STATE));
1316                         outb(SMS_COMMAND_REGISTER, SMS_GET_STATUS);
1317                         i = 0;
1318                         for (; i < (sizeof (sitka_bmc)/sizeof (sitka_bmc[0]));
1319                             i++) {
1320                                 attempts = 0;
1321                                 do {
1322                                         byte = inb(SMS_STATUS_REGISTER);
1323                                         byte &= SMS_IBF_MASK;
1324                                         if (byte != 0) {
1325                                                 drv_usecwait(1000);
1326                                                 if (attempts >= 3)
1327                                                         goto restart_sitka_bmc;
1328                                                 ++attempts;
1329                                         }
1330                                 } while (byte != 0);
1331                                 outb(sitka_bmc[i].port, sitka_bmc[i].data);
1332                         }
1333                         break;
1334 
1335                 case APIC_POWEROFF_NONE:
1336 
1337                         /* If no APIC direct method, we will try using ACPI */
1338                         if (apic_enable_acpi) {
1339                                 if (acpi_poweroff() == 1)
1340                                         return;
1341                         } else
1342                                 return;
1343 
1344                         break;
1345         }
1346         /*
1347          * Wait a limited time here for power to go off.
1348          * If the power does not go off, then there was a
1349          * problem and we should continue to the halt which
1350          * prints a message for the user to press a key to
1351          * reboot.
1352          */
1353         drv_usecwait(7000000); /* wait seven seconds */
1354 
1355 }
1356 
1357 cyclic_id_t apic_cyclic_id;
1358 
1359 /*
1360  * The following functions are in the platform specific file so that they
1361  * can be different functions depending on whether we are running on
1362  * bare metal or a hypervisor.
1363  */
1364 
1365 /*
1366  * map an apic for memory-mapped access
1367  */
1368 uint32_t *
1369 mapin_apic(uint32_t addr, size_t len, int flags)
1370 {
1371         return ((void *)psm_map_phys(addr, len, flags));
1372 }
1373 
1374 uint32_t *
1375 mapin_ioapic(uint32_t addr, size_t len, int flags)
1376 {
1377         return (mapin_apic(addr, len, flags));
1378 }
1379 
1380 /*
1381  * unmap an apic
1382  */
1383 void
1384 mapout_apic(caddr_t addr, size_t len)
1385 {
1386         psm_unmap_phys(addr, len);
1387 }
1388 
1389 void
1390 mapout_ioapic(caddr_t addr, size_t len)
1391 {
1392         mapout_apic(addr, len);
1393 }
1394 
1395 uint32_t
1396 ioapic_read(int ioapic_ix, uint32_t reg)
1397 {
1398         volatile uint32_t *ioapic;
1399 
1400         ioapic = apicioadr[ioapic_ix];
1401         ioapic[APIC_IO_REG] = reg;
1402         return (ioapic[APIC_IO_DATA]);
1403 }
1404 
1405 void
1406 ioapic_write(int ioapic_ix, uint32_t reg, uint32_t value)
1407 {
1408         volatile uint32_t *ioapic;
1409 
1410         ioapic = apicioadr[ioapic_ix];
1411         ioapic[APIC_IO_REG] = reg;
1412         ioapic[APIC_IO_DATA] = value;
1413 }
1414 
1415 void
1416 ioapic_write_eoi(int ioapic_ix, uint32_t value)
1417 {
1418         volatile uint32_t *ioapic;
1419 
1420         ioapic = apicioadr[ioapic_ix];
1421         ioapic[APIC_IO_EOI] = value;
1422 }
1423 
1424 /*
1425  * Round-robin algorithm to find the next CPU with interrupts enabled.
1426  * It can't share the same static variable apic_next_bind_cpu with
1427  * apic_get_next_bind_cpu(), since that will cause all interrupts to be
1428  * bound to CPU1 at boot time.  During boot, only CPU0 is online with
1429  * interrupts enabled when apic_get_next_bind_cpu() and apic_find_cpu()
1430  * are called.  However, the pcplusmp driver assumes that there will be
1431  * boot_ncpus CPUs configured eventually so it tries to distribute all
1432  * interrupts among CPU0 - CPU[boot_ncpus - 1].  Thus to prevent all
1433  * interrupts being targetted at CPU1, we need to use a dedicated static
1434  * variable for find_next_cpu() instead of sharing apic_next_bind_cpu.
1435  */
1436 
1437 processorid_t
1438 apic_find_cpu(int flag)
1439 {
1440         int i;
1441         static processorid_t acid = 0;
1442 
1443         /* Find the first CPU with the passed-in flag set */
1444         for (i = 0; i < apic_nproc; i++) {
1445                 if (++acid >= apic_nproc) {
1446                         acid = 0;
1447                 }
1448                 if (apic_cpu_in_range(acid) &&
1449                     (apic_cpus[acid].aci_status & flag)) {
1450                         break;
1451                 }
1452         }
1453 
1454         ASSERT((apic_cpus[acid].aci_status & flag) != 0);
1455         return (acid);
1456 }
1457 
1458 /*
1459  * Switch between safe and x2APIC IPI sending method.
1460  * CPU may power on in xapic mode or x2apic mode. If CPU needs to send IPI to
1461  * other CPUs before entering x2APIC mode, it still needs to xAPIC method.
1462  * Before sending StartIPI to target CPU, psm_send_ipi will be changed to
1463  * apic_common_send_ipi, which detects current local APIC mode and use right
1464  * method to send IPI. If some CPUs fail to start up, apic_poweron_cnt
1465  * won't return to zero, so apic_common_send_ipi will always be used.
1466  * psm_send_ipi can't be simply changed back to x2apic_send_ipi if some CPUs
1467  * failed to start up because those failed CPUs may recover itself later at
1468  * unpredictable time.
1469  */
1470 void
1471 apic_switch_ipi_callback(boolean_t enter)
1472 {
1473         ulong_t iflag;
1474         struct psm_ops *pops = psmops;
1475 
1476         iflag = intr_clear();
1477         lock_set(&apic_mode_switch_lock);
1478         if (enter) {
1479                 ASSERT(apic_poweron_cnt >= 0);
1480                 if (apic_poweron_cnt == 0) {
1481                         pops->psm_send_ipi = apic_common_send_ipi;
1482                         send_dirintf = pops->psm_send_ipi;
1483                 }
1484                 apic_poweron_cnt++;
1485         } else {
1486                 ASSERT(apic_poweron_cnt > 0);
1487                 apic_poweron_cnt--;
1488                 if (apic_poweron_cnt == 0) {
1489                         pops->psm_send_ipi = x2apic_send_ipi;
1490                         send_dirintf = pops->psm_send_ipi;
1491                 }
1492         }
1493         lock_clear(&apic_mode_switch_lock);
1494         intr_restore(iflag);
1495 }
1496 
1497 void
1498 apic_intrmap_init(int apic_mode)
1499 {
1500         int suppress_brdcst_eoi = 0;
1501 
1502         /*
1503          * Intel Software Developer's Manual 3A, 10.12.7:
1504          *
1505          * Routing of device interrupts to local APIC units operating in
1506          * x2APIC mode requires use of the interrupt-remapping architecture
1507          * specified in the Intel Virtualization Technology for Directed
1508          * I/O, Revision 1.3.  Because of this, BIOS must enumerate support
1509          * for and software must enable this interrupt remapping with
1510          * Extended Interrupt Mode Enabled before it enabling x2APIC mode in
1511          * the local APIC units.
1512          *
1513          *
1514          * In other words, to use the APIC in x2APIC mode, we need interrupt
1515          * remapping.  Since we don't start up the IOMMU by default, we
1516          * won't be able to do any interrupt remapping and therefore have to
1517          * use the APIC in traditional 'local APIC' mode with memory mapped
1518          * I/O.
1519          */
1520 
1521         if (psm_vt_ops != NULL) {
1522                 if (((apic_intrmap_ops_t *)psm_vt_ops)->
1523                     apic_intrmap_init(apic_mode) == DDI_SUCCESS) {
1524 
1525                         apic_vt_ops = psm_vt_ops;
1526 
1527                         /*
1528                          * We leverage the interrupt remapping engine to
1529                          * suppress broadcast EOI; thus we must send the
1530                          * directed EOI with the directed-EOI handler.
1531                          */
1532                         if (apic_directed_EOI_supported() == 0) {
1533                                 suppress_brdcst_eoi = 1;
1534                         }
1535 
1536                         apic_vt_ops->apic_intrmap_enable(suppress_brdcst_eoi);
1537 
1538                         if (apic_detect_x2apic()) {
1539                                 apic_enable_x2apic();
1540                         }
1541 
1542                         if (apic_directed_EOI_supported() == 0) {
1543                                 apic_set_directed_EOI_handler();
1544                         }
1545                 }
1546         }
1547 }
1548 
1549 /*ARGSUSED*/
1550 static void
1551 apic_record_ioapic_rdt(void *intrmap_private, ioapic_rdt_t *irdt)
1552 {
1553         irdt->ir_hi <<= APIC_ID_BIT_OFFSET;
1554 }
1555 
1556 /*ARGSUSED*/
1557 static void
1558 apic_record_msi(void *intrmap_private, msi_regs_t *mregs)
1559 {
1560         mregs->mr_addr = MSI_ADDR_HDR |
1561             (MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) |
1562             (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT) |
1563             (mregs->mr_addr << MSI_ADDR_DEST_SHIFT);
1564         mregs->mr_data = (MSI_DATA_TM_EDGE << MSI_DATA_TM_SHIFT) |
1565             mregs->mr_data;
1566 }
1567 
1568 /*
1569  * Functions from apic_introp.c
1570  *
1571  * Those functions are used by apic_intr_ops().
1572  */
1573 
1574 /*
1575  * MSI support flag:
1576  * reflects whether MSI is supported at APIC level
1577  * it can also be patched through /etc/system
1578  *
1579  *  0 = default value - don't know and need to call apic_check_msi_support()
1580  *      to find out then set it accordingly
1581  *  1 = supported
1582  * -1 = not supported
1583  */
1584 int     apic_support_msi = 0;
1585 
1586 /* Multiple vector support for MSI-X */
1587 int     apic_msix_enable = 1;
1588 
1589 /* Multiple vector support for MSI */
1590 int     apic_multi_msi_enable = 1;
1591 
1592 /*
1593  * check whether the system supports MSI
1594  *
1595  * If PCI-E capability is found, then this must be a PCI-E system.
1596  * Since MSI is required for PCI-E system, it returns PSM_SUCCESS
1597  * to indicate this system supports MSI.
1598  */
1599 int
1600 apic_check_msi_support()
1601 {
1602         dev_info_t *cdip;
1603         char dev_type[16];
1604         int dev_len;
1605 
1606         DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support:\n"));
1607 
1608         /*
1609          * check whether the first level children of root_node have
1610          * PCI-E capability
1611          */
1612         for (cdip = ddi_get_child(ddi_root_node()); cdip != NULL;
1613             cdip = ddi_get_next_sibling(cdip)) {
1614 
1615                 DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: cdip: 0x%p,"
1616                     " driver: %s, binding: %s, nodename: %s\n", (void *)cdip,
1617                     ddi_driver_name(cdip), ddi_binding_name(cdip),
1618                     ddi_node_name(cdip)));
1619                 dev_len = sizeof (dev_type);
1620                 if (ddi_getlongprop_buf(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
1621                     "device_type", (caddr_t)dev_type, &dev_len)
1622                     != DDI_PROP_SUCCESS)
1623                         continue;
1624                 if (strcmp(dev_type, "pciex") == 0)
1625                         return (PSM_SUCCESS);
1626         }
1627 
1628         /* MSI is not supported on this system */
1629         DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: no 'pciex' "
1630             "device_type found\n"));
1631         return (PSM_FAILURE);
1632 }
1633 
1634 /*
1635  * apic_pci_msi_unconfigure:
1636  *
1637  * This and next two interfaces are copied from pci_intr_lib.c
1638  * Do ensure that these two files stay in sync.
1639  * These needed to be copied over here to avoid a deadlock situation on
1640  * certain mp systems that use MSI interrupts.
1641  *
1642  * IMPORTANT regards next three interfaces:
1643  * i) are called only for MSI/X interrupts.
1644  * ii) called with interrupts disabled, and must not block
1645  */
1646 void
1647 apic_pci_msi_unconfigure(dev_info_t *rdip, int type, int inum)
1648 {
1649         ushort_t                msi_ctrl;
1650         int                     cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
1651         ddi_acc_handle_t        handle = i_ddi_get_pci_config_handle(rdip);
1652 
1653         ASSERT((handle != NULL) && (cap_ptr != 0));
1654 
1655         if (type == DDI_INTR_TYPE_MSI) {
1656                 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1657                 msi_ctrl &= (~PCI_MSI_MME_MASK);
1658                 pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
1659                 pci_config_put32(handle, cap_ptr + PCI_MSI_ADDR_OFFSET, 0);
1660 
1661                 if (msi_ctrl &  PCI_MSI_64BIT_MASK) {
1662                         pci_config_put16(handle,
1663                             cap_ptr + PCI_MSI_64BIT_DATA, 0);
1664                         pci_config_put32(handle,
1665                             cap_ptr + PCI_MSI_ADDR_OFFSET + 4, 0);
1666                 } else {
1667                         pci_config_put16(handle,
1668                             cap_ptr + PCI_MSI_32BIT_DATA, 0);
1669                 }
1670 
1671         } else if (type == DDI_INTR_TYPE_MSIX) {
1672                 uintptr_t       off;
1673                 uint32_t        mask;
1674                 ddi_intr_msix_t *msix_p = i_ddi_get_msix(rdip);
1675 
1676                 ASSERT(msix_p != NULL);
1677 
1678                 /* Offset into "inum"th entry in the MSI-X table & mask it */
1679                 off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
1680                     PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
1681 
1682                 mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
1683 
1684                 ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask | 1));
1685 
1686                 /* Offset into the "inum"th entry in the MSI-X table */
1687                 off = (uintptr_t)msix_p->msix_tbl_addr +
1688                     (inum * PCI_MSIX_VECTOR_SIZE);
1689 
1690                 /* Reset the "data" and "addr" bits */
1691                 ddi_put32(msix_p->msix_tbl_hdl,
1692                     (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), 0);
1693                 ddi_put64(msix_p->msix_tbl_hdl, (uint64_t *)off, 0);
1694         }
1695 }
1696 
1697 /*
1698  * apic_pci_msi_disable_mode:
1699  */
1700 void
1701 apic_pci_msi_disable_mode(dev_info_t *rdip, int type)
1702 {
1703         ushort_t                msi_ctrl;
1704         int                     cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
1705         ddi_acc_handle_t        handle = i_ddi_get_pci_config_handle(rdip);
1706 
1707         ASSERT((handle != NULL) && (cap_ptr != 0));
1708 
1709         if (type == DDI_INTR_TYPE_MSI) {
1710                 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1711                 if (!(msi_ctrl & PCI_MSI_ENABLE_BIT))
1712                         return;
1713 
1714                 msi_ctrl &= ~PCI_MSI_ENABLE_BIT;    /* MSI disable */
1715                 pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
1716 
1717         } else if (type == DDI_INTR_TYPE_MSIX) {
1718                 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
1719                 if (msi_ctrl & PCI_MSIX_ENABLE_BIT) {
1720                         msi_ctrl &= ~PCI_MSIX_ENABLE_BIT;
1721                         pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL,
1722                             msi_ctrl);
1723                 }
1724         }
1725 }
1726 
1727 uint32_t
1728 apic_get_localapicid(uint32_t cpuid)
1729 {
1730         ASSERT(cpuid < apic_nproc && apic_cpus != NULL);
1731 
1732         return (apic_cpus[cpuid].aci_local_id);
1733 }
1734 
1735 uchar_t
1736 apic_get_ioapicid(uchar_t ioapicindex)
1737 {
1738         ASSERT(ioapicindex < MAX_IO_APIC);
1739 
1740         return (apic_io_id[ioapicindex]);
1741 }