core New usr/src/uts/sun4v/os/mach_cpu

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 #include <sys/types.h>
  26 #include <sys/systm.h>
  27 #include <sys/archsystm.h>
  28 #include <sys/t_lock.h>
  29 #include <sys/uadmin.h>
  30 #include <sys/panic.h>
  31 #include <sys/reboot.h>
  32 #include <sys/autoconf.h>
  33 #include <sys/machsystm.h>
  34 #include <sys/promif.h>
  35 #include <sys/membar.h>
  36 #include <vm/hat_sfmmu.h>
  37 #include <sys/cpu_module.h>
  38 #include <sys/cpu_sgnblk_defs.h>
  39 #include <sys/intreg.h>
  40 #include <sys/consdev.h>
  41 #include <sys/kdi_impl.h>
  42 #include <sys/traptrace.h>
  43 #include <sys/hypervisor_api.h>
  44 #include <sys/vmsystm.h>
  45 #include <sys/dtrace.h>
  46 #include <sys/xc_impl.h>
  47 #include <sys/callb.h>
  48 #include <sys/mdesc.h>
  49 #include <sys/mach_descrip.h>
  50 #include <sys/wdt.h>
  51 #include <sys/soft_state.h>
  52 #include <sys/promimpl.h>
  53 #include <sys/hsvc.h>
  54 #include <sys/ldoms.h>
  55 #include <sys/kldc.h>
  56 #include <sys/clock_impl.h>
  57 #include <sys/suspend.h>
  58 #include <sys/dumphdr.h>
  59 
  60 /*
  61  * hvdump_buf_va is a pointer to the currently-configured hvdump_buf.
  62  * A value of NULL indicates that this area is not configured.
  63  * hvdump_buf_sz is tunable but will be clamped to HVDUMP_SIZE_MAX.
  64  */
  65 
  66 caddr_t hvdump_buf_va;
  67 uint64_t hvdump_buf_sz = HVDUMP_SIZE_DEFAULT;
  68 static uint64_t hvdump_buf_pa;
  69 
  70 u_longlong_t panic_tick;
  71 
  72 extern u_longlong_t gettick();
  73 static void reboot_machine(char *);
  74 static void update_hvdump_buffer(void);
  75 
  76 /*
  77  * For xt_sync synchronization.
  78  */
  79 extern uint64_t xc_tick_limit;
  80 extern uint64_t xc_tick_jump_limit;
  81 extern uint64_t xc_sync_tick_limit;
  82 
  83 /*
  84  * Bring in the cpc PIL_15 handler for panic_enter_hw.
  85  */
  86 extern uint64_t cpc_level15_inum;
  87 
  88 /*
  89  * We keep our own copies, used for cache flushing, because we can be called
  90  * before cpu_fiximpl().
  91  */
  92 static int kdi_dcache_size;
  93 static int kdi_dcache_linesize;
  94 static int kdi_icache_size;
  95 static int kdi_icache_linesize;
  96 
  97 /*
  98  * Assembly support for generic modules in sun4v/ml/mach_xc.s
  99  */
 100 extern void init_mondo_nocheck(xcfunc_t *func, uint64_t arg1, uint64_t arg2);
 101 extern void kdi_flush_idcache(int, int, int, int);
 102 extern uint64_t get_cpuaddr(uint64_t, uint64_t);
 103 
 104 
 105 #define BOOT_CMD_MAX_LEN        256     /* power of 2 & 16-byte aligned */
 106 #define BOOT_CMD_BASE           "boot "
 107 
 108 /*
 109  * In an LDoms system we do not save the user's boot args in NVRAM
 110  * as is done on legacy systems.  Instead, we format and send a
 111  * 'reboot-command' variable to the variable service.  The contents
 112  * of the variable are retrieved by OBP and used verbatim for
 113  * the next boot.
 114  */
 115 static void
 116 store_boot_cmd(char *args, boolean_t add_boot_str, boolean_t invoke_cb)
 117 {
 118         static char     *cmd_buf;
 119         size_t          len = 1;
 120         pnode_t         node;
 121         size_t          base_len = 0;
 122         size_t          args_len;
 123         size_t          args_max;
 124         uint64_t        majornum;
 125         uint64_t        minornum;
 126         uint64_t        buf_pa;
 127         uint64_t        status;
 128 
 129         status = hsvc_version(HSVC_GROUP_REBOOT_DATA, &majornum, &minornum);
 130 
 131         /*
 132          * invoke_cb is set to true when we are in a normal shutdown sequence
 133          * (interrupts are not blocked, the system is not panicking or being
 134          * suspended). In that case, we can use any method to store the boot
 135          * command. Otherwise storing the boot command can not be done using
 136          * a domain service because it can not be safely used in that context.
 137          */
 138         if ((status != H_EOK) && (invoke_cb == B_FALSE))
 139                 return;
 140 
 141         cmd_buf = contig_mem_alloc(BOOT_CMD_MAX_LEN);
 142         if (cmd_buf == NULL)
 143                 return;
 144 
 145         if (add_boot_str) {
 146                 (void) strcpy(cmd_buf, BOOT_CMD_BASE);
 147 
 148                 base_len = strlen(BOOT_CMD_BASE);
 149                 len = base_len + 1;
 150         }
 151 
 152         if (args != NULL) {
 153                 args_len = strlen(args);
 154                 args_max = BOOT_CMD_MAX_LEN - len;
 155 
 156                 if (args_len > args_max) {
 157                         cmn_err(CE_WARN, "Reboot command too long (%ld), "
 158                             "truncating command arguments", len + args_len);
 159 
 160                         args_len = args_max;
 161                 }
 162 
 163                 len += args_len;
 164                 (void) strncpy(&cmd_buf[base_len], args, args_len);
 165         }
 166 
 167         /*
 168          * Save the reboot-command with HV, if reboot data group is
 169          * negotiated. Else save the reboot-command via vars-config domain
 170          * services on the SP.
 171          */
 172         if (status == H_EOK) {
 173                 buf_pa = va_to_pa(cmd_buf);
 174                 status = hv_reboot_data_set(buf_pa, len);
 175                 if (status != H_EOK) {
 176                         cmn_err(CE_WARN, "Unable to store boot command for "
 177                             "use on reboot with HV: error = 0x%lx", status);
 178                 }
 179         } else {
 180                 node = prom_optionsnode();
 181                 if ((node == OBP_NONODE) || (node == OBP_BADNODE) ||
 182                     prom_setprop(node, "reboot-command", cmd_buf, len) == -1)
 183                         cmn_err(CE_WARN, "Unable to store boot command for "
 184                             "use on reboot");
 185         }
 186 }
 187 
 188 
 189 /*
 190  * Machine dependent code to reboot.
 191  *
 192  * "bootstr", when non-null, points to a string to be used as the
 193  * argument string when rebooting.
 194  *
 195  * "invoke_cb" is a boolean. It is set to true when mdboot() can safely
 196  * invoke CB_CL_MDBOOT callbacks before shutting the system down, i.e. when
 197  * we are in a normal shutdown sequence (interrupts are not blocked, the
 198  * system is not panic'ing or being suspended).
 199  */
 200 /*ARGSUSED*/
 201 void
 202 mdboot(int cmd, int fcn, char *bootstr, boolean_t invoke_cb)
 203 {
 204         extern void pm_cfb_check_and_powerup(void);
 205 
 206         /*
 207          * XXX - rconsvp is set to NULL to ensure that output messages
 208          * are sent to the underlying "hardware" device using the
 209          * monitor's printf routine since we are in the process of
 210          * either rebooting or halting the machine.
 211          */
 212         rconsvp = NULL;
 213 
 214         switch (fcn) {
 215         case AD_HALT:
 216                 /*
 217                  * LDoms: By storing a no-op command
 218                  * in the 'reboot-command' variable we cause OBP
 219                  * to ignore the setting of 'auto-boot?' after
 220                  * it completes the reset.  This causes the system
 221                  * to stop at the ok prompt.
 222                  */
 223                 if (domaining_enabled())
 224                         store_boot_cmd("noop", B_FALSE, invoke_cb);
 225                 break;
 226 
 227         case AD_POWEROFF:
 228                 break;
 229 
 230         default:
 231                 if (bootstr == NULL) {
 232                         switch (fcn) {
 233 
 234                         case AD_FASTREBOOT:
 235                         case AD_BOOT:
 236                                 bootstr = "";
 237                                 break;
 238 
 239                         case AD_IBOOT:
 240                                 bootstr = "-a";
 241                                 break;
 242 
 243                         case AD_SBOOT:
 244                                 bootstr = "-s";
 245                                 break;
 246 
 247                         case AD_SIBOOT:
 248                                 bootstr = "-sa";
 249                                 break;
 250                         default:
 251                                 cmn_err(CE_WARN,
 252                                     "mdboot: invalid function %d", fcn);
 253                                 bootstr = "";
 254                                 break;
 255                         }
 256                 }
 257 
 258                 /*
 259                  * If LDoms is running, we must save the boot string
 260                  * before we enter restricted mode.  This is possible
 261                  * only if we are not being called from panic.
 262                  */
 263                 if (domaining_enabled())
 264                         store_boot_cmd(bootstr, B_TRUE, invoke_cb);
 265         }
 266 
 267         /*
 268          * At a high interrupt level we can't:
 269          *      1) bring up the console
 270          * or
 271          *      2) wait for pending interrupts prior to redistribution
 272          *         to the current CPU
 273          *
 274          * so we do them now.
 275          */
 276         pm_cfb_check_and_powerup();
 277 
 278         /* make sure there are no more changes to the device tree */
 279         devtree_freeze();
 280 
 281         if (invoke_cb)
 282                 (void) callb_execute_class(CB_CL_MDBOOT, NULL);
 283 
 284         /*
 285          * Clear any unresolved UEs from memory.
 286          */
 287         page_retire_mdboot();
 288 
 289         /*
 290          * stop other cpus which also raise our priority. since there is only
 291          * one active cpu after this, and our priority will be too high
 292          * for us to be preempted, we're essentially single threaded
 293          * from here on out.
 294          */
 295         stop_other_cpus();
 296 
 297         /*
 298          * try and reset leaf devices.  reset_leaves() should only
 299          * be called when there are no other threads that could be
 300          * accessing devices
 301          */
 302         reset_leaves();
 303 
 304         watchdog_clear();
 305 
 306         if (fcn == AD_HALT) {
 307                 mach_set_soft_state(SIS_TRANSITION,
 308                     &SOLARIS_SOFT_STATE_HALT_MSG);
 309                 halt((char *)NULL);
 310         } else if (fcn == AD_POWEROFF) {
 311                 mach_set_soft_state(SIS_TRANSITION,
 312                     &SOLARIS_SOFT_STATE_POWER_MSG);
 313                 power_down(NULL);
 314         } else {
 315                 mach_set_soft_state(SIS_TRANSITION,
 316                     &SOLARIS_SOFT_STATE_REBOOT_MSG);
 317                 reboot_machine(bootstr);
 318         }
 319         /* MAYBE REACHED */
 320 }
 321 
 322 /* mdpreboot - may be called prior to mdboot while root fs still mounted */
 323 /*ARGSUSED*/
 324 void
 325 mdpreboot(int cmd, int fcn, char *bootstr)
 326 {
 327 }
 328 
 329 /*
 330  * Halt the machine and then reboot with the device
 331  * and arguments specified in bootstr.
 332  */
 333 static void
 334 reboot_machine(char *bootstr)
 335 {
 336         flush_windows();
 337         stop_other_cpus();              /* send stop signal to other CPUs */
 338         prom_printf("rebooting...\n");
 339         /*
 340          * For platforms that use CPU signatures, we
 341          * need to set the signature block to OS and
 342          * the state to exiting for all the processors.
 343          */
 344         CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_REBOOT, -1);
 345         prom_reboot(bootstr);
 346         /*NOTREACHED*/
 347 }
 348 
 349 /*
 350  * We use the x-trap mechanism and idle_stop_xcall() to stop the other CPUs.
 351  * Once in panic_idle() they raise spl, record their location, and spin.
 352  */
 353 static void
 354 panic_idle(void)
 355 {
 356         (void) spl7();
 357 
 358         debug_flush_windows();
 359         (void) setjmp(&curthread->t_pcb);
 360 
 361         CPU->cpu_m.in_prom = 1;
 362         membar_stld();
 363 
 364         for (;;)
 365                 ;
 366 }
 367 
 368 /*
 369  * Force the other CPUs to trap into panic_idle(), and then remove them
 370  * from the cpu_ready_set so they will no longer receive cross-calls.
 371  */
 372 /*ARGSUSED*/
 373 void
 374 panic_stopcpus(cpu_t *cp, kthread_t *t, int spl)
 375 {
 376         cpuset_t cps;
 377         int i;
 378 
 379         (void) splzs();
 380         CPUSET_ALL_BUT(cps, cp->cpu_id);
 381         xt_some(cps, (xcfunc_t *)idle_stop_xcall, (uint64_t)&panic_idle, NULL);
 382 
 383         for (i = 0; i < NCPU; i++) {
 384                 if (i != cp->cpu_id && CPU_XCALL_READY(i)) {
 385                         int ntries = 0x10000;
 386 
 387                         while (!cpu[i]->cpu_m.in_prom && ntries) {
 388                                 DELAY(50);
 389                                 ntries--;
 390                         }
 391 
 392                         if (!cpu[i]->cpu_m.in_prom)
 393                                 printf("panic: failed to stop cpu%d\n", i);
 394 
 395                         cpu[i]->cpu_flags &= ~CPU_READY;
 396                         cpu[i]->cpu_flags |= CPU_QUIESCED;
 397                         CPUSET_DEL(cpu_ready_set, cpu[i]->cpu_id);
 398                 }
 399         }
 400 }
 401 
 402 /*
 403  * Platform callback following each entry to panicsys().  If we've panicked at
 404  * level 14, we examine t_panic_trap to see if a fatal trap occurred.  If so,
 405  * we disable further %tick_cmpr interrupts.  If not, an explicit call to panic
 406  * was made and so we re-enqueue an interrupt request structure to allow
 407  * further level 14 interrupts to be processed once we lower PIL.  This allows
 408  * us to handle panics from the deadman() CY_HIGH_LEVEL cyclic.
 409  *
 410  * In case we panic at level 15, ensure that the cpc handler has been
 411  * reinstalled otherwise we could run the risk of hitting a missing interrupt
 412  * handler when this thread drops PIL and the cpc counter overflows.
 413  */
 414 void
 415 panic_enter_hw(int spl)
 416 {
 417         uint_t opstate;
 418 
 419         if (!panic_tick) {
 420                 panic_tick = gettick();
 421                 if (mach_htraptrace_enable) {
 422                         uint64_t prev_freeze;
 423 
 424                         /*  there are no possible error codes for this hcall */
 425                         (void) hv_ttrace_freeze((uint64_t)TRAP_TFREEZE_ALL,
 426                             &prev_freeze);
 427                 }
 428 #ifdef TRAPTRACE
 429                 TRAPTRACE_FREEZE;
 430 #endif
 431         }
 432 
 433         mach_set_soft_state(SIS_TRANSITION, &SOLARIS_SOFT_STATE_PANIC_MSG);
 434 
 435         if (spl == ipltospl(PIL_14)) {
 436                 opstate = disable_vec_intr();
 437 
 438                 if (curthread->t_panic_trap != NULL) {
 439                         tickcmpr_disable();
 440                         intr_dequeue_req(PIL_14, cbe_level14_inum);
 441                 } else {
 442                         if (!tickcmpr_disabled())
 443                                 intr_enqueue_req(PIL_14, cbe_level14_inum);
 444                         /*
 445                          * Clear SOFTINT<14>, SOFTINT<0> (TICK_INT)
 446                          * and SOFTINT<16> (STICK_INT) to indicate
 447                          * that the current level 14 has been serviced.
 448                          */
 449                         wr_clr_softint((1 << PIL_14) |
 450                             TICK_INT_MASK | STICK_INT_MASK);
 451                 }
 452 
 453                 enable_vec_intr(opstate);
 454         } else if (spl == ipltospl(PIL_15)) {
 455                 opstate = disable_vec_intr();
 456                 intr_enqueue_req(PIL_15, cpc_level15_inum);
 457                 wr_clr_softint(1 << PIL_15);
 458                 enable_vec_intr(opstate);
 459         }
 460 }
 461 
 462 /*
 463  * Miscellaneous hardware-specific code to execute after panicstr is set
 464  * by the panic code: we also print and record PTL1 panic information here.
 465  */
 466 /*ARGSUSED*/
 467 void
 468 panic_quiesce_hw(panic_data_t *pdp)
 469 {
 470         extern uint_t getpstate(void);
 471         extern void setpstate(uint_t);
 472 
 473         /*
 474          * Turn off TRAPTRACE and save the current %tick value in panic_tick.
 475          */
 476         if (!panic_tick) {
 477                 panic_tick = gettick();
 478                 if (mach_htraptrace_enable) {
 479                         uint64_t prev_freeze;
 480 
 481                         /*  there are no possible error codes for this hcall */
 482                         (void) hv_ttrace_freeze((uint64_t)TRAP_TFREEZE_ALL,
 483                             &prev_freeze);
 484                 }
 485 #ifdef TRAPTRACE
 486                 TRAPTRACE_FREEZE;
 487 #endif
 488         }
 489         /*
 490          * For Platforms that use CPU signatures, we
 491          * need to set the signature block to OS, the state to
 492          * exiting, and the substate to panic for all the processors.
 493          */
 494         CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_PANIC, -1);
 495 
 496         update_hvdump_buffer();
 497 
 498         /*
 499          * Disable further ECC errors from the bus nexus.
 500          */
 501         (void) bus_func_invoke(BF_TYPE_ERRDIS);
 502 
 503         /*
 504          * Redirect all interrupts to the current CPU.
 505          */
 506         intr_redist_all_cpus_shutdown();
 507 
 508         /*
 509          * This call exists solely to support dumps to network
 510          * devices after sync from OBP.
 511          *
 512          * If we came here via the sync callback, then on some
 513          * platforms, interrupts may have arrived while we were
 514          * stopped in OBP.  OBP will arrange for those interrupts to
 515          * be redelivered if you say "go", but not if you invoke a
 516          * client callback like 'sync'.  For some dump devices
 517          * (network swap devices), we need interrupts to be
 518          * delivered in order to dump, so we have to call the bus
 519          * nexus driver to reset the interrupt state machines.
 520          */
 521         (void) bus_func_invoke(BF_TYPE_RESINTR);
 522 
 523         setpstate(getpstate() | PSTATE_IE);
 524 }
 525 
 526 /*
 527  * Platforms that use CPU signatures need to set the signature block to OS and
 528  * the state to exiting for all CPUs. PANIC_CONT indicates that we're about to
 529  * write the crash dump, which tells the SSP/SMS to begin a timeout routine to
 530  * reboot the machine if the dump never completes.
 531  */
 532 /*ARGSUSED*/
 533 void
 534 panic_dump_hw(int spl)
 535 {
 536         CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_DUMP, -1);
 537 }
 538 
 539 /*
 540  * for ptl1_panic
 541  */
 542 void
 543 ptl1_init_cpu(struct cpu *cpu)
 544 {
 545         ptl1_state_t *pstate = &cpu->cpu_m.ptl1_state;
 546 
 547         /*CONSTCOND*/
 548         if (sizeof (struct cpu) + PTL1_SSIZE > CPU_ALLOC_SIZE) {
 549                 panic("ptl1_init_cpu: not enough space left for ptl1_panic "
 550                     "stack, sizeof (struct cpu) = %lu",
 551                     (unsigned long)sizeof (struct cpu));
 552         }
 553 
 554         pstate->ptl1_stktop = (uintptr_t)cpu + CPU_ALLOC_SIZE;
 555         cpu_pa[cpu->cpu_id] = va_to_pa(cpu);
 556 }
 557 
 558 void
 559 ptl1_panic_handler(ptl1_state_t *pstate)
 560 {
 561         static const char *ptl1_reasons[] = {
 562 #ifdef  PTL1_PANIC_DEBUG
 563                 "trap for debug purpose",       /* PTL1_BAD_DEBUG */
 564 #else
 565                 "unknown trap",                 /* PTL1_BAD_DEBUG */
 566 #endif
 567                 "register window trap",         /* PTL1_BAD_WTRAP */
 568                 "kernel MMU miss",              /* PTL1_BAD_KMISS */
 569                 "kernel protection fault",      /* PTL1_BAD_KPROT_FAULT */
 570                 "ISM MMU miss",                 /* PTL1_BAD_ISM */
 571                 "kernel MMU trap",              /* PTL1_BAD_MMUTRAP */
 572                 "kernel trap handler state",    /* PTL1_BAD_TRAP */
 573                 "floating point trap",          /* PTL1_BAD_FPTRAP */
 574 #ifdef  DEBUG
 575                 "pointer to intr_vec",          /* PTL1_BAD_INTR_VEC */
 576 #else
 577                 "unknown trap",                 /* PTL1_BAD_INTR_VEC */
 578 #endif
 579 #ifdef  TRAPTRACE
 580                 "TRACE_PTR state",              /* PTL1_BAD_TRACE_PTR */
 581 #else
 582                 "unknown trap",                 /* PTL1_BAD_TRACE_PTR */
 583 #endif
 584                 "stack overflow",               /* PTL1_BAD_STACK */
 585                 "DTrace flags",                 /* PTL1_BAD_DTRACE_FLAGS */
 586                 "attempt to steal locked ctx",  /* PTL1_BAD_CTX_STEAL */
 587                 "CPU ECC error loop",           /* PTL1_BAD_ECC */
 588                 "unexpected error from hypervisor call", /* PTL1_BAD_HCALL */
 589                 "unexpected global level(%gl)", /* PTL1_BAD_GL */
 590                 "Watchdog Reset",               /* PTL1_BAD_WATCHDOG */
 591                 "unexpected RED mode trap",     /* PTL1_BAD_RED */
 592                 "return value EINVAL from hcall: "\
 593                     "UNMAP_PERM_ADDR",  /* PTL1_BAD_HCALL_UNMAP_PERM_EINVAL */
 594                 "return value ENOMAP from hcall: "\
 595                     "UNMAP_PERM_ADDR", /* PTL1_BAD_HCALL_UNMAP_PERM_ENOMAP */
 596                 "error raising a TSB exception", /* PTL1_BAD_RAISE_TSBEXCP */
 597                 "missing shared TSB"    /* PTL1_NO_SCDTSB8K */
 598         };
 599 
 600         uint_t reason = pstate->ptl1_regs.ptl1_gregs[0].ptl1_g1;
 601         uint_t tl = pstate->ptl1_regs.ptl1_trap_regs[0].ptl1_tl;
 602         struct panic_trap_info ti = { 0 };
 603 
 604         /*
 605          * Use trap_info for a place holder to call panic_savetrap() and
 606          * panic_showtrap() to save and print out ptl1_panic information.
 607          */
 608         if (curthread->t_panic_trap == NULL)
 609                 curthread->t_panic_trap = &ti;
 610 
 611         if (reason < sizeof (ptl1_reasons) / sizeof (ptl1_reasons[0]))
 612                 panic("bad %s at TL %u", ptl1_reasons[reason], tl);
 613         else
 614                 panic("ptl1_panic reason 0x%x at TL %u", reason, tl);
 615 }
 616 
 617 void
 618 clear_watchdog_on_exit(void)
 619 {
 620         if (watchdog_enabled && watchdog_activated) {
 621                 prom_printf("Debugging requested; hardware watchdog "
 622                     "suspended.\n");
 623                 (void) watchdog_suspend();
 624         }
 625 }
 626 
 627 /*
 628  * Restore the watchdog timer when returning from a debugger
 629  * after a panic or L1-A and resume watchdog pat.
 630  */
 631 void
 632 restore_watchdog_on_entry()
 633 {
 634         watchdog_resume();
 635 }
 636 
 637 int
 638 kdi_watchdog_disable(void)
 639 {
 640         watchdog_suspend();
 641 
 642         return (0);
 643 }
 644 
 645 void
 646 kdi_watchdog_restore(void)
 647 {
 648         watchdog_resume();
 649 }
 650 
 651 void
 652 mach_dump_buffer_init(void)
 653 {
 654         uint64_t  ret, minsize = 0;
 655 
 656         if (hvdump_buf_sz > HVDUMP_SIZE_MAX)
 657                 hvdump_buf_sz = HVDUMP_SIZE_MAX;
 658 
 659         hvdump_buf_va = contig_mem_alloc_align(hvdump_buf_sz, PAGESIZE);
 660         if (hvdump_buf_va == NULL)
 661                 return;
 662 
 663         hvdump_buf_pa = va_to_pa(hvdump_buf_va);
 664 
 665         ret = hv_dump_buf_update(hvdump_buf_pa, hvdump_buf_sz,
 666             &minsize);
 667 
 668         if (ret != H_EOK) {
 669                 contig_mem_free(hvdump_buf_va, hvdump_buf_sz);
 670                 hvdump_buf_va = NULL;
 671                 cmn_err(CE_NOTE, "!Error in setting up hvstate"
 672                     "dump buffer. Error = 0x%lx, size = 0x%lx,"
 673                     "buf_pa = 0x%lx", ret, hvdump_buf_sz,
 674                     hvdump_buf_pa);
 675 
 676                 if (ret == H_EINVAL) {
 677                         cmn_err(CE_NOTE, "!Buffer size too small."
 678                             "Available buffer size = 0x%lx,"
 679                             "Minimum buffer size required = 0x%lx",
 680                             hvdump_buf_sz, minsize);
 681                 }
 682         }
 683 }
 684 
 685 
 686 static void
 687 update_hvdump_buffer(void)
 688 {
 689         uint64_t ret, dummy_val;
 690 
 691         if (hvdump_buf_va == NULL)
 692                 return;
 693 
 694         ret = hv_dump_buf_update(hvdump_buf_pa, hvdump_buf_sz,
 695             &dummy_val);
 696         if (ret != H_EOK) {
 697                 cmn_err(CE_NOTE, "!Cannot update hvstate dump"
 698                     "buffer. Error = 0x%lx", ret);
 699         }
 700 }
 701 
 702 
 703 static int
 704 getintprop(pnode_t node, char *name, int deflt)
 705 {
 706         int     value;
 707 
 708         switch (prom_getproplen(node, name)) {
 709         case 0:
 710                 value = 1;      /* boolean properties */
 711                 break;
 712 
 713         case sizeof (int):
 714                 (void) prom_getprop(node, name, (caddr_t)&value);
 715                 break;
 716 
 717         default:
 718                 value = deflt;
 719                 break;
 720         }
 721 
 722         return (value);
 723 }
 724 
 725 /*
 726  * Called by setcpudelay
 727  */
 728 void
 729 cpu_init_tick_freq(void)
 730 {
 731         md_t *mdp;
 732         mde_cookie_t rootnode;
 733         int             listsz;
 734         mde_cookie_t    *listp = NULL;
 735         int     num_nodes;
 736         uint64_t stick_prop;
 737 
 738         if (broken_md_flag) {
 739                 sys_tick_freq = cpunodes[CPU->cpu_id].clock_freq;
 740                 return;
 741         }
 742 
 743         if ((mdp = md_get_handle()) == NULL)
 744                 panic("stick_frequency property not found in MD");
 745 
 746         rootnode = md_root_node(mdp);
 747         ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE);
 748 
 749         num_nodes = md_node_count(mdp);
 750 
 751         ASSERT(num_nodes > 0);
 752         listsz = num_nodes * sizeof (mde_cookie_t);
 753         listp = (mde_cookie_t *)prom_alloc((caddr_t)0, listsz, 0);
 754 
 755         if (listp == NULL)
 756                 panic("cannot allocate list for MD properties");
 757 
 758         num_nodes = md_scan_dag(mdp, rootnode, md_find_name(mdp, "platform"),
 759             md_find_name(mdp, "fwd"), listp);
 760 
 761         ASSERT(num_nodes == 1);
 762 
 763         if (md_get_prop_val(mdp, *listp, "stick-frequency", &stick_prop) != 0)
 764                 panic("stick_frequency property not found in MD");
 765 
 766         sys_tick_freq = stick_prop;
 767 
 768         prom_free((caddr_t)listp, listsz);
 769         (void) md_fini_handle(mdp);
 770 }
 771 
 772 int shipit(int n, uint64_t cpu_list_ra);
 773 
 774 #ifdef DEBUG
 775 #define SEND_MONDO_STATS        1
 776 #endif
 777 
 778 #ifdef SEND_MONDO_STATS
 779 uint32_t x_one_stimes[64];
 780 uint32_t x_one_ltimes[16];
 781 uint32_t x_set_stimes[64];
 782 uint32_t x_set_ltimes[16];
 783 uint32_t x_set_cpus[NCPU];
 784 #endif
 785 
 786 void
 787 send_one_mondo(int cpuid)
 788 {
 789         int retries, stat;
 790         uint64_t starttick, endtick, tick, lasttick;
 791         struct machcpu  *mcpup = &(CPU->cpu_m);
 792 
 793         CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
 794         starttick = lasttick = gettick();
 795         mcpup->cpu_list[0] = (uint16_t)cpuid;
 796         stat = shipit(1, mcpup->cpu_list_ra);
 797         endtick = starttick + xc_tick_limit;
 798         retries = 0;
 799         while (stat != H_EOK) {
 800                 if (stat != H_EWOULDBLOCK) {
 801                         if (panic_quiesce)
 802                                 return;
 803                         if (stat == H_ECPUERROR)
 804                                 cmn_err(CE_PANIC, "send_one_mondo: "
 805                                     "cpuid: 0x%x has been marked in "
 806                                     "error", cpuid);
 807                         else
 808                                 cmn_err(CE_PANIC, "send_one_mondo: "
 809                                     "unexpected hypervisor error 0x%x "
 810                                     "while sending a mondo to cpuid: "
 811                                     "0x%x", stat, cpuid);
 812                 }
 813                 tick = gettick();
 814                 /*
 815                  * If there is a big jump between the current tick
 816                  * count and lasttick, we have probably hit a break
 817                  * point.  Adjust endtick accordingly to avoid panic.
 818                  */
 819                 if (tick > (lasttick + xc_tick_jump_limit))
 820                         endtick += (tick - lasttick);
 821                 lasttick = tick;
 822                 if (tick > endtick) {
 823                         if (panic_quiesce)
 824                                 return;
 825                         cmn_err(CE_PANIC, "send mondo timeout "
 826                             "(target 0x%x) [retries: 0x%x hvstat: 0x%x]",
 827                             cpuid, retries, stat);
 828                 }
 829                 drv_usecwait(1);
 830                 stat = shipit(1, mcpup->cpu_list_ra);
 831                 retries++;
 832         }
 833 #ifdef SEND_MONDO_STATS
 834         {
 835                 uint64_t n = gettick() - starttick;
 836                 if (n < 8192)
 837                         x_one_stimes[n >> 7]++;
 838                 else if (n < 15*8192)
 839                         x_one_ltimes[n >> 13]++;
 840                 else
 841                         x_one_ltimes[0xf]++;
 842         }
 843 #endif
 844 }
 845 
 846 void
 847 send_mondo_set(cpuset_t set)
 848 {
 849         uint64_t starttick, endtick, tick, lasttick;
 850         uint_t largestid, smallestid;
 851         int i, j;
 852         int ncpuids = 0;
 853         int shipped = 0;
 854         int retries = 0;
 855         struct machcpu  *mcpup = &(CPU->cpu_m);
 856 
 857         ASSERT(!CPUSET_ISNULL(set));
 858         CPUSET_BOUNDS(set, smallestid, largestid);
 859         if (smallestid == CPUSET_NOTINSET) {
 860                 return;
 861         }
 862 
 863         starttick = lasttick = gettick();
 864         endtick = starttick + xc_tick_limit;
 865 
 866         /*
 867          * Assemble CPU list for HV argument. We already know
 868          * smallestid and largestid are members of set.
 869          */
 870         mcpup->cpu_list[ncpuids++] = (uint16_t)smallestid;
 871         if (largestid != smallestid) {
 872                 for (i = smallestid+1; i <= largestid-1; i++) {
 873                         if (CPU_IN_SET(set, i)) {
 874                                 mcpup->cpu_list[ncpuids++] = (uint16_t)i;
 875                         }
 876                 }
 877                 mcpup->cpu_list[ncpuids++] = (uint16_t)largestid;
 878         }
 879 
 880         do {
 881                 int stat;
 882 
 883                 stat = shipit(ncpuids, mcpup->cpu_list_ra);
 884                 if (stat == H_EOK) {
 885                         shipped += ncpuids;
 886                         break;
 887                 }
 888 
 889                 /*
 890                  * Either not all CPU mondos were sent, or an
 891                  * error occurred. CPUs that were sent mondos
 892                  * have their CPU IDs overwritten in cpu_list.
 893                  * Reset cpu_list so that it only holds those
 894                  * CPU IDs that still need to be sent.
 895                  */
 896                 for (i = 0, j = 0; i < ncpuids; i++) {
 897                         if (mcpup->cpu_list[i] == HV_SEND_MONDO_ENTRYDONE) {
 898                                 shipped++;
 899                         } else {
 900                                 mcpup->cpu_list[j++] = mcpup->cpu_list[i];
 901                         }
 902                 }
 903                 ncpuids = j;
 904 
 905                 /*
 906                  * Now handle possible errors returned
 907                  * from hypervisor.
 908                  */
 909                 if (stat == H_ECPUERROR) {
 910                         int errorcpus;
 911 
 912                         if (!panic_quiesce)
 913                                 cmn_err(CE_CONT, "send_mondo_set: cpuid(s) ");
 914 
 915                         /*
 916                          * Remove any CPUs in the error state from
 917                          * cpu_list. At this point cpu_list only
 918                          * contains the CPU IDs for mondos not
 919                          * succesfully sent.
 920                          */
 921                         for (i = 0, errorcpus = 0; i < ncpuids; i++) {
 922                                 uint64_t state = CPU_STATE_INVALID;
 923                                 uint16_t id = mcpup->cpu_list[i];
 924 
 925                                 (void) hv_cpu_state(id, &state);
 926                                 if (state == CPU_STATE_ERROR) {
 927                                         if (!panic_quiesce)
 928                                                 cmn_err(CE_CONT, "0x%x ", id);
 929                                         errorcpus++;
 930                                 } else if (errorcpus > 0) {
 931                                         mcpup->cpu_list[i - errorcpus] =
 932                                             mcpup->cpu_list[i];
 933                                 }
 934                         }
 935                         ncpuids -= errorcpus;
 936 
 937                         if (!panic_quiesce) {
 938                                 if (errorcpus == 0) {
 939                                         cmn_err(CE_CONT, "<none> have been "
 940                                             "marked in error\n");
 941                                         cmn_err(CE_PANIC, "send_mondo_set: "
 942                                             "hypervisor returned "
 943                                             "H_ECPUERROR but no CPU in "
 944                                             "cpu_list in error state");
 945                                 } else {
 946                                         cmn_err(CE_CONT, "have been marked in "
 947                                             "error\n");
 948                                         cmn_err(CE_PANIC, "send_mondo_set: "
 949                                             "CPU(s) in error state");
 950                                 }
 951                         }
 952                 } else if (stat != H_EWOULDBLOCK) {
 953                         if (panic_quiesce)
 954                                 return;
 955                         /*
 956                          * For all other errors, panic.
 957                          */
 958                         cmn_err(CE_CONT, "send_mondo_set: unexpected "
 959                             "hypervisor error 0x%x while sending a "
 960                             "mondo to cpuid(s):", stat);
 961                         for (i = 0; i < ncpuids; i++) {
 962                                 cmn_err(CE_CONT, " 0x%x", mcpup->cpu_list[i]);
 963                         }
 964                         cmn_err(CE_CONT, "\n");
 965                         cmn_err(CE_PANIC, "send_mondo_set: unexpected "
 966                             "hypervisor error");
 967                 }
 968 
 969                 tick = gettick();
 970                 /*
 971                  * If there is a big jump between the current tick
 972                  * count and lasttick, we have probably hit a break
 973                  * point.  Adjust endtick accordingly to avoid panic.
 974                  */
 975                 if (tick > (lasttick + xc_tick_jump_limit))
 976                         endtick += (tick - lasttick);
 977                 lasttick = tick;
 978                 if (tick > endtick) {
 979                         if (panic_quiesce)
 980                                 return;
 981                         cmn_err(CE_CONT, "send mondo timeout "
 982                             "[retries: 0x%x]  cpuids: ", retries);
 983                         for (i = 0; i < ncpuids; i++)
 984                                 cmn_err(CE_CONT, " 0x%x", mcpup->cpu_list[i]);
 985                         cmn_err(CE_CONT, "\n");
 986                         cmn_err(CE_PANIC, "send_mondo_set: timeout");
 987                 }
 988 
 989                 while (gettick() < (tick + sys_clock_mhz))
 990                         ;
 991                 retries++;
 992         } while (ncpuids > 0);
 993 
 994         CPU_STATS_ADDQ(CPU, sys, xcalls, shipped);
 995 
 996 #ifdef SEND_MONDO_STATS
 997         {
 998                 uint64_t n = gettick() - starttick;
 999                 if (n < 8192)
1000                         x_set_stimes[n >> 7]++;
1001                 else if (n < 15*8192)
1002                         x_set_ltimes[n >> 13]++;
1003                 else
1004                         x_set_ltimes[0xf]++;
1005         }
1006         x_set_cpus[shipped]++;
1007 #endif
1008 }
1009 
1010 void
1011 syncfpu(void)
1012 {
1013 }
1014 
1015 void
1016 sticksync_slave(void)
1017 {
1018         suspend_sync_tick_stick_npt();
1019 }
1020 
1021 void
1022 sticksync_master(void)
1023 {}
1024 
1025 void
1026 cpu_init_cache_scrub(void)
1027 {
1028         mach_set_soft_state(SIS_NORMAL, &SOLARIS_SOFT_STATE_RUN_MSG);
1029 }
1030 
1031 int
1032 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
1033 {
1034         int ret, watched;
1035 
1036         watched = watch_disable_addr((void *)addr, 4, S_WRITE);
1037         ret = dtrace_blksuword32(addr, data, 0);
1038         if (watched)
1039                 watch_enable_addr((void *)addr, 4, S_WRITE);
1040 
1041         return (ret);
1042 }
1043 
1044 int
1045 dtrace_blksuword32(uintptr_t addr, uint32_t *data, int tryagain)
1046 {
1047         if (suword32((void *)addr, *data) == -1)
1048                 return (tryagain ? dtrace_blksuword32_err(addr, data) : -1);
1049         dtrace_flush_sec(addr);
1050 
1051         return (0);
1052 }
1053 
1054 /*ARGSUSED*/
1055 void
1056 cpu_faulted_enter(struct cpu *cp)
1057 {
1058 }
1059 
1060 /*ARGSUSED*/
1061 void
1062 cpu_faulted_exit(struct cpu *cp)
1063 {
1064 }
1065 
1066 static int
1067 kdi_cpu_ready_iter(int (*cb)(int, void *), void *arg)
1068 {
1069         int rc, i;
1070 
1071         for (rc = 0, i = 0; i < NCPU; i++) {
1072                 if (CPU_IN_SET(cpu_ready_set, i))
1073                         rc += cb(i, arg);
1074         }
1075 
1076         return (rc);
1077 }
1078 
1079 /*
1080  * Sends a cross-call to a specified processor.  The caller assumes
1081  * responsibility for repetition of cross-calls, as appropriate (MARSA for
1082  * debugging).
1083  */
1084 static int
1085 kdi_xc_one(int cpuid, void (*func)(uintptr_t, uintptr_t), uintptr_t arg1,
1086     uintptr_t arg2)
1087 {
1088         int stat;
1089         struct machcpu  *mcpup;
1090         uint64_t cpuaddr_reg = 0, cpuaddr_scr = 0;
1091 
1092         mcpup = &(((cpu_t *)get_cpuaddr(cpuaddr_reg, cpuaddr_scr))->cpu_m);
1093 
1094         /*
1095          * if (idsr_busy())
1096          *      return (KDI_XC_RES_ERR);
1097          */
1098 
1099         init_mondo_nocheck((xcfunc_t *)func, arg1, arg2);
1100 
1101         mcpup->cpu_list[0] = (uint16_t)cpuid;
1102         stat = shipit(1, mcpup->cpu_list_ra);
1103 
1104         if (stat == 0)
1105                 return (KDI_XC_RES_OK);
1106         else
1107                 return (KDI_XC_RES_NACK);
1108 }
1109 
1110 static void
1111 kdi_tickwait(clock_t nticks)
1112 {
1113         clock_t endtick = gettick() + nticks;
1114 
1115         while (gettick() < endtick)
1116                 ;
1117 }
1118 
1119 static void
1120 kdi_cpu_init(int dcache_size, int dcache_linesize, int icache_size,
1121     int icache_linesize)
1122 {
1123         kdi_dcache_size = dcache_size;
1124         kdi_dcache_linesize = dcache_linesize;
1125         kdi_icache_size = icache_size;
1126         kdi_icache_linesize = icache_linesize;
1127 }
1128 
1129 /* used directly by kdi_read/write_phys */
1130 void
1131 kdi_flush_caches(void)
1132 {
1133         /* Not required on sun4v architecture. */
1134 }
1135 
1136 /*ARGSUSED*/
1137 int
1138 kdi_get_stick(uint64_t *stickp)
1139 {
1140         return (-1);
1141 }
1142 
1143 void
1144 cpu_kdi_init(kdi_t *kdi)
1145 {
1146         kdi->kdi_flush_caches = kdi_flush_caches;
1147         kdi->mkdi_cpu_init = kdi_cpu_init;
1148         kdi->mkdi_cpu_ready_iter = kdi_cpu_ready_iter;
1149         kdi->mkdi_xc_one = kdi_xc_one;
1150         kdi->mkdi_tickwait = kdi_tickwait;
1151         kdi->mkdi_get_stick = kdi_get_stick;
1152 }
1153 
1154 uint64_t        soft_state_message_ra[SOLARIS_SOFT_STATE_MSG_CNT];
1155 static uint64_t soft_state_saved_state = (uint64_t)-1;
1156 static int      soft_state_initialized = 0;
1157 static uint64_t soft_state_sup_minor;           /* Supported minor number */
1158 static hsvc_info_t soft_state_hsvc = {
1159                         HSVC_REV_1, NULL, HSVC_GROUP_SOFT_STATE, 1, 0, NULL };
1160 
1161 
1162 static void
1163 sun4v_system_claim(void)
1164 {
1165         lbolt_debug_entry();
1166 
1167         watchdog_suspend();
1168         kldc_debug_enter();
1169         /*
1170          * For "mdb -K", set soft state to debugging
1171          */
1172         if (soft_state_saved_state == -1) {
1173                 mach_get_soft_state(&soft_state_saved_state,
1174                     &SOLARIS_SOFT_STATE_SAVED_MSG);
1175         }
1176         /*
1177          * check again as the read above may or may not have worked and if
1178          * it didn't then soft state will still be -1
1179          */
1180         if (soft_state_saved_state != -1) {
1181                 mach_set_soft_state(SIS_TRANSITION,
1182                     &SOLARIS_SOFT_STATE_DEBUG_MSG);
1183         }
1184 }
1185 
1186 static void
1187 sun4v_system_release(void)
1188 {
1189         watchdog_resume();
1190         /*
1191          * For "mdb -K", set soft_state state back to original state on exit
1192          */
1193         if (soft_state_saved_state != -1) {
1194                 mach_set_soft_state(soft_state_saved_state,
1195                     &SOLARIS_SOFT_STATE_SAVED_MSG);
1196                 soft_state_saved_state = -1;
1197         }
1198 
1199         lbolt_debug_return();
1200 }
1201 
1202 void
1203 plat_kdi_init(kdi_t *kdi)
1204 {
1205         kdi->pkdi_system_claim = sun4v_system_claim;
1206         kdi->pkdi_system_release = sun4v_system_release;
1207 }
1208 
1209 /*
1210  * Routine to return memory information associated
1211  * with a physical address and syndrome.
1212  */
1213 /* ARGSUSED */
1214 int
1215 cpu_get_mem_info(uint64_t synd, uint64_t afar,
1216     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
1217     int *segsp, int *banksp, int *mcidp)
1218 {
1219         return (ENOTSUP);
1220 }
1221 
1222 /*
1223  * This routine returns the size of the kernel's FRU name buffer.
1224  */
1225 size_t
1226 cpu_get_name_bufsize()
1227 {
1228         return (UNUM_NAMLEN);
1229 }
1230 
1231 /*
1232  * This routine is a more generic interface to cpu_get_mem_unum(),
1233  * that may be used by other modules (e.g. mm).
1234  */
1235 /* ARGSUSED */
1236 int
1237 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
1238     char *buf, int buflen, int *lenp)
1239 {
1240         return (ENOTSUP);
1241 }
1242 
1243 /* ARGSUSED */
1244 int
1245 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
1246 {
1247         return (ENOTSUP);
1248 }
1249 
1250 /* ARGSUSED */
1251 int
1252 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
1253 {
1254         return (ENOTSUP);
1255 }
1256 
1257 /*
1258  * xt_sync - wait for previous x-traps to finish
1259  */
1260 void
1261 xt_sync(cpuset_t cpuset)
1262 {
1263         union {
1264                 uint8_t volatile byte[NCPU];
1265                 uint64_t volatile xword[NCPU / 8];
1266         } cpu_sync;
1267         uint64_t starttick, endtick, tick, lasttick, traptrace_id;
1268         uint_t largestid, smallestid;
1269         int i, j;
1270 
1271         kpreempt_disable();
1272         CPUSET_DEL(cpuset, CPU->cpu_id);
1273         CPUSET_AND(cpuset, cpu_ready_set);
1274 
1275         CPUSET_BOUNDS(cpuset, smallestid, largestid);
1276         if (smallestid == CPUSET_NOTINSET)
1277                 goto out;
1278 
1279         /*
1280          * Sun4v uses a queue for receiving mondos. Successful
1281          * transmission of a mondo only indicates that the mondo
1282          * has been written into the queue.
1283          *
1284          * We use an array of bytes to let each cpu to signal back
1285          * to the cross trap sender that the cross trap has been
1286          * executed. Set the byte to 1 before sending the cross trap
1287          * and wait until other cpus reset it to 0.
1288          */
1289         bzero((void *)&cpu_sync, NCPU);
1290         cpu_sync.byte[smallestid] = 1;
1291         if (largestid != smallestid) {
1292                 for (i = (smallestid + 1); i <= (largestid - 1); i++)
1293                         if (CPU_IN_SET(cpuset, i))
1294                                 cpu_sync.byte[i] = 1;
1295                 cpu_sync.byte[largestid] = 1;
1296         }
1297 
1298         /*
1299          * To help debug xt_sync panic, each mondo is uniquely identified
1300          * by passing the tick value, traptrace_id as the second mondo
1301          * argument to xt_some which is logged in CPU's mondo queue,
1302          * traptrace buffer and the panic message.
1303          */
1304         traptrace_id = gettick();
1305         xt_some(cpuset, (xcfunc_t *)xt_sync_tl1,
1306             (uint64_t)cpu_sync.byte, traptrace_id);
1307 
1308         starttick = lasttick = gettick();
1309         endtick = starttick + xc_sync_tick_limit;
1310 
1311         for (i = (smallestid / 8); i <= (largestid / 8); i++) {
1312                 while (cpu_sync.xword[i] != 0) {
1313                         tick = gettick();
1314                         /*
1315                          * If there is a big jump between the current tick
1316                          * count and lasttick, we have probably hit a break
1317                          * point. Adjust endtick accordingly to avoid panic.
1318                          */
1319                         if (tick > (lasttick + xc_tick_jump_limit)) {
1320                                 endtick += (tick - lasttick);
1321                         }
1322                         lasttick = tick;
1323                         if (tick > endtick) {
1324                                 if (panic_quiesce)
1325                                         goto out;
1326                                 cmn_err(CE_CONT, "Cross trap sync timeout:  "
1327                                     "at cpu_sync.xword[%d]: 0x%lx "
1328                                     "cpu_sync.byte: 0x%lx "
1329                                     "starttick: 0x%lx endtick: 0x%lx "
1330                                     "traptrace_id = 0x%lx\n",
1331                                     i, cpu_sync.xword[i],
1332                                     (uint64_t)cpu_sync.byte,
1333                                     starttick, endtick, traptrace_id);
1334                                 cmn_err(CE_CONT, "CPUIDs:");
1335                                 for (j = (i * 8); j <= largestid; j++) {
1336                                         if (cpu_sync.byte[j] != 0)
1337                                                 cmn_err(CE_CONT, " 0x%x", j);
1338                                 }
1339                                 cmn_err(CE_PANIC, "xt_sync: timeout");
1340                         }
1341                 }
1342         }
1343 
1344 out:
1345         kpreempt_enable();
1346 }
1347 
1348 #define QFACTOR         200
1349 /*
1350  * Recalculate the values of the cross-call timeout variables based
1351  * on the value of the 'inter-cpu-latency' property of the platform node.
1352  * The property sets the number of nanosec to wait for a cross-call
1353  * to be acknowledged.  Other timeout variables are derived from it.
1354  *
1355  * N.B. This implementation is aware of the internals of xc_init()
1356  * and updates many of the same variables.
1357  */
1358 void
1359 recalc_xc_timeouts(void)
1360 {
1361         typedef union {
1362                 uint64_t whole;
1363                 struct {
1364                         uint_t high;
1365                         uint_t low;
1366                 } half;
1367         } u_number;
1368 
1369         /* See x_call.c for descriptions of these extern variables. */
1370         extern uint64_t xc_tick_limit_scale;
1371         extern uint64_t xc_mondo_time_limit;
1372         extern uint64_t xc_func_time_limit;
1373         extern uint64_t xc_scale;
1374         extern uint64_t xc_mondo_multiplier;
1375         extern uint_t   nsec_shift;
1376 
1377         /* Temp versions of the target variables */
1378         uint64_t tick_limit;
1379         uint64_t tick_jump_limit;
1380         uint64_t mondo_time_limit;
1381         uint64_t func_time_limit;
1382         uint64_t scale;
1383 
1384         uint64_t latency;       /* nanoseconds */
1385         uint64_t maxfreq;
1386         uint64_t tick_limit_save = xc_tick_limit;
1387         uint64_t sync_tick_limit_save = xc_sync_tick_limit;
1388         uint_t   tick_scale;
1389         uint64_t top;
1390         uint64_t bottom;
1391         u_number tk;
1392 
1393         md_t *mdp;
1394         int nrnode;
1395         mde_cookie_t *platlist;
1396 
1397         /*
1398          * Look up the 'inter-cpu-latency' (optional) property in the
1399          * platform node of the MD.  The units are nanoseconds.
1400          */
1401         if ((mdp = md_get_handle()) == NULL) {
1402                 cmn_err(CE_WARN, "recalc_xc_timeouts: "
1403                     "Unable to initialize machine description");
1404                 return;
1405         }
1406 
1407         nrnode = md_alloc_scan_dag(mdp,
1408             md_root_node(mdp), "platform", "fwd", &platlist);
1409 
1410         ASSERT(nrnode == 1);
1411         if (nrnode < 1) {
1412                 cmn_err(CE_WARN, "recalc_xc_timeouts: platform node missing");
1413                 goto done;
1414         }
1415         if (md_get_prop_val(mdp, platlist[0],
1416             "inter-cpu-latency", &latency) == -1)
1417                 goto done;
1418 
1419         /*
1420          * clock.h defines an assembly-language macro
1421          * (NATIVE_TIME_TO_NSEC_SCALE) to convert from %stick
1422          * units to nanoseconds.  Since the inter-cpu-latency
1423          * units are nanoseconds and the xc_* variables require
1424          * %stick units, we need the inverse of that function.
1425          * The trick is to perform the calculation without
1426          * floating point, but also without integer truncation
1427          * or overflow.  To understand the calculation below,
1428          * please read the discussion of the macro in clock.h.
1429          * Since this new code will be invoked infrequently,
1430          * we can afford to implement it in C.
1431          *
1432          * tick_scale is the reciprocal of nsec_scale which is
1433          * calculated at startup in setcpudelay().  The calc
1434          * of tick_limit parallels that of NATIVE_TIME_TO_NSEC_SCALE
1435          * except we use tick_scale instead of nsec_scale and
1436          * C instead of assembler.
1437          */
1438         tick_scale = (uint_t)(((u_longlong_t)sys_tick_freq
1439             << (32 - nsec_shift)) / NANOSEC);
1440 
1441         tk.whole = latency;
1442         top = ((uint64_t)tk.half.high << 4) * tick_scale;
1443         bottom = (((uint64_t)tk.half.low << 4) * (uint64_t)tick_scale) >> 32;
1444         tick_limit = top + bottom;
1445 
1446         /*
1447          * xc_init() calculated 'maxfreq' by looking at all the cpus,
1448          * and used it to derive some of the timeout variables that we
1449          * recalculate below.  We can back into the original value by
1450          * using the inverse of one of those calculations.
1451          */
1452         maxfreq = xc_mondo_time_limit / xc_scale;
1453 
1454         /*
1455          * Don't allow the new timeout (xc_tick_limit) to fall below
1456          * the system tick frequency (stick).  Allowing the timeout
1457          * to be set more tightly than this empirically determined
1458          * value may cause panics.
1459          */
1460         tick_limit = tick_limit < sys_tick_freq ? sys_tick_freq : tick_limit;
1461 
1462         tick_jump_limit = tick_limit / 32;
1463         tick_limit *= xc_tick_limit_scale;
1464 
1465         /*
1466          * Recalculate xc_scale since it is used in a callback function
1467          * (xc_func_timeout_adj) to adjust two of the timeouts dynamically.
1468          * Make the change in xc_scale proportional to the change in
1469          * xc_tick_limit.
1470          */
1471         scale = (xc_scale * tick_limit + sys_tick_freq / 2) / tick_limit_save;
1472         if (scale == 0)
1473                 scale = 1;
1474 
1475         mondo_time_limit = maxfreq * scale;
1476         func_time_limit = mondo_time_limit * xc_mondo_multiplier;
1477 
1478         /*
1479          * Don't modify the timeouts if nothing has changed.  Else,
1480          * stuff the variables with the freshly calculated (temp)
1481          * variables.  This minimizes the window where the set of
1482          * values could be inconsistent.
1483          */
1484         if (tick_limit != xc_tick_limit) {
1485                 xc_tick_limit = tick_limit;
1486                 xc_tick_jump_limit = tick_jump_limit;
1487                 xc_scale = scale;
1488                 xc_mondo_time_limit = mondo_time_limit;
1489                 xc_func_time_limit = func_time_limit;
1490         }
1491 
1492 done:
1493         /*
1494          * Increase the timeout limit for xt_sync() cross calls.
1495          */
1496         xc_sync_tick_limit = xc_tick_limit * (cpu_q_entries / QFACTOR);
1497         xc_sync_tick_limit = xc_sync_tick_limit < xc_tick_limit ?
1498             xc_tick_limit : xc_sync_tick_limit;
1499 
1500         /*
1501          * Force the new values to be used for future cross calls.
1502          * This is necessary only when we increase the timeouts.
1503          */
1504         if ((xc_tick_limit > tick_limit_save) || (xc_sync_tick_limit >
1505             sync_tick_limit_save)) {
1506                 cpuset_t cpuset = cpu_ready_set;
1507                 xt_sync(cpuset);
1508         }
1509 
1510         if (nrnode > 0)
1511                 md_free_scan_dag(mdp, &platlist);
1512         (void) md_fini_handle(mdp);
1513 }
1514 
1515 void
1516 mach_soft_state_init(void)
1517 {
1518         int             i;
1519         uint64_t        ra;
1520 
1521         /*
1522          * Try to register soft_state api. If it fails, soft_state api has not
1523          * been implemented in the firmware, so do not bother to setup
1524          * soft_state in the kernel.
1525          */
1526         if ((i = hsvc_register(&soft_state_hsvc, &soft_state_sup_minor)) != 0) {
1527                 return;
1528         }
1529         for (i = 0; i < SOLARIS_SOFT_STATE_MSG_CNT; i++) {
1530                 ASSERT(strlen((const char *)(void *)
1531                     soft_state_message_strings + i) < SSM_SIZE);
1532                 if ((ra = va_to_pa(
1533                     (void *)(soft_state_message_strings + i))) == -1ll) {
1534                         return;
1535                 }
1536                 soft_state_message_ra[i] = ra;
1537         }
1538         /*
1539          * Tell OBP that we are supporting Guest State
1540          */
1541         prom_sun4v_soft_state_supported();
1542         soft_state_initialized = 1;
1543 }
1544 
1545 void
1546 mach_set_soft_state(uint64_t state, uint64_t *string_ra)
1547 {
1548         uint64_t        rc;
1549 
1550         if (soft_state_initialized && *string_ra) {
1551                 rc = hv_soft_state_set(state, *string_ra);
1552                 if (rc != H_EOK) {
1553                         cmn_err(CE_WARN,
1554                             "hv_soft_state_set returned %ld\n", rc);
1555                 }
1556         }
1557 }
1558 
1559 void
1560 mach_get_soft_state(uint64_t *state, uint64_t *string_ra)
1561 {
1562         uint64_t        rc;
1563 
1564         if (soft_state_initialized && *string_ra) {
1565                 rc = hv_soft_state_get(*string_ra, state);
1566                 if (rc != H_EOK) {
1567                         cmn_err(CE_WARN,
1568                             "hv_soft_state_get returned %ld\n", rc);
1569                         *state = -1;
1570                 }
1571         }
1572 }