1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /* derived from netbsd's xen_machdep.c 1.1.2.1 */
  28 
  29 /*
  30  *
  31  * Copyright (c) 2004 Christian Limpach.
  32  * All rights reserved.
  33  *
  34  * Redistribution and use in source and binary forms, with or without
  35  * modification, are permitted provided that the following conditions
  36  * are met:
  37  * 1. Redistributions of source code must retain the above copyright
  38  *    notice, this list of conditions and the following disclaimer.
  39  * 2. Redistributions in binary form must reproduce the above copyright
  40  *    notice, this list of conditions and the following disclaimer in the
  41  *    documentation and/or other materials provided with the distribution.
  42  * 3. This section intentionally left blank.
  43  * 4. The name of the author may not be used to endorse or promote products
  44  *    derived from this software without specific prior written permission.
  45  *
  46  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  47  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  48  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  49  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  50  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  51  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  52  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  53  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  54  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  55  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  56  */
  57 /*
  58  * Section 3 of the above license was updated in response to bug 6379571.
  59  */
  60 
  61 #include <sys/xpv_user.h>
  62 
  63 /* XXX 3.3. TODO remove this include */
  64 #include <xen/public/arch-x86/xen-mca.h>
  65 
  66 #include <sys/ctype.h>
  67 #include <sys/types.h>
  68 #include <sys/cmn_err.h>
  69 #include <sys/trap.h>
  70 #include <sys/segments.h>
  71 #include <sys/hypervisor.h>
  72 #include <sys/xen_mmu.h>
  73 #include <sys/machsystm.h>
  74 #include <sys/promif.h>
  75 #include <sys/bootconf.h>
  76 #include <sys/bootinfo.h>
  77 #include <sys/cpr.h>
  78 #include <sys/taskq.h>
  79 #include <sys/uadmin.h>
  80 #include <sys/evtchn_impl.h>
  81 #include <sys/archsystm.h>
  82 #include <xen/sys/xenbus_impl.h>
  83 #include <sys/mach_mmu.h>
  84 #include <vm/hat_i86.h>
  85 #include <sys/gnttab.h>
  86 #include <sys/reboot.h>
  87 #include <sys/stack.h>
  88 #include <sys/clock.h>
  89 #include <sys/bitmap.h>
  90 #include <sys/processor.h>
  91 #include <sys/xen_errno.h>
  92 #include <sys/xpv_panic.h>
  93 #include <sys/smp_impldefs.h>
  94 #include <sys/cpu.h>
  95 #include <sys/balloon_impl.h>
  96 #include <sys/ddi.h>
  97 
  98 #ifdef DEBUG
  99 #define SUSPEND_DEBUG if (xen_suspend_debug) xen_printf
 100 #else
 101 #define SUSPEND_DEBUG(...)
 102 #endif
 103 
 104 int cpr_debug;
 105 cpuset_t cpu_suspend_lost_set;
 106 static int xen_suspend_debug;
 107 
 108 uint_t xen_phys_ncpus;
 109 xen_mc_logical_cpu_t *xen_phys_cpus;
 110 int xen_physinfo_debug = 0;
 111 
 112 /*
 113  * Determine helpful version information.
 114  *
 115  * (And leave copies in the data segment so we can look at them later
 116  * with e.g. kmdb.)
 117  */
 118 
 119 typedef enum xen_version {
 120         XENVER_BOOT_IDX,
 121         XENVER_CURRENT_IDX
 122 } xen_version_t;
 123 
 124 struct xenver {
 125         ulong_t xv_major;
 126         ulong_t xv_minor;
 127         ulong_t xv_revision;
 128         xen_extraversion_t xv_ver;
 129         ulong_t xv_is_xvm;
 130         xen_changeset_info_t xv_chgset;
 131         xen_compile_info_t xv_build;
 132         xen_capabilities_info_t xv_caps;
 133 } xenver[2];
 134 
 135 #define XENVER_BOOT(m)  (xenver[XENVER_BOOT_IDX].m)
 136 #define XENVER_CURRENT(m)       (xenver[XENVER_CURRENT_IDX].m)
 137 
 138 /*
 139  * Update the xenver data. We maintain two copies, boot and
 140  * current. If we are setting the boot, then also set current.
 141  */
 142 static void
 143 xen_set_version(xen_version_t idx)
 144 {
 145         ulong_t ver;
 146 
 147         bzero(&xenver[idx], sizeof (xenver[idx]));
 148 
 149         ver = HYPERVISOR_xen_version(XENVER_version, 0);
 150 
 151         xenver[idx].xv_major = BITX(ver, 31, 16);
 152         xenver[idx].xv_minor = BITX(ver, 15, 0);
 153 
 154         (void) HYPERVISOR_xen_version(XENVER_extraversion, &xenver[idx].xv_ver);
 155 
 156         /*
 157          * The revision is buried in the extraversion information that is
 158          * maintained by the hypervisor. For our purposes we expect that
 159          * the revision number is:
 160          *      - the second character in the extraversion information
 161          *      - one character long
 162          *      - numeric digit
 163          * If it isn't then we can't extract the revision and we leave it
 164          * set to 0.
 165          */
 166         if (strlen(xenver[idx].xv_ver) > 1 && isdigit(xenver[idx].xv_ver[1]))
 167                 xenver[idx].xv_revision = xenver[idx].xv_ver[1] - '0';
 168         else
 169                 cmn_err(CE_WARN, "Cannot extract revision on this hypervisor "
 170                     "version: v%s, unexpected version format",
 171                     xenver[idx].xv_ver);
 172 
 173         xenver[idx].xv_is_xvm = 0;
 174 
 175         if (strstr(xenver[idx].xv_ver, "-xvm") != NULL)
 176                 xenver[idx].xv_is_xvm = 1;
 177 
 178         (void) HYPERVISOR_xen_version(XENVER_changeset,
 179             &xenver[idx].xv_chgset);
 180 
 181         (void) HYPERVISOR_xen_version(XENVER_compile_info,
 182             &xenver[idx].xv_build);
 183         /*
 184          * Capabilities are a set of space separated ascii strings
 185          * e.g. 'xen-3.1-x86_32p' or 'hvm-3.2-x86_64'
 186          */
 187         (void) HYPERVISOR_xen_version(XENVER_capabilities,
 188             &xenver[idx].xv_caps);
 189 
 190         cmn_err(CE_CONT, "?v%lu.%lu%s chgset '%s'\n", xenver[idx].xv_major,
 191             xenver[idx].xv_minor, xenver[idx].xv_ver, xenver[idx].xv_chgset);
 192 
 193         if (idx == XENVER_BOOT_IDX)
 194                 bcopy(&xenver[XENVER_BOOT_IDX], &xenver[XENVER_CURRENT_IDX],
 195                     sizeof (xenver[XENVER_BOOT_IDX]));
 196 }
 197 
 198 typedef enum xen_hypervisor_check {
 199         XEN_RUN_CHECK,
 200         XEN_SUSPEND_CHECK
 201 } xen_hypervisor_check_t;
 202 
 203 /*
 204  * To run the hypervisor must be 3.0.4 or better. To suspend/resume
 205  * we need 3.0.4 or better and if it is 3.0.4. then it must be provided
 206  * by the Solaris xVM project.
 207  * Checking can be disabled for testing purposes by setting the
 208  * xen_suspend_debug variable.
 209  */
 210 static int
 211 xen_hypervisor_supports_solaris(xen_hypervisor_check_t check)
 212 {
 213         if (xen_suspend_debug == 1)
 214                 return (1);
 215         if (XENVER_CURRENT(xv_major) < 3)
 216                 return (0);
 217         if (XENVER_CURRENT(xv_major) > 3)
 218                 return (1);
 219         if (XENVER_CURRENT(xv_minor) > 0)
 220                 return (1);
 221         if (XENVER_CURRENT(xv_revision) < 4)
 222                 return (0);
 223         if (check == XEN_SUSPEND_CHECK && XENVER_CURRENT(xv_revision) == 4 &&
 224             !XENVER_CURRENT(xv_is_xvm))
 225                 return (0);
 226 
 227         return (1);
 228 }
 229 
 230 /*
 231  * If the hypervisor is -xvm, or 3.1.2 or higher, we don't need the
 232  * workaround.
 233  */
 234 static void
 235 xen_pte_workaround(void)
 236 {
 237 #if defined(__amd64)
 238         extern int pt_kern;
 239 
 240         if (XENVER_CURRENT(xv_major) != 3)
 241                 return;
 242         if (XENVER_CURRENT(xv_minor) > 1)
 243                 return;
 244         if (XENVER_CURRENT(xv_minor) == 1 &&
 245             XENVER_CURRENT(xv_revision) > 1)
 246                 return;
 247         if (XENVER_CURRENT(xv_is_xvm))
 248                 return;
 249 
 250         pt_kern = PT_USER;
 251 #endif
 252 }
 253 
 254 void
 255 xen_set_callback(void (*func)(void), uint_t type, uint_t flags)
 256 {
 257         struct callback_register cb;
 258 
 259         bzero(&cb, sizeof (cb));
 260 #if defined(__amd64)
 261         cb.address = (ulong_t)func;
 262 #elif defined(__i386)
 263         cb.address.cs = KCS_SEL;
 264         cb.address.eip = (ulong_t)func;
 265 #endif
 266         cb.type = type;
 267         cb.flags = flags;
 268 
 269         /*
 270          * XXPV always ignore return value for NMI
 271          */
 272         if (HYPERVISOR_callback_op(CALLBACKOP_register, &cb) != 0 &&
 273             type != CALLBACKTYPE_nmi)
 274                 panic("HYPERVISOR_callback_op failed");
 275 }
 276 
 277 void
 278 xen_init_callbacks(void)
 279 {
 280         /*
 281          * register event (interrupt) handler.
 282          */
 283         xen_set_callback(xen_callback, CALLBACKTYPE_event, 0);
 284 
 285         /*
 286          * failsafe handler.
 287          */
 288         xen_set_callback(xen_failsafe_callback, CALLBACKTYPE_failsafe,
 289             CALLBACKF_mask_events);
 290 
 291         /*
 292          * NMI handler.
 293          */
 294         xen_set_callback(nmiint, CALLBACKTYPE_nmi, 0);
 295 
 296         /*
 297          * system call handler
 298          * XXPV move to init_cpu_syscall?
 299          */
 300 #if defined(__amd64)
 301         xen_set_callback(sys_syscall, CALLBACKTYPE_syscall,
 302             CALLBACKF_mask_events);
 303 #endif  /* __amd64 */
 304 }
 305 
 306 
 307 /*
 308  * cmn_err() followed by a 1/4 second delay; this gives the
 309  * logging service a chance to flush messages and helps avoid
 310  * intermixing output from prom_printf().
 311  * XXPV: doesn't exactly help us on UP though.
 312  */
 313 /*PRINTFLIKE2*/
 314 void
 315 cpr_err(int ce, const char *fmt, ...)
 316 {
 317         va_list adx;
 318 
 319         va_start(adx, fmt);
 320         vcmn_err(ce, fmt, adx);
 321         va_end(adx);
 322         drv_usecwait(MICROSEC >> 2);
 323 }
 324 
 325 void
 326 xen_suspend_devices(void)
 327 {
 328         int rc;
 329 
 330         SUSPEND_DEBUG("xen_suspend_devices\n");
 331 
 332         if ((rc = cpr_suspend_devices(ddi_root_node())) != 0)
 333                 panic("failed to suspend devices: %d", rc);
 334 }
 335 
 336 void
 337 xen_resume_devices(void)
 338 {
 339         int rc;
 340 
 341         SUSPEND_DEBUG("xen_resume_devices\n");
 342 
 343         if ((rc = cpr_resume_devices(ddi_root_node(), 0)) != 0)
 344                 panic("failed to resume devices: %d", rc);
 345 }
 346 
 347 /*
 348  * The list of mfn pages is out of date.  Recompute it.
 349  */
 350 static void
 351 rebuild_mfn_list(void)
 352 {
 353         int i = 0;
 354         size_t sz;
 355         size_t off;
 356         pfn_t pfn;
 357 
 358         SUSPEND_DEBUG("rebuild_mfn_list\n");
 359 
 360         sz = ((mfn_count * sizeof (mfn_t)) + MMU_PAGEOFFSET) & MMU_PAGEMASK;
 361 
 362         for (off = 0; off < sz; off += MMU_PAGESIZE) {
 363                 size_t j = mmu_btop(off);
 364                 if (((j * sizeof (mfn_t)) & MMU_PAGEOFFSET) == 0) {
 365                         pfn = hat_getpfnum(kas.a_hat,
 366                             (caddr_t)&mfn_list_pages[j]);
 367                         mfn_list_pages_page[i++] = pfn_to_mfn(pfn);
 368                 }
 369 
 370                 pfn = hat_getpfnum(kas.a_hat, (caddr_t)mfn_list + off);
 371                 mfn_list_pages[j] = pfn_to_mfn(pfn);
 372         }
 373 
 374         pfn = hat_getpfnum(kas.a_hat, (caddr_t)mfn_list_pages_page);
 375         HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list
 376             = pfn_to_mfn(pfn);
 377 }
 378 
 379 static void
 380 suspend_cpus(void)
 381 {
 382         int i;
 383 
 384         SUSPEND_DEBUG("suspend_cpus\n");
 385 
 386         mp_enter_barrier();
 387 
 388         for (i = 1; i < ncpus; i++) {
 389                 if (!CPU_IN_SET(cpu_suspend_lost_set, i)) {
 390                         SUSPEND_DEBUG("xen_vcpu_down %d\n", i);
 391                         (void) xen_vcpu_down(i);
 392                 }
 393 
 394                 mach_cpucontext_reset(cpu[i]);
 395         }
 396 }
 397 
 398 static void
 399 resume_cpus(void)
 400 {
 401         int i;
 402 
 403         for (i = 1; i < ncpus; i++) {
 404                 if (cpu[i] == NULL)
 405                         continue;
 406 
 407                 if (!CPU_IN_SET(cpu_suspend_lost_set, i)) {
 408                         SUSPEND_DEBUG("xen_vcpu_up %d\n", i);
 409                         mach_cpucontext_restore(cpu[i]);
 410                         (void) xen_vcpu_up(i);
 411                 }
 412         }
 413 
 414         mp_leave_barrier();
 415 }
 416 
 417 /*
 418  * Top level routine to direct suspend/resume of a domain.
 419  */
 420 void
 421 xen_suspend_domain(void)
 422 {
 423         extern void rtcsync(void);
 424         extern hrtime_t hres_last_tick;
 425         mfn_t start_info_mfn;
 426         ulong_t flags;
 427         pfn_t pfn;
 428         int i;
 429 
 430         /*
 431          * Check that we are happy to suspend on this hypervisor.
 432          */
 433         if (xen_hypervisor_supports_solaris(XEN_SUSPEND_CHECK) == 0) {
 434                 cpr_err(CE_WARN, "Cannot suspend on this hypervisor "
 435                     "version: v%lu.%lu%s, need at least version v3.0.4 or "
 436                     "-xvm based hypervisor", XENVER_CURRENT(xv_major),
 437                     XENVER_CURRENT(xv_minor), XENVER_CURRENT(xv_ver));
 438                 return;
 439         }
 440 
 441         /*
 442          * XXPV - Are we definitely OK to suspend by the time we've connected
 443          * the handler?
 444          */
 445 
 446         cpr_err(CE_NOTE, "Domain suspending for save/migrate");
 447 
 448         SUSPEND_DEBUG("xen_suspend_domain\n");
 449 
 450         /*
 451          * suspend interrupts and devices
 452          * XXPV - we use suspend/resume for both save/restore domains (like sun
 453          * cpr) and for migration.  Would be nice to know the difference if
 454          * possible.  For save/restore where down time may be a long time, we
 455          * may want to do more of the things that cpr does.  (i.e. notify user
 456          * processes, shrink memory footprint for faster restore, etc.)
 457          */
 458         xen_suspend_devices();
 459         SUSPEND_DEBUG("xenbus_suspend\n");
 460         xenbus_suspend();
 461 
 462         pfn = hat_getpfnum(kas.a_hat, (caddr_t)xen_info);
 463         start_info_mfn = pfn_to_mfn(pfn);
 464 
 465         /*
 466          * XXPV: cpu hotplug can hold this under a xenbus watch. Are we safe
 467          * wrt xenbus being suspended here?
 468          */
 469         mutex_enter(&cpu_lock);
 470 
 471         /*
 472          * Suspend must be done on vcpu 0, as no context for other CPUs is
 473          * saved.
 474          *
 475          * XXPV - add to taskq API ?
 476          */
 477         thread_affinity_set(curthread, 0);
 478         kpreempt_disable();
 479 
 480         SUSPEND_DEBUG("xen_start_migrate\n");
 481         xen_start_migrate();
 482         if (ncpus > 1)
 483                 suspend_cpus();
 484 
 485         /*
 486          * We can grab the ec_lock as it's a spinlock with a high SPL. Hence
 487          * any holder would have dropped it to get through suspend_cpus().
 488          */
 489         mutex_enter(&ec_lock);
 490 
 491         /*
 492          * From here on in, we can't take locks.
 493          */
 494         SUSPEND_DEBUG("ec_suspend\n");
 495         ec_suspend();
 496         SUSPEND_DEBUG("gnttab_suspend\n");
 497         gnttab_suspend();
 498 
 499         flags = intr_clear();
 500 
 501         xpv_time_suspend();
 502 
 503         /*
 504          * Currently, the hypervisor incorrectly fails to bring back
 505          * powered-down VCPUs.  Thus we need to record any powered-down VCPUs
 506          * to prevent any attempts to operate on them.  But we have to do this
 507          * *after* the very first time we do ec_suspend().
 508          */
 509         for (i = 1; i < ncpus; i++) {
 510                 if (cpu[i] == NULL)
 511                         continue;
 512 
 513                 if (cpu_get_state(cpu[i]) == P_POWEROFF)
 514                         CPUSET_ATOMIC_ADD(cpu_suspend_lost_set, i);
 515         }
 516 
 517         /*
 518          * The dom0 save/migrate code doesn't automatically translate
 519          * these into PFNs, but expects them to be, so we do it here.
 520          * We don't use mfn_to_pfn() because so many OS services have
 521          * been disabled at this point.
 522          */
 523         xen_info->store_mfn = mfn_to_pfn_mapping[xen_info->store_mfn];
 524         xen_info->console.domU.mfn =
 525             mfn_to_pfn_mapping[xen_info->console.domU.mfn];
 526 
 527         if (CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask == 0) {
 528                 prom_printf("xen_suspend_domain(): "
 529                     "CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask not set\n");
 530                 (void) HYPERVISOR_shutdown(SHUTDOWN_crash);
 531         }
 532 
 533         if (HYPERVISOR_update_va_mapping((uintptr_t)HYPERVISOR_shared_info,
 534             0, UVMF_INVLPG)) {
 535                 prom_printf("xen_suspend_domain(): "
 536                     "HYPERVISOR_update_va_mapping() failed\n");
 537                 (void) HYPERVISOR_shutdown(SHUTDOWN_crash);
 538         }
 539 
 540         SUSPEND_DEBUG("HYPERVISOR_suspend\n");
 541 
 542         /*
 543          * At this point we suspend and sometime later resume.
 544          */
 545         if (HYPERVISOR_suspend(start_info_mfn)) {
 546                 prom_printf("xen_suspend_domain(): "
 547                     "HYPERVISOR_suspend() failed\n");
 548                 (void) HYPERVISOR_shutdown(SHUTDOWN_crash);
 549         }
 550 
 551         /*
 552          * Point HYPERVISOR_shared_info to its new value.
 553          */
 554         if (HYPERVISOR_update_va_mapping((uintptr_t)HYPERVISOR_shared_info,
 555             xen_info->shared_info | PT_NOCONSIST | PT_VALID | PT_WRITABLE,
 556             UVMF_INVLPG))
 557                 (void) HYPERVISOR_shutdown(SHUTDOWN_crash);
 558 
 559         if (xen_info->nr_pages != mfn_count) {
 560                 prom_printf("xen_suspend_domain(): number of pages"
 561                     " changed, was 0x%lx, now 0x%lx\n", mfn_count,
 562                     xen_info->nr_pages);
 563                 (void) HYPERVISOR_shutdown(SHUTDOWN_crash);
 564         }
 565 
 566         xpv_time_resume();
 567 
 568         cached_max_mfn = 0;
 569 
 570         SUSPEND_DEBUG("gnttab_resume\n");
 571         gnttab_resume();
 572 
 573         /* XXPV: add a note that this must be lockless. */
 574         SUSPEND_DEBUG("ec_resume\n");
 575         ec_resume();
 576 
 577         intr_restore(flags);
 578 
 579         if (ncpus > 1)
 580                 resume_cpus();
 581 
 582         mutex_exit(&ec_lock);
 583         xen_end_migrate();
 584         mutex_exit(&cpu_lock);
 585 
 586         /*
 587          * Now we can take locks again.
 588          */
 589 
 590         /*
 591          * Force the tick value used for tv_nsec in hres_tick() to be up to
 592          * date. rtcsync() will reset the hrestime value appropriately.
 593          */
 594         hres_last_tick = xpv_gethrtime();
 595 
 596         /*
 597          * XXPV: we need to have resumed the CPUs since this takes locks, but
 598          * can remote CPUs see bad state? Presumably yes. Should probably nest
 599          * taking of todlock inside of cpu_lock, or vice versa, then provide an
 600          * unlocked version.  Probably need to call clkinitf to reset cpu freq
 601          * and re-calibrate if we migrated to a different speed cpu.  Also need
 602          * to make a (re)init_cpu_info call to update processor info structs
 603          * and device tree info.  That remains to be written at the moment.
 604          */
 605         rtcsync();
 606 
 607         rebuild_mfn_list();
 608 
 609         SUSPEND_DEBUG("xenbus_resume\n");
 610         xenbus_resume();
 611         SUSPEND_DEBUG("xenbus_resume_devices\n");
 612         xen_resume_devices();
 613 
 614         thread_affinity_clear(curthread);
 615         kpreempt_enable();
 616 
 617         SUSPEND_DEBUG("finished xen_suspend_domain\n");
 618 
 619         /*
 620          * We have restarted our suspended domain, update the hypervisor
 621          * details. NB: This must be done at the end of this function,
 622          * since we need the domain to be completely resumed before
 623          * these functions will work correctly.
 624          */
 625         xen_set_version(XENVER_CURRENT_IDX);
 626 
 627         /*
 628          * We can check and report a warning, but we don't stop the
 629          * process.
 630          */
 631         if (xen_hypervisor_supports_solaris(XEN_SUSPEND_CHECK) == 0)
 632                 cmn_err(CE_WARN, "Found hypervisor version: v%lu.%lu%s "
 633                     "but need at least version v3.0.4",
 634                     XENVER_CURRENT(xv_major), XENVER_CURRENT(xv_minor),
 635                     XENVER_CURRENT(xv_ver));
 636 
 637         cmn_err(CE_NOTE, "domain restore/migrate completed");
 638 }
 639 
 640 /*ARGSUSED*/
 641 int
 642 xen_debug_handler(void *arg)
 643 {
 644         debug_enter("External debug event received");
 645 
 646         /*
 647          * If we've not got KMDB loaded, output some stuff difficult to capture
 648          * from a domain core.
 649          */
 650         if (!(boothowto & RB_DEBUG)) {
 651                 shared_info_t *si = HYPERVISOR_shared_info;
 652                 int i;
 653 
 654                 prom_printf("evtchn_pending [ ");
 655                 for (i = 0; i < 8; i++)
 656                         prom_printf("%lx ", si->evtchn_pending[i]);
 657                 prom_printf("]\nevtchn_mask [ ");
 658                 for (i = 0; i < 8; i++)
 659                         prom_printf("%lx ", si->evtchn_mask[i]);
 660                 prom_printf("]\n");
 661 
 662                 for (i = 0; i < ncpus; i++) {
 663                         vcpu_info_t *vcpu = &si->vcpu_info[i];
 664                         if (cpu[i] == NULL)
 665                                 continue;
 666                         prom_printf("CPU%d pending %d mask %d sel %lx\n",
 667                             i, vcpu->evtchn_upcall_pending,
 668                             vcpu->evtchn_upcall_mask,
 669                             vcpu->evtchn_pending_sel);
 670                 }
 671         }
 672 
 673         return (0);
 674 }
 675 
 676 /*ARGSUSED*/
 677 static void
 678 xen_sysrq_handler(struct xenbus_watch *watch, const char **vec,
 679     unsigned int len)
 680 {
 681         xenbus_transaction_t xbt;
 682         char key = '\0';
 683         int ret;
 684 
 685 retry:
 686         if (xenbus_transaction_start(&xbt)) {
 687                 cmn_err(CE_WARN, "failed to start sysrq transaction");
 688                 return;
 689         }
 690 
 691         if ((ret = xenbus_scanf(xbt, "control", "sysrq", "%c", &key)) != 0) {
 692                 /*
 693                  * ENOENT happens in response to our own xenbus_rm.
 694                  * XXPV - this happens spuriously on boot?
 695                  */
 696                 if (ret != ENOENT)
 697                         cmn_err(CE_WARN, "failed to read sysrq: %d", ret);
 698                 goto out;
 699         }
 700 
 701         if ((ret = xenbus_rm(xbt, "control", "sysrq")) != 0) {
 702                 cmn_err(CE_WARN, "failed to reset sysrq: %d", ret);
 703                 goto out;
 704         }
 705 
 706         if (xenbus_transaction_end(xbt, 0) == EAGAIN)
 707                 goto retry;
 708 
 709         /*
 710          * Somewhat arbitrary - on Linux this means 'reboot'. We could just
 711          * accept any key, but this might increase the risk of sending a
 712          * harmless sysrq to the wrong domain...
 713          */
 714         if (key == 'b')
 715                 (void) xen_debug_handler(NULL);
 716         else
 717                 cmn_err(CE_WARN, "Ignored sysrq %c", key);
 718         return;
 719 
 720 out:
 721         (void) xenbus_transaction_end(xbt, 1);
 722 }
 723 
 724 taskq_t *xen_shutdown_tq;
 725 
 726 #define SHUTDOWN_INVALID        -1
 727 #define SHUTDOWN_POWEROFF       0
 728 #define SHUTDOWN_REBOOT         1
 729 #define SHUTDOWN_SUSPEND        2
 730 #define SHUTDOWN_HALT           3
 731 #define SHUTDOWN_MAX            4
 732 
 733 #define SHUTDOWN_TIMEOUT_SECS (60 * 5)
 734 
 735 static const char *cmd_strings[SHUTDOWN_MAX] = {
 736         "poweroff",
 737         "reboot",
 738         "suspend",
 739         "halt"
 740 };
 741 
 742 static void
 743 xen_dirty_shutdown(void *arg)
 744 {
 745         int cmd = (uintptr_t)arg;
 746 
 747         cmn_err(CE_WARN, "Externally requested shutdown failed or "
 748             "timed out.\nShutting down.\n");
 749 
 750         switch (cmd) {
 751         case SHUTDOWN_HALT:
 752         case SHUTDOWN_POWEROFF:
 753                 (void) kadmin(A_SHUTDOWN, AD_POWEROFF, NULL, kcred);
 754                 break;
 755         case SHUTDOWN_REBOOT:
 756                 (void) kadmin(A_REBOOT, AD_BOOT, NULL, kcred);
 757                 break;
 758         }
 759 }
 760 
 761 static void
 762 xen_shutdown(void *arg)
 763 {
 764         int cmd = (uintptr_t)arg;
 765         proc_t *initpp;
 766 
 767         ASSERT(cmd > SHUTDOWN_INVALID && cmd < SHUTDOWN_MAX);
 768 
 769         if (cmd == SHUTDOWN_SUSPEND) {
 770                 xen_suspend_domain();
 771                 return;
 772         }
 773 
 774         switch (cmd) {
 775         case SHUTDOWN_POWEROFF:
 776                 force_shutdown_method = AD_POWEROFF;
 777                 break;
 778         case SHUTDOWN_HALT:
 779                 force_shutdown_method = AD_HALT;
 780                 break;
 781         case SHUTDOWN_REBOOT:
 782                 force_shutdown_method = AD_BOOT;
 783                 break;
 784         }
 785 
 786         /*
 787          * If we're still booting and init(1) isn't set up yet, simply halt.
 788          */
 789         mutex_enter(&pidlock);
 790         initpp = prfind(P_INITPID);
 791         mutex_exit(&pidlock);
 792         if (initpp == NULL) {
 793                 extern void halt(char *);
 794                 halt("Power off the System");   /* just in case */
 795         }
 796 
 797         /*
 798          * else, graceful shutdown with inittab and all getting involved
 799          */
 800         psignal(initpp, SIGPWR);
 801 
 802         (void) timeout(xen_dirty_shutdown, arg,
 803             SHUTDOWN_TIMEOUT_SECS * drv_usectohz(MICROSEC));
 804 }
 805 
 806 /*ARGSUSED*/
 807 static void
 808 xen_shutdown_handler(struct xenbus_watch *watch, const char **vec,
 809         unsigned int len)
 810 {
 811         char *str;
 812         xenbus_transaction_t xbt;
 813         int err, shutdown_code = SHUTDOWN_INVALID;
 814         unsigned int slen;
 815 
 816 again:
 817         err = xenbus_transaction_start(&xbt);
 818         if (err)
 819                 return;
 820         if (xenbus_read(xbt, "control", "shutdown", (void *)&str, &slen)) {
 821                 (void) xenbus_transaction_end(xbt, 1);
 822                 return;
 823         }
 824 
 825         SUSPEND_DEBUG("%d: xen_shutdown_handler: \"%s\"\n", CPU->cpu_id, str);
 826 
 827         /*
 828          * If this is a watch fired from our write below, check out early to
 829          * avoid an infinite loop.
 830          */
 831         if (strcmp(str, "") == 0) {
 832                 (void) xenbus_transaction_end(xbt, 0);
 833                 kmem_free(str, slen);
 834                 return;
 835         } else if (strcmp(str, "poweroff") == 0) {
 836                 shutdown_code = SHUTDOWN_POWEROFF;
 837         } else if (strcmp(str, "reboot") == 0) {
 838                 shutdown_code = SHUTDOWN_REBOOT;
 839         } else if (strcmp(str, "suspend") == 0) {
 840                 shutdown_code = SHUTDOWN_SUSPEND;
 841         } else if (strcmp(str, "halt") == 0) {
 842                 shutdown_code = SHUTDOWN_HALT;
 843         } else {
 844                 printf("Ignoring shutdown request: %s\n", str);
 845         }
 846 
 847         /*
 848          * XXPV Should we check the value of xenbus_write() too, or are all
 849          *      errors automatically folded into xenbus_transaction_end() ??
 850          */
 851         (void) xenbus_write(xbt, "control", "shutdown", "");
 852         err = xenbus_transaction_end(xbt, 0);
 853         if (err == EAGAIN) {
 854                 SUSPEND_DEBUG("%d: trying again\n", CPU->cpu_id);
 855                 kmem_free(str, slen);
 856                 goto again;
 857         }
 858 
 859         kmem_free(str, slen);
 860         if (shutdown_code != SHUTDOWN_INVALID) {
 861                 (void) taskq_dispatch(xen_shutdown_tq, xen_shutdown,
 862                     (void *)(intptr_t)shutdown_code, 0);
 863         }
 864 }
 865 
 866 static struct xenbus_watch shutdown_watch;
 867 static struct xenbus_watch sysrq_watch;
 868 
 869 void
 870 xen_late_startup(void)
 871 {
 872         if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
 873                 xen_shutdown_tq = taskq_create("shutdown_taskq", 1,
 874                     maxclsyspri - 1, 1, 1, TASKQ_PREPOPULATE);
 875                 shutdown_watch.node = "control/shutdown";
 876                 shutdown_watch.callback = xen_shutdown_handler;
 877                 if (register_xenbus_watch(&shutdown_watch))
 878                         cmn_err(CE_WARN, "Failed to set shutdown watcher");
 879 
 880                 sysrq_watch.node = "control/sysrq";
 881                 sysrq_watch.callback = xen_sysrq_handler;
 882                 if (register_xenbus_watch(&sysrq_watch))
 883                         cmn_err(CE_WARN, "Failed to set sysrq watcher");
 884         }
 885         balloon_init(xen_info->nr_pages);
 886 }
 887 
 888 #ifdef DEBUG
 889 #define XEN_PRINTF_BUFSIZE      1024
 890 
 891 char xen_printf_buffer[XEN_PRINTF_BUFSIZE];
 892 
 893 /*
 894  * Printf function that calls hypervisor directly.  For DomU it only
 895  * works when running on a xen hypervisor built with debug on.  Works
 896  * always since no I/O ring interaction is needed.
 897  */
 898 /*PRINTFLIKE1*/
 899 void
 900 xen_printf(const char *fmt, ...)
 901 {
 902         va_list ap;
 903 
 904         va_start(ap, fmt);
 905         (void) vsnprintf(xen_printf_buffer, XEN_PRINTF_BUFSIZE, fmt, ap);
 906         va_end(ap);
 907 
 908         (void) HYPERVISOR_console_io(CONSOLEIO_write,
 909             strlen(xen_printf_buffer), xen_printf_buffer);
 910 }
 911 #else
 912 void
 913 xen_printf(const char *fmt, ...)
 914 {
 915 }
 916 #endif  /* DEBUG */
 917 
 918 void
 919 startup_xen_version(void)
 920 {
 921         xen_set_version(XENVER_BOOT_IDX);
 922         if (xen_hypervisor_supports_solaris(XEN_RUN_CHECK) == 0)
 923                 cmn_err(CE_WARN, "Found hypervisor version: v%lu.%lu%s "
 924                     "but need at least version v3.0.4",
 925                     XENVER_CURRENT(xv_major), XENVER_CURRENT(xv_minor),
 926                     XENVER_CURRENT(xv_ver));
 927         xen_pte_workaround();
 928 }
 929 
 930 int xen_mca_simulate_mc_physinfo_failure = 0;
 931 
 932 void
 933 startup_xen_mca(void)
 934 {
 935         if (!DOMAIN_IS_INITDOMAIN(xen_info))
 936                 return;
 937 
 938         xen_phys_ncpus = 0;
 939         xen_phys_cpus = NULL;
 940 
 941         if (xen_mca_simulate_mc_physinfo_failure ||
 942             xen_get_mc_physcpuinfo(NULL, &xen_phys_ncpus) != 0) {
 943                 cmn_err(CE_WARN,
 944                     "%sxen_get_mc_physinfo failure during xen MCA startup: "
 945                     "there will be no machine check support",
 946                     xen_mca_simulate_mc_physinfo_failure ? "(simulated) " : "");
 947                 return;
 948         }
 949 
 950         xen_phys_cpus = kmem_alloc(xen_phys_ncpus *
 951             sizeof (xen_mc_logical_cpu_t), KM_NOSLEEP);
 952 
 953         if (xen_phys_cpus == NULL) {
 954                 cmn_err(CE_WARN,
 955                     "xen_get_mc_physinfo failure: can't allocate CPU array");
 956                 return;
 957         }
 958 
 959         if (xen_get_mc_physcpuinfo(xen_phys_cpus, &xen_phys_ncpus) != 0) {
 960                 cmn_err(CE_WARN, "xen_get_mc_physinfo failure: no "
 961                     "physical CPU info");
 962                 kmem_free(xen_phys_cpus,
 963                     xen_phys_ncpus * sizeof (xen_mc_logical_cpu_t));
 964                 xen_phys_ncpus = 0;
 965                 xen_phys_cpus = NULL;
 966         }
 967 
 968         if (xen_physinfo_debug) {
 969                 xen_mc_logical_cpu_t *xcp;
 970                 unsigned i;
 971 
 972                 cmn_err(CE_NOTE, "xvm mca: %u physical cpus:\n",
 973                     xen_phys_ncpus);
 974                 for (i = 0; i < xen_phys_ncpus; i++) {
 975                         xcp = &xen_phys_cpus[i];
 976                         cmn_err(CE_NOTE, "cpu%u: (%u, %u, %u) apid %u",
 977                             xcp->mc_cpunr, xcp->mc_chipid, xcp->mc_coreid,
 978                             xcp->mc_threadid, xcp->mc_apicid);
 979                 }
 980         }
 981 }
 982 
 983 /*
 984  * Miscellaneous hypercall wrappers with slightly more verbose diagnostics.
 985  */
 986 
 987 void
 988 xen_set_gdt(ulong_t *frame_list, int entries)
 989 {
 990         int err;
 991         if ((err = HYPERVISOR_set_gdt(frame_list, entries)) != 0) {
 992                 /*
 993                  * X_EINVAL:    reserved entry or bad frames
 994                  * X_EFAULT:    bad address
 995                  */
 996                 panic("xen_set_gdt(%p, %d): error %d",
 997                     (void *)frame_list, entries, -(int)err);
 998         }
 999 }
1000 
1001 void
1002 xen_set_ldt(user_desc_t *ldt, uint_t nsels)
1003 {
1004         struct mmuext_op        op;
1005         long                    err;
1006 
1007         op.cmd = MMUEXT_SET_LDT;
1008         op.arg1.linear_addr = (uintptr_t)ldt;
1009         op.arg2.nr_ents = nsels;
1010 
1011         if ((err = HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) != 0) {
1012                 panic("xen_set_ldt(%p, %d): error %d",
1013                     (void *)ldt, nsels, -(int)err);
1014         }
1015 }
1016 
1017 void
1018 xen_stack_switch(ulong_t ss, ulong_t esp)
1019 {
1020         long err;
1021 
1022         if ((err = HYPERVISOR_stack_switch(ss, esp)) != 0) {
1023                 /*
1024                  * X_EPERM:     bad selector
1025                  */
1026                 panic("xen_stack_switch(%lx, %lx): error %d", ss, esp,
1027                     -(int)err);
1028         }
1029 }
1030 
1031 long
1032 xen_set_trap_table(trap_info_t *table)
1033 {
1034         long err;
1035 
1036         if ((err = HYPERVISOR_set_trap_table(table)) != 0) {
1037                 /*
1038                  * X_EFAULT:    bad address
1039                  * X_EPERM:     bad selector
1040                  */
1041                 panic("xen_set_trap_table(%p): error %d", (void *)table,
1042                     -(int)err);
1043         }
1044         return (err);
1045 }
1046 
1047 #if defined(__amd64)
1048 void
1049 xen_set_segment_base(int reg, ulong_t value)
1050 {
1051         long err;
1052 
1053         if ((err = HYPERVISOR_set_segment_base(reg, value)) != 0) {
1054                 /*
1055                  * X_EFAULT:    bad address
1056                  * X_EINVAL:    bad type
1057                  */
1058                 panic("xen_set_segment_base(%d, %lx): error %d",
1059                     reg, value, -(int)err);
1060         }
1061 }
1062 #endif  /* __amd64 */
1063 
1064 /*
1065  * Translate a hypervisor errcode to a Solaris error code.
1066  */
1067 int
1068 xen_xlate_errcode(int error)
1069 {
1070         switch (-error) {
1071 
1072         /*
1073          * Translate hypervisor errno's into native errno's
1074          */
1075 
1076 #define CASE(num)       case X_##num: error = num; break
1077 
1078         CASE(EPERM);    CASE(ENOENT);   CASE(ESRCH);
1079         CASE(EINTR);    CASE(EIO);      CASE(ENXIO);
1080         CASE(E2BIG);    CASE(ENOMEM);   CASE(EACCES);
1081         CASE(EFAULT);   CASE(EBUSY);    CASE(EEXIST);
1082         CASE(ENODEV);   CASE(EISDIR);   CASE(EINVAL);
1083         CASE(ENOSPC);   CASE(ESPIPE);   CASE(EROFS);
1084         CASE(ENOSYS);   CASE(ENOTEMPTY); CASE(EISCONN);
1085         CASE(ENODATA);  CASE(EAGAIN);
1086 
1087 #undef CASE
1088 
1089         default:
1090                 panic("xen_xlate_errcode: unknown error %d", error);
1091         }
1092 
1093         return (error);
1094 }
1095 
1096 /*
1097  * Raise PS_IOPL on current vcpu to user level.
1098  * Caller responsible for preventing kernel preemption.
1099  */
1100 void
1101 xen_enable_user_iopl(void)
1102 {
1103         physdev_set_iopl_t set_iopl;
1104         set_iopl.iopl = 3;              /* user ring 3 */
1105         (void) HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
1106 }
1107 
1108 /*
1109  * Drop PS_IOPL on current vcpu to kernel level
1110  */
1111 void
1112 xen_disable_user_iopl(void)
1113 {
1114         physdev_set_iopl_t set_iopl;
1115         set_iopl.iopl = 1;              /* kernel pseudo ring 1 */
1116         (void) HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
1117 }
1118 
1119 int
1120 xen_gdt_setprot(cpu_t *cp, uint_t prot)
1121 {
1122         int err;
1123 #if defined(__amd64)
1124         int pt_bits = PT_VALID;
1125         if (prot & PROT_WRITE)
1126                 pt_bits |= PT_WRITABLE;
1127 #endif
1128 
1129         if ((err = as_setprot(&kas, (caddr_t)cp->cpu_gdt,
1130             MMU_PAGESIZE, prot)) != 0)
1131                 goto done;
1132 
1133 #if defined(__amd64)
1134         err = xen_kpm_page(mmu_btop(cp->cpu_m.mcpu_gdtpa), pt_bits);
1135 #endif
1136 
1137 done:
1138         if (err) {
1139                 cmn_err(CE_WARN, "cpu%d: xen_gdt_setprot(%s) failed: error %d",
1140                     cp->cpu_id, (prot & PROT_WRITE) ? "writable" : "read-only",
1141                     err);
1142         }
1143 
1144         return (err);
1145 }
1146 
1147 int
1148 xen_ldt_setprot(user_desc_t *ldt, size_t lsize, uint_t prot)
1149 {
1150         int err;
1151         caddr_t lva = (caddr_t)ldt;
1152 #if defined(__amd64)
1153         int pt_bits = PT_VALID;
1154         pgcnt_t npgs;
1155         if (prot & PROT_WRITE)
1156                 pt_bits |= PT_WRITABLE;
1157 #endif  /* __amd64 */
1158 
1159         if ((err = as_setprot(&kas, (caddr_t)ldt, lsize, prot)) != 0)
1160                 goto done;
1161 
1162 #if defined(__amd64)
1163 
1164         ASSERT(IS_P2ALIGNED(lsize, PAGESIZE));
1165         npgs = mmu_btop(lsize);
1166         while (npgs--) {
1167                 if ((err = xen_kpm_page(hat_getpfnum(kas.a_hat, lva),
1168                     pt_bits)) != 0)
1169                         break;
1170                 lva += PAGESIZE;
1171         }
1172 #endif  /* __amd64 */
1173 
1174 done:
1175         if (err) {
1176                 cmn_err(CE_WARN, "xen_ldt_setprot(%p, %s) failed: error %d",
1177                     (void *)lva,
1178                     (prot & PROT_WRITE) ? "writable" : "read-only", err);
1179         }
1180 
1181         return (err);
1182 }
1183 
1184 int
1185 xen_get_mc_physcpuinfo(xen_mc_logical_cpu_t *log_cpus, uint_t *ncpus)
1186 {
1187         xen_mc_t xmc;
1188         struct xen_mc_physcpuinfo *cpi = &xmc.u.mc_physcpuinfo;
1189 
1190         cpi->ncpus = *ncpus;
1191         /*LINTED: constant in conditional context*/
1192         set_xen_guest_handle(cpi->info, log_cpus);
1193 
1194         if (HYPERVISOR_mca(XEN_MC_physcpuinfo, &xmc) != 0)
1195                 return (-1);
1196 
1197         *ncpus = cpi->ncpus;
1198         return (0);
1199 }
1200 
1201 void
1202 print_panic(const char *str)
1203 {
1204         xen_printf(str);
1205 }
1206 
1207 /*
1208  * Interfaces to iterate over real cpu information, but only that info
1209  * which we choose to expose here.  These are of interest to dom0
1210  * only (and the backing hypercall should not work for domu).
1211  */
1212 
1213 xen_mc_lcpu_cookie_t
1214 xen_physcpu_next(xen_mc_lcpu_cookie_t cookie)
1215 {
1216         xen_mc_logical_cpu_t *xcp = (xen_mc_logical_cpu_t *)cookie;
1217 
1218         if (!DOMAIN_IS_INITDOMAIN(xen_info))
1219                 return (NULL);
1220 
1221         if (cookie == NULL)
1222                 return ((xen_mc_lcpu_cookie_t)xen_phys_cpus);
1223 
1224         if (xcp == xen_phys_cpus + xen_phys_ncpus - 1)
1225                 return (NULL);
1226         else
1227                 return ((xen_mc_lcpu_cookie_t)++xcp);
1228 }
1229 
1230 #define COOKIE2XCP(c) ((xen_mc_logical_cpu_t *)(c))
1231 
1232 const char *
1233 xen_physcpu_vendorstr(xen_mc_lcpu_cookie_t cookie)
1234 {
1235         xen_mc_logical_cpu_t *xcp = COOKIE2XCP(cookie);
1236 
1237         return ((const char *)&xcp->mc_vendorid[0]);
1238 }
1239 
1240 int
1241 xen_physcpu_family(xen_mc_lcpu_cookie_t cookie)
1242 {
1243         return (COOKIE2XCP(cookie)->mc_family);
1244 }
1245 
1246 int
1247 xen_physcpu_model(xen_mc_lcpu_cookie_t cookie)
1248 {
1249         return (COOKIE2XCP(cookie)->mc_model);
1250 }
1251 
1252 int
1253 xen_physcpu_stepping(xen_mc_lcpu_cookie_t cookie)
1254 {
1255         return (COOKIE2XCP(cookie)->mc_step);
1256 }
1257 
1258 id_t
1259 xen_physcpu_chipid(xen_mc_lcpu_cookie_t cookie)
1260 {
1261         return (COOKIE2XCP(cookie)->mc_chipid);
1262 }
1263 
1264 id_t
1265 xen_physcpu_coreid(xen_mc_lcpu_cookie_t cookie)
1266 {
1267         return (COOKIE2XCP(cookie)->mc_coreid);
1268 }
1269 
1270 id_t
1271 xen_physcpu_strandid(xen_mc_lcpu_cookie_t cookie)
1272 {
1273         return (COOKIE2XCP(cookie)->mc_threadid);
1274 }
1275 
1276 id_t
1277 xen_physcpu_initial_apicid(xen_mc_lcpu_cookie_t cookie)
1278 {
1279         return (COOKIE2XCP(cookie)->mc_clusterid);
1280 }
1281 
1282 id_t
1283 xen_physcpu_logical_id(xen_mc_lcpu_cookie_t cookie)
1284 {
1285         return (COOKIE2XCP(cookie)->mc_cpunr);
1286 }
1287 
1288 boolean_t
1289 xen_physcpu_is_cmt(xen_mc_lcpu_cookie_t cookie)
1290 {
1291         return (COOKIE2XCP(cookie)->mc_nthreads > 1);
1292 }
1293 
1294 uint64_t
1295 xen_physcpu_mcg_cap(xen_mc_lcpu_cookie_t cookie)
1296 {
1297         xen_mc_logical_cpu_t *xcp = COOKIE2XCP(cookie);
1298 
1299         /*
1300          * Need to #define the indices, or search through the array.
1301          */
1302         return (xcp->mc_msrvalues[0].value);
1303 }
1304 
1305 int
1306 xen_map_gref(uint_t cmd, gnttab_map_grant_ref_t *mapop, uint_t count,
1307     boolean_t uvaddr)
1308 {
1309         long rc;
1310         uint_t i;
1311 
1312         ASSERT(cmd == GNTTABOP_map_grant_ref);
1313 
1314 #if !defined(_BOOT)
1315         if (uvaddr == B_FALSE) {
1316                 for (i = 0; i < count; ++i) {
1317                         mapop[i].flags |= (PT_FOREIGN <<_GNTMAP_guest_avail0);
1318                 }
1319         }
1320 #endif
1321 
1322         rc = HYPERVISOR_grant_table_op(cmd, mapop, count);
1323 
1324         return (rc);
1325 }
1326 
1327 static int
1328 xpv_get_physinfo(xen_sysctl_physinfo_t *pi)
1329 {
1330         xen_sysctl_t op;
1331         struct sp { void *p; } *sp = (struct sp *)&op.u.physinfo.cpu_to_node;
1332         int ret;
1333 
1334         bzero(&op, sizeof (op));
1335         op.cmd = XEN_SYSCTL_physinfo;
1336         op.interface_version = XEN_SYSCTL_INTERFACE_VERSION;
1337         /*LINTED: constant in conditional context*/
1338         set_xen_guest_handle(*sp, NULL);
1339 
1340         ret = HYPERVISOR_sysctl(&op);
1341 
1342         if (ret != 0)
1343                 return (xen_xlate_errcode(ret));
1344 
1345         bcopy(&op.u.physinfo, pi, sizeof (op.u.physinfo));
1346         return (0);
1347 }
1348 
1349 /*
1350  * On dom0, we can determine the number of physical cpus on the machine.
1351  * This number is important when figuring out what workarounds are
1352  * appropriate, so compute it now.
1353  */
1354 uint_t
1355 xpv_nr_phys_cpus(void)
1356 {
1357         static uint_t nphyscpus = 0;
1358 
1359         ASSERT(DOMAIN_IS_INITDOMAIN(xen_info));
1360 
1361         if (nphyscpus == 0) {
1362                 xen_sysctl_physinfo_t pi;
1363                 int ret;
1364 
1365                 if ((ret = xpv_get_physinfo(&pi)) != 0)
1366                         panic("xpv_get_physinfo() failed: %d\n", ret);
1367                 nphyscpus = pi.nr_cpus;
1368         }
1369         return (nphyscpus);
1370 }
1371 
1372 pgcnt_t
1373 xpv_nr_phys_pages(void)
1374 {
1375         xen_sysctl_physinfo_t pi;
1376         int ret;
1377 
1378         ASSERT(DOMAIN_IS_INITDOMAIN(xen_info));
1379 
1380         if ((ret = xpv_get_physinfo(&pi)) != 0)
1381                 panic("xpv_get_physinfo() failed: %d\n", ret);
1382 
1383         return ((pgcnt_t)pi.total_pages);
1384 }
1385 
1386 uint64_t
1387 xpv_cpu_khz(void)
1388 {
1389         xen_sysctl_physinfo_t pi;
1390         int ret;
1391 
1392         ASSERT(DOMAIN_IS_INITDOMAIN(xen_info));
1393 
1394         if ((ret = xpv_get_physinfo(&pi)) != 0)
1395                 panic("xpv_get_physinfo() failed: %d\n", ret);
1396         return ((uint64_t)pi.cpu_khz);
1397 }