1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #include <sys/modctl.h>
  27 #include <sys/types.h>
  28 #include <sys/archsystm.h>
  29 #include <sys/machsystm.h>
  30 #include <sys/sunndi.h>
  31 #include <sys/sunddi.h>
  32 #include <sys/ddi_subrdefs.h>
  33 #include <sys/xpv_support.h>
  34 #include <sys/xen_errno.h>
  35 #include <sys/hypervisor.h>
  36 #include <sys/gnttab.h>
  37 #include <sys/xenbus_comms.h>
  38 #include <sys/xenbus_impl.h>
  39 #include <xen/sys/xendev.h>
  40 #include <sys/sysmacros.h>
  41 #include <sys/x86_archext.h>
  42 #include <sys/mman.h>
  43 #include <sys/stat.h>
  44 #include <sys/conf.h>
  45 #include <sys/devops.h>
  46 #include <sys/pc_mmu.h>
  47 #include <sys/cmn_err.h>
  48 #include <sys/cpr.h>
  49 #include <sys/ddi.h>
  50 #include <vm/seg_kmem.h>
  51 #include <vm/as.h>
  52 #include <vm/hat_pte.h>
  53 #include <vm/hat_i86.h>
  54 
  55 #define XPV_MINOR 0
  56 #define XPV_BUFSIZE 128
  57 
  58 /* virtual addr for the store_mfn page */
  59 caddr_t xb_addr;
  60 
  61 dev_info_t *xpv_dip;
  62 static dev_info_t *xpvd_dip;
  63 
  64 #ifdef DEBUG
  65 int xen_suspend_debug;
  66 
  67 #define SUSPEND_DEBUG if (xen_suspend_debug) xen_printf
  68 #else
  69 #define SUSPEND_DEBUG(...)
  70 #endif
  71 
  72 /*
  73  * Forward declarations
  74  */
  75 static int xpv_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
  76 static int xpv_attach(dev_info_t *, ddi_attach_cmd_t);
  77 static int xpv_detach(dev_info_t *, ddi_detach_cmd_t);
  78 static int xpv_open(dev_t *, int, int, cred_t *);
  79 static int xpv_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
  80 
  81 static struct cb_ops xpv_cb_ops = {
  82         xpv_open,
  83         nulldev,        /* close */
  84         nodev,          /* strategy */
  85         nodev,          /* print */
  86         nodev,          /* dump */
  87         nodev,          /* read */
  88         nodev,          /* write */
  89         xpv_ioctl,      /* ioctl */
  90         nodev,          /* devmap */
  91         nodev,          /* mmap */
  92         nodev,          /* segmap */
  93         nochpoll,       /* poll */
  94         ddi_prop_op,
  95         NULL,
  96         D_MP,
  97         CB_REV,
  98         NULL,
  99         NULL
 100 };
 101 
 102 static struct dev_ops xpv_dv_ops = {
 103         DEVO_REV,
 104         0,
 105         xpv_getinfo,
 106         nulldev,        /* identify */
 107         nulldev,        /* probe */
 108         xpv_attach,
 109         xpv_detach,
 110         nodev,          /* reset */
 111         &xpv_cb_ops,
 112         NULL,           /* struct bus_ops */
 113         NULL,           /* power */
 114         ddi_quiesce_not_supported,      /* devo_quiesce */
 115 };
 116 
 117 static struct modldrv modldrv = {
 118         &mod_driverops,
 119         "xpv driver",
 120         &xpv_dv_ops
 121 };
 122 
 123 static struct modlinkage modl = {
 124         MODREV_1,
 125         {
 126                 (void *)&modldrv,
 127                 NULL            /* null termination */
 128         }
 129 };
 130 
 131 static ddi_dma_attr_t xpv_dma_attr = {
 132         DMA_ATTR_V0,            /* version of this structure */
 133         0,                      /* lowest usable address */
 134         0xffffffffffffffffULL,  /* highest usable address */
 135         0x7fffffff,             /* maximum DMAable byte count */
 136         MMU_PAGESIZE,           /* alignment in bytes */
 137         0x7ff,                  /* bitmap of burst sizes */
 138         1,                      /* minimum transfer */
 139         0xffffffffU,            /* maximum transfer */
 140         0x7fffffffULL,          /* maximum segment length */
 141         1,                      /* maximum number of segments */
 142         1,                      /* granularity */
 143         0,                      /* flags (reserved) */
 144 };
 145 
 146 static ddi_device_acc_attr_t xpv_accattr = {
 147         DDI_DEVICE_ATTR_V0,
 148         DDI_NEVERSWAP_ACC,
 149         DDI_STRICTORDER_ACC
 150 };
 151 
 152 #define MAX_ALLOCATIONS 10
 153 static ddi_dma_handle_t xpv_dma_handle[MAX_ALLOCATIONS];
 154 static ddi_acc_handle_t xpv_dma_acchandle[MAX_ALLOCATIONS];
 155 static int xen_alloc_cnt = 0;
 156 
 157 void *
 158 xen_alloc_pages(pgcnt_t cnt)
 159 {
 160         size_t len;
 161         int a = xen_alloc_cnt++;
 162         caddr_t addr;
 163 
 164         ASSERT(xen_alloc_cnt < MAX_ALLOCATIONS);
 165         if (ddi_dma_alloc_handle(xpv_dip, &xpv_dma_attr, DDI_DMA_SLEEP, 0,
 166             &xpv_dma_handle[a]) != DDI_SUCCESS)
 167                 return (NULL);
 168 
 169         if (ddi_dma_mem_alloc(xpv_dma_handle[a], MMU_PAGESIZE * cnt,
 170             &xpv_accattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, 0,
 171             &addr, &len, &xpv_dma_acchandle[a]) != DDI_SUCCESS) {
 172                 ddi_dma_free_handle(&xpv_dma_handle[a]);
 173                 cmn_err(CE_WARN, "Couldn't allocate memory for xpv devices");
 174                 return (NULL);
 175         }
 176         return (addr);
 177 }
 178 
 179 /*
 180  * This function is invoked twice, first time with reprogram=0 to set up
 181  * the xpvd portion of the device tree. The second time it is ignored.
 182  */
 183 static void
 184 xpv_enumerate(int reprogram)
 185 {
 186         dev_info_t *dip;
 187 
 188         if (reprogram != 0)
 189                 return;
 190 
 191         ndi_devi_alloc_sleep(ddi_root_node(), "xpvd",
 192             (pnode_t)DEVI_SID_NODEID, &dip);
 193 
 194         (void) ndi_devi_bind_driver(dip, 0);
 195 
 196         /*
 197          * Too early to enumerate split device drivers in domU
 198          * since we need to create taskq thread during enumeration.
 199          * So, we only enumerate softdevs and console here.
 200          */
 201         xendev_enum_all(dip, B_TRUE);
 202 }
 203 
 204 /*
 205  * Translate a hypervisor errcode to a Solaris error code.
 206  */
 207 int
 208 xen_xlate_errcode(int error)
 209 {
 210 #define CASE(num)       case X_##num: error = num; break
 211 
 212         switch (-error) {
 213                 CASE(EPERM);    CASE(ENOENT);   CASE(ESRCH);
 214                 CASE(EINTR);    CASE(EIO);      CASE(ENXIO);
 215                 CASE(E2BIG);    CASE(ENOMEM);   CASE(EACCES);
 216                 CASE(EFAULT);   CASE(EBUSY);    CASE(EEXIST);
 217                 CASE(ENODEV);   CASE(EISDIR);   CASE(EINVAL);
 218                 CASE(ENOSPC);   CASE(ESPIPE);   CASE(EROFS);
 219                 CASE(ENOSYS);   CASE(ENOTEMPTY); CASE(EISCONN);
 220                 CASE(ENODATA);
 221                 default:
 222                 panic("xen_xlate_errcode: unknown error %d", error);
 223         }
 224         return (error);
 225 #undef CASE
 226 }
 227 
 228 /*PRINTFLIKE1*/
 229 void
 230 xen_printf(const char *fmt, ...)
 231 {
 232         va_list adx;
 233 
 234         va_start(adx, fmt);
 235         printf(fmt, adx);
 236         va_end(adx);
 237 }
 238 
 239 /*
 240  * Stub functions to get the FE drivers to build, and to catch drivers that
 241  * misbehave in HVM domains.
 242  */
 243 /*ARGSUSED*/
 244 void
 245 xen_release_pfn(pfn_t pfn)
 246 {
 247         panic("xen_release_pfn() is not supported in HVM domains");
 248 }
 249 
 250 /*ARGSUSED*/
 251 void
 252 reassign_pfn(pfn_t pfn, mfn_t mfn)
 253 {
 254         panic("reassign_pfn() is not supported in HVM domains");
 255 }
 256 
 257 /*ARGSUSED*/
 258 long
 259 balloon_free_pages(uint_t page_cnt, mfn_t *mfns, caddr_t kva, pfn_t *pfns)
 260 {
 261         panic("balloon_free_pages() is not supported in HVM domains");
 262         return (0);
 263 }
 264 
 265 /*ARGSUSED*/
 266 void
 267 balloon_drv_added(int64_t delta)
 268 {
 269         panic("balloon_drv_added() is not supported in HVM domains");
 270 }
 271 
 272 /*
 273  * Add a mapping for the machine page at the given virtual address.
 274  */
 275 void
 276 kbm_map_ma(maddr_t ma, uintptr_t va, uint_t level)
 277 {
 278         ASSERT(level == 0);
 279 
 280         hat_devload(kas.a_hat, (caddr_t)va, MMU_PAGESIZE,
 281             mmu_btop(ma), PROT_READ | PROT_WRITE, HAT_LOAD);
 282 }
 283 
 284 /*ARGSUSED*/
 285 int
 286 xen_map_gref(uint_t cmd, gnttab_map_grant_ref_t *mapop, uint_t count,
 287     boolean_t uvaddr)
 288 {
 289         long rc;
 290 
 291         ASSERT(cmd == GNTTABOP_map_grant_ref);
 292         rc = HYPERVISOR_grant_table_op(cmd, mapop, count);
 293 
 294         return (rc);
 295 }
 296 
 297 static struct xenbus_watch shutdown_watch;
 298 taskq_t *xen_shutdown_tq;
 299 
 300 #define SHUTDOWN_INVALID        -1
 301 #define SHUTDOWN_POWEROFF       0
 302 #define SHUTDOWN_REBOOT         1
 303 #define SHUTDOWN_SUSPEND        2
 304 #define SHUTDOWN_HALT           3
 305 #define SHUTDOWN_MAX            4
 306 
 307 #define SHUTDOWN_TIMEOUT_SECS (60 * 5)
 308 
 309 int
 310 xen_suspend_devices(dev_info_t *dip)
 311 {
 312         int error;
 313         char buf[XPV_BUFSIZE];
 314 
 315         SUSPEND_DEBUG("xen_suspend_devices\n");
 316 
 317         for (; dip != NULL; dip = ddi_get_next_sibling(dip)) {
 318                 if (xen_suspend_devices(ddi_get_child(dip)))
 319                         return (ENXIO);
 320                 if (ddi_get_driver(dip) == NULL)
 321                         continue;
 322                 SUSPEND_DEBUG("Suspending device %s\n", ddi_deviname(dip, buf));
 323                 ASSERT((DEVI(dip)->devi_cpr_flags & DCF_CPR_SUSPENDED) == 0);
 324 
 325 
 326                 if (!i_ddi_devi_attached(dip)) {
 327                         error = DDI_FAILURE;
 328                 } else {
 329                         error = devi_detach(dip, DDI_SUSPEND);
 330                 }
 331 
 332                 if (error == DDI_SUCCESS) {
 333                         DEVI(dip)->devi_cpr_flags |= DCF_CPR_SUSPENDED;
 334                 } else {
 335                         SUSPEND_DEBUG("WARNING: Unable to suspend device %s\n",
 336                             ddi_deviname(dip, buf));
 337                         cmn_err(CE_WARN, "Unable to suspend device %s.",
 338                             ddi_deviname(dip, buf));
 339                         cmn_err(CE_WARN, "Device is busy or does not "
 340                             "support suspend/resume.");
 341                                 return (ENXIO);
 342                 }
 343         }
 344         return (0);
 345 }
 346 
 347 int
 348 xen_resume_devices(dev_info_t *start, int resume_failed)
 349 {
 350         dev_info_t *dip, *next, *last = NULL;
 351         int did_suspend;
 352         int error = resume_failed;
 353         char buf[XPV_BUFSIZE];
 354 
 355         SUSPEND_DEBUG("xen_resume_devices\n");
 356 
 357         while (last != start) {
 358                 dip = start;
 359                 next = ddi_get_next_sibling(dip);
 360                 while (next != last) {
 361                         dip = next;
 362                         next = ddi_get_next_sibling(dip);
 363                 }
 364 
 365                 /*
 366                  * cpr is the only one that uses this field and the device
 367                  * itself hasn't resumed yet, there is no need to use a
 368                  * lock, even though kernel threads are active by now.
 369                  */
 370                 did_suspend = DEVI(dip)->devi_cpr_flags & DCF_CPR_SUSPENDED;
 371                 if (did_suspend)
 372                         DEVI(dip)->devi_cpr_flags &= ~DCF_CPR_SUSPENDED;
 373 
 374                 /*
 375                  * There may be background attaches happening on devices
 376                  * that were not originally suspended by cpr, so resume
 377                  * only devices that were suspended by cpr. Also, stop
 378                  * resuming after the first resume failure, but traverse
 379                  * the entire tree to clear the suspend flag.
 380                  */
 381                 if (did_suspend && !error) {
 382                         SUSPEND_DEBUG("Resuming device %s\n",
 383                             ddi_deviname(dip, buf));
 384                         /*
 385                          * If a device suspended by cpr gets detached during
 386                          * the resume process (for example, due to hotplugging)
 387                          * before cpr gets around to issuing it a DDI_RESUME,
 388                          * we'll have problems.
 389                          */
 390                         if (!i_ddi_devi_attached(dip)) {
 391                                 cmn_err(CE_WARN, "Skipping %s, device "
 392                                     "not ready for resume",
 393                                     ddi_deviname(dip, buf));
 394                         } else {
 395                                 if (devi_attach(dip, DDI_RESUME) !=
 396                                     DDI_SUCCESS) {
 397                                         error = ENXIO;
 398                                 }
 399                         }
 400                 }
 401 
 402                 if (error == ENXIO) {
 403                         cmn_err(CE_WARN, "Unable to resume device %s",
 404                             ddi_deviname(dip, buf));
 405                 }
 406 
 407                 error = xen_resume_devices(ddi_get_child(dip), error);
 408                 last = dip;
 409         }
 410 
 411         return (error);
 412 }
 413 
 414 /*ARGSUSED*/
 415 static int
 416 check_xpvd(dev_info_t *dip, void *arg)
 417 {
 418         char *name;
 419 
 420         name = ddi_node_name(dip);
 421         if (name == NULL || strcmp(name, "xpvd")) {
 422                 return (DDI_WALK_CONTINUE);
 423         } else {
 424                 xpvd_dip = dip;
 425                 return (DDI_WALK_TERMINATE);
 426         }
 427 }
 428 
 429 /*
 430  * Top level routine to direct suspend/resume of a domain.
 431  */
 432 void
 433 xen_suspend_domain(void)
 434 {
 435         extern void rtcsync(void);
 436         extern void ec_resume(void);
 437         extern kmutex_t ec_lock;
 438         struct xen_add_to_physmap xatp;
 439         ulong_t flags;
 440         int err;
 441 
 442         cmn_err(CE_NOTE, "Domain suspending for save/migrate");
 443 
 444         SUSPEND_DEBUG("xen_suspend_domain\n");
 445 
 446         /*
 447          * We only want to suspend the PV devices, since the emulated devices
 448          * are suspended by saving the emulated device state.  The PV devices
 449          * are all children of the xpvd nexus device.  So we search the
 450          * device tree for the xpvd node to use as the root of the tree to
 451          * be suspended.
 452          */
 453         if (xpvd_dip == NULL)
 454                 ddi_walk_devs(ddi_root_node(), check_xpvd, NULL);
 455 
 456         /*
 457          * suspend interrupts and devices
 458          */
 459         if (xpvd_dip != NULL)
 460                 (void) xen_suspend_devices(ddi_get_child(xpvd_dip));
 461         else
 462                 cmn_err(CE_WARN, "No PV devices found to suspend");
 463         SUSPEND_DEBUG("xenbus_suspend\n");
 464         xenbus_suspend();
 465 
 466         mutex_enter(&cpu_lock);
 467 
 468         /*
 469          * Suspend on vcpu 0
 470          */
 471         thread_affinity_set(curthread, 0);
 472         kpreempt_disable();
 473 
 474         if (ncpus > 1)
 475                 pause_cpus(NULL);
 476         /*
 477          * We can grab the ec_lock as it's a spinlock with a high SPL. Hence
 478          * any holder would have dropped it to get through pause_cpus().
 479          */
 480         mutex_enter(&ec_lock);
 481 
 482         /*
 483          * From here on in, we can't take locks.
 484          */
 485 
 486         flags = intr_clear();
 487 
 488         SUSPEND_DEBUG("HYPERVISOR_suspend\n");
 489         /*
 490          * At this point we suspend and sometime later resume.
 491          * Note that this call may return with an indication of a cancelled
 492          * for now no matter ehat the return we do a full resume of all
 493          * suspended drivers, etc.
 494          */
 495         (void) HYPERVISOR_shutdown(SHUTDOWN_suspend);
 496 
 497         /*
 498          * Point HYPERVISOR_shared_info to the proper place.
 499          */
 500         xatp.domid = DOMID_SELF;
 501         xatp.idx = 0;
 502         xatp.space = XENMAPSPACE_shared_info;
 503         xatp.gpfn = xen_shared_info_frame;
 504         if ((err = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) != 0)
 505                 panic("Could not set shared_info page. error: %d", err);
 506 
 507         SUSPEND_DEBUG("gnttab_resume\n");
 508         gnttab_resume();
 509 
 510         SUSPEND_DEBUG("ec_resume\n");
 511         ec_resume();
 512 
 513         intr_restore(flags);
 514 
 515         if (ncpus > 1)
 516                 start_cpus();
 517 
 518         mutex_exit(&ec_lock);
 519         mutex_exit(&cpu_lock);
 520 
 521         /*
 522          * Now we can take locks again.
 523          */
 524 
 525         rtcsync();
 526 
 527         SUSPEND_DEBUG("xenbus_resume\n");
 528         xenbus_resume();
 529         SUSPEND_DEBUG("xen_resume_devices\n");
 530         if (xpvd_dip != NULL)
 531                 (void) xen_resume_devices(ddi_get_child(xpvd_dip), 0);
 532 
 533         thread_affinity_clear(curthread);
 534         kpreempt_enable();
 535 
 536         SUSPEND_DEBUG("finished xen_suspend_domain\n");
 537 
 538         cmn_err(CE_NOTE, "domain restore/migrate completed");
 539 }
 540 
 541 static void
 542 xen_dirty_shutdown(void *arg)
 543 {
 544         int cmd = (uintptr_t)arg;
 545 
 546         cmn_err(CE_WARN, "Externally requested shutdown failed or "
 547             "timed out.\nShutting down.\n");
 548 
 549         switch (cmd) {
 550         case SHUTDOWN_HALT:
 551         case SHUTDOWN_POWEROFF:
 552                 (void) kadmin(A_SHUTDOWN, AD_POWEROFF, NULL, kcred);
 553                 break;
 554         case SHUTDOWN_REBOOT:
 555                 (void) kadmin(A_REBOOT, AD_BOOT, NULL, kcred);
 556                 break;
 557         }
 558 }
 559 
 560 static void
 561 xen_shutdown(void *arg)
 562 {
 563         int cmd = (uintptr_t)arg;
 564         proc_t *initpp;
 565 
 566         ASSERT(cmd > SHUTDOWN_INVALID && cmd < SHUTDOWN_MAX);
 567 
 568         if (cmd == SHUTDOWN_SUSPEND) {
 569                 xen_suspend_domain();
 570                 return;
 571         }
 572 
 573         switch (cmd) {
 574         case SHUTDOWN_POWEROFF:
 575                 force_shutdown_method = AD_POWEROFF;
 576                 break;
 577         case SHUTDOWN_HALT:
 578                 force_shutdown_method = AD_HALT;
 579                 break;
 580         case SHUTDOWN_REBOOT:
 581                 force_shutdown_method = AD_BOOT;
 582                 break;
 583         }
 584 
 585 
 586         /*
 587          * If we're still booting and init(1) isn't set up yet, simply halt.
 588          */
 589         mutex_enter(&pidlock);
 590         initpp = prfind(P_INITPID);
 591         mutex_exit(&pidlock);
 592         if (initpp == NULL) {
 593                 extern void halt(char *);
 594                 halt("Power off the System");   /* just in case */
 595         }
 596 
 597         /*
 598          * else, graceful shutdown with inittab and all getting involved
 599          */
 600         psignal(initpp, SIGPWR);
 601 
 602         (void) timeout(xen_dirty_shutdown, arg,
 603             SHUTDOWN_TIMEOUT_SECS * drv_usectohz(MICROSEC));
 604 }
 605 
 606 /*ARGSUSED*/
 607 static void
 608 xen_shutdown_handler(struct xenbus_watch *watch, const char **vec,
 609         unsigned int len)
 610 {
 611         char *str;
 612         xenbus_transaction_t xbt;
 613         int err, shutdown_code = SHUTDOWN_INVALID;
 614         unsigned int slen;
 615 
 616 again:
 617         err = xenbus_transaction_start(&xbt);
 618         if (err)
 619                 return;
 620         if (xenbus_read(xbt, "control", "shutdown", (void *)&str, &slen)) {
 621                 (void) xenbus_transaction_end(xbt, 1);
 622                 return;
 623         }
 624 
 625         SUSPEND_DEBUG("%d: xen_shutdown_handler: \"%s\"\n", CPU->cpu_id, str);
 626 
 627         /*
 628          * If this is a watch fired from our write below, check out early to
 629          * avoid an infinite loop.
 630          */
 631         if (strcmp(str, "") == 0) {
 632                 (void) xenbus_transaction_end(xbt, 0);
 633                 kmem_free(str, slen);
 634                 return;
 635         } else if (strcmp(str, "poweroff") == 0) {
 636                 shutdown_code = SHUTDOWN_POWEROFF;
 637         } else if (strcmp(str, "reboot") == 0) {
 638                 shutdown_code = SHUTDOWN_REBOOT;
 639         } else if (strcmp(str, "suspend") == 0) {
 640                 shutdown_code = SHUTDOWN_SUSPEND;
 641         } else if (strcmp(str, "halt") == 0) {
 642                 shutdown_code = SHUTDOWN_HALT;
 643         } else {
 644                 printf("Ignoring shutdown request: %s\n", str);
 645         }
 646 
 647         (void) xenbus_write(xbt, "control", "shutdown", "");
 648         err = xenbus_transaction_end(xbt, 0);
 649         if (err == EAGAIN) {
 650                 SUSPEND_DEBUG("%d: trying again\n", CPU->cpu_id);
 651                 kmem_free(str, slen);
 652                 goto again;
 653         }
 654 
 655         kmem_free(str, slen);
 656         if (shutdown_code != SHUTDOWN_INVALID) {
 657                 (void) taskq_dispatch(xen_shutdown_tq, xen_shutdown,
 658                     (void *)(intptr_t)shutdown_code, 0);
 659         }
 660 }
 661 
 662 static int
 663 xpv_drv_init(void)
 664 {
 665         if (xpv_feature(XPVF_HYPERCALLS) < 0 ||
 666             xpv_feature(XPVF_SHARED_INFO) < 0)
 667                 return (-1);
 668 
 669         /* Set up the grant tables.  */
 670         gnttab_init();
 671 
 672         /* Set up event channel support */
 673         if (ec_init() != 0)
 674                 return (-1);
 675 
 676         /* Set up xenbus */
 677         xb_addr = vmem_alloc(heap_arena, MMU_PAGESIZE, VM_SLEEP);
 678         xs_early_init();
 679         xs_domu_init();
 680 
 681         /* Set up for suspend/resume/migrate */
 682         xen_shutdown_tq = taskq_create("shutdown_taskq", 1,
 683             maxclsyspri - 1, 1, 1, TASKQ_PREPOPULATE);
 684         shutdown_watch.node = "control/shutdown";
 685         shutdown_watch.callback = xen_shutdown_handler;
 686         if (register_xenbus_watch(&shutdown_watch))
 687                 cmn_err(CE_WARN, "Failed to set shutdown watcher");
 688 
 689         return (0);
 690 }
 691 
 692 static void
 693 xen_pv_fini()
 694 {
 695         ec_fini();
 696 }
 697 
 698 /*ARGSUSED*/
 699 static int
 700 xpv_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
 701 {
 702         if (getminor((dev_t)arg) != XPV_MINOR)
 703                 return (DDI_FAILURE);
 704 
 705         switch (cmd) {
 706         case DDI_INFO_DEVT2DEVINFO:
 707                 *result = xpv_dip;
 708                 break;
 709         case DDI_INFO_DEVT2INSTANCE:
 710                 *result = 0;
 711                 break;
 712         default:
 713                 return (DDI_FAILURE);
 714         }
 715 
 716         return (DDI_SUCCESS);
 717 }
 718 
 719 static int
 720 xpv_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 721 {
 722         if (cmd != DDI_ATTACH)
 723                 return (DDI_FAILURE);
 724 
 725         if (ddi_create_minor_node(dip, ddi_get_name(dip), S_IFCHR,
 726             ddi_get_instance(dip), DDI_PSEUDO, 0) != DDI_SUCCESS)
 727                 return (DDI_FAILURE);
 728 
 729         xpv_dip = dip;
 730 
 731         if (xpv_drv_init() != 0)
 732                 return (DDI_FAILURE);
 733 
 734         ddi_report_dev(dip);
 735 
 736         /*
 737          * If the memscrubber attempts to scrub the pages we hand to Xen,
 738          * the domain will panic.
 739          */
 740         memscrub_disable();
 741 
 742         /*
 743          * Report our version to dom0.
 744          */
 745         if (xenbus_printf(XBT_NULL, "guest/xpv", "version", "%d",
 746             HVMPV_XPV_VERS))
 747                 cmn_err(CE_WARN, "xpv: couldn't write version\n");
 748 
 749         return (DDI_SUCCESS);
 750 }
 751 
 752 /*
 753  * Attempts to reload the PV driver plumbing hang on Intel platforms, so
 754  * we don't want to unload the framework by accident.
 755  */
 756 int xpv_allow_detach = 0;
 757 
 758 static int
 759 xpv_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 760 {
 761         if (cmd != DDI_DETACH || xpv_allow_detach == 0)
 762                 return (DDI_FAILURE);
 763 
 764         if (xpv_dip != NULL) {
 765                 xen_pv_fini();
 766                 ddi_remove_minor_node(dip, NULL);
 767                 xpv_dip = NULL;
 768         }
 769 
 770         return (DDI_SUCCESS);
 771 }
 772 
 773 /*ARGSUSED1*/
 774 static int
 775 xpv_open(dev_t *dev, int flag, int otyp, cred_t *cr)
 776 {
 777         return (getminor(*dev) == XPV_MINOR ? 0 : ENXIO);
 778 }
 779 
 780 /*ARGSUSED*/
 781 static int
 782 xpv_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cr,
 783     int *rval_p)
 784 {
 785         return (EINVAL);
 786 }
 787 
 788 int
 789 _init(void)
 790 {
 791         int err;
 792 
 793         if ((err = mod_install(&modl)) != 0)
 794                 return (err);
 795 
 796         impl_bus_add_probe(xpv_enumerate);
 797         return (0);
 798 }
 799 
 800 int
 801 _fini(void)
 802 {
 803         int err;
 804 
 805         if ((err = mod_remove(&modl)) != 0)
 806                 return (err);
 807 
 808         impl_bus_delete_probe(xpv_enumerate);
 809         return (0);
 810 }
 811 
 812 int
 813 _info(struct modinfo *modinfop)
 814 {
 815         return (mod_info(&modl, modinfop));
 816 }