1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2012 Garrett D'Amore <garrett@damore.org>
  25  * Copyright 2014 Pluribus Networks, Inc.
  26  */
  27 
  28 /*
  29  * PC specific DDI implementation
  30  */
  31 #include <sys/types.h>
  32 #include <sys/autoconf.h>
  33 #include <sys/avintr.h>
  34 #include <sys/bootconf.h>
  35 #include <sys/conf.h>
  36 #include <sys/cpuvar.h>
  37 #include <sys/ddi_impldefs.h>
  38 #include <sys/ddi_subrdefs.h>
  39 #include <sys/ethernet.h>
  40 #include <sys/fp.h>
  41 #include <sys/instance.h>
  42 #include <sys/kmem.h>
  43 #include <sys/machsystm.h>
  44 #include <sys/modctl.h>
  45 #include <sys/promif.h>
  46 #include <sys/prom_plat.h>
  47 #include <sys/sunndi.h>
  48 #include <sys/ndi_impldefs.h>
  49 #include <sys/ddi_impldefs.h>
  50 #include <sys/sysmacros.h>
  51 #include <sys/systeminfo.h>
  52 #include <sys/utsname.h>
  53 #include <sys/atomic.h>
  54 #include <sys/spl.h>
  55 #include <sys/archsystm.h>
  56 #include <vm/seg_kmem.h>
  57 #include <sys/ontrap.h>
  58 #include <sys/fm/protocol.h>
  59 #include <sys/ramdisk.h>
  60 #include <sys/sunndi.h>
  61 #include <sys/vmem.h>
  62 #include <sys/pci_impl.h>
  63 #if defined(__xpv)
  64 #include <sys/hypervisor.h>
  65 #endif
  66 #include <sys/mach_intr.h>
  67 #include <vm/hat_i86.h>
  68 #include <sys/x86_archext.h>
  69 #include <sys/avl.h>
  70 
  71 /*
  72  * DDI Boot Configuration
  73  */
  74 
  75 /*
  76  * Platform drivers on this platform
  77  */
  78 char *platform_module_list[] = {
  79         "acpippm",
  80         "ppm",
  81         (char *)0
  82 };
  83 
  84 /* pci bus resource maps */
  85 struct pci_bus_resource *pci_bus_res;
  86 
  87 size_t dma_max_copybuf_size = 0x101000;         /* 1M + 4K */
  88 
  89 uint64_t ramdisk_start, ramdisk_end;
  90 
  91 int pseudo_isa = 0;
  92 
  93 /*
  94  * Forward declarations
  95  */
  96 static int getlongprop_buf();
  97 static void get_boot_properties(void);
  98 static void impl_bus_initialprobe(void);
  99 static void impl_bus_reprobe(void);
 100 
 101 static int poke_mem(peekpoke_ctlops_t *in_args);
 102 static int peek_mem(peekpoke_ctlops_t *in_args);
 103 
 104 static int kmem_override_cache_attrs(caddr_t, size_t, uint_t);
 105 
 106 #if defined(__amd64) && !defined(__xpv)
 107 extern void immu_init(void);
 108 #endif
 109 
 110 /*
 111  * We use an AVL tree to store contiguous address allocations made with the
 112  * kalloca() routine, so that we can return the size to free with kfreea().
 113  * Note that in the future it would be vastly faster if we could eliminate
 114  * this lookup by insisting that all callers keep track of their own sizes,
 115  * just as for kmem_alloc().
 116  */
 117 struct ctgas {
 118         avl_node_t ctg_link;
 119         void *ctg_addr;
 120         size_t ctg_size;
 121 };
 122 
 123 static avl_tree_t ctgtree;
 124 
 125 static kmutex_t         ctgmutex;
 126 #define CTGLOCK()       mutex_enter(&ctgmutex)
 127 #define CTGUNLOCK()     mutex_exit(&ctgmutex)
 128 
 129 /*
 130  * Minimum pfn value of page_t's put on the free list.  This is to simplify
 131  * support of ddi dma memory requests which specify small, non-zero addr_lo
 132  * values.
 133  *
 134  * The default value of 2, which corresponds to the only known non-zero addr_lo
 135  * value used, means a single page will be sacrificed (pfn typically starts
 136  * at 1).  ddiphysmin can be set to 0 to disable. It cannot be set above 0x100
 137  * otherwise mp startup panics.
 138  */
 139 pfn_t   ddiphysmin = 2;
 140 
 141 static void
 142 check_driver_disable(void)
 143 {
 144         int proplen = 128;
 145         char *prop_name;
 146         char *drv_name, *propval;
 147         major_t major;
 148 
 149         prop_name = kmem_alloc(proplen, KM_SLEEP);
 150         for (major = 0; major < devcnt; major++) {
 151                 drv_name = ddi_major_to_name(major);
 152                 if (drv_name == NULL)
 153                         continue;
 154                 (void) snprintf(prop_name, proplen, "disable-%s", drv_name);
 155                 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(),
 156                     DDI_PROP_DONTPASS, prop_name, &propval) == DDI_SUCCESS) {
 157                         if (strcmp(propval, "true") == 0) {
 158                                 devnamesp[major].dn_flags |= DN_DRIVER_REMOVED;
 159                                 cmn_err(CE_NOTE, "driver %s disabled",
 160                                     drv_name);
 161                         }
 162                         ddi_prop_free(propval);
 163                 }
 164         }
 165         kmem_free(prop_name, proplen);
 166 }
 167 
 168 
 169 /*
 170  * Configure the hardware on the system.
 171  * Called before the rootfs is mounted
 172  */
 173 void
 174 configure(void)
 175 {
 176         extern void i_ddi_init_root();
 177 
 178 #if defined(__i386)
 179         extern int fpu_pentium_fdivbug;
 180 #endif  /* __i386 */
 181         extern int fpu_ignored;
 182 
 183         /*
 184          * Determine if an FPU is attached
 185          */
 186 
 187         fpu_probe();
 188 
 189 #if defined(__i386)
 190         if (fpu_pentium_fdivbug) {
 191                 printf("\
 192 FP hardware exhibits Pentium floating point divide problem\n");
 193         }
 194 #endif  /* __i386 */
 195 
 196         if (fpu_ignored) {
 197                 printf("FP hardware will not be used\n");
 198         } else if (!fpu_exists) {
 199                 printf("No FPU in configuration\n");
 200         }
 201 
 202         /*
 203          * Initialize devices on the machine.
 204          * Uses configuration tree built by the PROMs to determine what
 205          * is present, and builds a tree of prototype dev_info nodes
 206          * corresponding to the hardware which identified itself.
 207          */
 208 
 209         /*
 210          * Initialize root node.
 211          */
 212         i_ddi_init_root();
 213 
 214         /* reprogram devices not set up by firmware (BIOS) */
 215         impl_bus_reprobe();
 216 
 217 #if defined(__amd64) && !defined(__xpv)
 218         /*
 219          * Setup but don't startup the IOMMU
 220          * Startup happens later via a direct call
 221          * to IOMMU code by boot code.
 222          * At this point, all PCI bus renumbering
 223          * is done, so safe to init the IMMU
 224          * AKA Intel IOMMU.
 225          */
 226         immu_init();
 227 #endif
 228 
 229         /*
 230          * attach the isa nexus to get ACPI resource usage
 231          * isa is "kind of" a pseudo node
 232          */
 233 #if defined(__xpv)
 234         if (DOMAIN_IS_INITDOMAIN(xen_info)) {
 235                 if (pseudo_isa)
 236                         (void) i_ddi_attach_pseudo_node("isa");
 237                 else
 238                         (void) i_ddi_attach_hw_nodes("isa");
 239         }
 240 #else
 241         if (pseudo_isa)
 242                 (void) i_ddi_attach_pseudo_node("isa");
 243         else
 244                 (void) i_ddi_attach_hw_nodes("isa");
 245 #endif
 246 }
 247 
 248 /*
 249  * The "status" property indicates the operational status of a device.
 250  * If this property is present, the value is a string indicating the
 251  * status of the device as follows:
 252  *
 253  *      "okay"          operational.
 254  *      "disabled"      not operational, but might become operational.
 255  *      "fail"          not operational because a fault has been detected,
 256  *                      and it is unlikely that the device will become
 257  *                      operational without repair. no additional details
 258  *                      are available.
 259  *      "fail-xxx"      not operational because a fault has been detected,
 260  *                      and it is unlikely that the device will become
 261  *                      operational without repair. "xxx" is additional
 262  *                      human-readable information about the particular
 263  *                      fault condition that was detected.
 264  *
 265  * The absence of this property means that the operational status is
 266  * unknown or okay.
 267  *
 268  * This routine checks the status property of the specified device node
 269  * and returns 0 if the operational status indicates failure, and 1 otherwise.
 270  *
 271  * The property may exist on plug-in cards the existed before IEEE 1275-1994.
 272  * And, in that case, the property may not even be a string. So we carefully
 273  * check for the value "fail", in the beginning of the string, noting
 274  * the property length.
 275  */
 276 int
 277 status_okay(int id, char *buf, int buflen)
 278 {
 279         char status_buf[OBP_MAXPROPNAME];
 280         char *bufp = buf;
 281         int len = buflen;
 282         int proplen;
 283         static const char *status = "status";
 284         static const char *fail = "fail";
 285         int fail_len = (int)strlen(fail);
 286 
 287         /*
 288          * Get the proplen ... if it's smaller than "fail",
 289          * or doesn't exist ... then we don't care, since
 290          * the value can't begin with the char string "fail".
 291          *
 292          * NB: proplen, if it's a string, includes the NULL in the
 293          * the size of the property, and fail_len does not.
 294          */
 295         proplen = prom_getproplen((pnode_t)id, (caddr_t)status);
 296         if (proplen <= fail_len)     /* nonexistant or uninteresting len */
 297                 return (1);
 298 
 299         /*
 300          * if a buffer was provided, use it
 301          */
 302         if ((buf == (char *)NULL) || (buflen <= 0)) {
 303                 bufp = status_buf;
 304                 len = sizeof (status_buf);
 305         }
 306         *bufp = (char)0;
 307 
 308         /*
 309          * Get the property into the buffer, to the extent of the buffer,
 310          * and in case the buffer is smaller than the property size,
 311          * NULL terminate the buffer. (This handles the case where
 312          * a buffer was passed in and the caller wants to print the
 313          * value, but the buffer was too small).
 314          */
 315         (void) prom_bounded_getprop((pnode_t)id, (caddr_t)status,
 316             (caddr_t)bufp, len);
 317         *(bufp + len - 1) = (char)0;
 318 
 319         /*
 320          * If the value begins with the char string "fail",
 321          * then it means the node is failed. We don't care
 322          * about any other values. We assume the node is ok
 323          * although it might be 'disabled'.
 324          */
 325         if (strncmp(bufp, fail, fail_len) == 0)
 326                 return (0);
 327 
 328         return (1);
 329 }
 330 
 331 /*
 332  * Check the status of the device node passed as an argument.
 333  *
 334  *      if ((status is OKAY) || (status is DISABLED))
 335  *              return DDI_SUCCESS
 336  *      else
 337  *              print a warning and return DDI_FAILURE
 338  */
 339 /*ARGSUSED1*/
 340 int
 341 check_status(int id, char *name, dev_info_t *parent)
 342 {
 343         char status_buf[64];
 344         char devtype_buf[OBP_MAXPROPNAME];
 345         int retval = DDI_FAILURE;
 346 
 347         /*
 348          * is the status okay?
 349          */
 350         if (status_okay(id, status_buf, sizeof (status_buf)))
 351                 return (DDI_SUCCESS);
 352 
 353         /*
 354          * a status property indicating bad memory will be associated
 355          * with a node which has a "device_type" property with a value of
 356          * "memory-controller". in this situation, return DDI_SUCCESS
 357          */
 358         if (getlongprop_buf(id, OBP_DEVICETYPE, devtype_buf,
 359             sizeof (devtype_buf)) > 0) {
 360                 if (strcmp(devtype_buf, "memory-controller") == 0)
 361                         retval = DDI_SUCCESS;
 362         }
 363 
 364         /*
 365          * print the status property information
 366          */
 367         cmn_err(CE_WARN, "status '%s' for '%s'", status_buf, name);
 368         return (retval);
 369 }
 370 
 371 /*ARGSUSED*/
 372 uint_t
 373 softlevel1(caddr_t arg1, caddr_t arg2)
 374 {
 375         softint();
 376         return (1);
 377 }
 378 
 379 /*
 380  * Allow for implementation specific correction of PROM property values.
 381  */
 382 
 383 /*ARGSUSED*/
 384 void
 385 impl_fix_props(dev_info_t *dip, dev_info_t *ch_dip, char *name, int len,
 386     caddr_t buffer)
 387 {
 388         /*
 389          * There are no adjustments needed in this implementation.
 390          */
 391 }
 392 
 393 static int
 394 getlongprop_buf(int id, char *name, char *buf, int maxlen)
 395 {
 396         int size;
 397 
 398         size = prom_getproplen((pnode_t)id, name);
 399         if (size <= 0 || (size > maxlen - 1))
 400                 return (-1);
 401 
 402         if (-1 == prom_getprop((pnode_t)id, name, buf))
 403                 return (-1);
 404 
 405         if (strcmp("name", name) == 0) {
 406                 if (buf[size - 1] != '\0') {
 407                         buf[size] = '\0';
 408                         size += 1;
 409                 }
 410         }
 411 
 412         return (size);
 413 }
 414 
 415 static int
 416 get_prop_int_array(dev_info_t *di, char *pname, int **pval, uint_t *plen)
 417 {
 418         int ret;
 419 
 420         if ((ret = ddi_prop_lookup_int_array(DDI_DEV_T_ANY, di,
 421             DDI_PROP_DONTPASS, pname, pval, plen))
 422             == DDI_PROP_SUCCESS) {
 423                 *plen = (*plen) * (sizeof (int));
 424         }
 425         return (ret);
 426 }
 427 
 428 
 429 /*
 430  * Node Configuration
 431  */
 432 
 433 struct prop_ispec {
 434         uint_t  pri, vec;
 435 };
 436 
 437 /*
 438  * For the x86, we're prepared to claim that the interrupt string
 439  * is in the form of a list of <ipl,vec> specifications.
 440  */
 441 
 442 #define VEC_MIN 1
 443 #define VEC_MAX 255
 444 
 445 static int
 446 impl_xlate_intrs(dev_info_t *child, int *in,
 447     struct ddi_parent_private_data *pdptr)
 448 {
 449         size_t size;
 450         int n;
 451         struct intrspec *new;
 452         caddr_t got_prop;
 453         int *inpri;
 454         int got_len;
 455         extern int ignore_hardware_nodes;       /* force flag from ddi_impl.c */
 456 
 457         static char bad_intr_fmt[] =
 458             "bad interrupt spec from %s%d - ipl %d, irq %d\n";
 459 
 460         /*
 461          * determine if the driver is expecting the new style "interrupts"
 462          * property which just contains the IRQ, or the old style which
 463          * contains pairs of <IPL,IRQ>.  if it is the new style, we always
 464          * assign IPL 5 unless an "interrupt-priorities" property exists.
 465          * in that case, the "interrupt-priorities" property contains the
 466          * IPL values that match, one for one, the IRQ values in the
 467          * "interrupts" property.
 468          */
 469         inpri = NULL;
 470         if ((ddi_getprop(DDI_DEV_T_ANY, child, DDI_PROP_DONTPASS,
 471             "ignore-hardware-nodes", -1) != -1) || ignore_hardware_nodes) {
 472                 /* the old style "interrupts" property... */
 473 
 474                 /*
 475                  * The list consists of <ipl,vec> elements
 476                  */
 477                 if ((n = (*in++ >> 1)) < 1)
 478                         return (DDI_FAILURE);
 479 
 480                 pdptr->par_nintr = n;
 481                 size = n * sizeof (struct intrspec);
 482                 new = pdptr->par_intr = kmem_zalloc(size, KM_SLEEP);
 483 
 484                 while (n--) {
 485                         int level = *in++;
 486                         int vec = *in++;
 487 
 488                         if (level < 1 || level > MAXIPL ||
 489                             vec < VEC_MIN || vec > VEC_MAX) {
 490                                 cmn_err(CE_CONT, bad_intr_fmt,
 491                                     DEVI(child)->devi_name,
 492                                     DEVI(child)->devi_instance, level, vec);
 493                                 goto broken;
 494                         }
 495                         new->intrspec_pri = level;
 496                         if (vec != 2)
 497                                 new->intrspec_vec = vec;
 498                         else
 499                                 /*
 500                                  * irq 2 on the PC bus is tied to irq 9
 501                                  * on ISA, EISA and MicroChannel
 502                                  */
 503                                 new->intrspec_vec = 9;
 504                         new++;
 505                 }
 506 
 507                 return (DDI_SUCCESS);
 508         } else {
 509                 /* the new style "interrupts" property... */
 510 
 511                 /*
 512                  * The list consists of <vec> elements
 513                  */
 514                 if ((n = (*in++)) < 1)
 515                         return (DDI_FAILURE);
 516 
 517                 pdptr->par_nintr = n;
 518                 size = n * sizeof (struct intrspec);
 519                 new = pdptr->par_intr = kmem_zalloc(size, KM_SLEEP);
 520 
 521                 /* XXX check for "interrupt-priorities" property... */
 522                 if (ddi_getlongprop(DDI_DEV_T_ANY, child, DDI_PROP_DONTPASS,
 523                     "interrupt-priorities", (caddr_t)&got_prop, &got_len)
 524                     == DDI_PROP_SUCCESS) {
 525                         if (n != (got_len / sizeof (int))) {
 526                                 cmn_err(CE_CONT,
 527                                     "bad interrupt-priorities length"
 528                                     " from %s%d: expected %d, got %d\n",
 529                                     DEVI(child)->devi_name,
 530                                     DEVI(child)->devi_instance, n,
 531                                     (int)(got_len / sizeof (int)));
 532                                 goto broken;
 533                         }
 534                         inpri = (int *)got_prop;
 535                 }
 536 
 537                 while (n--) {
 538                         int level;
 539                         int vec = *in++;
 540 
 541                         if (inpri == NULL)
 542                                 level = 5;
 543                         else
 544                                 level = *inpri++;
 545 
 546                         if (level < 1 || level > MAXIPL ||
 547                             vec < VEC_MIN || vec > VEC_MAX) {
 548                                 cmn_err(CE_CONT, bad_intr_fmt,
 549                                     DEVI(child)->devi_name,
 550                                     DEVI(child)->devi_instance, level, vec);
 551                                 goto broken;
 552                         }
 553                         new->intrspec_pri = level;
 554                         if (vec != 2)
 555                                 new->intrspec_vec = vec;
 556                         else
 557                                 /*
 558                                  * irq 2 on the PC bus is tied to irq 9
 559                                  * on ISA, EISA and MicroChannel
 560                                  */
 561                                 new->intrspec_vec = 9;
 562                         new++;
 563                 }
 564 
 565                 if (inpri != NULL)
 566                         kmem_free(got_prop, got_len);
 567                 return (DDI_SUCCESS);
 568         }
 569 
 570 broken:
 571         kmem_free(pdptr->par_intr, size);
 572         pdptr->par_intr = NULL;
 573         pdptr->par_nintr = 0;
 574         if (inpri != NULL)
 575                 kmem_free(got_prop, got_len);
 576 
 577         return (DDI_FAILURE);
 578 }
 579 
 580 /*
 581  * Create a ddi_parent_private_data structure from the ddi properties of
 582  * the dev_info node.
 583  *
 584  * The "reg" and either an "intr" or "interrupts" properties are required
 585  * if the driver wishes to create mappings or field interrupts on behalf
 586  * of the device.
 587  *
 588  * The "reg" property is assumed to be a list of at least one triple
 589  *
 590  *      <bustype, address, size>*1
 591  *
 592  * The "intr" property is assumed to be a list of at least one duple
 593  *
 594  *      <SPARC ipl, vector#>*1
 595  *
 596  * The "interrupts" property is assumed to be a list of at least one
 597  * n-tuples that describes the interrupt capabilities of the bus the device
 598  * is connected to.  For SBus, this looks like
 599  *
 600  *      <SBus-level>*1
 601  *
 602  * (This property obsoletes the 'intr' property).
 603  *
 604  * The "ranges" property is optional.
 605  */
 606 void
 607 make_ddi_ppd(dev_info_t *child, struct ddi_parent_private_data **ppd)
 608 {
 609         struct ddi_parent_private_data *pdptr;
 610         int n;
 611         int *reg_prop, *rng_prop, *intr_prop, *irupts_prop;
 612         uint_t reg_len, rng_len, intr_len, irupts_len;
 613 
 614         *ppd = pdptr = kmem_zalloc(sizeof (*pdptr), KM_SLEEP);
 615 
 616         /*
 617          * Handle the 'reg' property.
 618          */
 619         if ((get_prop_int_array(child, "reg", &reg_prop, &reg_len) ==
 620             DDI_PROP_SUCCESS) && (reg_len != 0)) {
 621                 pdptr->par_nreg = reg_len / (int)sizeof (struct regspec);
 622                 pdptr->par_reg = (struct regspec *)reg_prop;
 623         }
 624 
 625         /*
 626          * See if I have a range (adding one where needed - this
 627          * means to add one for sbus node in sun4c, when romvec > 0,
 628          * if no range is already defined in the PROM node.
 629          * (Currently no sun4c PROMS define range properties,
 630          * but they should and may in the future.)  For the SBus
 631          * node, the range is defined by the SBus reg property.
 632          */
 633         if (get_prop_int_array(child, "ranges", &rng_prop, &rng_len)
 634             == DDI_PROP_SUCCESS) {
 635                 pdptr->par_nrng = rng_len / (int)(sizeof (struct rangespec));
 636                 pdptr->par_rng = (struct rangespec *)rng_prop;
 637         }
 638 
 639         /*
 640          * Handle the 'intr' and 'interrupts' properties
 641          */
 642 
 643         /*
 644          * For backwards compatibility
 645          * we first look for the 'intr' property for the device.
 646          */
 647         if (get_prop_int_array(child, "intr", &intr_prop, &intr_len)
 648             != DDI_PROP_SUCCESS) {
 649                 intr_len = 0;
 650         }
 651 
 652         /*
 653          * If we're to support bus adapters and future platforms cleanly,
 654          * we need to support the generalized 'interrupts' property.
 655          */
 656         if (get_prop_int_array(child, "interrupts", &irupts_prop,
 657             &irupts_len) != DDI_PROP_SUCCESS) {
 658                 irupts_len = 0;
 659         } else if (intr_len != 0) {
 660                 /*
 661                  * If both 'intr' and 'interrupts' are defined,
 662                  * then 'interrupts' wins and we toss the 'intr' away.
 663                  */
 664                 ddi_prop_free((void *)intr_prop);
 665                 intr_len = 0;
 666         }
 667 
 668         if (intr_len != 0) {
 669 
 670                 /*
 671                  * Translate the 'intr' property into an array
 672                  * an array of struct intrspec's.  There's not really
 673                  * very much to do here except copy what's out there.
 674                  */
 675 
 676                 struct intrspec *new;
 677                 struct prop_ispec *l;
 678 
 679                 n = pdptr->par_nintr = intr_len / sizeof (struct prop_ispec);
 680                 l = (struct prop_ispec *)intr_prop;
 681                 pdptr->par_intr =
 682                     new = kmem_zalloc(n * sizeof (struct intrspec), KM_SLEEP);
 683                 while (n--) {
 684                         new->intrspec_pri = l->pri;
 685                         new->intrspec_vec = l->vec;
 686                         new++;
 687                         l++;
 688                 }
 689                 ddi_prop_free((void *)intr_prop);
 690 
 691         } else if ((n = irupts_len) != 0) {
 692                 size_t size;
 693                 int *out;
 694 
 695                 /*
 696                  * Translate the 'interrupts' property into an array
 697                  * of intrspecs for the rest of the DDI framework to
 698                  * toy with.  Only our ancestors really know how to
 699                  * do this, so ask 'em.  We massage the 'interrupts'
 700                  * property so that it is pre-pended by a count of
 701                  * the number of integers in the argument.
 702                  */
 703                 size = sizeof (int) + n;
 704                 out = kmem_alloc(size, KM_SLEEP);
 705                 *out = n / sizeof (int);
 706                 bcopy(irupts_prop, out + 1, (size_t)n);
 707                 ddi_prop_free((void *)irupts_prop);
 708                 if (impl_xlate_intrs(child, out, pdptr) != DDI_SUCCESS) {
 709                         cmn_err(CE_CONT,
 710                             "Unable to translate 'interrupts' for %s%d\n",
 711                             DEVI(child)->devi_binding_name,
 712                             DEVI(child)->devi_instance);
 713                 }
 714                 kmem_free(out, size);
 715         }
 716 }
 717 
 718 /*
 719  * Name a child
 720  */
 721 static int
 722 impl_sunbus_name_child(dev_info_t *child, char *name, int namelen)
 723 {
 724         /*
 725          * Fill in parent-private data and this function returns to us
 726          * an indication if it used "registers" to fill in the data.
 727          */
 728         if (ddi_get_parent_data(child) == NULL) {
 729                 struct ddi_parent_private_data *pdptr;
 730                 make_ddi_ppd(child, &pdptr);
 731                 ddi_set_parent_data(child, pdptr);
 732         }
 733 
 734         name[0] = '\0';
 735         if (sparc_pd_getnreg(child) > 0) {
 736                 (void) snprintf(name, namelen, "%x,%x",
 737                     (uint_t)sparc_pd_getreg(child, 0)->regspec_bustype,
 738                     (uint_t)sparc_pd_getreg(child, 0)->regspec_addr);
 739         }
 740 
 741         return (DDI_SUCCESS);
 742 }
 743 
 744 /*
 745  * Called from the bus_ctl op of sunbus (sbus, obio, etc) nexus drivers
 746  * to implement the DDI_CTLOPS_INITCHILD operation.  That is, it names
 747  * the children of sun busses based on the reg spec.
 748  *
 749  * Handles the following properties (in make_ddi_ppd):
 750  *      Property                value
 751  *        Name                  type
 752  *      reg             register spec
 753  *      intr            old-form interrupt spec
 754  *      interrupts      new (bus-oriented) interrupt spec
 755  *      ranges          range spec
 756  */
 757 int
 758 impl_ddi_sunbus_initchild(dev_info_t *child)
 759 {
 760         char name[MAXNAMELEN];
 761         void impl_ddi_sunbus_removechild(dev_info_t *);
 762 
 763         /*
 764          * Name the child, also makes parent private data
 765          */
 766         (void) impl_sunbus_name_child(child, name, MAXNAMELEN);
 767         ddi_set_name_addr(child, name);
 768 
 769         /*
 770          * Attempt to merge a .conf node; if successful, remove the
 771          * .conf node.
 772          */
 773         if ((ndi_dev_is_persistent_node(child) == 0) &&
 774             (ndi_merge_node(child, impl_sunbus_name_child) == DDI_SUCCESS)) {
 775                 /*
 776                  * Return failure to remove node
 777                  */
 778                 impl_ddi_sunbus_removechild(child);
 779                 return (DDI_FAILURE);
 780         }
 781         return (DDI_SUCCESS);
 782 }
 783 
 784 void
 785 impl_free_ddi_ppd(dev_info_t *dip)
 786 {
 787         struct ddi_parent_private_data *pdptr;
 788         size_t n;
 789 
 790         if ((pdptr = ddi_get_parent_data(dip)) == NULL)
 791                 return;
 792 
 793         if ((n = (size_t)pdptr->par_nintr) != 0)
 794                 /*
 795                  * Note that kmem_free is used here (instead of
 796                  * ddi_prop_free) because the contents of the
 797                  * property were placed into a separate buffer and
 798                  * mucked with a bit before being stored in par_intr.
 799                  * The actual return value from the prop lookup
 800                  * was freed with ddi_prop_free previously.
 801                  */
 802                 kmem_free(pdptr->par_intr, n * sizeof (struct intrspec));
 803 
 804         if ((n = (size_t)pdptr->par_nrng) != 0)
 805                 ddi_prop_free((void *)pdptr->par_rng);
 806 
 807         if ((n = pdptr->par_nreg) != 0)
 808                 ddi_prop_free((void *)pdptr->par_reg);
 809 
 810         kmem_free(pdptr, sizeof (*pdptr));
 811         ddi_set_parent_data(dip, NULL);
 812 }
 813 
 814 void
 815 impl_ddi_sunbus_removechild(dev_info_t *dip)
 816 {
 817         impl_free_ddi_ppd(dip);
 818         ddi_set_name_addr(dip, NULL);
 819         /*
 820          * Strip the node to properly convert it back to prototype form
 821          */
 822         impl_rem_dev_props(dip);
 823 }
 824 
 825 /*
 826  * DDI Interrupt
 827  */
 828 
 829 /*
 830  * turn this on to force isa, eisa, and mca device to ignore the new
 831  * hardware nodes in the device tree (normally turned on only for
 832  * drivers that need it by setting the property "ignore-hardware-nodes"
 833  * in their driver.conf file).
 834  *
 835  * 7/31/96 -- Turned off globally.  Leaving variable in for the moment
 836  *              as safety valve.
 837  */
 838 int ignore_hardware_nodes = 0;
 839 
 840 /*
 841  * Local data
 842  */
 843 static struct impl_bus_promops *impl_busp;
 844 
 845 
 846 /*
 847  * New DDI interrupt framework
 848  */
 849 
 850 /*
 851  * i_ddi_intr_ops:
 852  *
 853  * This is the interrupt operator function wrapper for the bus function
 854  * bus_intr_op.
 855  */
 856 int
 857 i_ddi_intr_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t op,
 858     ddi_intr_handle_impl_t *hdlp, void * result)
 859 {
 860         dev_info_t      *pdip = (dev_info_t *)DEVI(dip)->devi_parent;
 861         int             ret = DDI_FAILURE;
 862 
 863         /* request parent to process this interrupt op */
 864         if (NEXUS_HAS_INTR_OP(pdip))
 865                 ret = (*(DEVI(pdip)->devi_ops->devo_bus_ops->bus_intr_op))(
 866                     pdip, rdip, op, hdlp, result);
 867         else
 868                 cmn_err(CE_WARN, "Failed to process interrupt "
 869                     "for %s%d due to down-rev nexus driver %s%d",
 870                     ddi_get_name(rdip), ddi_get_instance(rdip),
 871                     ddi_get_name(pdip), ddi_get_instance(pdip));
 872         return (ret);
 873 }
 874 
 875 /*
 876  * i_ddi_add_softint - allocate and add a soft interrupt to the system
 877  */
 878 int
 879 i_ddi_add_softint(ddi_softint_hdl_impl_t *hdlp)
 880 {
 881         int ret;
 882 
 883         /* add soft interrupt handler */
 884         ret = add_avsoftintr((void *)hdlp, hdlp->ih_pri, hdlp->ih_cb_func,
 885             DEVI(hdlp->ih_dip)->devi_name, hdlp->ih_cb_arg1, hdlp->ih_cb_arg2);
 886         return (ret ? DDI_SUCCESS : DDI_FAILURE);
 887 }
 888 
 889 
 890 void
 891 i_ddi_remove_softint(ddi_softint_hdl_impl_t *hdlp)
 892 {
 893         (void) rem_avsoftintr((void *)hdlp, hdlp->ih_pri, hdlp->ih_cb_func);
 894 }
 895 
 896 
 897 extern void (*setsoftint)(int, struct av_softinfo *);
 898 extern boolean_t av_check_softint_pending(struct av_softinfo *, boolean_t);
 899 
 900 int
 901 i_ddi_trigger_softint(ddi_softint_hdl_impl_t *hdlp, void *arg2)
 902 {
 903         if (av_check_softint_pending(hdlp->ih_pending, B_FALSE))
 904                 return (DDI_EPENDING);
 905 
 906         update_avsoftintr_args((void *)hdlp, hdlp->ih_pri, arg2);
 907 
 908         (*setsoftint)(hdlp->ih_pri, hdlp->ih_pending);
 909         return (DDI_SUCCESS);
 910 }
 911 
 912 /*
 913  * i_ddi_set_softint_pri:
 914  *
 915  * The way this works is that it first tries to add a softint vector
 916  * at the new priority in hdlp. If that succeeds; then it removes the
 917  * existing softint vector at the old priority.
 918  */
 919 int
 920 i_ddi_set_softint_pri(ddi_softint_hdl_impl_t *hdlp, uint_t old_pri)
 921 {
 922         int ret;
 923 
 924         /*
 925          * If a softint is pending at the old priority then fail the request.
 926          */
 927         if (av_check_softint_pending(hdlp->ih_pending, B_TRUE))
 928                 return (DDI_FAILURE);
 929 
 930         ret = av_softint_movepri((void *)hdlp, old_pri);
 931         return (ret ? DDI_SUCCESS : DDI_FAILURE);
 932 }
 933 
 934 void
 935 i_ddi_alloc_intr_phdl(ddi_intr_handle_impl_t *hdlp)
 936 {
 937         hdlp->ih_private = (void *)kmem_zalloc(sizeof (ihdl_plat_t), KM_SLEEP);
 938 }
 939 
 940 void
 941 i_ddi_free_intr_phdl(ddi_intr_handle_impl_t *hdlp)
 942 {
 943         kmem_free(hdlp->ih_private, sizeof (ihdl_plat_t));
 944         hdlp->ih_private = NULL;
 945 }
 946 
 947 int
 948 i_ddi_get_intx_nintrs(dev_info_t *dip)
 949 {
 950         struct ddi_parent_private_data *pdp;
 951 
 952         if ((pdp = ddi_get_parent_data(dip)) == NULL)
 953                 return (0);
 954 
 955         return (pdp->par_nintr);
 956 }
 957 
 958 /*
 959  * DDI Memory/DMA
 960  */
 961 
 962 /*
 963  * Support for allocating DMAable memory to implement
 964  * ddi_dma_mem_alloc(9F) interface.
 965  */
 966 
 967 #define KA_ALIGN_SHIFT  7
 968 #define KA_ALIGN        (1 << KA_ALIGN_SHIFT)
 969 #define KA_NCACHE       (PAGESHIFT + 1 - KA_ALIGN_SHIFT)
 970 
 971 /*
 972  * Dummy DMA attribute template for kmem_io[].kmem_io_attr.  We only
 973  * care about addr_lo, addr_hi, and align.  addr_hi will be dynamically set.
 974  */
 975 
 976 static ddi_dma_attr_t kmem_io_attr = {
 977         DMA_ATTR_V0,
 978         0x0000000000000000ULL,          /* dma_attr_addr_lo */
 979         0x0000000000000000ULL,          /* dma_attr_addr_hi */
 980         0x00ffffff,
 981         0x1000,                         /* dma_attr_align */
 982         1, 1, 0xffffffffULL, 0xffffffffULL, 0x1, 1, 0
 983 };
 984 
 985 /* kmem io memory ranges and indices */
 986 enum {
 987         IO_4P, IO_64G, IO_4G, IO_2G, IO_1G, IO_512M,
 988         IO_256M, IO_128M, IO_64M, IO_32M, IO_16M, MAX_MEM_RANGES
 989 };
 990 
 991 static struct {
 992         vmem_t          *kmem_io_arena;
 993         kmem_cache_t    *kmem_io_cache[KA_NCACHE];
 994         ddi_dma_attr_t  kmem_io_attr;
 995 } kmem_io[MAX_MEM_RANGES];
 996 
 997 static int kmem_io_idx;         /* index of first populated kmem_io[] */
 998 
 999 static page_t *
1000 page_create_io_wrapper(void *addr, size_t len, int vmflag, void *arg)
1001 {
1002         extern page_t *page_create_io(vnode_t *, u_offset_t, uint_t,
1003             uint_t, struct as *, caddr_t, ddi_dma_attr_t *);
1004 
1005         return (page_create_io(&kvp, (u_offset_t)(uintptr_t)addr, len,
1006             PG_EXCL | ((vmflag & VM_NOSLEEP) ? 0 : PG_WAIT), &kas, addr, arg));
1007 }
1008 
1009 #ifdef __xpv
1010 static void
1011 segkmem_free_io(vmem_t *vmp, void * ptr, size_t size)
1012 {
1013         extern void page_destroy_io(page_t *);
1014         segkmem_xfree(vmp, ptr, size, page_destroy_io);
1015 }
1016 #endif
1017 
1018 static void *
1019 segkmem_alloc_io_4P(vmem_t *vmp, size_t size, int vmflag)
1020 {
1021         return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1022             page_create_io_wrapper, &kmem_io[IO_4P].kmem_io_attr));
1023 }
1024 
1025 static void *
1026 segkmem_alloc_io_64G(vmem_t *vmp, size_t size, int vmflag)
1027 {
1028         return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1029             page_create_io_wrapper, &kmem_io[IO_64G].kmem_io_attr));
1030 }
1031 
1032 static void *
1033 segkmem_alloc_io_4G(vmem_t *vmp, size_t size, int vmflag)
1034 {
1035         return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1036             page_create_io_wrapper, &kmem_io[IO_4G].kmem_io_attr));
1037 }
1038 
1039 static void *
1040 segkmem_alloc_io_2G(vmem_t *vmp, size_t size, int vmflag)
1041 {
1042         return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1043             page_create_io_wrapper, &kmem_io[IO_2G].kmem_io_attr));
1044 }
1045 
1046 static void *
1047 segkmem_alloc_io_1G(vmem_t *vmp, size_t size, int vmflag)
1048 {
1049         return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1050             page_create_io_wrapper, &kmem_io[IO_1G].kmem_io_attr));
1051 }
1052 
1053 static void *
1054 segkmem_alloc_io_512M(vmem_t *vmp, size_t size, int vmflag)
1055 {
1056         return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1057             page_create_io_wrapper, &kmem_io[IO_512M].kmem_io_attr));
1058 }
1059 
1060 static void *
1061 segkmem_alloc_io_256M(vmem_t *vmp, size_t size, int vmflag)
1062 {
1063         return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1064             page_create_io_wrapper, &kmem_io[IO_256M].kmem_io_attr));
1065 }
1066 
1067 static void *
1068 segkmem_alloc_io_128M(vmem_t *vmp, size_t size, int vmflag)
1069 {
1070         return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1071             page_create_io_wrapper, &kmem_io[IO_128M].kmem_io_attr));
1072 }
1073 
1074 static void *
1075 segkmem_alloc_io_64M(vmem_t *vmp, size_t size, int vmflag)
1076 {
1077         return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1078             page_create_io_wrapper, &kmem_io[IO_64M].kmem_io_attr));
1079 }
1080 
1081 static void *
1082 segkmem_alloc_io_32M(vmem_t *vmp, size_t size, int vmflag)
1083 {
1084         return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1085             page_create_io_wrapper, &kmem_io[IO_32M].kmem_io_attr));
1086 }
1087 
1088 static void *
1089 segkmem_alloc_io_16M(vmem_t *vmp, size_t size, int vmflag)
1090 {
1091         return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1092             page_create_io_wrapper, &kmem_io[IO_16M].kmem_io_attr));
1093 }
1094 
1095 struct {
1096         uint64_t        io_limit;
1097         char            *io_name;
1098         void            *(*io_alloc)(vmem_t *, size_t, int);
1099         int             io_initial;     /* kmem_io_init during startup */
1100 } io_arena_params[MAX_MEM_RANGES] = {
1101         {0x000fffffffffffffULL, "kmem_io_4P",   segkmem_alloc_io_4P,    1},
1102         {0x0000000fffffffffULL, "kmem_io_64G",  segkmem_alloc_io_64G,   0},
1103         {0x00000000ffffffffULL, "kmem_io_4G",   segkmem_alloc_io_4G,    1},
1104         {0x000000007fffffffULL, "kmem_io_2G",   segkmem_alloc_io_2G,    1},
1105         {0x000000003fffffffULL, "kmem_io_1G",   segkmem_alloc_io_1G,    0},
1106         {0x000000001fffffffULL, "kmem_io_512M", segkmem_alloc_io_512M,  0},
1107         {0x000000000fffffffULL, "kmem_io_256M", segkmem_alloc_io_256M,  0},
1108         {0x0000000007ffffffULL, "kmem_io_128M", segkmem_alloc_io_128M,  0},
1109         {0x0000000003ffffffULL, "kmem_io_64M",  segkmem_alloc_io_64M,   0},
1110         {0x0000000001ffffffULL, "kmem_io_32M",  segkmem_alloc_io_32M,   0},
1111         {0x0000000000ffffffULL, "kmem_io_16M",  segkmem_alloc_io_16M,   1}
1112 };
1113 
1114 void
1115 kmem_io_init(int a)
1116 {
1117         int     c;
1118         char name[40];
1119 
1120         kmem_io[a].kmem_io_arena = vmem_create(io_arena_params[a].io_name,
1121             NULL, 0, PAGESIZE, io_arena_params[a].io_alloc,
1122 #ifdef __xpv
1123             segkmem_free_io,
1124 #else
1125             segkmem_free,
1126 #endif
1127             heap_arena, 0, VM_SLEEP);
1128 
1129         for (c = 0; c < KA_NCACHE; c++) {
1130                 size_t size = KA_ALIGN << c;
1131                 (void) sprintf(name, "%s_%lu",
1132                     io_arena_params[a].io_name, size);
1133                 kmem_io[a].kmem_io_cache[c] = kmem_cache_create(name,
1134                     size, size, NULL, NULL, NULL, NULL,
1135                     kmem_io[a].kmem_io_arena, 0);
1136         }
1137 }
1138 
1139 /*
1140  * Return the index of the highest memory range for addr.
1141  */
1142 static int
1143 kmem_io_index(uint64_t addr)
1144 {
1145         int n;
1146 
1147         for (n = kmem_io_idx; n < MAX_MEM_RANGES; n++) {
1148                 if (kmem_io[n].kmem_io_attr.dma_attr_addr_hi <= addr) {
1149                         if (kmem_io[n].kmem_io_arena == NULL)
1150                                 kmem_io_init(n);
1151                         return (n);
1152                 }
1153         }
1154         panic("kmem_io_index: invalid addr - must be at least 16m");
1155 
1156         /*NOTREACHED*/
1157 }
1158 
1159 /*
1160  * Return the index of the next kmem_io populated memory range
1161  * after curindex.
1162  */
1163 static int
1164 kmem_io_index_next(int curindex)
1165 {
1166         int n;
1167 
1168         for (n = curindex + 1; n < MAX_MEM_RANGES; n++) {
1169                 if (kmem_io[n].kmem_io_arena)
1170                         return (n);
1171         }
1172         return (-1);
1173 }
1174 
1175 /*
1176  * allow kmem to be mapped in with different PTE cache attribute settings.
1177  * Used by i_ddi_mem_alloc()
1178  */
1179 int
1180 kmem_override_cache_attrs(caddr_t kva, size_t size, uint_t order)
1181 {
1182         uint_t hat_flags;
1183         caddr_t kva_end;
1184         uint_t hat_attr;
1185         pfn_t pfn;
1186 
1187         if (hat_getattr(kas.a_hat, kva, &hat_attr) == -1) {
1188                 return (-1);
1189         }
1190 
1191         hat_attr &= ~HAT_ORDER_MASK;
1192         hat_attr |= order | HAT_NOSYNC;
1193         hat_flags = HAT_LOAD_LOCK;
1194 
1195         kva_end = (caddr_t)(((uintptr_t)kva + size + PAGEOFFSET) &
1196             (uintptr_t)PAGEMASK);
1197         kva = (caddr_t)((uintptr_t)kva & (uintptr_t)PAGEMASK);
1198 
1199         while (kva < kva_end) {
1200                 pfn = hat_getpfnum(kas.a_hat, kva);
1201                 hat_unload(kas.a_hat, kva, PAGESIZE, HAT_UNLOAD_UNLOCK);
1202                 hat_devload(kas.a_hat, kva, PAGESIZE, pfn, hat_attr, hat_flags);
1203                 kva += MMU_PAGESIZE;
1204         }
1205 
1206         return (0);
1207 }
1208 
1209 static int
1210 ctgcompare(const void *a1, const void *a2)
1211 {
1212         /* we just want to compare virtual addresses */
1213         a1 = ((struct ctgas *)a1)->ctg_addr;
1214         a2 = ((struct ctgas *)a2)->ctg_addr;
1215         return (a1 == a2 ? 0 : (a1 < a2 ? -1 : 1));
1216 }
1217 
1218 void
1219 ka_init(void)
1220 {
1221         int a;
1222         paddr_t maxphysaddr;
1223 #if !defined(__xpv)
1224         extern pfn_t physmax;
1225 
1226         maxphysaddr = mmu_ptob((paddr_t)physmax) + MMU_PAGEOFFSET;
1227 #else
1228         maxphysaddr = mmu_ptob((paddr_t)HYPERVISOR_memory_op(
1229             XENMEM_maximum_ram_page, NULL)) + MMU_PAGEOFFSET;
1230 #endif
1231 
1232         ASSERT(maxphysaddr <= io_arena_params[0].io_limit);
1233 
1234         for (a = 0; a < MAX_MEM_RANGES; a++) {
1235                 if (maxphysaddr >= io_arena_params[a + 1].io_limit) {
1236                         if (maxphysaddr > io_arena_params[a + 1].io_limit)
1237                                 io_arena_params[a].io_limit = maxphysaddr;
1238                         else
1239                                 a++;
1240                         break;
1241                 }
1242         }
1243         kmem_io_idx = a;
1244 
1245         for (; a < MAX_MEM_RANGES; a++) {
1246                 kmem_io[a].kmem_io_attr = kmem_io_attr;
1247                 kmem_io[a].kmem_io_attr.dma_attr_addr_hi =
1248                     io_arena_params[a].io_limit;
1249                 /*
1250                  * initialize kmem_io[] arena/cache corresponding to
1251                  * maxphysaddr and to the "common" io memory ranges that
1252                  * have io_initial set to a non-zero value.
1253                  */
1254                 if (io_arena_params[a].io_initial || a == kmem_io_idx)
1255                         kmem_io_init(a);
1256         }
1257 
1258         /* initialize ctgtree */
1259         avl_create(&ctgtree, ctgcompare, sizeof (struct ctgas),
1260             offsetof(struct ctgas, ctg_link));
1261 }
1262 
1263 /*
1264  * put contig address/size
1265  */
1266 static void *
1267 putctgas(void *addr, size_t size)
1268 {
1269         struct ctgas    *ctgp;
1270         if ((ctgp = kmem_zalloc(sizeof (*ctgp), KM_NOSLEEP)) != NULL) {
1271                 ctgp->ctg_addr = addr;
1272                 ctgp->ctg_size = size;
1273                 CTGLOCK();
1274                 avl_add(&ctgtree, ctgp);
1275                 CTGUNLOCK();
1276         }
1277         return (ctgp);
1278 }
1279 
1280 /*
1281  * get contig size by addr
1282  */
1283 static size_t
1284 getctgsz(void *addr)
1285 {
1286         struct ctgas    *ctgp;
1287         struct ctgas    find;
1288         size_t          sz = 0;
1289 
1290         find.ctg_addr = addr;
1291         CTGLOCK();
1292         if ((ctgp = avl_find(&ctgtree, &find, NULL)) != NULL) {
1293                 avl_remove(&ctgtree, ctgp);
1294         }
1295         CTGUNLOCK();
1296 
1297         if (ctgp != NULL) {
1298                 sz = ctgp->ctg_size;
1299                 kmem_free(ctgp, sizeof (*ctgp));
1300         }
1301 
1302         return (sz);
1303 }
1304 
1305 /*
1306  * contig_alloc:
1307  *
1308  *      allocates contiguous memory to satisfy the 'size' and dma attributes
1309  *      specified in 'attr'.
1310  *
1311  *      Not all of memory need to be physically contiguous if the
1312  *      scatter-gather list length is greater than 1.
1313  */
1314 
1315 /*ARGSUSED*/
1316 void *
1317 contig_alloc(size_t size, ddi_dma_attr_t *attr, uintptr_t align, int cansleep)
1318 {
1319         pgcnt_t         pgcnt = btopr(size);
1320         size_t          asize = pgcnt * PAGESIZE;
1321         page_t          *ppl;
1322         int             pflag;
1323         void            *addr;
1324 
1325         extern page_t *page_create_io(vnode_t *, u_offset_t, uint_t,
1326             uint_t, struct as *, caddr_t, ddi_dma_attr_t *);
1327 
1328         /* segkmem_xalloc */
1329 
1330         if (align <= PAGESIZE)
1331                 addr = vmem_alloc(heap_arena, asize,
1332                     (cansleep) ? VM_SLEEP : VM_NOSLEEP);
1333         else
1334                 addr = vmem_xalloc(heap_arena, asize, align, 0, 0, NULL, NULL,
1335                     (cansleep) ? VM_SLEEP : VM_NOSLEEP);
1336         if (addr) {
1337                 ASSERT(!((uintptr_t)addr & (align - 1)));
1338 
1339                 if (page_resv(pgcnt, (cansleep) ? KM_SLEEP : KM_NOSLEEP) == 0) {
1340                         vmem_free(heap_arena, addr, asize);
1341                         return (NULL);
1342                 }
1343                 pflag = PG_EXCL;
1344 
1345                 if (cansleep)
1346                         pflag |= PG_WAIT;
1347 
1348                 /* 4k req gets from freelists rather than pfn search */
1349                 if (pgcnt > 1 || align > PAGESIZE)
1350                         pflag |= PG_PHYSCONTIG;
1351 
1352                 ppl = page_create_io(&kvp, (u_offset_t)(uintptr_t)addr,
1353                     asize, pflag, &kas, (caddr_t)addr, attr);
1354 
1355                 if (!ppl) {
1356                         vmem_free(heap_arena, addr, asize);
1357                         page_unresv(pgcnt);
1358                         return (NULL);
1359                 }
1360 
1361                 while (ppl != NULL) {
1362                         page_t  *pp = ppl;
1363                         page_sub(&ppl, pp);
1364                         ASSERT(page_iolock_assert(pp));
1365                         page_io_unlock(pp);
1366                         page_downgrade(pp);
1367                         hat_memload(kas.a_hat, (caddr_t)(uintptr_t)pp->p_offset,
1368                             pp, (PROT_ALL & ~PROT_USER) |
1369                             HAT_NOSYNC, HAT_LOAD_LOCK);
1370                 }
1371         }
1372         return (addr);
1373 }
1374 
1375 void
1376 contig_free(void *addr, size_t size)
1377 {
1378         pgcnt_t pgcnt = btopr(size);
1379         size_t  asize = pgcnt * PAGESIZE;
1380         caddr_t a, ea;
1381         page_t  *pp;
1382 
1383         hat_unload(kas.a_hat, addr, asize, HAT_UNLOAD_UNLOCK);
1384 
1385         for (a = addr, ea = a + asize; a < ea; a += PAGESIZE) {
1386                 pp = page_find(&kvp, (u_offset_t)(uintptr_t)a);
1387                 if (!pp)
1388                         panic("contig_free: contig pp not found");
1389 
1390                 if (!page_tryupgrade(pp)) {
1391                         page_unlock(pp);
1392                         pp = page_lookup(&kvp,
1393                             (u_offset_t)(uintptr_t)a, SE_EXCL);
1394                         if (pp == NULL)
1395                                 panic("contig_free: page freed");
1396                 }
1397                 page_destroy(pp, 0);
1398         }
1399 
1400         page_unresv(pgcnt);
1401         vmem_free(heap_arena, addr, asize);
1402 }
1403 
1404 /*
1405  * Allocate from the system, aligned on a specific boundary.
1406  * The alignment, if non-zero, must be a power of 2.
1407  */
1408 static void *
1409 kalloca(size_t size, size_t align, int cansleep, int physcontig,
1410         ddi_dma_attr_t *attr)
1411 {
1412         size_t *addr, *raddr, rsize;
1413         size_t hdrsize = 4 * sizeof (size_t);   /* must be power of 2 */
1414         int a, i, c;
1415         vmem_t *vmp;
1416         kmem_cache_t *cp = NULL;
1417 
1418         if (attr->dma_attr_addr_lo > mmu_ptob((uint64_t)ddiphysmin))
1419                 return (NULL);
1420 
1421         align = MAX(align, hdrsize);
1422         ASSERT((align & (align - 1)) == 0);
1423 
1424         /*
1425          * All of our allocators guarantee 16-byte alignment, so we don't
1426          * need to reserve additional space for the header.
1427          * To simplify picking the correct kmem_io_cache, we round up to
1428          * a multiple of KA_ALIGN.
1429          */
1430         rsize = P2ROUNDUP_TYPED(size + align, KA_ALIGN, size_t);
1431 
1432         if (physcontig && rsize > PAGESIZE) {
1433                 if (addr = contig_alloc(size, attr, align, cansleep)) {
1434                         if (!putctgas(addr, size))
1435                                 contig_free(addr, size);
1436                         else
1437                                 return (addr);
1438                 }
1439                 return (NULL);
1440         }
1441 
1442         a = kmem_io_index(attr->dma_attr_addr_hi);
1443 
1444         if (rsize > PAGESIZE) {
1445                 vmp = kmem_io[a].kmem_io_arena;
1446                 raddr = vmem_alloc(vmp, rsize,
1447                     (cansleep) ? VM_SLEEP : VM_NOSLEEP);
1448         } else {
1449                 c = highbit((rsize >> KA_ALIGN_SHIFT) - 1);
1450                 cp = kmem_io[a].kmem_io_cache[c];
1451                 raddr = kmem_cache_alloc(cp, (cansleep) ? KM_SLEEP :
1452                     KM_NOSLEEP);
1453         }
1454 
1455         if (raddr == NULL) {
1456                 int     na;
1457 
1458                 ASSERT(cansleep == 0);
1459                 if (rsize > PAGESIZE)
1460                         return (NULL);
1461                 /*
1462                  * System does not have memory in the requested range.
1463                  * Try smaller kmem io ranges and larger cache sizes
1464                  * to see if there might be memory available in
1465                  * these other caches.
1466                  */
1467 
1468                 for (na = kmem_io_index_next(a); na >= 0;
1469                     na = kmem_io_index_next(na)) {
1470                         ASSERT(kmem_io[na].kmem_io_arena);
1471                         cp = kmem_io[na].kmem_io_cache[c];
1472                         raddr = kmem_cache_alloc(cp, KM_NOSLEEP);
1473                         if (raddr)
1474                                 goto kallocdone;
1475                 }
1476                 /* now try the larger kmem io cache sizes */
1477                 for (na = a; na >= 0; na = kmem_io_index_next(na)) {
1478                         for (i = c + 1; i < KA_NCACHE; i++) {
1479                                 cp = kmem_io[na].kmem_io_cache[i];
1480                                 raddr = kmem_cache_alloc(cp, KM_NOSLEEP);
1481                                 if (raddr)
1482                                         goto kallocdone;
1483                         }
1484                 }
1485                 return (NULL);
1486         }
1487 
1488 kallocdone:
1489         ASSERT(!P2BOUNDARY((uintptr_t)raddr, rsize, PAGESIZE) ||
1490             rsize > PAGESIZE);
1491 
1492         addr = (size_t *)P2ROUNDUP((uintptr_t)raddr + hdrsize, align);
1493         ASSERT((uintptr_t)addr + size - (uintptr_t)raddr <= rsize);
1494 
1495         addr[-4] = (size_t)cp;
1496         addr[-3] = (size_t)vmp;
1497         addr[-2] = (size_t)raddr;
1498         addr[-1] = rsize;
1499 
1500         return (addr);
1501 }
1502 
1503 static void
1504 kfreea(void *addr)
1505 {
1506         size_t          size;
1507 
1508         if (!((uintptr_t)addr & PAGEOFFSET) && (size = getctgsz(addr))) {
1509                 contig_free(addr, size);
1510         } else {
1511                 size_t  *saddr = addr;
1512                 if (saddr[-4] == 0)
1513                         vmem_free((vmem_t *)saddr[-3], (void *)saddr[-2],
1514                             saddr[-1]);
1515                 else
1516                         kmem_cache_free((kmem_cache_t *)saddr[-4],
1517                             (void *)saddr[-2]);
1518         }
1519 }
1520 
1521 /*ARGSUSED*/
1522 void
1523 i_ddi_devacc_to_hatacc(ddi_device_acc_attr_t *devaccp, uint_t *hataccp)
1524 {
1525 }
1526 
1527 /*
1528  * Check if the specified cache attribute is supported on the platform.
1529  * This function must be called before i_ddi_cacheattr_to_hatacc().
1530  */
1531 boolean_t
1532 i_ddi_check_cache_attr(uint_t flags)
1533 {
1534         /*
1535          * The cache attributes are mutually exclusive. Any combination of
1536          * the attributes leads to a failure.
1537          */
1538         uint_t cache_attr = IOMEM_CACHE_ATTR(flags);
1539         if ((cache_attr != 0) && ((cache_attr & (cache_attr - 1)) != 0))
1540                 return (B_FALSE);
1541 
1542         /* All cache attributes are supported on X86/X64 */
1543         if (cache_attr & (IOMEM_DATA_UNCACHED | IOMEM_DATA_CACHED |
1544             IOMEM_DATA_UC_WR_COMBINE))
1545                 return (B_TRUE);
1546 
1547         /* undefined attributes */
1548         return (B_FALSE);
1549 }
1550 
1551 /* set HAT cache attributes from the cache attributes */
1552 void
1553 i_ddi_cacheattr_to_hatacc(uint_t flags, uint_t *hataccp)
1554 {
1555         uint_t cache_attr = IOMEM_CACHE_ATTR(flags);
1556         static char *fname = "i_ddi_cacheattr_to_hatacc";
1557 
1558         /*
1559          * If write-combining is not supported, then it falls back
1560          * to uncacheable.
1561          */
1562         if (cache_attr == IOMEM_DATA_UC_WR_COMBINE &&
1563             !is_x86_feature(x86_featureset, X86FSET_PAT))
1564                 cache_attr = IOMEM_DATA_UNCACHED;
1565 
1566         /*
1567          * set HAT attrs according to the cache attrs.
1568          */
1569         switch (cache_attr) {
1570         case IOMEM_DATA_UNCACHED:
1571                 *hataccp &= ~HAT_ORDER_MASK;
1572                 *hataccp |= (HAT_STRICTORDER | HAT_PLAT_NOCACHE);
1573                 break;
1574         case IOMEM_DATA_UC_WR_COMBINE:
1575                 *hataccp &= ~HAT_ORDER_MASK;
1576                 *hataccp |= (HAT_MERGING_OK | HAT_PLAT_NOCACHE);
1577                 break;
1578         case IOMEM_DATA_CACHED:
1579                 *hataccp &= ~HAT_ORDER_MASK;
1580                 *hataccp |= HAT_UNORDERED_OK;
1581                 break;
1582         /*
1583          * This case must not occur because the cache attribute is scrutinized
1584          * before this function is called.
1585          */
1586         default:
1587                 /*
1588                  * set cacheable to hat attrs.
1589                  */
1590                 *hataccp &= ~HAT_ORDER_MASK;
1591                 *hataccp |= HAT_UNORDERED_OK;
1592                 cmn_err(CE_WARN, "%s: cache_attr=0x%x is ignored.",
1593                     fname, cache_attr);
1594         }
1595 }
1596 
1597 /*
1598  * This should actually be called i_ddi_dma_mem_alloc. There should
1599  * also be an i_ddi_pio_mem_alloc. i_ddi_dma_mem_alloc should call
1600  * through the device tree with the DDI_CTLOPS_DMA_ALIGN ctl ops to
1601  * get alignment requirements for DMA memory. i_ddi_pio_mem_alloc
1602  * should use DDI_CTLOPS_PIO_ALIGN. Since we only have i_ddi_mem_alloc
1603  * so far which is used for both, DMA and PIO, we have to use the DMA
1604  * ctl ops to make everybody happy.
1605  */
1606 /*ARGSUSED*/
1607 int
1608 i_ddi_mem_alloc(dev_info_t *dip, ddi_dma_attr_t *attr,
1609         size_t length, int cansleep, int flags,
1610         ddi_device_acc_attr_t *accattrp, caddr_t *kaddrp,
1611         size_t *real_length, ddi_acc_hdl_t *ap)
1612 {
1613         caddr_t a;
1614         int iomin;
1615         ddi_acc_impl_t *iap;
1616         int physcontig = 0;
1617         pgcnt_t npages;
1618         pgcnt_t minctg;
1619         uint_t order;
1620         int e;
1621 
1622         /*
1623          * Check legality of arguments
1624          */
1625         if (length == 0 || kaddrp == NULL || attr == NULL) {
1626                 return (DDI_FAILURE);
1627         }
1628 
1629         if (attr->dma_attr_minxfer == 0 || attr->dma_attr_align == 0 ||
1630             (attr->dma_attr_align & (attr->dma_attr_align - 1)) ||
1631             (attr->dma_attr_minxfer & (attr->dma_attr_minxfer - 1))) {
1632                         return (DDI_FAILURE);
1633         }
1634 
1635         /*
1636          * figure out most restrictive alignment requirement
1637          */
1638         iomin = attr->dma_attr_minxfer;
1639         iomin = maxbit(iomin, attr->dma_attr_align);
1640         if (iomin == 0)
1641                 return (DDI_FAILURE);
1642 
1643         ASSERT((iomin & (iomin - 1)) == 0);
1644 
1645         /*
1646          * if we allocate memory with IOMEM_DATA_UNCACHED or
1647          * IOMEM_DATA_UC_WR_COMBINE, make sure we allocate a page aligned
1648          * memory that ends on a page boundry.
1649          * Don't want to have to different cache mappings to the same
1650          * physical page.
1651          */
1652         if (OVERRIDE_CACHE_ATTR(flags)) {
1653                 iomin = (iomin + MMU_PAGEOFFSET) & MMU_PAGEMASK;
1654                 length = (length + MMU_PAGEOFFSET) & (size_t)MMU_PAGEMASK;
1655         }
1656 
1657         /*
1658          * Determine if we need to satisfy the request for physically
1659          * contiguous memory or alignments larger than pagesize.
1660          */
1661         npages = btopr(length + attr->dma_attr_align);
1662         minctg = howmany(npages, attr->dma_attr_sgllen);
1663 
1664         if (minctg > 1) {
1665                 uint64_t pfnseg = attr->dma_attr_seg >> PAGESHIFT;
1666                 /*
1667                  * verify that the minimum contig requirement for the
1668                  * actual length does not cross segment boundary.
1669                  */
1670                 length = P2ROUNDUP_TYPED(length, attr->dma_attr_minxfer,
1671                     size_t);
1672                 npages = btopr(length);
1673                 minctg = howmany(npages, attr->dma_attr_sgllen);
1674                 if (minctg > pfnseg + 1)
1675                         return (DDI_FAILURE);
1676                 physcontig = 1;
1677         } else {
1678                 length = P2ROUNDUP_TYPED(length, iomin, size_t);
1679         }
1680 
1681         /*
1682          * Allocate the requested amount from the system.
1683          */
1684         a = kalloca(length, iomin, cansleep, physcontig, attr);
1685 
1686         if ((*kaddrp = a) == NULL)
1687                 return (DDI_FAILURE);
1688 
1689         /*
1690          * if we to modify the cache attributes, go back and muck with the
1691          * mappings.
1692          */
1693         if (OVERRIDE_CACHE_ATTR(flags)) {
1694                 order = 0;
1695                 i_ddi_cacheattr_to_hatacc(flags, &order);
1696                 e = kmem_override_cache_attrs(a, length, order);
1697                 if (e != 0) {
1698                         kfreea(a);
1699                         return (DDI_FAILURE);
1700                 }
1701         }
1702 
1703         if (real_length) {
1704                 *real_length = length;
1705         }
1706         if (ap) {
1707                 /*
1708                  * initialize access handle
1709                  */
1710                 iap = (ddi_acc_impl_t *)ap->ah_platform_private;
1711                 iap->ahi_acc_attr |= DDI_ACCATTR_CPU_VADDR;
1712                 impl_acc_hdl_init(ap);
1713         }
1714 
1715         return (DDI_SUCCESS);
1716 }
1717 
1718 /* ARGSUSED */
1719 void
1720 i_ddi_mem_free(caddr_t kaddr, ddi_acc_hdl_t *ap)
1721 {
1722         if (ap != NULL) {
1723                 /*
1724                  * if we modified the cache attributes on alloc, go back and
1725                  * fix them since this memory could be returned to the
1726                  * general pool.
1727                  */
1728                 if (OVERRIDE_CACHE_ATTR(ap->ah_xfermodes)) {
1729                         uint_t order = 0;
1730                         int e;
1731                         i_ddi_cacheattr_to_hatacc(IOMEM_DATA_CACHED, &order);
1732                         e = kmem_override_cache_attrs(kaddr, ap->ah_len, order);
1733                         if (e != 0) {
1734                                 cmn_err(CE_WARN, "i_ddi_mem_free() failed to "
1735                                     "override cache attrs, memory leaked\n");
1736                                 return;
1737                         }
1738                 }
1739         }
1740         kfreea(kaddr);
1741 }
1742 
1743 /*
1744  * Access Barriers
1745  *
1746  */
1747 /*ARGSUSED*/
1748 int
1749 i_ddi_ontrap(ddi_acc_handle_t hp)
1750 {
1751         return (DDI_FAILURE);
1752 }
1753 
1754 /*ARGSUSED*/
1755 void
1756 i_ddi_notrap(ddi_acc_handle_t hp)
1757 {
1758 }
1759 
1760 
1761 /*
1762  * Misc Functions
1763  */
1764 
1765 /*
1766  * Implementation instance override functions
1767  *
1768  * No override on i86pc
1769  */
1770 /*ARGSUSED*/
1771 uint_t
1772 impl_assign_instance(dev_info_t *dip)
1773 {
1774         return ((uint_t)-1);
1775 }
1776 
1777 /*ARGSUSED*/
1778 int
1779 impl_keep_instance(dev_info_t *dip)
1780 {
1781 
1782 #if defined(__xpv)
1783         /*
1784          * Do not persist instance numbers assigned to devices in dom0
1785          */
1786         dev_info_t *pdip;
1787         if (DOMAIN_IS_INITDOMAIN(xen_info)) {
1788                 if (((pdip = ddi_get_parent(dip)) != NULL) &&
1789                     (strcmp(ddi_get_name(pdip), "xpvd") == 0))
1790                         return (DDI_SUCCESS);
1791         }
1792 #endif
1793         return (DDI_FAILURE);
1794 }
1795 
1796 /*ARGSUSED*/
1797 int
1798 impl_free_instance(dev_info_t *dip)
1799 {
1800         return (DDI_FAILURE);
1801 }
1802 
1803 /*ARGSUSED*/
1804 int
1805 impl_check_cpu(dev_info_t *devi)
1806 {
1807         return (DDI_SUCCESS);
1808 }
1809 
1810 /*
1811  * Referenced in common/cpr_driver.c: Power off machine.
1812  * Don't know how to power off i86pc.
1813  */
1814 void
1815 arch_power_down()
1816 {}
1817 
1818 /*
1819  * Copy name to property_name, since name
1820  * is in the low address range below kernelbase.
1821  */
1822 static void
1823 copy_boot_str(const char *boot_str, char *kern_str, int len)
1824 {
1825         int i = 0;
1826 
1827         while (i < len - 1 && boot_str[i] != '\0') {
1828                 kern_str[i] = boot_str[i];
1829                 i++;
1830         }
1831 
1832         kern_str[i] = 0;        /* null terminate */
1833         if (boot_str[i] != '\0')
1834                 cmn_err(CE_WARN,
1835                     "boot property string is truncated to %s", kern_str);
1836 }
1837 
1838 static void
1839 get_boot_properties(void)
1840 {
1841         extern char hw_provider[];
1842         dev_info_t *devi;
1843         char *name;
1844         int length;
1845         char property_name[50], property_val[50];
1846         void *bop_staging_area;
1847 
1848         bop_staging_area = kmem_zalloc(MMU_PAGESIZE, KM_NOSLEEP);
1849 
1850         /*
1851          * Import "root" properties from the boot.
1852          *
1853          * We do this by invoking BOP_NEXTPROP until the list
1854          * is completely copied in.
1855          */
1856 
1857         devi = ddi_root_node();
1858         for (name = BOP_NEXTPROP(bootops, "");          /* get first */
1859             name;                                       /* NULL => DONE */
1860             name = BOP_NEXTPROP(bootops, name)) {       /* get next */
1861 
1862                 /* copy string to memory above kernelbase */
1863                 copy_boot_str(name, property_name, 50);
1864 
1865                 /*
1866                  * Skip vga properties. They will be picked up later
1867                  * by get_vga_properties.
1868                  */
1869                 if (strcmp(property_name, "display-edif-block") == 0 ||
1870                     strcmp(property_name, "display-edif-id") == 0) {
1871                         continue;
1872                 }
1873 
1874                 length = BOP_GETPROPLEN(bootops, property_name);
1875                 if (length == 0)
1876                         continue;
1877                 if (length > MMU_PAGESIZE) {
1878                         cmn_err(CE_NOTE,
1879                             "boot property %s longer than 0x%x, ignored\n",
1880                             property_name, MMU_PAGESIZE);
1881                         continue;
1882                 }
1883                 BOP_GETPROP(bootops, property_name, bop_staging_area);
1884 
1885                 /*
1886                  * special properties:
1887                  * si-machine, si-hw-provider
1888                  *      goes to kernel data structures.
1889                  * bios-boot-device and stdout
1890                  *      goes to hardware property list so it may show up
1891                  *      in the prtconf -vp output. This is needed by
1892                  *      Install/Upgrade. Once we fix install upgrade,
1893                  *      this can be taken out.
1894                  */
1895                 if (strcmp(name, "si-machine") == 0) {
1896                         (void) strncpy(utsname.machine, bop_staging_area,
1897                             SYS_NMLN);
1898                         utsname.machine[SYS_NMLN - 1] = (char)NULL;
1899                 } else if (strcmp(name, "si-hw-provider") == 0) {
1900                         (void) strncpy(hw_provider, bop_staging_area, SYS_NMLN);
1901                         hw_provider[SYS_NMLN - 1] = (char)NULL;
1902                 } else if (strcmp(name, "bios-boot-device") == 0) {
1903                         copy_boot_str(bop_staging_area, property_val, 50);
1904                         (void) ndi_prop_update_string(DDI_DEV_T_NONE, devi,
1905                             property_name, property_val);
1906                 } else if (strcmp(name, "stdout") == 0) {
1907                         (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi,
1908                             property_name, *((int *)bop_staging_area));
1909                 } else {
1910                         /* Property type unknown, use old prop interface */
1911                         (void) e_ddi_prop_create(DDI_DEV_T_NONE, devi,
1912                             DDI_PROP_CANSLEEP, property_name, bop_staging_area,
1913                             length);
1914                 }
1915         }
1916 
1917         kmem_free(bop_staging_area, MMU_PAGESIZE);
1918 }
1919 
1920 static void
1921 get_vga_properties(void)
1922 {
1923         dev_info_t *devi;
1924         major_t major;
1925         char *name;
1926         int length;
1927         char property_val[50];
1928         void *bop_staging_area;
1929 
1930         /*
1931          * XXXX Hack Allert!
1932          * There really needs to be a better way for identifying various
1933          * console framebuffers and their related issues.  Till then,
1934          * check for this one as a replacement to vgatext.
1935          */
1936         major = ddi_name_to_major("ragexl");
1937         if (major == (major_t)-1) {
1938                 major = ddi_name_to_major("vgatext");
1939                 if (major == (major_t)-1)
1940                         return;
1941         }
1942         devi = devnamesp[major].dn_head;
1943         if (devi == NULL)
1944                 return;
1945 
1946         bop_staging_area = kmem_zalloc(MMU_PAGESIZE, KM_SLEEP);
1947 
1948         /*
1949          * Import "vga" properties from the boot.
1950          */
1951         name = "display-edif-block";
1952         length = BOP_GETPROPLEN(bootops, name);
1953         if (length > 0 && length < MMU_PAGESIZE) {
1954                 BOP_GETPROP(bootops, name, bop_staging_area);
1955                 (void) ndi_prop_update_byte_array(DDI_DEV_T_NONE,
1956                     devi, name, bop_staging_area, length);
1957         }
1958 
1959         /*
1960          * kdmconfig is also looking for display-type and
1961          * video-adapter-type. We default to color and svga.
1962          *
1963          * Could it be "monochrome", "vga"?
1964          * Nah, you've got to come to the 21st century...
1965          * And you can set monitor type manually in kdmconfig
1966          * if you are really an old junky.
1967          */
1968         (void) ndi_prop_update_string(DDI_DEV_T_NONE,
1969             devi, "display-type", "color");
1970         (void) ndi_prop_update_string(DDI_DEV_T_NONE,
1971             devi, "video-adapter-type", "svga");
1972 
1973         name = "display-edif-id";
1974         length = BOP_GETPROPLEN(bootops, name);
1975         if (length > 0 && length < MMU_PAGESIZE) {
1976                 BOP_GETPROP(bootops, name, bop_staging_area);
1977                 copy_boot_str(bop_staging_area, property_val, length);
1978                 (void) ndi_prop_update_string(DDI_DEV_T_NONE,
1979                     devi, name, property_val);
1980         }
1981 
1982         kmem_free(bop_staging_area, MMU_PAGESIZE);
1983 }
1984 
1985 
1986 /*
1987  * This is temporary, but absolutely necessary.  If we are being
1988  * booted with a device tree created by the DevConf project's bootconf
1989  * program, then we have device information nodes that reflect
1990  * reality.  At this point in time in the Solaris release schedule, the
1991  * kernel drivers aren't prepared for reality.  They still depend on their
1992  * own ad-hoc interpretations of the properties created when their .conf
1993  * files were interpreted. These drivers use an "ignore-hardware-nodes"
1994  * property to prevent them from using the nodes passed up from the bootconf
1995  * device tree.
1996  *
1997  * Trying to assemble root file system drivers as we are booting from
1998  * devconf will fail if the kernel driver is basing its name_addr's on the
1999  * psuedo-node device info while the bootpath passed up from bootconf is using
2000  * reality-based name_addrs.  We help the boot along in this case by
2001  * looking at the pre-bootconf bootpath and determining if we would have
2002  * successfully matched if that had been the bootpath we had chosen.
2003  *
2004  * Note that we only even perform this extra check if we've booted
2005  * using bootconf's 1275 compliant bootpath, this is the boot device, and
2006  * we're trying to match the name_addr specified in the 1275 bootpath.
2007  */
2008 
2009 #define MAXCOMPONENTLEN 32
2010 
2011 int
2012 x86_old_bootpath_name_addr_match(dev_info_t *cdip, char *caddr, char *naddr)
2013 {
2014         /*
2015          *  There are multiple criteria to be met before we can even
2016          *  consider allowing a name_addr match here.
2017          *
2018          *  1) We must have been booted such that the bootconf program
2019          *      created device tree nodes and properties.  This can be
2020          *      determined by examining the 'bootpath' property.  This
2021          *      property will be a non-null string iff bootconf was
2022          *      involved in the boot.
2023          *
2024          *  2) The module that we want to match must be the boot device.
2025          *
2026          *  3) The instance of the module we are thinking of letting be
2027          *      our match must be ignoring hardware nodes.
2028          *
2029          *  4) The name_addr we want to match must be the name_addr
2030          *      specified in the 1275 bootpath.
2031          */
2032         static char bootdev_module[MAXCOMPONENTLEN];
2033         static char bootdev_oldmod[MAXCOMPONENTLEN];
2034         static char bootdev_newaddr[MAXCOMPONENTLEN];
2035         static char bootdev_oldaddr[MAXCOMPONENTLEN];
2036         static int  quickexit;
2037 
2038         char *daddr;
2039         int dlen;
2040 
2041         char    *lkupname;
2042         int     rv = DDI_FAILURE;
2043 
2044         if ((ddi_getlongprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
2045             "devconf-addr", (caddr_t)&daddr, &dlen) == DDI_PROP_SUCCESS) &&
2046             (ddi_getprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
2047             "ignore-hardware-nodes", -1) != -1)) {
2048                 if (strcmp(daddr, caddr) == 0) {
2049                         return (DDI_SUCCESS);
2050                 }
2051         }
2052 
2053         if (quickexit)
2054                 return (rv);
2055 
2056         if (bootdev_module[0] == '\0') {
2057                 char *addrp, *eoaddrp;
2058                 char *busp, *modp, *atp;
2059                 char *bp1275, *bp;
2060                 int  bp1275len, bplen;
2061 
2062                 bp1275 = bp = addrp = eoaddrp = busp = modp = atp = NULL;
2063 
2064                 if (ddi_getlongprop(DDI_DEV_T_ANY,
2065                     ddi_root_node(), 0, "bootpath",
2066                     (caddr_t)&bp1275, &bp1275len) != DDI_PROP_SUCCESS ||
2067                     bp1275len <= 1) {
2068                         /*
2069                          * We didn't boot from bootconf so we never need to
2070                          * do any special matches.
2071                          */
2072                         quickexit = 1;
2073                         if (bp1275)
2074                                 kmem_free(bp1275, bp1275len);
2075                         return (rv);
2076                 }
2077 
2078                 if (ddi_getlongprop(DDI_DEV_T_ANY,
2079                     ddi_root_node(), 0, "boot-path",
2080                     (caddr_t)&bp, &bplen) != DDI_PROP_SUCCESS || bplen <= 1) {
2081                         /*
2082                          * No fallback position for matching. This is
2083                          * certainly unexpected, but we'll handle it
2084                          * just in case.
2085                          */
2086                         quickexit = 1;
2087                         kmem_free(bp1275, bp1275len);
2088                         if (bp)
2089                                 kmem_free(bp, bplen);
2090                         return (rv);
2091                 }
2092 
2093                 /*
2094                  *  Determine boot device module and 1275 name_addr
2095                  *
2096                  *  bootpath assumed to be of the form /bus/module@name_addr
2097                  */
2098                 if (busp = strchr(bp1275, '/')) {
2099                         if (modp = strchr(busp + 1, '/')) {
2100                                 if (atp = strchr(modp + 1, '@')) {
2101                                         *atp = '\0';
2102                                         addrp = atp + 1;
2103                                         if (eoaddrp = strchr(addrp, '/'))
2104                                                 *eoaddrp = '\0';
2105                                 }
2106                         }
2107                 }
2108 
2109                 if (modp && addrp) {
2110                         (void) strncpy(bootdev_module, modp + 1,
2111                             MAXCOMPONENTLEN);
2112                         bootdev_module[MAXCOMPONENTLEN - 1] = '\0';
2113 
2114                         (void) strncpy(bootdev_newaddr, addrp, MAXCOMPONENTLEN);
2115                         bootdev_newaddr[MAXCOMPONENTLEN - 1] = '\0';
2116                 } else {
2117                         quickexit = 1;
2118                         kmem_free(bp1275, bp1275len);
2119                         kmem_free(bp, bplen);
2120                         return (rv);
2121                 }
2122 
2123                 /*
2124                  *  Determine fallback name_addr
2125                  *
2126                  *  10/3/96 - Also save fallback module name because it
2127                  *  might actually be different than the current module
2128                  *  name.  E.G., ISA pnp drivers have new names.
2129                  *
2130                  *  bootpath assumed to be of the form /bus/module@name_addr
2131                  */
2132                 addrp = NULL;
2133                 if (busp = strchr(bp, '/')) {
2134                         if (modp = strchr(busp + 1, '/')) {
2135                                 if (atp = strchr(modp + 1, '@')) {
2136                                         *atp = '\0';
2137                                         addrp = atp + 1;
2138                                         if (eoaddrp = strchr(addrp, '/'))
2139                                                 *eoaddrp = '\0';
2140                                 }
2141                         }
2142                 }
2143 
2144                 if (modp && addrp) {
2145                         (void) strncpy(bootdev_oldmod, modp + 1,
2146                             MAXCOMPONENTLEN);
2147                         bootdev_module[MAXCOMPONENTLEN - 1] = '\0';
2148 
2149                         (void) strncpy(bootdev_oldaddr, addrp, MAXCOMPONENTLEN);
2150                         bootdev_oldaddr[MAXCOMPONENTLEN - 1] = '\0';
2151                 }
2152 
2153                 /* Free up the bootpath storage now that we're done with it. */
2154                 kmem_free(bp1275, bp1275len);
2155                 kmem_free(bp, bplen);
2156 
2157                 if (bootdev_oldaddr[0] == '\0') {
2158                         quickexit = 1;
2159                         return (rv);
2160                 }
2161         }
2162 
2163         if (((lkupname = ddi_get_name(cdip)) != NULL) &&
2164             (strcmp(bootdev_module, lkupname) == 0 ||
2165             strcmp(bootdev_oldmod, lkupname) == 0) &&
2166             ((ddi_getprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
2167             "ignore-hardware-nodes", -1) != -1) ||
2168             ignore_hardware_nodes) &&
2169             strcmp(bootdev_newaddr, caddr) == 0 &&
2170             strcmp(bootdev_oldaddr, naddr) == 0) {
2171                 rv = DDI_SUCCESS;
2172         }
2173 
2174         return (rv);
2175 }
2176 
2177 /*
2178  * Perform a copy from a memory mapped device (whose devinfo pointer is devi)
2179  * separately mapped at devaddr in the kernel to a kernel buffer at kaddr.
2180  */
2181 /*ARGSUSED*/
2182 int
2183 e_ddi_copyfromdev(dev_info_t *devi,
2184     off_t off, const void *devaddr, void *kaddr, size_t len)
2185 {
2186         bcopy(devaddr, kaddr, len);
2187         return (0);
2188 }
2189 
2190 /*
2191  * Perform a copy to a memory mapped device (whose devinfo pointer is devi)
2192  * separately mapped at devaddr in the kernel from a kernel buffer at kaddr.
2193  */
2194 /*ARGSUSED*/
2195 int
2196 e_ddi_copytodev(dev_info_t *devi,
2197     off_t off, const void *kaddr, void *devaddr, size_t len)
2198 {
2199         bcopy(kaddr, devaddr, len);
2200         return (0);
2201 }
2202 
2203 
2204 static int
2205 poke_mem(peekpoke_ctlops_t *in_args)
2206 {
2207         int err = DDI_SUCCESS;
2208         on_trap_data_t otd;
2209 
2210         /* Set up protected environment. */
2211         if (!on_trap(&otd, OT_DATA_ACCESS)) {
2212                 switch (in_args->size) {
2213                 case sizeof (uint8_t):
2214                         *(uint8_t *)(in_args->dev_addr) =
2215                             *(uint8_t *)in_args->host_addr;
2216                         break;
2217 
2218                 case sizeof (uint16_t):
2219                         *(uint16_t *)(in_args->dev_addr) =
2220                             *(uint16_t *)in_args->host_addr;
2221                         break;
2222 
2223                 case sizeof (uint32_t):
2224                         *(uint32_t *)(in_args->dev_addr) =
2225                             *(uint32_t *)in_args->host_addr;
2226                         break;
2227 
2228                 case sizeof (uint64_t):
2229                         *(uint64_t *)(in_args->dev_addr) =
2230                             *(uint64_t *)in_args->host_addr;
2231                         break;
2232 
2233                 default:
2234                         err = DDI_FAILURE;
2235                         break;
2236                 }
2237         } else
2238                 err = DDI_FAILURE;
2239 
2240         /* Take down protected environment. */
2241         no_trap();
2242 
2243         return (err);
2244 }
2245 
2246 
2247 static int
2248 peek_mem(peekpoke_ctlops_t *in_args)
2249 {
2250         int err = DDI_SUCCESS;
2251         on_trap_data_t otd;
2252 
2253         if (!on_trap(&otd, OT_DATA_ACCESS)) {
2254                 switch (in_args->size) {
2255                 case sizeof (uint8_t):
2256                         *(uint8_t *)in_args->host_addr =
2257                             *(uint8_t *)in_args->dev_addr;
2258                         break;
2259 
2260                 case sizeof (uint16_t):
2261                         *(uint16_t *)in_args->host_addr =
2262                             *(uint16_t *)in_args->dev_addr;
2263                         break;
2264 
2265                 case sizeof (uint32_t):
2266                         *(uint32_t *)in_args->host_addr =
2267                             *(uint32_t *)in_args->dev_addr;
2268                         break;
2269 
2270                 case sizeof (uint64_t):
2271                         *(uint64_t *)in_args->host_addr =
2272                             *(uint64_t *)in_args->dev_addr;
2273                         break;
2274 
2275                 default:
2276                         err = DDI_FAILURE;
2277                         break;
2278                 }
2279         } else
2280                 err = DDI_FAILURE;
2281 
2282         no_trap();
2283         return (err);
2284 }
2285 
2286 
2287 /*
2288  * This is called only to process peek/poke when the DIP is NULL.
2289  * Assume that this is for memory, as nexi take care of device safe accesses.
2290  */
2291 int
2292 peekpoke_mem(ddi_ctl_enum_t cmd, peekpoke_ctlops_t *in_args)
2293 {
2294         return (cmd == DDI_CTLOPS_PEEK ? peek_mem(in_args) : poke_mem(in_args));
2295 }
2296 
2297 /*
2298  * we've just done a cautious put/get. Check if it was successful by
2299  * calling pci_ereport_post() on all puts and for any gets that return -1
2300  */
2301 static int
2302 pci_peekpoke_check_fma(dev_info_t *dip, void *arg, ddi_ctl_enum_t ctlop,
2303     void (*scan)(dev_info_t *, ddi_fm_error_t *))
2304 {
2305         int     rval = DDI_SUCCESS;
2306         peekpoke_ctlops_t *in_args = (peekpoke_ctlops_t *)arg;
2307         ddi_fm_error_t de;
2308         ddi_acc_impl_t *hp = (ddi_acc_impl_t *)in_args->handle;
2309         ddi_acc_hdl_t *hdlp = (ddi_acc_hdl_t *)in_args->handle;
2310         int check_err = 0;
2311         int repcount = in_args->repcount;
2312 
2313         if (ctlop == DDI_CTLOPS_POKE &&
2314             hdlp->ah_acc.devacc_attr_access != DDI_CAUTIOUS_ACC)
2315                 return (DDI_SUCCESS);
2316 
2317         if (ctlop == DDI_CTLOPS_PEEK &&
2318             hdlp->ah_acc.devacc_attr_access != DDI_CAUTIOUS_ACC) {
2319                 for (; repcount; repcount--) {
2320                         switch (in_args->size) {
2321                         case sizeof (uint8_t):
2322                                 if (*(uint8_t *)in_args->host_addr == 0xff)
2323                                         check_err = 1;
2324                                 break;
2325                         case sizeof (uint16_t):
2326                                 if (*(uint16_t *)in_args->host_addr == 0xffff)
2327                                         check_err = 1;
2328                                 break;
2329                         case sizeof (uint32_t):
2330                                 if (*(uint32_t *)in_args->host_addr ==
2331                                     0xffffffff)
2332                                         check_err = 1;
2333                                 break;
2334                         case sizeof (uint64_t):
2335                                 if (*(uint64_t *)in_args->host_addr ==
2336                                     0xffffffffffffffff)
2337                                         check_err = 1;
2338                                 break;
2339                         }
2340                 }
2341                 if (check_err == 0)
2342                         return (DDI_SUCCESS);
2343         }
2344         /*
2345          * for a cautious put or get or a non-cautious get that returned -1 call
2346          * io framework to see if there really was an error
2347          */
2348         bzero(&de, sizeof (ddi_fm_error_t));
2349         de.fme_version = DDI_FME_VERSION;
2350         de.fme_ena = fm_ena_generate(0, FM_ENA_FMT1);
2351         if (hdlp->ah_acc.devacc_attr_access == DDI_CAUTIOUS_ACC) {
2352                 de.fme_flag = DDI_FM_ERR_EXPECTED;
2353                 de.fme_acc_handle = in_args->handle;
2354         } else if (hdlp->ah_acc.devacc_attr_access == DDI_DEFAULT_ACC) {
2355                 /*
2356                  * We only get here with DDI_DEFAULT_ACC for config space gets.
2357                  * Non-hardened drivers may be probing the hardware and
2358                  * expecting -1 returned. So need to treat errors on
2359                  * DDI_DEFAULT_ACC as DDI_FM_ERR_EXPECTED.
2360                  */
2361                 de.fme_flag = DDI_FM_ERR_EXPECTED;
2362                 de.fme_acc_handle = in_args->handle;
2363         } else {
2364                 /*
2365                  * Hardened driver doing protected accesses shouldn't
2366                  * get errors unless there's a hardware problem. Treat
2367                  * as nonfatal if there's an error, but set UNEXPECTED
2368                  * so we raise ereports on any errors and potentially
2369                  * fault the device
2370                  */
2371                 de.fme_flag = DDI_FM_ERR_UNEXPECTED;
2372         }
2373         (void) scan(dip, &de);
2374         if (hdlp->ah_acc.devacc_attr_access != DDI_DEFAULT_ACC &&
2375             de.fme_status != DDI_FM_OK) {
2376                 ndi_err_t *errp = (ndi_err_t *)hp->ahi_err;
2377                 rval = DDI_FAILURE;
2378                 errp->err_ena = de.fme_ena;
2379                 errp->err_expected = de.fme_flag;
2380                 errp->err_status = DDI_FM_NONFATAL;
2381         }
2382         return (rval);
2383 }
2384 
2385 /*
2386  * pci_peekpoke_check_nofma() is for when an error occurs on a register access
2387  * during pci_ereport_post(). We can't call pci_ereport_post() again or we'd
2388  * recurse, so assume all puts are OK and gets have failed if they return -1
2389  */
2390 static int
2391 pci_peekpoke_check_nofma(void *arg, ddi_ctl_enum_t ctlop)
2392 {
2393         int rval = DDI_SUCCESS;
2394         peekpoke_ctlops_t *in_args = (peekpoke_ctlops_t *)arg;
2395         ddi_acc_impl_t *hp = (ddi_acc_impl_t *)in_args->handle;
2396         ddi_acc_hdl_t *hdlp = (ddi_acc_hdl_t *)in_args->handle;
2397         int repcount = in_args->repcount;
2398 
2399         if (ctlop == DDI_CTLOPS_POKE)
2400                 return (rval);
2401 
2402         for (; repcount; repcount--) {
2403                 switch (in_args->size) {
2404                 case sizeof (uint8_t):
2405                         if (*(uint8_t *)in_args->host_addr == 0xff)
2406                                 rval = DDI_FAILURE;
2407                         break;
2408                 case sizeof (uint16_t):
2409                         if (*(uint16_t *)in_args->host_addr == 0xffff)
2410                                 rval = DDI_FAILURE;
2411                         break;
2412                 case sizeof (uint32_t):
2413                         if (*(uint32_t *)in_args->host_addr == 0xffffffff)
2414                                 rval = DDI_FAILURE;
2415                         break;
2416                 case sizeof (uint64_t):
2417                         if (*(uint64_t *)in_args->host_addr ==
2418                             0xffffffffffffffff)
2419                                 rval = DDI_FAILURE;
2420                         break;
2421                 }
2422         }
2423         if (hdlp->ah_acc.devacc_attr_access != DDI_DEFAULT_ACC &&
2424             rval == DDI_FAILURE) {
2425                 ndi_err_t *errp = (ndi_err_t *)hp->ahi_err;
2426                 errp->err_ena = fm_ena_generate(0, FM_ENA_FMT1);
2427                 errp->err_expected = DDI_FM_ERR_UNEXPECTED;
2428                 errp->err_status = DDI_FM_NONFATAL;
2429         }
2430         return (rval);
2431 }
2432 
2433 int
2434 pci_peekpoke_check(dev_info_t *dip, dev_info_t *rdip,
2435         ddi_ctl_enum_t ctlop, void *arg, void *result,
2436         int (*handler)(dev_info_t *, dev_info_t *, ddi_ctl_enum_t, void *,
2437         void *), kmutex_t *err_mutexp, kmutex_t *peek_poke_mutexp,
2438         void (*scan)(dev_info_t *, ddi_fm_error_t *))
2439 {
2440         int rval;
2441         peekpoke_ctlops_t *in_args = (peekpoke_ctlops_t *)arg;
2442         ddi_acc_impl_t *hp = (ddi_acc_impl_t *)in_args->handle;
2443 
2444         /*
2445          * this function only supports cautious accesses, not peeks/pokes
2446          * which don't have a handle
2447          */
2448         if (hp == NULL)
2449                 return (DDI_FAILURE);
2450 
2451         if (hp->ahi_acc_attr & DDI_ACCATTR_CONFIG_SPACE) {
2452                 if (!mutex_tryenter(err_mutexp)) {
2453                         /*
2454                          * As this may be a recursive call from within
2455                          * pci_ereport_post() we can't wait for the mutexes.
2456                          * Fortunately we know someone is already calling
2457                          * pci_ereport_post() which will handle the error bits
2458                          * for us, and as this is a config space access we can
2459                          * just do the access and check return value for -1
2460                          * using pci_peekpoke_check_nofma().
2461                          */
2462                         rval = handler(dip, rdip, ctlop, arg, result);
2463                         if (rval == DDI_SUCCESS)
2464                                 rval = pci_peekpoke_check_nofma(arg, ctlop);
2465                         return (rval);
2466                 }
2467                 /*
2468                  * This can't be a recursive call. Drop the err_mutex and get
2469                  * both mutexes in the right order. If an error hasn't already
2470                  * been detected by the ontrap code, use pci_peekpoke_check_fma
2471                  * which will call pci_ereport_post() to check error status.
2472                  */
2473                 mutex_exit(err_mutexp);
2474         }
2475         mutex_enter(peek_poke_mutexp);
2476         rval = handler(dip, rdip, ctlop, arg, result);
2477         if (rval == DDI_SUCCESS) {
2478                 mutex_enter(err_mutexp);
2479                 rval = pci_peekpoke_check_fma(dip, arg, ctlop, scan);
2480                 mutex_exit(err_mutexp);
2481         }
2482         mutex_exit(peek_poke_mutexp);
2483         return (rval);
2484 }
2485 
2486 void
2487 impl_setup_ddi(void)
2488 {
2489 #if !defined(__xpv)
2490         extern void startup_bios_disk(void);
2491         extern int post_fastreboot;
2492 #endif
2493         dev_info_t *xdip, *isa_dip;
2494         rd_existing_t rd_mem_prop;
2495         int err;
2496 
2497         ndi_devi_alloc_sleep(ddi_root_node(), "ramdisk",
2498             (pnode_t)DEVI_SID_NODEID, &xdip);
2499 
2500         (void) BOP_GETPROP(bootops,
2501             "ramdisk_start", (void *)&ramdisk_start);
2502         (void) BOP_GETPROP(bootops,
2503             "ramdisk_end", (void *)&ramdisk_end);
2504 
2505 #ifdef __xpv
2506         ramdisk_start -= ONE_GIG;
2507         ramdisk_end -= ONE_GIG;
2508 #endif
2509         rd_mem_prop.phys = ramdisk_start;
2510         rd_mem_prop.size = ramdisk_end - ramdisk_start + 1;
2511 
2512         (void) ndi_prop_update_byte_array(DDI_DEV_T_NONE, xdip,
2513             RD_EXISTING_PROP_NAME, (uchar_t *)&rd_mem_prop,
2514             sizeof (rd_mem_prop));
2515         err = ndi_devi_bind_driver(xdip, 0);
2516         ASSERT(err == 0);
2517 
2518         /* isa node */
2519         if (pseudo_isa) {
2520                 ndi_devi_alloc_sleep(ddi_root_node(), "isa",
2521                     (pnode_t)DEVI_SID_NODEID, &isa_dip);
2522                 (void) ndi_prop_update_string(DDI_DEV_T_NONE, isa_dip,
2523                     "device_type", "isa");
2524                 (void) ndi_prop_update_string(DDI_DEV_T_NONE, isa_dip,
2525                     "bus-type", "isa");
2526                 (void) ndi_devi_bind_driver(isa_dip, 0);
2527         }
2528 
2529         /*
2530          * Read in the properties from the boot.
2531          */
2532         get_boot_properties();
2533 
2534         /* not framebuffer should be enumerated, if present */
2535         get_vga_properties();
2536 
2537         /*
2538          * Check for administratively disabled drivers.
2539          */
2540         check_driver_disable();
2541 
2542 #if !defined(__xpv)
2543         if (!post_fastreboot)
2544                 startup_bios_disk();
2545 #endif
2546         /* do bus dependent probes. */
2547         impl_bus_initialprobe();
2548 }
2549 
2550 dev_t
2551 getrootdev(void)
2552 {
2553         /*
2554          * Precedence given to rootdev if set in /etc/system
2555          */
2556         if (root_is_svm == B_TRUE) {
2557                 return (ddi_pathname_to_dev_t(svm_bootpath));
2558         }
2559 
2560         /*
2561          * Usually rootfs.bo_name is initialized by the
2562          * the bootpath property from bootenv.rc, but
2563          * defaults to "/ramdisk:a" otherwise.
2564          */
2565         return (ddi_pathname_to_dev_t(rootfs.bo_name));
2566 }
2567 
2568 static struct bus_probe {
2569         struct bus_probe *next;
2570         void (*probe)(int);
2571 } *bus_probes;
2572 
2573 void
2574 impl_bus_add_probe(void (*func)(int))
2575 {
2576         struct bus_probe *probe;
2577         struct bus_probe *lastprobe = NULL;
2578 
2579         probe = kmem_alloc(sizeof (*probe), KM_SLEEP);
2580         probe->probe = func;
2581         probe->next = NULL;
2582 
2583         if (!bus_probes) {
2584                 bus_probes = probe;
2585                 return;
2586         }
2587 
2588         lastprobe = bus_probes;
2589         while (lastprobe->next)
2590                 lastprobe = lastprobe->next;
2591         lastprobe->next = probe;
2592 }
2593 
2594 /*ARGSUSED*/
2595 void
2596 impl_bus_delete_probe(void (*func)(int))
2597 {
2598         struct bus_probe *prev = NULL;
2599         struct bus_probe *probe = bus_probes;
2600 
2601         while (probe) {
2602                 if (probe->probe == func)
2603                         break;
2604                 prev = probe;
2605                 probe = probe->next;
2606         }
2607 
2608         if (probe == NULL)
2609                 return;
2610 
2611         if (prev)
2612                 prev->next = probe->next;
2613         else
2614                 bus_probes = probe->next;
2615 
2616         kmem_free(probe, sizeof (struct bus_probe));
2617 }
2618 
2619 /*
2620  * impl_bus_initialprobe
2621  *      Modload the prom simulator, then let it probe to verify existence
2622  *      and type of PCI support.
2623  */
2624 static void
2625 impl_bus_initialprobe(void)
2626 {
2627         struct bus_probe *probe;
2628 
2629         /* load modules to install bus probes */
2630 #if defined(__xpv)
2631         if (DOMAIN_IS_INITDOMAIN(xen_info)) {
2632                 if (modload("misc", "pci_autoconfig") < 0) {
2633                         panic("failed to load misc/pci_autoconfig");
2634                 }
2635 
2636                 if (modload("drv", "isa") < 0)
2637                         panic("failed to load drv/isa");
2638         }
2639 
2640         (void) modload("misc", "xpv_autoconfig");
2641 #else
2642         if (modload("misc", "pci_autoconfig") < 0) {
2643                 panic("failed to load misc/pci_autoconfig");
2644         }
2645 
2646         (void) modload("misc", "acpidev");
2647 
2648         if (modload("drv", "isa") < 0)
2649                 panic("failed to load drv/isa");
2650 #endif
2651 
2652         probe = bus_probes;
2653         while (probe) {
2654                 /* run the probe functions */
2655                 (*probe->probe)(0);
2656                 probe = probe->next;
2657         }
2658 }
2659 
2660 /*
2661  * impl_bus_reprobe
2662  *      Reprogram devices not set up by firmware.
2663  */
2664 static void
2665 impl_bus_reprobe(void)
2666 {
2667         struct bus_probe *probe;
2668 
2669         probe = bus_probes;
2670         while (probe) {
2671                 /* run the probe function */
2672                 (*probe->probe)(1);
2673                 probe = probe->next;
2674         }
2675 }
2676 
2677 
2678 /*
2679  * The following functions ready a cautious request to go up to the nexus
2680  * driver.  It is up to the nexus driver to decide how to process the request.
2681  * It may choose to call i_ddi_do_caut_get/put in this file, or do it
2682  * differently.
2683  */
2684 
2685 static void
2686 i_ddi_caut_getput_ctlops(ddi_acc_impl_t *hp, uint64_t host_addr,
2687     uint64_t dev_addr, size_t size, size_t repcount, uint_t flags,
2688     ddi_ctl_enum_t cmd)
2689 {
2690         peekpoke_ctlops_t       cautacc_ctlops_arg;
2691 
2692         cautacc_ctlops_arg.size = size;
2693         cautacc_ctlops_arg.dev_addr = dev_addr;
2694         cautacc_ctlops_arg.host_addr = host_addr;
2695         cautacc_ctlops_arg.handle = (ddi_acc_handle_t)hp;
2696         cautacc_ctlops_arg.repcount = repcount;
2697         cautacc_ctlops_arg.flags = flags;
2698 
2699         (void) ddi_ctlops(hp->ahi_common.ah_dip, hp->ahi_common.ah_dip, cmd,
2700             &cautacc_ctlops_arg, NULL);
2701 }
2702 
2703 uint8_t
2704 i_ddi_caut_get8(ddi_acc_impl_t *hp, uint8_t *addr)
2705 {
2706         uint8_t value;
2707         i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2708             sizeof (uint8_t), 1, 0, DDI_CTLOPS_PEEK);
2709 
2710         return (value);
2711 }
2712 
2713 uint16_t
2714 i_ddi_caut_get16(ddi_acc_impl_t *hp, uint16_t *addr)
2715 {
2716         uint16_t value;
2717         i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2718             sizeof (uint16_t), 1, 0, DDI_CTLOPS_PEEK);
2719 
2720         return (value);
2721 }
2722 
2723 uint32_t
2724 i_ddi_caut_get32(ddi_acc_impl_t *hp, uint32_t *addr)
2725 {
2726         uint32_t value;
2727         i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2728             sizeof (uint32_t), 1, 0, DDI_CTLOPS_PEEK);
2729 
2730         return (value);
2731 }
2732 
2733 uint64_t
2734 i_ddi_caut_get64(ddi_acc_impl_t *hp, uint64_t *addr)
2735 {
2736         uint64_t value;
2737         i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2738             sizeof (uint64_t), 1, 0, DDI_CTLOPS_PEEK);
2739 
2740         return (value);
2741 }
2742 
2743 void
2744 i_ddi_caut_put8(ddi_acc_impl_t *hp, uint8_t *addr, uint8_t value)
2745 {
2746         i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2747             sizeof (uint8_t), 1, 0, DDI_CTLOPS_POKE);
2748 }
2749 
2750 void
2751 i_ddi_caut_put16(ddi_acc_impl_t *hp, uint16_t *addr, uint16_t value)
2752 {
2753         i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2754             sizeof (uint16_t), 1, 0, DDI_CTLOPS_POKE);
2755 }
2756 
2757 void
2758 i_ddi_caut_put32(ddi_acc_impl_t *hp, uint32_t *addr, uint32_t value)
2759 {
2760         i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2761             sizeof (uint32_t), 1, 0, DDI_CTLOPS_POKE);
2762 }
2763 
2764 void
2765 i_ddi_caut_put64(ddi_acc_impl_t *hp, uint64_t *addr, uint64_t value)
2766 {
2767         i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2768             sizeof (uint64_t), 1, 0, DDI_CTLOPS_POKE);
2769 }
2770 
2771 void
2772 i_ddi_caut_rep_get8(ddi_acc_impl_t *hp, uint8_t *host_addr, uint8_t *dev_addr,
2773         size_t repcount, uint_t flags)
2774 {
2775         i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2776             sizeof (uint8_t), repcount, flags, DDI_CTLOPS_PEEK);
2777 }
2778 
2779 void
2780 i_ddi_caut_rep_get16(ddi_acc_impl_t *hp, uint16_t *host_addr,
2781     uint16_t *dev_addr, size_t repcount, uint_t flags)
2782 {
2783         i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2784             sizeof (uint16_t), repcount, flags, DDI_CTLOPS_PEEK);
2785 }
2786 
2787 void
2788 i_ddi_caut_rep_get32(ddi_acc_impl_t *hp, uint32_t *host_addr,
2789     uint32_t *dev_addr, size_t repcount, uint_t flags)
2790 {
2791         i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2792             sizeof (uint32_t), repcount, flags, DDI_CTLOPS_PEEK);
2793 }
2794 
2795 void
2796 i_ddi_caut_rep_get64(ddi_acc_impl_t *hp, uint64_t *host_addr,
2797     uint64_t *dev_addr, size_t repcount, uint_t flags)
2798 {
2799         i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2800             sizeof (uint64_t), repcount, flags, DDI_CTLOPS_PEEK);
2801 }
2802 
2803 void
2804 i_ddi_caut_rep_put8(ddi_acc_impl_t *hp, uint8_t *host_addr, uint8_t *dev_addr,
2805         size_t repcount, uint_t flags)
2806 {
2807         i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2808             sizeof (uint8_t), repcount, flags, DDI_CTLOPS_POKE);
2809 }
2810 
2811 void
2812 i_ddi_caut_rep_put16(ddi_acc_impl_t *hp, uint16_t *host_addr,
2813     uint16_t *dev_addr, size_t repcount, uint_t flags)
2814 {
2815         i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2816             sizeof (uint16_t), repcount, flags, DDI_CTLOPS_POKE);
2817 }
2818 
2819 void
2820 i_ddi_caut_rep_put32(ddi_acc_impl_t *hp, uint32_t *host_addr,
2821     uint32_t *dev_addr, size_t repcount, uint_t flags)
2822 {
2823         i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2824             sizeof (uint32_t), repcount, flags, DDI_CTLOPS_POKE);
2825 }
2826 
2827 void
2828 i_ddi_caut_rep_put64(ddi_acc_impl_t *hp, uint64_t *host_addr,
2829     uint64_t *dev_addr, size_t repcount, uint_t flags)
2830 {
2831         i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2832             sizeof (uint64_t), repcount, flags, DDI_CTLOPS_POKE);
2833 }
2834 
2835 boolean_t
2836 i_ddi_copybuf_required(ddi_dma_attr_t *attrp)
2837 {
2838         uint64_t hi_pa;
2839 
2840         hi_pa = ((uint64_t)physmax + 1ull) << PAGESHIFT;
2841         if (attrp->dma_attr_addr_hi < hi_pa) {
2842                 return (B_TRUE);
2843         }
2844 
2845         return (B_FALSE);
2846 }
2847 
2848 size_t
2849 i_ddi_copybuf_size()
2850 {
2851         return (dma_max_copybuf_size);
2852 }
2853 
2854 /*
2855  * i_ddi_dma_max()
2856  *    returns the maximum DMA size which can be performed in a single DMA
2857  *    window taking into account the devices DMA contraints (attrp), the
2858  *    maximum copy buffer size (if applicable), and the worse case buffer
2859  *    fragmentation.
2860  */
2861 /*ARGSUSED*/
2862 uint32_t
2863 i_ddi_dma_max(dev_info_t *dip, ddi_dma_attr_t *attrp)
2864 {
2865         uint64_t maxxfer;
2866 
2867 
2868         /*
2869          * take the min of maxxfer and the the worse case fragementation
2870          * (e.g. every cookie <= 1 page)
2871          */
2872         maxxfer = MIN(attrp->dma_attr_maxxfer,
2873             ((uint64_t)(attrp->dma_attr_sgllen - 1) << PAGESHIFT));
2874 
2875         /*
2876          * If the DMA engine can't reach all off memory, we also need to take
2877          * the max size of the copybuf into consideration.
2878          */
2879         if (i_ddi_copybuf_required(attrp)) {
2880                 maxxfer = MIN(i_ddi_copybuf_size(), maxxfer);
2881         }
2882 
2883         /*
2884          * we only return a 32-bit value. Make sure it's not -1. Round to a
2885          * page so it won't be mistaken for an error value during debug.
2886          */
2887         if (maxxfer >= 0xFFFFFFFF) {
2888                 maxxfer = 0xFFFFF000;
2889         }
2890 
2891         /*
2892          * make sure the value we return is a whole multiple of the
2893          * granlarity.
2894          */
2895         if (attrp->dma_attr_granular > 1) {
2896                 maxxfer = maxxfer - (maxxfer % attrp->dma_attr_granular);
2897         }
2898 
2899         return ((uint32_t)maxxfer);
2900 }
2901 
2902 /*ARGSUSED*/
2903 void
2904 translate_devid(dev_info_t *dip)
2905 {
2906 }
2907 
2908 pfn_t
2909 i_ddi_paddr_to_pfn(paddr_t paddr)
2910 {
2911         pfn_t pfn;
2912 
2913 #ifdef __xpv
2914         if (DOMAIN_IS_INITDOMAIN(xen_info)) {
2915                 pfn = xen_assign_pfn(mmu_btop(paddr));
2916         } else {
2917                 pfn = mmu_btop(paddr);
2918         }
2919 #else
2920         pfn = mmu_btop(paddr);
2921 #endif
2922 
2923         return (pfn);
2924 }