1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Memory special file
  28  */
  29 
  30 #include <sys/types.h>
  31 #include <sys/param.h>
  32 #include <sys/user.h>
  33 #include <sys/buf.h>
  34 #include <sys/systm.h>
  35 #include <sys/cred.h>
  36 #include <sys/vm.h>
  37 #include <sys/uio.h>
  38 #include <sys/mman.h>
  39 #include <sys/kmem.h>
  40 #include <vm/seg.h>
  41 #include <vm/page.h>
  42 #include <sys/stat.h>
  43 #include <sys/vmem.h>
  44 #include <sys/memlist.h>
  45 #include <sys/bootconf.h>
  46 
  47 #include <vm/seg_vn.h>
  48 #include <vm/seg_dev.h>
  49 #include <vm/seg_kmem.h>
  50 #include <vm/seg_kp.h>
  51 #include <vm/seg_kpm.h>
  52 #include <vm/hat.h>
  53 
  54 #include <sys/conf.h>
  55 #include <sys/mem.h>
  56 #include <sys/types.h>
  57 #include <sys/conf.h>
  58 #include <sys/param.h>
  59 #include <sys/systm.h>
  60 #include <sys/errno.h>
  61 #include <sys/modctl.h>
  62 #include <sys/memlist.h>
  63 #include <sys/ddi.h>
  64 #include <sys/sunddi.h>
  65 #include <sys/debug.h>
  66 #include <sys/fm/protocol.h>
  67 
  68 #if defined(__sparc)
  69 extern int cpu_get_mem_name(uint64_t, uint64_t *, uint64_t, char *, int, int *);
  70 extern int cpu_get_mem_info(uint64_t, uint64_t, uint64_t *, uint64_t *,
  71     uint64_t *, int *, int *, int *);
  72 extern size_t cpu_get_name_bufsize(void);
  73 extern int cpu_get_mem_sid(char *, char *, int, int *);
  74 extern int cpu_get_mem_addr(char *, char *, uint64_t, uint64_t *);
  75 #elif defined(__x86)
  76 #include <sys/cpu_module.h>
  77 #endif  /* __sparc */
  78 
  79 /*
  80  * Turn a byte length into a pagecount.  The DDI btop takes a
  81  * 32-bit size on 32-bit machines, this handles 64-bit sizes for
  82  * large physical-memory 32-bit machines.
  83  */
  84 #define BTOP(x) ((pgcnt_t)((x) >> _pageshift))
  85 
  86 static kmutex_t mm_lock;
  87 static caddr_t mm_map;
  88 
  89 static dev_info_t *mm_dip;      /* private copy of devinfo pointer */
  90 
  91 static int mm_kmem_io_access;
  92 
  93 static int mm_kstat_update(kstat_t *ksp, int rw);
  94 static int mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw);
  95 
  96 static int mm_read_mem_name(intptr_t data, mem_name_t *mem_name);
  97 
  98 /*ARGSUSED1*/
  99 static int
 100 mm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
 101 {
 102         int i;
 103         struct mem_minor {
 104                 char *name;
 105                 minor_t minor;
 106                 int privonly;
 107                 const char *rdpriv;
 108                 const char *wrpriv;
 109                 mode_t priv_mode;
 110         } mm[] = {
 111                 { "mem",        M_MEM,          0,      NULL,   "all",  0640 },
 112                 { "kmem",       M_KMEM,         0,      NULL,   "all",  0640 },
 113                 { "allkmem",    M_ALLKMEM,      0,      "all",  "all",  0600 },
 114                 { "null",       M_NULL, PRIVONLY_DEV,   NULL,   NULL,   0666 },
 115                 { "zero",       M_ZERO, PRIVONLY_DEV,   NULL,   NULL,   0666 },
 116         };
 117         kstat_t *ksp;
 118 
 119         mutex_init(&mm_lock, NULL, MUTEX_DEFAULT, NULL);
 120         mm_map = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
 121 
 122         for (i = 0; i < (sizeof (mm) / sizeof (mm[0])); i++) {
 123                 if (ddi_create_priv_minor_node(devi, mm[i].name, S_IFCHR,
 124                     mm[i].minor, DDI_PSEUDO, mm[i].privonly,
 125                     mm[i].rdpriv, mm[i].wrpriv, mm[i].priv_mode) ==
 126                     DDI_FAILURE) {
 127                         ddi_remove_minor_node(devi, NULL);
 128                         return (DDI_FAILURE);
 129                 }
 130         }
 131 
 132         mm_dip = devi;
 133 
 134         ksp = kstat_create("mm", 0, "phys_installed", "misc",
 135             KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE | KSTAT_FLAG_VIRTUAL);
 136         if (ksp != NULL) {
 137                 ksp->ks_update = mm_kstat_update;
 138                 ksp->ks_snapshot = mm_kstat_snapshot;
 139                 ksp->ks_lock = &mm_lock; /* XXX - not really needed */
 140                 kstat_install(ksp);
 141         }
 142 
 143         mm_kmem_io_access = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
 144             "kmem_io_access", 0);
 145 
 146         return (DDI_SUCCESS);
 147 }
 148 
 149 /*ARGSUSED*/
 150 static int
 151 mm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
 152 {
 153         register int error;
 154 
 155         switch (infocmd) {
 156         case DDI_INFO_DEVT2DEVINFO:
 157                 *result = (void *)mm_dip;
 158                 error = DDI_SUCCESS;
 159                 break;
 160         case DDI_INFO_DEVT2INSTANCE:
 161                 *result = (void *)0;
 162                 error = DDI_SUCCESS;
 163                 break;
 164         default:
 165                 error = DDI_FAILURE;
 166         }
 167         return (error);
 168 }
 169 
 170 /*ARGSUSED1*/
 171 static int
 172 mmopen(dev_t *devp, int flag, int typ, struct cred *cred)
 173 {
 174         switch (getminor(*devp)) {
 175         case M_NULL:
 176         case M_ZERO:
 177         case M_MEM:
 178         case M_KMEM:
 179         case M_ALLKMEM:
 180                 /* standard devices */
 181                 break;
 182 
 183         default:
 184                 /* Unsupported or unknown type */
 185                 return (EINVAL);
 186         }
 187         /* must be character device */
 188         if (typ != OTYP_CHR)
 189                 return (EINVAL);
 190         return (0);
 191 }
 192 
 193 struct pollhead mm_pollhd;
 194 
 195 /*ARGSUSED*/
 196 static int
 197 mmchpoll(dev_t dev, short events, int anyyet, short *reventsp,
 198     struct pollhead **phpp)
 199 {
 200         switch (getminor(dev)) {
 201         case M_NULL:
 202         case M_ZERO:
 203         case M_MEM:
 204         case M_KMEM:
 205         case M_ALLKMEM:
 206                 *reventsp = events & (POLLIN | POLLOUT | POLLPRI | POLLRDNORM |
 207                     POLLWRNORM | POLLRDBAND | POLLWRBAND);
 208                 /*
 209                  * A non NULL pollhead pointer should be returned in case
 210                  * user polls for 0 events.
 211                  */
 212                 *phpp = !anyyet && !*reventsp ?
 213                     &mm_pollhd : (struct pollhead *)NULL;
 214                 return (0);
 215         default:
 216                 /* no other devices currently support polling */
 217                 return (ENXIO);
 218         }
 219 }
 220 
 221 static int
 222 mmpropop(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags,
 223     char *name, caddr_t valuep, int *lengthp)
 224 {
 225         /*
 226          * implement zero size to reduce overhead (avoid two failing
 227          * property lookups per stat).
 228          */
 229         return (ddi_prop_op_size(dev, dip, prop_op,
 230             flags, name, valuep, lengthp, 0));
 231 }
 232 
 233 static int
 234 mmio(struct uio *uio, enum uio_rw rw, pfn_t pfn, off_t pageoff, int allowio,
 235     page_t *pp)
 236 {
 237         int error = 0;
 238         int devload = 0;
 239         int is_memory = pf_is_memory(pfn);
 240         size_t nbytes = MIN((size_t)(PAGESIZE - pageoff),
 241             (size_t)uio->uio_iov->iov_len);
 242         caddr_t va = NULL;
 243 
 244         mutex_enter(&mm_lock);
 245 
 246         if (is_memory && kpm_enable) {
 247                 if (pp)
 248                         va = hat_kpm_mapin(pp, NULL);
 249                 else
 250                         va = hat_kpm_mapin_pfn(pfn);
 251         }
 252 
 253         if (va == NULL) {
 254                 hat_devload(kas.a_hat, mm_map, PAGESIZE, pfn,
 255                     (uint_t)(rw == UIO_READ ? PROT_READ : PROT_READ|PROT_WRITE),
 256                     HAT_LOAD_NOCONSIST|HAT_LOAD_LOCK);
 257                 va = mm_map;
 258                 devload = 1;
 259         }
 260 
 261         if (!is_memory) {
 262                 if (allowio) {
 263                         size_t c = uio->uio_iov->iov_len;
 264 
 265                         if (ddi_peekpokeio(NULL, uio, rw,
 266                             (caddr_t)(uintptr_t)uio->uio_loffset, c,
 267                             sizeof (int32_t)) != DDI_SUCCESS)
 268                                 error = EFAULT;
 269                 } else
 270                         error = EIO;
 271         } else
 272                 error = uiomove(va + pageoff, nbytes, rw, uio);
 273 
 274         if (devload)
 275                 hat_unload(kas.a_hat, mm_map, PAGESIZE, HAT_UNLOAD_UNLOCK);
 276         else if (pp)
 277                 hat_kpm_mapout(pp, NULL, va);
 278         else
 279                 hat_kpm_mapout_pfn(pfn);
 280 
 281         mutex_exit(&mm_lock);
 282         return (error);
 283 }
 284 
 285 static int
 286 mmpagelock(struct as *as, caddr_t va)
 287 {
 288         struct seg *seg;
 289         int i;
 290 
 291         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
 292         seg = as_segat(as, va);
 293         i = (seg != NULL)? SEGOP_CAPABLE(seg, S_CAPABILITY_NOMINFLT) : 0;
 294         AS_LOCK_EXIT(as, &as->a_lock);
 295 
 296         return (i);
 297 }
 298 
 299 #ifdef  __sparc
 300 
 301 #define NEED_LOCK_KVADDR(kva)   mmpagelock(&kas, kva)
 302 
 303 #else   /* __i386, __amd64 */
 304 
 305 #define NEED_LOCK_KVADDR(va)    0
 306 
 307 #endif  /* __sparc */
 308 
 309 /*ARGSUSED3*/
 310 static int
 311 mmrw(dev_t dev, struct uio *uio, enum uio_rw rw, cred_t *cred)
 312 {
 313         pfn_t v;
 314         struct iovec *iov;
 315         int error = 0;
 316         size_t c;
 317         ssize_t oresid = uio->uio_resid;
 318         minor_t minor = getminor(dev);
 319 
 320         while (uio->uio_resid > 0 && error == 0) {
 321                 iov = uio->uio_iov;
 322                 if (iov->iov_len == 0) {
 323                         uio->uio_iov++;
 324                         uio->uio_iovcnt--;
 325                         if (uio->uio_iovcnt < 0)
 326                                 panic("mmrw");
 327                         continue;
 328                 }
 329                 switch (minor) {
 330 
 331                 case M_MEM:
 332                         memlist_read_lock();
 333                         if (!address_in_memlist(phys_install,
 334                             (uint64_t)uio->uio_loffset, 1)) {
 335                                 memlist_read_unlock();
 336                                 error = EFAULT;
 337                                 break;
 338                         }
 339                         memlist_read_unlock();
 340 
 341                         v = BTOP((u_offset_t)uio->uio_loffset);
 342                         error = mmio(uio, rw, v,
 343                             uio->uio_loffset & PAGEOFFSET, 0, NULL);
 344                         break;
 345 
 346                 case M_KMEM:
 347                 case M_ALLKMEM:
 348                         {
 349                         page_t **ppp = NULL;
 350                         caddr_t vaddr = (caddr_t)uio->uio_offset;
 351                         int try_lock = NEED_LOCK_KVADDR(vaddr);
 352                         int locked = 0;
 353 
 354                         if ((error = plat_mem_do_mmio(uio, rw)) != ENOTSUP)
 355                                 break;
 356 
 357                         /*
 358                          * If vaddr does not map a valid page, as_pagelock()
 359                          * will return failure. Hence we can't check the
 360                          * return value and return EFAULT here as we'd like.
 361                          * seg_kp and seg_kpm do not properly support
 362                          * as_pagelock() for this context so we avoid it
 363                          * using the try_lock set check above.  Some day when
 364                          * the kernel page locking gets redesigned all this
 365                          * muck can be cleaned up.
 366                          */
 367                         if (try_lock)
 368                                 locked = (as_pagelock(&kas, &ppp, vaddr,
 369                                     PAGESIZE, S_WRITE) == 0);
 370 
 371                         v = hat_getpfnum(kas.a_hat,
 372                             (caddr_t)(uintptr_t)uio->uio_loffset);
 373                         if (v == PFN_INVALID) {
 374                                 if (locked)
 375                                         as_pageunlock(&kas, ppp, vaddr,
 376                                             PAGESIZE, S_WRITE);
 377                                 error = EFAULT;
 378                                 break;
 379                         }
 380 
 381                         error = mmio(uio, rw, v, uio->uio_loffset & PAGEOFFSET,
 382                             minor == M_ALLKMEM || mm_kmem_io_access,
 383                             (locked && ppp) ? *ppp : NULL);
 384                         if (locked)
 385                                 as_pageunlock(&kas, ppp, vaddr, PAGESIZE,
 386                                     S_WRITE);
 387                         }
 388 
 389                         break;
 390 
 391                 case M_ZERO:
 392                         if (rw == UIO_READ) {
 393                                 label_t ljb;
 394 
 395                                 if (on_fault(&ljb)) {
 396                                         no_fault();
 397                                         error = EFAULT;
 398                                         break;
 399                                 }
 400                                 uzero(iov->iov_base, iov->iov_len);
 401                                 no_fault();
 402                                 uio->uio_resid -= iov->iov_len;
 403                                 uio->uio_loffset += iov->iov_len;
 404                                 break;
 405                         }
 406                         /* else it's a write, fall through to NULL case */
 407                         /*FALLTHROUGH*/
 408 
 409                 case M_NULL:
 410                         if (rw == UIO_READ)
 411                                 return (0);
 412                         c = iov->iov_len;
 413                         iov->iov_base += c;
 414                         iov->iov_len -= c;
 415                         uio->uio_loffset += c;
 416                         uio->uio_resid -= c;
 417                         break;
 418 
 419                 }
 420         }
 421         return (uio->uio_resid == oresid ? error : 0);
 422 }
 423 
 424 static int
 425 mmread(dev_t dev, struct uio *uio, cred_t *cred)
 426 {
 427         return (mmrw(dev, uio, UIO_READ, cred));
 428 }
 429 
 430 static int
 431 mmwrite(dev_t dev, struct uio *uio, cred_t *cred)
 432 {
 433         return (mmrw(dev, uio, UIO_WRITE, cred));
 434 }
 435 
 436 /*
 437  * Private ioctl for libkvm to support kvm_physaddr().
 438  * Given an address space and a VA, compute the PA.
 439  */
 440 static int
 441 mmioctl_vtop(intptr_t data)
 442 {
 443 #ifdef _SYSCALL32
 444         mem_vtop32_t vtop32;
 445 #endif
 446         mem_vtop_t mem_vtop;
 447         proc_t *p;
 448         pfn_t pfn = (pfn_t)PFN_INVALID;
 449         pid_t pid = 0;
 450         struct as *as;
 451         struct seg *seg;
 452 
 453         if (get_udatamodel() == DATAMODEL_NATIVE) {
 454                 if (copyin((void *)data, &mem_vtop, sizeof (mem_vtop_t)))
 455                         return (EFAULT);
 456         }
 457 #ifdef _SYSCALL32
 458         else {
 459                 if (copyin((void *)data, &vtop32, sizeof (mem_vtop32_t)))
 460                         return (EFAULT);
 461                 mem_vtop.m_as = (struct as *)(uintptr_t)vtop32.m_as;
 462                 mem_vtop.m_va = (void *)(uintptr_t)vtop32.m_va;
 463 
 464                 if (mem_vtop.m_as != NULL)
 465                         return (EINVAL);
 466         }
 467 #endif
 468 
 469         if (mem_vtop.m_as == &kas) {
 470                 pfn = hat_getpfnum(kas.a_hat, mem_vtop.m_va);
 471         } else {
 472                 if (mem_vtop.m_as == NULL) {
 473                         /*
 474                          * Assume the calling process's address space if the
 475                          * caller didn't specify one.
 476                          */
 477                         p = curthread->t_procp;
 478                         if (p == NULL)
 479                                 return (EIO);
 480                         mem_vtop.m_as = p->p_as;
 481                 }
 482 
 483                 mutex_enter(&pidlock);
 484                 for (p = practive; p != NULL; p = p->p_next) {
 485                         if (p->p_as == mem_vtop.m_as) {
 486                                 pid = p->p_pid;
 487                                 break;
 488                         }
 489                 }
 490                 mutex_exit(&pidlock);
 491                 if (p == NULL)
 492                         return (EIO);
 493                 p = sprlock(pid);
 494                 if (p == NULL)
 495                         return (EIO);
 496                 as = p->p_as;
 497                 if (as == mem_vtop.m_as) {
 498                         mutex_exit(&p->p_lock);
 499                         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
 500                         for (seg = AS_SEGFIRST(as); seg != NULL;
 501                             seg = AS_SEGNEXT(as, seg))
 502                                 if ((uintptr_t)mem_vtop.m_va -
 503                                     (uintptr_t)seg->s_base < seg->s_size)
 504                                         break;
 505                         if (seg != NULL)
 506                                 pfn = hat_getpfnum(as->a_hat, mem_vtop.m_va);
 507                         AS_LOCK_EXIT(as, &as->a_lock);
 508                         mutex_enter(&p->p_lock);
 509                 }
 510                 sprunlock(p);
 511         }
 512         mem_vtop.m_pfn = pfn;
 513         if (pfn == PFN_INVALID)
 514                 return (EIO);
 515 
 516         if (get_udatamodel() == DATAMODEL_NATIVE) {
 517                 if (copyout(&mem_vtop, (void *)data, sizeof (mem_vtop_t)))
 518                         return (EFAULT);
 519         }
 520 #ifdef _SYSCALL32
 521         else {
 522                 vtop32.m_pfn = mem_vtop.m_pfn;
 523                 if (copyout(&vtop32, (void *)data, sizeof (mem_vtop32_t)))
 524                         return (EFAULT);
 525         }
 526 #endif
 527 
 528         return (0);
 529 }
 530 
 531 /*
 532  * Given a PA, execute the given page retire command on it.
 533  */
 534 static int
 535 mmioctl_page_retire(int cmd, intptr_t data)
 536 {
 537         extern int page_retire_test(void);
 538         uint64_t pa;
 539 
 540         if (copyin((void *)data, &pa, sizeof (uint64_t))) {
 541                 return (EFAULT);
 542         }
 543 
 544         switch (cmd) {
 545         case MEM_PAGE_ISRETIRED:
 546                 return (page_retire_check(pa, NULL));
 547 
 548         case MEM_PAGE_UNRETIRE:
 549                 return (page_unretire(pa));
 550 
 551         case MEM_PAGE_RETIRE:
 552                 return (page_retire(pa, PR_FMA));
 553 
 554         case MEM_PAGE_RETIRE_MCE:
 555                 return (page_retire(pa, PR_MCE));
 556 
 557         case MEM_PAGE_RETIRE_UE:
 558                 return (page_retire(pa, PR_UE));
 559 
 560         case MEM_PAGE_GETERRORS:
 561                 {
 562                         uint64_t page_errors;
 563                         int rc = page_retire_check(pa, &page_errors);
 564                         if (copyout(&page_errors, (void *)data,
 565                             sizeof (uint64_t))) {
 566                                 return (EFAULT);
 567                         }
 568                         return (rc);
 569                 }
 570 
 571         case MEM_PAGE_RETIRE_TEST:
 572                 return (page_retire_test());
 573 
 574         }
 575 
 576         return (EINVAL);
 577 }
 578 
 579 #ifdef __sparc
 580 /*
 581  * Given a syndrome, syndrome type, and address return the
 582  * associated memory name in the provided data buffer.
 583  */
 584 static int
 585 mmioctl_get_mem_name(intptr_t data)
 586 {
 587         mem_name_t mem_name;
 588         void *buf;
 589         size_t bufsize;
 590         int len, err;
 591 
 592         if ((bufsize = cpu_get_name_bufsize()) == 0)
 593                 return (ENOTSUP);
 594 
 595         if ((err = mm_read_mem_name(data, &mem_name)) < 0)
 596                 return (err);
 597 
 598         buf = kmem_alloc(bufsize, KM_SLEEP);
 599 
 600         /*
 601          * Call into cpu specific code to do the lookup.
 602          */
 603         if ((err = cpu_get_mem_name(mem_name.m_synd, mem_name.m_type,
 604             mem_name.m_addr, buf, bufsize, &len)) != 0) {
 605                 kmem_free(buf, bufsize);
 606                 return (err);
 607         }
 608 
 609         if (len >= mem_name.m_namelen) {
 610                 kmem_free(buf, bufsize);
 611                 return (ENOSPC);
 612         }
 613 
 614         if (copyoutstr(buf, (char *)mem_name.m_name,
 615             mem_name.m_namelen, NULL) != 0) {
 616                 kmem_free(buf, bufsize);
 617                 return (EFAULT);
 618         }
 619 
 620         kmem_free(buf, bufsize);
 621         return (0);
 622 }
 623 
 624 /*
 625  * Given a syndrome and address return information about the associated memory.
 626  */
 627 static int
 628 mmioctl_get_mem_info(intptr_t data)
 629 {
 630         mem_info_t mem_info;
 631         int err;
 632 
 633         if (copyin((void *)data, &mem_info, sizeof (mem_info_t)))
 634                 return (EFAULT);
 635 
 636         if ((err = cpu_get_mem_info(mem_info.m_synd, mem_info.m_addr,
 637             &mem_info.m_mem_size, &mem_info.m_seg_size, &mem_info.m_bank_size,
 638             &mem_info.m_segments, &mem_info.m_banks, &mem_info.m_mcid)) != 0)
 639                 return (err);
 640 
 641         if (copyout(&mem_info, (void *)data, sizeof (mem_info_t)) != 0)
 642                 return (EFAULT);
 643 
 644         return (0);
 645 }
 646 
 647 /*
 648  * Given a memory name, return its associated serial id
 649  */
 650 static int
 651 mmioctl_get_mem_sid(intptr_t data)
 652 {
 653         mem_name_t mem_name;
 654         void *buf;
 655         void *name;
 656         size_t  name_len;
 657         size_t bufsize;
 658         int len, err;
 659 
 660         if ((bufsize = cpu_get_name_bufsize()) == 0)
 661                 return (ENOTSUP);
 662 
 663         if ((err = mm_read_mem_name(data, &mem_name)) < 0)
 664                 return (err);
 665 
 666         buf = kmem_alloc(bufsize, KM_SLEEP);
 667 
 668         if (mem_name.m_namelen > 1024)
 669                 mem_name.m_namelen = 1024; /* cap at 1024 bytes */
 670 
 671         name = kmem_alloc(mem_name.m_namelen, KM_SLEEP);
 672 
 673         if ((err = copyinstr((char *)mem_name.m_name, (char *)name,
 674             mem_name.m_namelen, &name_len)) != 0) {
 675                 kmem_free(buf, bufsize);
 676                 kmem_free(name, mem_name.m_namelen);
 677                 return (err);
 678         }
 679 
 680         /*
 681          * Call into cpu specific code to do the lookup.
 682          */
 683         if ((err = cpu_get_mem_sid(name, buf, bufsize, &len)) != 0) {
 684                 kmem_free(buf, bufsize);
 685                 kmem_free(name, mem_name.m_namelen);
 686                 return (err);
 687         }
 688 
 689         if (len > mem_name.m_sidlen) {
 690                 kmem_free(buf, bufsize);
 691                 kmem_free(name, mem_name.m_namelen);
 692                 return (ENAMETOOLONG);
 693         }
 694 
 695         if (copyoutstr(buf, (char *)mem_name.m_sid,
 696             mem_name.m_sidlen, NULL) != 0) {
 697                 kmem_free(buf, bufsize);
 698                 kmem_free(name, mem_name.m_namelen);
 699                 return (EFAULT);
 700         }
 701 
 702         kmem_free(buf, bufsize);
 703         kmem_free(name, mem_name.m_namelen);
 704         return (0);
 705 }
 706 #endif  /* __sparc */
 707 
 708 /*
 709  * Private ioctls for
 710  *      libkvm to support kvm_physaddr().
 711  *      FMA support for page_retire() and memory attribute information.
 712  */
 713 /*ARGSUSED*/
 714 static int
 715 mmioctl(dev_t dev, int cmd, intptr_t data, int flag, cred_t *cred, int *rvalp)
 716 {
 717         if ((cmd == MEM_VTOP && getminor(dev) != M_KMEM) ||
 718             (cmd != MEM_VTOP && getminor(dev) != M_MEM))
 719                 return (ENXIO);
 720 
 721         switch (cmd) {
 722         case MEM_VTOP:
 723                 return (mmioctl_vtop(data));
 724 
 725         case MEM_PAGE_RETIRE:
 726         case MEM_PAGE_ISRETIRED:
 727         case MEM_PAGE_UNRETIRE:
 728         case MEM_PAGE_RETIRE_MCE:
 729         case MEM_PAGE_RETIRE_UE:
 730         case MEM_PAGE_GETERRORS:
 731         case MEM_PAGE_RETIRE_TEST:
 732                 return (mmioctl_page_retire(cmd, data));
 733 
 734 #ifdef __sparc
 735         case MEM_NAME:
 736                 return (mmioctl_get_mem_name(data));
 737 
 738         case MEM_INFO:
 739                 return (mmioctl_get_mem_info(data));
 740 
 741         case MEM_SID:
 742                 return (mmioctl_get_mem_sid(data));
 743 #else
 744         case MEM_NAME:
 745         case MEM_INFO:
 746         case MEM_SID:
 747                 return (ENOTSUP);
 748 #endif  /* __sparc */
 749         }
 750         return (ENXIO);
 751 }
 752 
 753 /*ARGSUSED2*/
 754 static int
 755 mmmmap(dev_t dev, off_t off, int prot)
 756 {
 757         pfn_t pf;
 758         struct memlist *pmem;
 759         minor_t minor = getminor(dev);
 760 
 761         switch (minor) {
 762         case M_MEM:
 763                 pf = btop(off);
 764                 memlist_read_lock();
 765                 for (pmem = phys_install; pmem != NULL; pmem = pmem->ml_next) {
 766                         if (pf >= BTOP(pmem->ml_address) &&
 767                             pf < BTOP(pmem->ml_address + pmem->ml_size)) {
 768                                 memlist_read_unlock();
 769                                 return (impl_obmem_pfnum(pf));
 770                         }
 771                 }
 772                 memlist_read_unlock();
 773                 break;
 774 
 775         case M_KMEM:
 776         case M_ALLKMEM:
 777                 /* no longer supported with KPR */
 778                 return (-1);
 779 
 780         case M_ZERO:
 781                 /*
 782                  * We shouldn't be mmap'ing to /dev/zero here as
 783                  * mmsegmap() should have already converted
 784                  * a mapping request for this device to a mapping
 785                  * using seg_vn for anonymous memory.
 786                  */
 787                 break;
 788 
 789         }
 790         return (-1);
 791 }
 792 
 793 /*
 794  * This function is called when a memory device is mmap'ed.
 795  * Set up the mapping to the correct device driver.
 796  */
 797 static int
 798 mmsegmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len,
 799     uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
 800 {
 801         struct segvn_crargs vn_a;
 802         struct segdev_crargs dev_a;
 803         int error;
 804         minor_t minor;
 805         off_t i;
 806 
 807         minor = getminor(dev);
 808 
 809         as_rangelock(as);
 810         /*
 811          * No need to worry about vac alignment on /dev/zero
 812          * since this is a "clone" object that doesn't yet exist.
 813          */
 814         error = choose_addr(as, addrp, len, off,
 815             (minor == M_MEM) || (minor == M_KMEM), flags);
 816         if (error != 0) {
 817                 as_rangeunlock(as);
 818                 return (error);
 819         }
 820 
 821         switch (minor) {
 822         case M_MEM:
 823                 /* /dev/mem cannot be mmap'ed with MAP_PRIVATE */
 824                 if ((flags & MAP_TYPE) != MAP_SHARED) {
 825                         as_rangeunlock(as);
 826                         return (EINVAL);
 827                 }
 828 
 829                 /*
 830                  * Check to ensure that the entire range is
 831                  * legal and we are not trying to map in
 832                  * more than the device will let us.
 833                  */
 834                 for (i = 0; i < len; i += PAGESIZE) {
 835                         if (mmmmap(dev, off + i, maxprot) == -1) {
 836                                 as_rangeunlock(as);
 837                                 return (ENXIO);
 838                         }
 839                 }
 840 
 841                 /*
 842                  * Use seg_dev segment driver for /dev/mem mapping.
 843                  */
 844                 dev_a.mapfunc = mmmmap;
 845                 dev_a.dev = dev;
 846                 dev_a.offset = off;
 847                 dev_a.type = (flags & MAP_TYPE);
 848                 dev_a.prot = (uchar_t)prot;
 849                 dev_a.maxprot = (uchar_t)maxprot;
 850                 dev_a.hat_attr = 0;
 851 
 852                 /*
 853                  * Make /dev/mem mappings non-consistent since we can't
 854                  * alias pages that don't have page structs behind them,
 855                  * such as kernel stack pages. If someone mmap()s a kernel
 856                  * stack page and if we give him a tte with cv, a line from
 857                  * that page can get into both pages of the spitfire d$.
 858                  * But snoop from another processor will only invalidate
 859                  * the first page. This later caused kernel (xc_attention)
 860                  * to go into an infinite loop at pil 13 and no interrupts
 861                  * could come in. See 1203630.
 862                  *
 863                  */
 864                 dev_a.hat_flags = HAT_LOAD_NOCONSIST;
 865                 dev_a.devmap_data = NULL;
 866 
 867                 error = as_map(as, *addrp, len, segdev_create, &dev_a);
 868                 break;
 869 
 870         case M_ZERO:
 871                 /*
 872                  * Use seg_vn segment driver for /dev/zero mapping.
 873                  * Passing in a NULL amp gives us the "cloning" effect.
 874                  */
 875                 vn_a.vp = NULL;
 876                 vn_a.offset = 0;
 877                 vn_a.type = (flags & MAP_TYPE);
 878                 vn_a.prot = prot;
 879                 vn_a.maxprot = maxprot;
 880                 vn_a.flags = flags & ~MAP_TYPE;
 881                 vn_a.cred = cred;
 882                 vn_a.amp = NULL;
 883                 vn_a.szc = 0;
 884                 vn_a.lgrp_mem_policy_flags = 0;
 885                 error = as_map(as, *addrp, len, segvn_create, &vn_a);
 886                 break;
 887 
 888         case M_KMEM:
 889         case M_ALLKMEM:
 890                 /* No longer supported with KPR. */
 891                 error = ENXIO;
 892                 break;
 893 
 894         case M_NULL:
 895                 /*
 896                  * Use seg_dev segment driver for /dev/null mapping.
 897                  */
 898                 dev_a.mapfunc = mmmmap;
 899                 dev_a.dev = dev;
 900                 dev_a.offset = off;
 901                 dev_a.type = 0;         /* neither PRIVATE nor SHARED */
 902                 dev_a.prot = dev_a.maxprot = (uchar_t)PROT_NONE;
 903                 dev_a.hat_attr = 0;
 904                 dev_a.hat_flags = 0;
 905                 error = as_map(as, *addrp, len, segdev_create, &dev_a);
 906                 break;
 907 
 908         default:
 909                 error = ENXIO;
 910         }
 911 
 912         as_rangeunlock(as);
 913         return (error);
 914 }
 915 
 916 static struct cb_ops mm_cb_ops = {
 917         mmopen,                 /* open */
 918         nulldev,                /* close */
 919         nodev,                  /* strategy */
 920         nodev,                  /* print */
 921         nodev,                  /* dump */
 922         mmread,                 /* read */
 923         mmwrite,                /* write */
 924         mmioctl,                /* ioctl */
 925         nodev,                  /* devmap */
 926         mmmmap,                 /* mmap */
 927         mmsegmap,               /* segmap */
 928         mmchpoll,               /* poll */
 929         mmpropop,               /* prop_op */
 930         0,                      /* streamtab  */
 931         D_NEW | D_MP | D_64BIT | D_U64BIT
 932 };
 933 
 934 static struct dev_ops mm_ops = {
 935         DEVO_REV,               /* devo_rev, */
 936         0,                      /* refcnt  */
 937         mm_info,                /* get_dev_info */
 938         nulldev,                /* identify */
 939         nulldev,                /* probe */
 940         mm_attach,              /* attach */
 941         nodev,                  /* detach */
 942         nodev,                  /* reset */
 943         &mm_cb_ops,         /* driver operations */
 944         (struct bus_ops *)0,    /* bus operations */
 945         NULL,                   /* power */
 946         ddi_quiesce_not_needed,         /* quiesce */
 947 };
 948 
 949 static struct modldrv modldrv = {
 950         &mod_driverops, "memory driver", &mm_ops,
 951 };
 952 
 953 static struct modlinkage modlinkage = {
 954         MODREV_1, &modldrv, NULL
 955 };
 956 
 957 int
 958 _init(void)
 959 {
 960         return (mod_install(&modlinkage));
 961 }
 962 
 963 int
 964 _info(struct modinfo *modinfop)
 965 {
 966         return (mod_info(&modlinkage, modinfop));
 967 }
 968 
 969 int
 970 _fini(void)
 971 {
 972         return (mod_remove(&modlinkage));
 973 }
 974 
 975 static int
 976 mm_kstat_update(kstat_t *ksp, int rw)
 977 {
 978         struct memlist *pmem;
 979         uint_t count;
 980 
 981         if (rw == KSTAT_WRITE)
 982                 return (EACCES);
 983 
 984         count = 0;
 985         memlist_read_lock();
 986         for (pmem = phys_install; pmem != NULL; pmem = pmem->ml_next) {
 987                 count++;
 988         }
 989         memlist_read_unlock();
 990 
 991         ksp->ks_ndata = count;
 992         ksp->ks_data_size = count * 2 * sizeof (uint64_t);
 993 
 994         return (0);
 995 }
 996 
 997 static int
 998 mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw)
 999 {
1000         struct memlist *pmem;
1001         struct memunit {
1002                 uint64_t address;
1003                 uint64_t size;
1004         } *kspmem;
1005 
1006         if (rw == KSTAT_WRITE)
1007                 return (EACCES);
1008 
1009         ksp->ks_snaptime = gethrtime();
1010 
1011         kspmem = (struct memunit *)buf;
1012         memlist_read_lock();
1013         for (pmem = phys_install; pmem != NULL;
1014             pmem = pmem->ml_next, kspmem++) {
1015                 if ((caddr_t)kspmem >= (caddr_t)buf + ksp->ks_data_size)
1016                         break;
1017                 kspmem->address = pmem->ml_address;
1018                 kspmem->size = pmem->ml_size;
1019         }
1020         memlist_read_unlock();
1021 
1022         return (0);
1023 }
1024 
1025 /*
1026  * Read a mem_name_t from user-space and store it in the mem_name_t
1027  * pointed to by the mem_name argument.
1028  */
1029 static int
1030 mm_read_mem_name(intptr_t data, mem_name_t *mem_name)
1031 {
1032         if (get_udatamodel() == DATAMODEL_NATIVE) {
1033                 if (copyin((void *)data, mem_name, sizeof (mem_name_t)))
1034                         return (EFAULT);
1035         }
1036 #ifdef  _SYSCALL32
1037         else {
1038                 mem_name32_t mem_name32;
1039 
1040                 if (copyin((void *)data, &mem_name32, sizeof (mem_name32_t)))
1041                         return (EFAULT);
1042                 mem_name->m_addr = mem_name32.m_addr;
1043                 mem_name->m_synd = mem_name32.m_synd;
1044                 mem_name->m_type[0] = mem_name32.m_type[0];
1045                 mem_name->m_type[1] = mem_name32.m_type[1];
1046                 mem_name->m_name = (caddr_t)(uintptr_t)mem_name32.m_name;
1047                 mem_name->m_namelen = (size_t)mem_name32.m_namelen;
1048                 mem_name->m_sid = (caddr_t)(uintptr_t)mem_name32.m_sid;
1049                 mem_name->m_sidlen = (size_t)mem_name32.m_sidlen;
1050         }
1051 #endif  /* _SYSCALL32 */
1052 
1053         return (0);
1054 }