1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 #include <sys/xpv_user.h>
  28 
  29 #include <sys/types.h>
  30 #include <sys/file.h>
  31 #include <sys/errno.h>
  32 #include <sys/open.h>
  33 #include <sys/cred.h>
  34 #include <sys/conf.h>
  35 #include <sys/stat.h>
  36 #include <sys/modctl.h>
  37 #include <sys/ddi.h>
  38 #include <sys/sunddi.h>
  39 #include <sys/vmsystm.h>
  40 #include <sys/sdt.h>
  41 #include <sys/hypervisor.h>
  42 #include <sys/xen_errno.h>
  43 #include <sys/policy.h>
  44 
  45 #include <vm/hat_i86.h>
  46 #include <vm/hat_pte.h>
  47 #include <vm/seg_mf.h>
  48 
  49 #include <xen/sys/privcmd.h>
  50 #include <sys/privcmd_impl.h>
  51 
  52 static dev_info_t *privcmd_devi;
  53 
  54 /*ARGSUSED*/
  55 static int
  56 privcmd_getinfo(dev_info_t *devi, ddi_info_cmd_t cmd, void *arg, void **result)
  57 {
  58         switch (cmd) {
  59         case DDI_INFO_DEVT2DEVINFO:
  60         case DDI_INFO_DEVT2INSTANCE:
  61                 break;
  62         default:
  63                 return (DDI_FAILURE);
  64         }
  65 
  66         switch (getminor((dev_t)arg)) {
  67         case PRIVCMD_MINOR:
  68                 break;
  69         default:
  70                 return (DDI_FAILURE);
  71         }
  72 
  73         if (cmd == DDI_INFO_DEVT2INSTANCE)
  74                 *result = 0;
  75         else
  76                 *result = privcmd_devi;
  77         return (DDI_SUCCESS);
  78 }
  79 
  80 static int
  81 privcmd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
  82 {
  83         if (cmd != DDI_ATTACH)
  84                 return (DDI_FAILURE);
  85 
  86         if (ddi_create_minor_node(devi, PRIVCMD_NODE,
  87             S_IFCHR, PRIVCMD_MINOR, DDI_PSEUDO, 0) != DDI_SUCCESS)
  88                 return (DDI_FAILURE);
  89 
  90         privcmd_devi = devi;
  91         ddi_report_dev(devi);
  92         return (DDI_SUCCESS);
  93 }
  94 
  95 static int
  96 privcmd_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
  97 {
  98         if (cmd != DDI_DETACH)
  99                 return (DDI_FAILURE);
 100         ddi_remove_minor_node(devi, NULL);
 101         privcmd_devi = NULL;
 102         return (DDI_SUCCESS);
 103 }
 104 
 105 /*ARGSUSED1*/
 106 static int
 107 privcmd_open(dev_t *dev, int flag, int otyp, cred_t *cr)
 108 {
 109         return (getminor(*dev) == PRIVCMD_MINOR ? 0 : ENXIO);
 110 }
 111 
 112 /*
 113  * Map a contiguous set of machine frames in a foreign domain.
 114  * Used in the following way:
 115  *
 116  *      privcmd_mmap_t p;
 117  *      privcmd_mmap_entry_t e;
 118  *
 119  *      addr = mmap(NULL, size, prot, MAP_SHARED, fd, 0);
 120  *      p.num = number of privcmd_mmap_entry_t's
 121  *      p.dom = domid;
 122  *      p.entry = &e;
 123  *      e.va = addr;
 124  *      e.mfn = mfn;
 125  *      e.npages = btopr(size);
 126  *      ioctl(fd, IOCTL_PRIVCMD_MMAP, &p);
 127  */
 128 /*ARGSUSED2*/
 129 int
 130 do_privcmd_mmap(void *uarg, int mode, cred_t *cr)
 131 {
 132         privcmd_mmap_t __mmapcmd, *mmc = &__mmapcmd;
 133         privcmd_mmap_entry_t *umme;
 134         struct as *as = curproc->p_as;
 135         struct seg *seg;
 136         int i, error = 0;
 137 
 138         if (ddi_copyin(uarg, mmc, sizeof (*mmc), mode))
 139                 return (EFAULT);
 140 
 141         DTRACE_XPV3(mmap__start, domid_t, mmc->dom, int, mmc->num,
 142             privcmd_mmap_entry_t *, mmc->entry);
 143 
 144         if (mmc->dom == DOMID_SELF) {
 145                 error = ENOTSUP;        /* Too paranoid? */
 146                 goto done;
 147         }
 148 
 149         for (umme = mmc->entry, i = 0; i < mmc->num; i++, umme++) {
 150                 privcmd_mmap_entry_t __mmapent, *mme = &__mmapent;
 151                 caddr_t addr;
 152 
 153                 if (ddi_copyin(umme, mme, sizeof (*mme), mode)) {
 154                         error = EFAULT;
 155                         break;
 156                 }
 157 
 158                 DTRACE_XPV3(mmap__entry, ulong_t, mme->va, ulong_t, mme->mfn,
 159                     ulong_t, mme->npages);
 160 
 161                 if (mme->mfn == MFN_INVALID) {
 162                         error = EINVAL;
 163                         break;
 164                 }
 165 
 166                 addr = (caddr_t)mme->va;
 167 
 168                 /*
 169                  * Find the segment we want to mess with, then add
 170                  * the mfn range to the segment.
 171                  */
 172                 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
 173                 if ((seg = as_findseg(as, addr, 0)) == NULL ||
 174                     addr + mmu_ptob(mme->npages) > seg->s_base + seg->s_size)
 175                         error = EINVAL;
 176                 else
 177                         error = segmf_add_mfns(seg, addr,
 178                             mme->mfn, mme->npages, mmc->dom);
 179                 AS_LOCK_EXIT(as, &as->a_lock);
 180 
 181                 if (error != 0)
 182                         break;
 183         }
 184 
 185 done:
 186         DTRACE_XPV1(mmap__end, int, error);
 187 
 188         return (error);
 189 }
 190 
 191 /*
 192  * Set up the address range to map to an array of mfns in
 193  * a foreign domain.  Used in the following way:
 194  *
 195  *      privcmd_mmap_batch_t p;
 196  *
 197  *      addr = mmap(NULL, size, prot, MAP_SHARED, fd, 0);
 198  *      p.num = number of pages
 199  *      p.dom = domid
 200  *      p.addr = addr;
 201  *      p.arr = array of mfns, indexed 0 .. p.num - 1
 202  *      ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH, &p);
 203  */
 204 /*ARGSUSED2*/
 205 static int
 206 do_privcmd_mmapbatch(void *uarg, int mode, cred_t *cr)
 207 {
 208         privcmd_mmapbatch_t __mmapbatch, *mmb = &__mmapbatch;
 209         struct as *as = curproc->p_as;
 210         struct seg *seg;
 211         int i, error = 0;
 212         caddr_t addr;
 213         ulong_t *ulp;
 214 
 215         if (ddi_copyin(uarg, mmb, sizeof (*mmb), mode))
 216                 return (EFAULT);
 217 
 218         DTRACE_XPV3(mmapbatch__start, domid_t, mmb->dom, int, mmb->num,
 219             caddr_t, mmb->addr);
 220 
 221         addr = (caddr_t)mmb->addr;
 222         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
 223         if ((seg = as_findseg(as, addr, 0)) == NULL ||
 224             addr + ptob(mmb->num) > seg->s_base + seg->s_size) {
 225                 error = EINVAL;
 226                 goto done;
 227         }
 228 
 229         for (i = 0, ulp = mmb->arr;
 230             i < mmb->num; i++, addr += PAGESIZE, ulp++) {
 231                 mfn_t mfn;
 232 
 233                 if (fulword(ulp, &mfn) != 0) {
 234                         error = EFAULT;
 235                         break;
 236                 }
 237 
 238                 if (mfn == MFN_INVALID) {
 239                         /*
 240                          * This mfn is invalid and should not be added to
 241                          * segmf, as we'd only cause an immediate EFAULT when
 242                          * we tried to fault it in.
 243                          */
 244                         mfn |= XEN_DOMCTL_PFINFO_XTAB;
 245                         continue;
 246                 }
 247 
 248                 if (segmf_add_mfns(seg, addr, mfn, 1, mmb->dom) == 0)
 249                         continue;
 250 
 251                 /*
 252                  * Tell the process that this MFN could not be mapped, so it
 253                  * won't later try to access it.
 254                  */
 255                 mfn |= XEN_DOMCTL_PFINFO_XTAB;
 256                 if (sulword(ulp, mfn) != 0) {
 257                         error = EFAULT;
 258                         break;
 259                 }
 260         }
 261 
 262 done:
 263         AS_LOCK_EXIT(as, &as->a_lock);
 264 
 265         DTRACE_XPV3(mmapbatch__end, int, error, struct seg *, seg, caddr_t,
 266             mmb->addr);
 267 
 268         return (error);
 269 }
 270 
 271 /*ARGSUSED*/
 272 static int
 273 privcmd_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cr, int *rval)
 274 {
 275         if (secpolicy_xvm_control(cr))
 276                 return (EPERM);
 277 
 278         /*
 279          * Everything is a -native- data type.
 280          */
 281         if ((mode & FMODELS) != FNATIVE)
 282                 return (EOVERFLOW);
 283 
 284         switch (cmd) {
 285         case IOCTL_PRIVCMD_HYPERCALL:
 286                 return (do_privcmd_hypercall((void *)arg, mode, cr, rval));
 287         case IOCTL_PRIVCMD_MMAP:
 288                 if (DOMAIN_IS_PRIVILEGED(xen_info))
 289                         return (do_privcmd_mmap((void *)arg, mode, cr));
 290                 break;
 291         case IOCTL_PRIVCMD_MMAPBATCH:
 292                 if (DOMAIN_IS_PRIVILEGED(xen_info))
 293                         return (do_privcmd_mmapbatch((void *)arg, mode, cr));
 294                 break;
 295         default:
 296                 break;
 297         }
 298         return (EINVAL);
 299 }
 300 
 301 /*
 302  * The real magic happens in the segmf segment driver.
 303  */
 304 /*ARGSUSED8*/
 305 static int
 306 privcmd_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp,
 307     off_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr)
 308 {
 309         struct segmf_crargs a;
 310         int error;
 311 
 312         if (secpolicy_xvm_control(cr))
 313                 return (EPERM);
 314 
 315         as_rangelock(as);
 316         if ((flags & MAP_FIXED) == 0) {
 317                 map_addr(addrp, len, (offset_t)off, 0, flags);
 318                 if (*addrp == NULL) {
 319                         error = ENOMEM;
 320                         goto rangeunlock;
 321                 }
 322         } else {
 323                 /*
 324                  * User specified address
 325                  */
 326                 (void) as_unmap(as, *addrp, len);
 327         }
 328 
 329         /*
 330          * The mapping *must* be MAP_SHARED at offset 0.
 331          *
 332          * (Foreign pages are treated like device memory; the
 333          * ioctl interface allows the backing objects to be
 334          * arbitrarily redefined to point at any machine frame.)
 335          */
 336         if ((flags & MAP_TYPE) != MAP_SHARED || off != 0) {
 337                 error = EINVAL;
 338                 goto rangeunlock;
 339         }
 340 
 341         a.dev = dev;
 342         a.prot = (uchar_t)prot;
 343         a.maxprot = (uchar_t)maxprot;
 344         error = as_map(as, *addrp, len, segmf_create, &a);
 345 
 346 rangeunlock:
 347         as_rangeunlock(as);
 348         return (error);
 349 }
 350 
 351 static struct cb_ops privcmd_cb_ops = {
 352         privcmd_open,
 353         nulldev,        /* close */
 354         nodev,          /* strategy */
 355         nodev,          /* print */
 356         nodev,          /* dump */
 357         nodev,          /* read */
 358         nodev,          /* write */
 359         privcmd_ioctl,
 360         nodev,          /* devmap */
 361         nodev,          /* mmap */
 362         privcmd_segmap,
 363         nochpoll,       /* poll */
 364         ddi_prop_op,
 365         NULL,
 366         D_64BIT | D_NEW | D_MP
 367 };
 368 
 369 static struct dev_ops privcmd_dv_ops = {
 370         DEVO_REV,
 371         0,
 372         privcmd_getinfo,
 373         nulldev,                /* identify */
 374         nulldev,                /* probe */
 375         privcmd_attach,
 376         privcmd_detach,
 377         nodev,                  /* reset */
 378         &privcmd_cb_ops,
 379         0,                      /* struct bus_ops */
 380         NULL,                   /* power */
 381         ddi_quiesce_not_needed,         /* quiesce */
 382 };
 383 
 384 static struct modldrv modldrv = {
 385         &mod_driverops,
 386         "privcmd driver",
 387         &privcmd_dv_ops
 388 };
 389 
 390 static struct modlinkage modl = {
 391         MODREV_1,
 392         &modldrv
 393 };
 394 
 395 int
 396 _init(void)
 397 {
 398         return (mod_install(&modl));
 399 }
 400 
 401 int
 402 _fini(void)
 403 {
 404         return (mod_remove(&modl));
 405 }
 406 
 407 int
 408 _info(struct modinfo *modinfo)
 409 {
 410         return (mod_info(&modl, modinfo));
 411 }