1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
  27 /*        All Rights Reserved   */
  28 
  29 #include <sys/atomic.h>
  30 #include <sys/errno.h>
  31 #include <sys/stat.h>
  32 #include <sys/modctl.h>
  33 #include <sys/conf.h>
  34 #include <sys/systm.h>
  35 #include <sys/ddi.h>
  36 #include <sys/sunddi.h>
  37 #include <sys/cpuvar.h>
  38 #include <sys/kmem.h>
  39 #include <sys/strsubr.h>
  40 #include <sys/sysmacros.h>
  41 #include <sys/frame.h>
  42 #include <sys/stack.h>
  43 #include <sys/proc.h>
  44 #include <sys/priv.h>
  45 #include <sys/policy.h>
  46 #include <sys/ontrap.h>
  47 #include <sys/vmsystm.h>
  48 #include <sys/prsystm.h>
  49 
  50 #include <vm/as.h>
  51 #include <vm/seg.h>
  52 #include <vm/seg_dev.h>
  53 #include <vm/seg_vn.h>
  54 #include <vm/seg_spt.h>
  55 #include <vm/seg_kmem.h>
  56 
  57 extern struct seg_ops segdev_ops;       /* needs a header file */
  58 extern struct seg_ops segspt_shmops;    /* needs a header file */
  59 
  60 static int
  61 page_valid(struct seg *seg, caddr_t addr)
  62 {
  63         struct segvn_data *svd;
  64         vnode_t *vp;
  65         vattr_t vattr;
  66 
  67         /*
  68          * Fail if the page doesn't map to a page in the underlying
  69          * mapped file, if an underlying mapped file exists.
  70          */
  71         vattr.va_mask = AT_SIZE;
  72         if (seg->s_ops == &segvn_ops &&
  73             segop_getvp(seg, addr, &vp) == 0 &&
  74             vp != NULL && vp->v_type == VREG &&
  75             VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
  76                 u_offset_t size = roundup(vattr.va_size, (u_offset_t)PAGESIZE);
  77                 u_offset_t offset = segop_getoffset(seg, addr);
  78 
  79                 if (offset >= size)
  80                         return (0);
  81         }
  82 
  83         /*
  84          * Fail if this is an ISM shared segment and the address is
  85          * not within the real size of the spt segment that backs it.
  86          */
  87         if (seg->s_ops == &segspt_shmops &&
  88             addr >= seg->s_base + spt_realsize(seg))
  89                 return (0);
  90 
  91         /*
  92          * Fail if the segment is mapped from /dev/null.
  93          * The key is that the mapping comes from segdev and the
  94          * type is neither MAP_SHARED nor MAP_PRIVATE.
  95          */
  96         if (seg->s_ops == &segdev_ops &&
  97             ((segop_gettype(seg, addr) & (MAP_SHARED | MAP_PRIVATE)) == 0))
  98                 return (0);
  99 
 100         /*
 101          * Fail if the page is a MAP_NORESERVE page that has
 102          * not actually materialized.
 103          * We cheat by knowing that segvn is the only segment
 104          * driver that supports MAP_NORESERVE.
 105          */
 106         if (seg->s_ops == &segvn_ops &&
 107             (svd = (struct segvn_data *)seg->s_data) != NULL &&
 108             (svd->vp == NULL || svd->vp->v_type != VREG) &&
 109             (svd->flags & MAP_NORESERVE)) {
 110                 /*
 111                  * Guilty knowledge here.  We know that
 112                  * segvn_incore returns more than just the
 113                  * low-order bit that indicates the page is
 114                  * actually in memory.  If any bits are set,
 115                  * then there is backing store for the page.
 116                  */
 117                 char incore = 0;
 118                 (void) segop_incore(seg, addr, PAGESIZE, &incore);
 119                 if (incore == 0)
 120                         return (0);
 121         }
 122         return (1);
 123 }
 124 
 125 /*
 126  * Map address "addr" in address space "as" into a kernel virtual address.
 127  * The memory is guaranteed to be resident and locked down.
 128  */
 129 static caddr_t
 130 mapin(struct as *as, caddr_t addr, int writing)
 131 {
 132         page_t *pp;
 133         caddr_t kaddr;
 134         pfn_t pfnum;
 135 
 136         /*
 137          * NB: Because of past mistakes, we have bits being returned
 138          * by getpfnum that are actually the page type bits of the pte.
 139          * When the object we are trying to map is a memory page with
 140          * a page structure everything is ok and we can use the optimal
 141          * method, ppmapin.  Otherwise, we have to do something special.
 142          */
 143         pfnum = hat_getpfnum(as->a_hat, addr);
 144         if (pf_is_memory(pfnum)) {
 145                 pp = page_numtopp_nolock(pfnum);
 146                 if (pp != NULL) {
 147                         ASSERT(PAGE_LOCKED(pp));
 148                         kaddr = ppmapin(pp, writing ?
 149                                 (PROT_READ | PROT_WRITE) : PROT_READ,
 150                                 (caddr_t)-1);
 151                         return (kaddr + ((uintptr_t)addr & PAGEOFFSET));
 152                 }
 153         }
 154 
 155         /*
 156          * Oh well, we didn't have a page struct for the object we were
 157          * trying to map in; ppmapin doesn't handle devices, but allocating a
 158          * heap address allows ppmapout to free virutal space when done.
 159          */
 160         kaddr = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
 161 
 162         hat_devload(kas.a_hat, kaddr, PAGESIZE, pfnum,
 163                 writing ? (PROT_READ | PROT_WRITE) : PROT_READ, HAT_LOAD_LOCK);
 164 
 165         return (kaddr + ((uintptr_t)addr & PAGEOFFSET));
 166 }
 167 
 168 /*ARGSUSED*/
 169 static void
 170 mapout(struct as *as, caddr_t addr, caddr_t vaddr, int writing)
 171 {
 172         vaddr = (caddr_t)(uintptr_t)((uintptr_t)vaddr & PAGEMASK);
 173         ppmapout(vaddr);
 174 }
 175 
 176 /*
 177  * Perform I/O to a given process. This will return EIO if we detect
 178  * corrupt memory and ENXIO if there is no such mapped address in the
 179  * user process's address space.
 180  */
 181 static int
 182 urw(proc_t *p, int writing, void *buf, size_t len, uintptr_t a)
 183 {
 184         caddr_t addr = (caddr_t)a;
 185         caddr_t page;
 186         caddr_t vaddr;
 187         struct seg *seg;
 188         int error = 0;
 189         int err = 0;
 190         uint_t prot;
 191         uint_t prot_rw = writing ? PROT_WRITE : PROT_READ;
 192         int protchanged;
 193         on_trap_data_t otd;
 194         int retrycnt;
 195         struct as *as = p->p_as;
 196         enum seg_rw rw;
 197 
 198         /*
 199          * Locate segment containing address of interest.
 200          */
 201         page = (caddr_t)(uintptr_t)((uintptr_t)addr & PAGEMASK);
 202         retrycnt = 0;
 203         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
 204 retry:
 205         if ((seg = as_segat(as, page)) == NULL ||
 206             !page_valid(seg, page)) {
 207                 AS_LOCK_EXIT(as, &as->a_lock);
 208                 return (ENXIO);
 209         }
 210         (void) segop_getprot(seg, page, 0, &prot);
 211 
 212         protchanged = 0;
 213         if ((prot & prot_rw) == 0) {
 214                 protchanged = 1;
 215                 err = segop_setprot(seg, page, PAGESIZE, prot | prot_rw);
 216 
 217                 if (err == IE_RETRY) {
 218                         protchanged = 0;
 219                         ASSERT(retrycnt == 0);
 220                         retrycnt++;
 221                         goto retry;
 222                 }
 223 
 224                 if (err != 0) {
 225                         AS_LOCK_EXIT(as, &as->a_lock);
 226                         return (ENXIO);
 227                 }
 228         }
 229 
 230         /*
 231          * segvn may do a copy-on-write for F_SOFTLOCK/S_READ case to break
 232          * sharing to avoid a copy on write of a softlocked page by another
 233          * thread. But since we locked the address space as a writer no other
 234          * thread can cause a copy on write. S_READ_NOCOW is passed as the
 235          * access type to tell segvn that it's ok not to do a copy-on-write
 236          * for this SOFTLOCK fault.
 237          */
 238         if (writing)
 239                 rw = S_WRITE;
 240         else if (seg->s_ops == &segvn_ops)
 241                 rw = S_READ_NOCOW;
 242         else
 243                 rw = S_READ;
 244 
 245         if (segop_fault(as->a_hat, seg, page, PAGESIZE, F_SOFTLOCK, rw)) {
 246                 if (protchanged)
 247                         (void) segop_setprot(seg, page, PAGESIZE, prot);
 248                 AS_LOCK_EXIT(as, &as->a_lock);
 249                 return (ENXIO);
 250         }
 251         CPU_STATS_ADD_K(vm, softlock, 1);
 252 
 253         /*
 254          * Make sure we're not trying to read or write off the end of the page.
 255          */
 256         ASSERT(len <= page + PAGESIZE - addr);
 257 
 258         /*
 259          * Map in the locked page, copy to our local buffer,
 260          * then map the page out and unlock it.
 261          */
 262         vaddr = mapin(as, addr, writing);
 263 
 264         /*
 265          * Since we are copying memory on behalf of the user process,
 266          * protect against memory error correction faults.
 267          */
 268         if (!on_trap(&otd, OT_DATA_EC)) {
 269                 if (seg->s_ops == &segdev_ops) {
 270                         /*
 271                          * Device memory can behave strangely; invoke
 272                          * a segdev-specific copy operation instead.
 273                          */
 274                         if (writing) {
 275                                 if (segdev_copyto(seg, addr, buf, vaddr, len))
 276                                         error = ENXIO;
 277                         } else {
 278                                 if (segdev_copyfrom(seg, addr, vaddr, buf, len))
 279                                         error = ENXIO;
 280                         }
 281                 } else {
 282                         if (writing)
 283                                 bcopy(buf, vaddr, len);
 284                         else
 285                                 bcopy(vaddr, buf, len);
 286                 }
 287         } else {
 288                 error = EIO;
 289         }
 290         no_trap();
 291 
 292         /*
 293          * If we're writing to an executable page, we may need to sychronize
 294          * the I$ with the modifications we made through the D$.
 295          */
 296         if (writing && (prot & PROT_EXEC))
 297                 sync_icache(vaddr, (uint_t)len);
 298 
 299         mapout(as, addr, vaddr, writing);
 300 
 301         if (rw == S_READ_NOCOW)
 302                 rw = S_READ;
 303 
 304         (void) segop_fault(as->a_hat, seg, page, PAGESIZE, F_SOFTUNLOCK, rw);
 305 
 306         if (protchanged)
 307                 (void) segop_setprot(seg, page, PAGESIZE, prot);
 308 
 309         AS_LOCK_EXIT(as, &as->a_lock);
 310 
 311         return (error);
 312 }
 313 
 314 int
 315 uread(proc_t *p, void *buf, size_t len, uintptr_t a)
 316 {
 317         return (urw(p, 0, buf, len, a));
 318 }
 319 
 320 int
 321 uwrite(proc_t *p, void *buf, size_t len, uintptr_t a)
 322 {
 323         return (urw(p, 1, buf, len, a));
 324 }