1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  27 /* All Rights Reserved */
  28 
  29 /*
  30  * Portions of this source code were derived from Berkeley 4.3 BSD
  31  * under license from the Regents of the University of California.
  32  */
  33 
  34 /*
  35  * VM - segment for non-faulting loads.
  36  */
  37 
  38 #include <sys/types.h>
  39 #include <sys/t_lock.h>
  40 #include <sys/param.h>
  41 #include <sys/mman.h>
  42 #include <sys/errno.h>
  43 #include <sys/kmem.h>
  44 #include <sys/cmn_err.h>
  45 #include <sys/vnode.h>
  46 #include <sys/proc.h>
  47 #include <sys/conf.h>
  48 #include <sys/debug.h>
  49 #include <sys/archsystm.h>
  50 #include <sys/lgrp.h>
  51 
  52 #include <vm/page.h>
  53 #include <vm/hat.h>
  54 #include <vm/as.h>
  55 #include <vm/seg.h>
  56 #include <vm/vpage.h>
  57 
  58 /*
  59  * Private seg op routines.
  60  */
  61 static int      segnf_dup(struct seg *seg, struct seg *newseg);
  62 static int      segnf_unmap(struct seg *seg, caddr_t addr, size_t len);
  63 static void     segnf_free(struct seg *seg);
  64 static faultcode_t segnf_nomap(void);
  65 static int      segnf_setprot(struct seg *seg, caddr_t addr,
  66                     size_t len, uint_t prot);
  67 static int      segnf_checkprot(struct seg *seg, caddr_t addr,
  68                     size_t len, uint_t prot);
  69 static int      segnf_nop(void);
  70 static int      segnf_getprot(struct seg *seg, caddr_t addr,
  71                     size_t len, uint_t *protv);
  72 static u_offset_t segnf_getoffset(struct seg *seg, caddr_t addr);
  73 static int      segnf_gettype(struct seg *seg, caddr_t addr);
  74 static int      segnf_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp);
  75 static void     segnf_dump(struct seg *seg);
  76 static int      segnf_pagelock(struct seg *seg, caddr_t addr, size_t len,
  77                     struct page ***ppp, enum lock_type type, enum seg_rw rw);
  78 static int      segnf_setpagesize(struct seg *seg, caddr_t addr, size_t len,
  79                     uint_t szc);
  80 static int      segnf_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp);
  81 static lgrp_mem_policy_info_t   *segnf_getpolicy(struct seg *seg,
  82     caddr_t addr);
  83 
  84 
  85 struct seg_ops segnf_ops = {
  86         .dup            = segnf_dup,
  87         .unmap          = segnf_unmap,
  88         .free           = segnf_free,
  89         .fault          = (faultcode_t (*)(struct hat *, struct seg *, caddr_t,
  90             size_t, enum fault_type, enum seg_rw))segnf_nomap,
  91         .faulta         = (faultcode_t (*)(struct seg *, caddr_t)) segnf_nomap,
  92         .setprot        = segnf_setprot,
  93         .checkprot      = segnf_checkprot,
  94         .sync           = (int (*)(struct seg *, caddr_t, size_t, int, uint_t))
  95                 segnf_nop,
  96         .incore         = (size_t (*)(struct seg *, caddr_t, size_t, char *))
  97                 segnf_nop,
  98         .lockop         = (int (*)(struct seg *, caddr_t, size_t, int, int,
  99             ulong_t *, size_t))segnf_nop,
 100         .getprot        = segnf_getprot,
 101         .getoffset      = segnf_getoffset,
 102         .gettype        = segnf_gettype,
 103         .getvp          = segnf_getvp,
 104         .advise         = (int (*)(struct seg *, caddr_t, size_t, uint_t))
 105                 segnf_nop,
 106         .dump           = segnf_dump,
 107         .pagelock       = segnf_pagelock,
 108         .setpagesize    = segnf_setpagesize,
 109         .getmemid       = segnf_getmemid,
 110         .getpolicy      = segnf_getpolicy,
 111 };
 112 
 113 /*
 114  * vnode and page for the page of zeros we use for the nf mappings.
 115  */
 116 static kmutex_t segnf_lock;
 117 static struct vnode nfvp;
 118 static struct page **nfpp;
 119 
 120 #define addr_to_vcolor(addr)                                            \
 121         (shm_alignment) ?                                               \
 122         ((int)(((uintptr_t)(addr) & (shm_alignment - 1)) >> PAGESHIFT)) : 0
 123 
 124 /*
 125  * We try to limit the number of Non-fault segments created.
 126  * Non fault segments are created to optimize sparc V9 code which uses
 127  * the sparc nonfaulting load ASI (ASI_PRIMARY_NOFAULT).
 128  *
 129  * There are several reasons why creating too many non-fault segments
 130  * could cause problems.
 131  *
 132  *      First, excessive allocation of kernel resources for the seg
 133  *      structures and the HAT data to map the zero pages.
 134  *
 135  *      Secondly, creating nofault segments actually uses up user virtual
 136  *      address space. This makes it unavailable for subsequent mmap(0, ...)
 137  *      calls which use as_gap() to find empty va regions.  Creation of too
 138  *      many nofault segments could thus interfere with the ability of the
 139  *      runtime linker to load a shared object.
 140  */
 141 #define MAXSEGFORNF     (10000)
 142 #define MAXNFSEARCH     (5)
 143 
 144 
 145 /*
 146  * Must be called from startup()
 147  */
 148 void
 149 segnf_init()
 150 {
 151         mutex_init(&segnf_lock, NULL, MUTEX_DEFAULT, NULL);
 152 }
 153 
 154 
 155 /*
 156  * Create a no-fault segment.
 157  *
 158  * The no-fault segment is not technically necessary, as the code in
 159  * nfload() in trap.c will emulate the SPARC instruction and load
 160  * a value of zero in the destination register.
 161  *
 162  * However, this code tries to put a page of zero's at the nofault address
 163  * so that subsequent non-faulting loads to the same page will not
 164  * trap with a tlb miss.
 165  *
 166  * In order to help limit the number of segments we merge adjacent nofault
 167  * segments into a single segment.  If we get a large number of segments
 168  * we'll also try to delete a random other nf segment.
 169  */
 170 /* ARGSUSED */
 171 int
 172 segnf_create(struct seg *seg, void *argsp)
 173 {
 174         uint_t prot;
 175         pgcnt_t vacpgs;
 176         u_offset_t off = 0;
 177         caddr_t vaddr = NULL;
 178         int i, color;
 179         struct seg *s1;
 180         struct seg *s2;
 181         size_t size;
 182         struct as *as = seg->s_as;
 183 
 184         ASSERT(as && AS_WRITE_HELD(as, &as->a_lock));
 185 
 186         /*
 187          * Need a page per virtual color or just 1 if no vac.
 188          */
 189         mutex_enter(&segnf_lock);
 190         if (nfpp == NULL) {
 191                 struct seg kseg;
 192 
 193                 vacpgs = 1;
 194                 if (shm_alignment > PAGESIZE) {
 195                         vacpgs = shm_alignment >> PAGESHIFT;
 196                 }
 197 
 198                 nfpp = kmem_alloc(sizeof (*nfpp) * vacpgs, KM_SLEEP);
 199 
 200                 kseg.s_as = &kas;
 201                 for (i = 0; i < vacpgs; i++, off += PAGESIZE,
 202                     vaddr += PAGESIZE) {
 203                         nfpp[i] = page_create_va(&nfvp, off, PAGESIZE,
 204                             PG_WAIT | PG_NORELOC, &kseg, vaddr);
 205                         page_io_unlock(nfpp[i]);
 206                         page_downgrade(nfpp[i]);
 207                         pagezero(nfpp[i], 0, PAGESIZE);
 208                 }
 209         }
 210         mutex_exit(&segnf_lock);
 211 
 212         hat_map(as->a_hat, seg->s_base, seg->s_size, HAT_MAP);
 213 
 214         /*
 215          * s_data can't be NULL because of ASSERTS in the common vm code.
 216          */
 217         seg->s_ops = &segnf_ops;
 218         seg->s_data = seg;
 219         seg->s_flags |= S_PURGE;
 220 
 221         mutex_enter(&as->a_contents);
 222         as->a_flags |= AS_NEEDSPURGE;
 223         mutex_exit(&as->a_contents);
 224 
 225         prot = PROT_READ;
 226         color = addr_to_vcolor(seg->s_base);
 227         if (as != &kas)
 228                 prot |= PROT_USER;
 229         hat_memload(as->a_hat, seg->s_base, nfpp[color],
 230             prot | HAT_NOFAULT, HAT_LOAD);
 231 
 232         /*
 233          * At this point see if we can concatenate a segment to
 234          * a non-fault segment immediately before and/or after it.
 235          */
 236         if ((s1 = AS_SEGPREV(as, seg)) != NULL &&
 237             s1->s_ops == &segnf_ops &&
 238             s1->s_base + s1->s_size == seg->s_base) {
 239                 size = s1->s_size;
 240                 seg_free(s1);
 241                 seg->s_base -= size;
 242                 seg->s_size += size;
 243         }
 244 
 245         if ((s2 = AS_SEGNEXT(as, seg)) != NULL &&
 246             s2->s_ops == &segnf_ops &&
 247             seg->s_base + seg->s_size == s2->s_base) {
 248                 size = s2->s_size;
 249                 seg_free(s2);
 250                 seg->s_size += size;
 251         }
 252 
 253         /*
 254          * if we already have a lot of segments, try to delete some other
 255          * nofault segment to reduce the probability of uncontrolled segment
 256          * creation.
 257          *
 258          * the code looks around quickly (no more than MAXNFSEARCH segments
 259          * each way) for another NF segment and then deletes it.
 260          */
 261         if (avl_numnodes(&as->a_segtree) > MAXSEGFORNF) {
 262                 size = 0;
 263                 s2 = NULL;
 264                 s1 = AS_SEGPREV(as, seg);
 265                 while (size++ < MAXNFSEARCH && s1 != NULL) {
 266                         if (s1->s_ops == &segnf_ops)
 267                                 s2 = s1;
 268                         s1 = AS_SEGPREV(s1->s_as, seg);
 269                 }
 270                 if (s2 == NULL) {
 271                         s1 = AS_SEGNEXT(as, seg);
 272                         while (size-- > 0 && s1 != NULL) {
 273                                 if (s1->s_ops == &segnf_ops)
 274                                         s2 = s1;
 275                                 s1 = AS_SEGNEXT(as, seg);
 276                         }
 277                 }
 278                 if (s2 != NULL)
 279                         seg_unmap(s2);
 280         }
 281 
 282         return (0);
 283 }
 284 
 285 /*
 286  * Never really need "No fault" segments, so they aren't dup'd.
 287  */
 288 /* ARGSUSED */
 289 static int
 290 segnf_dup(struct seg *seg, struct seg *newseg)
 291 {
 292         panic("segnf_dup");
 293         return (0);
 294 }
 295 
 296 /*
 297  * Split a segment at addr for length len.
 298  */
 299 static int
 300 segnf_unmap(struct seg *seg, caddr_t addr, size_t len)
 301 {
 302         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
 303 
 304         /*
 305          * Check for bad sizes.
 306          */
 307         if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size ||
 308             (len & PAGEOFFSET) || ((uintptr_t)addr & PAGEOFFSET)) {
 309                 cmn_err(CE_PANIC, "segnf_unmap: bad unmap size");
 310         }
 311 
 312         /*
 313          * Unload any hardware translations in the range to be taken out.
 314          */
 315         hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD_UNMAP);
 316 
 317         if (addr == seg->s_base && len == seg->s_size) {
 318                 /*
 319                  * Freeing entire segment.
 320                  */
 321                 seg_free(seg);
 322         } else if (addr == seg->s_base) {
 323                 /*
 324                  * Freeing the beginning of the segment.
 325                  */
 326                 seg->s_base += len;
 327                 seg->s_size -= len;
 328         } else if (addr + len == seg->s_base + seg->s_size) {
 329                 /*
 330                  * Freeing the end of the segment.
 331                  */
 332                 seg->s_size -= len;
 333         } else {
 334                 /*
 335                  * The section to go is in the middle of the segment, so we
 336                  * have to cut it into two segments.  We shrink the existing
 337                  * "seg" at the low end, and create "nseg" for the high end.
 338                  */
 339                 caddr_t nbase = addr + len;
 340                 size_t nsize = (seg->s_base + seg->s_size) - nbase;
 341                 struct seg *nseg;
 342 
 343                 /*
 344                  * Trim down "seg" before trying to stick "nseg" into the as.
 345                  */
 346                 seg->s_size = addr - seg->s_base;
 347                 nseg = seg_alloc(seg->s_as, nbase, nsize);
 348                 if (nseg == NULL)
 349                         cmn_err(CE_PANIC, "segnf_unmap: seg_alloc failed");
 350 
 351                 /*
 352                  * s_data can't be NULL because of ASSERTs in common VM code.
 353                  */
 354                 nseg->s_ops = seg->s_ops;
 355                 nseg->s_data = nseg;
 356                 nseg->s_flags |= S_PURGE;
 357                 mutex_enter(&seg->s_as->a_contents);
 358                 seg->s_as->a_flags |= AS_NEEDSPURGE;
 359                 mutex_exit(&seg->s_as->a_contents);
 360         }
 361 
 362         return (0);
 363 }
 364 
 365 /*
 366  * Free a segment.
 367  */
 368 static void
 369 segnf_free(struct seg *seg)
 370 {
 371         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
 372 }
 373 
 374 /*
 375  * No faults allowed on segnf.
 376  */
 377 static faultcode_t
 378 segnf_nomap(void)
 379 {
 380         return (FC_NOMAP);
 381 }
 382 
 383 /* ARGSUSED */
 384 static int
 385 segnf_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
 386 {
 387         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
 388         return (EACCES);
 389 }
 390 
 391 /* ARGSUSED */
 392 static int
 393 segnf_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
 394 {
 395         uint_t sprot;
 396         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
 397 
 398         sprot = seg->s_as == &kas ?  PROT_READ : PROT_READ|PROT_USER;
 399         return ((prot & sprot) == prot ? 0 : EACCES);
 400 }
 401 
 402 static int
 403 segnf_nop(void)
 404 {
 405         return (0);
 406 }
 407 
 408 static int
 409 segnf_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
 410 {
 411         size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
 412         size_t p;
 413         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
 414 
 415         for (p = 0; p < pgno; ++p)
 416                 protv[p] = PROT_READ;
 417         return (0);
 418 }
 419 
 420 /* ARGSUSED */
 421 static u_offset_t
 422 segnf_getoffset(struct seg *seg, caddr_t addr)
 423 {
 424         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
 425 
 426         return ((u_offset_t)0);
 427 }
 428 
 429 /* ARGSUSED */
 430 static int
 431 segnf_gettype(struct seg *seg, caddr_t addr)
 432 {
 433         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
 434 
 435         return (MAP_SHARED);
 436 }
 437 
 438 /* ARGSUSED */
 439 static int
 440 segnf_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
 441 {
 442         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
 443 
 444         *vpp = &nfvp;
 445         return (0);
 446 }
 447 
 448 /*
 449  * segnf pages are not dumped, so we just return
 450  */
 451 /* ARGSUSED */
 452 static void
 453 segnf_dump(struct seg *seg)
 454 {}
 455 
 456 /*ARGSUSED*/
 457 static int
 458 segnf_pagelock(struct seg *seg, caddr_t addr, size_t len,
 459     struct page ***ppp, enum lock_type type, enum seg_rw rw)
 460 {
 461         return (ENOTSUP);
 462 }
 463 
 464 /*ARGSUSED*/
 465 static int
 466 segnf_setpagesize(struct seg *seg, caddr_t addr, size_t len,
 467     uint_t szc)
 468 {
 469         return (ENOTSUP);
 470 }
 471 
 472 /*ARGSUSED*/
 473 static int
 474 segnf_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
 475 {
 476         return (ENODEV);
 477 }
 478 
 479 /*ARGSUSED*/
 480 static lgrp_mem_policy_info_t *
 481 segnf_getpolicy(struct seg *seg, caddr_t addr)
 482 {
 483         return (NULL);
 484 }