combined Wdiff usr/src/uts/common/vm/seg_map.c

Print this page

const-ify make segment ops structures
There is no reason to keep the segment ops structures writable.
use NULL capable segop as a shorthand for no-capabilities
Instead of forcing every segment driver to implement a dummy "return 0"
function, handle NULL capable segop function pointer as "no copabilities
supported" shorthand.
segop_getpolicy already checks for a NULL op
seg_inherit_notsup is redundant since segop_inherit checks for NULL properly
no need for bad-op segment op functions
The segment drivers have a number of bad-op functions that simply panic.
Keeping the function pointer NULL will accomplish the same thing in most
cases.  In other cases, keeping the function pointer NULL will result in
proper error code being returned.
use C99 initializers in segment ops structures
remove whole-process swapping
Long before Unix supported paging, it used process swapping to reclaim
memory.  The code is there and in theory it runs when we get *extremely* low
on memory.  In practice, it never runs since the definition of low-on-memory
is antiquated. (XXX: define what antiquated means)
You can check the number of swapout/swapin events with kstats:
$ kstat -p ::vm:swapin ::vm:swapout

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/vm/seg_map.c
          +++ new/usr/src/uts/common/vm/seg_map.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   */
  25   25  
  26   26  /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T     */
  27   27  /*        All Rights Reserved   */
  28   28  
  29   29  /*
  30   30   * Portions of this source code were derived from Berkeley 4.3 BSD
  31   31   * under license from the Regents of the University of California.
  32   32   */
  33   33  
  34   34  /*
  35   35   * VM - generic vnode mapping segment.
  36   36   *
  37   37   * The segmap driver is used only by the kernel to get faster (than seg_vn)
  38   38   * mappings [lower routine overhead; more persistent cache] to random
  39   39   * vnode/offsets.  Note than the kernel may (and does) use seg_vn as well.
  40   40   */
  41   41  
  42   42  #include <sys/types.h>
  43   43  #include <sys/t_lock.h>
  44   44  #include <sys/param.h>
  45   45  #include <sys/sysmacros.h>
  46   46  #include <sys/buf.h>
  47   47  #include <sys/systm.h>
  48   48  #include <sys/vnode.h>
  49   49  #include <sys/mman.h>
  50   50  #include <sys/errno.h>
  51   51  #include <sys/cred.h>
  52   52  #include <sys/kmem.h>
  53   53  #include <sys/vtrace.h>
  54   54  #include <sys/cmn_err.h>
  55   55  #include <sys/debug.h>
  56   56  #include <sys/thread.h>
  57   57  #include <sys/dumphdr.h>
  58   58  #include <sys/bitmap.h>
  59   59  #include <sys/lgrp.h>
  60   60  
  61   61  #include <vm/seg_kmem.h>
  62   62  #include <vm/hat.h>
  63   63  #include <vm/as.h>
  64   64  #include <vm/seg.h>
  65   65  #include <vm/seg_kpm.h>
  66   66  #include <vm/seg_map.h>
  67   67  #include <vm/page.h>
  68   68  #include <vm/pvn.h>
  69   69  #include <vm/rm.h>
  70   70  
  71   71  /*
  72   72   * Private seg op routines.
  73   73   */
  74   74  static void     segmap_free(struct seg *seg);
  75   75  faultcode_t segmap_fault(struct hat *hat, struct seg *seg, caddr_t addr,
  76   76                          size_t len, enum fault_type type, enum seg_rw rw);
  77   77  static faultcode_t segmap_faulta(struct seg *seg, caddr_t addr);
  78   78  static int      segmap_checkprot(struct seg *seg, caddr_t addr, size_t len,
  79   79                          uint_t prot);

↓ open down ↓

79 lines elided

↑ open up ↑

  80   80  static int      segmap_kluster(struct seg *seg, caddr_t addr, ssize_t);
  81   81  static int      segmap_getprot(struct seg *seg, caddr_t addr, size_t len,
  82   82                          uint_t *protv);
  83   83  static u_offset_t       segmap_getoffset(struct seg *seg, caddr_t addr);
  84   84  static int      segmap_gettype(struct seg *seg, caddr_t addr);
  85   85  static int      segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp);
  86   86  static void     segmap_dump(struct seg *seg);
  87   87  static int      segmap_pagelock(struct seg *seg, caddr_t addr, size_t len,
  88   88                          struct page ***ppp, enum lock_type type,
  89   89                          enum seg_rw rw);
  90      -static void     segmap_badop(void);
  91   90  static int      segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp);
  92      -static lgrp_mem_policy_info_t   *segmap_getpolicy(struct seg *seg,
  93      -    caddr_t addr);
  94      -static int      segmap_capable(struct seg *seg, segcapability_t capability);
  95   91  
  96   92  /* segkpm support */
  97   93  static caddr_t  segmap_pagecreate_kpm(struct seg *, vnode_t *, u_offset_t,
  98   94                          struct smap *, enum seg_rw);
  99   95  struct smap     *get_smap_kpm(caddr_t, page_t **);
 100   96  
 101      -#define SEGMAP_BADOP(t) (t(*)())segmap_badop
 102      -
 103      -static struct seg_ops segmap_ops = {
 104      -        SEGMAP_BADOP(int),      /* dup */
 105      -        SEGMAP_BADOP(int),      /* unmap */
 106      -        segmap_free,
 107      -        segmap_fault,
 108      -        segmap_faulta,
 109      -        SEGMAP_BADOP(int),      /* setprot */
 110      -        segmap_checkprot,
 111      -        segmap_kluster,
 112      -        SEGMAP_BADOP(size_t),   /* swapout */
 113      -        SEGMAP_BADOP(int),      /* sync */
 114      -        SEGMAP_BADOP(size_t),   /* incore */
 115      -        SEGMAP_BADOP(int),      /* lockop */
 116      -        segmap_getprot,
 117      -        segmap_getoffset,
 118      -        segmap_gettype,
 119      -        segmap_getvp,
 120      -        SEGMAP_BADOP(int),      /* advise */
 121      -        segmap_dump,
 122      -        segmap_pagelock,        /* pagelock */
 123      -        SEGMAP_BADOP(int),      /* setpgsz */
 124      -        segmap_getmemid,        /* getmemid */
 125      -        segmap_getpolicy,       /* getpolicy */
 126      -        segmap_capable,         /* capable */
 127      -        seg_inherit_notsup      /* inherit */
       97 +static const struct seg_ops segmap_ops = {
       98 +        .free           = segmap_free,
       99 +        .fault          = segmap_fault,
      100 +        .faulta         = segmap_faulta,
      101 +        .checkprot      = segmap_checkprot,
      102 +        .kluster        = segmap_kluster,
      103 +        .getprot        = segmap_getprot,
      104 +        .getoffset      = segmap_getoffset,
      105 +        .gettype        = segmap_gettype,
      106 +        .getvp          = segmap_getvp,
      107 +        .dump           = segmap_dump,
      108 +        .pagelock       = segmap_pagelock,
      109 +        .getmemid       = segmap_getmemid,
 128  110  };
 129  111  
 130  112  /*
 131  113   * Private segmap routines.
 132  114   */
 133  115  static void     segmap_unlock(struct hat *hat, struct seg *seg, caddr_t addr,
 134  116                          size_t len, enum seg_rw rw, struct smap *smp);
 135  117  static void     segmap_smapadd(struct smap *smp);
 136  118  static struct smap *segmap_hashin(struct smap *smp, struct vnode *vp,
 137  119                          u_offset_t off, int hashid);

 138  120  static void     segmap_hashout(struct smap *smp);
 139  121  
 140  122  
 141  123  /*
 142  124   * Statistics for segmap operations.
 143  125   *
 144  126   * No explicit locking to protect these stats.
 145  127   */
 146  128  struct segmapcnt segmapcnt = {
 147  129          { "fault",              KSTAT_DATA_ULONG },
 148  130          { "faulta",             KSTAT_DATA_ULONG },
 149  131          { "getmap",             KSTAT_DATA_ULONG },
 150  132          { "get_use",            KSTAT_DATA_ULONG },
 151  133          { "get_reclaim",        KSTAT_DATA_ULONG },
 152  134          { "get_reuse",          KSTAT_DATA_ULONG },
 153  135          { "get_unused",         KSTAT_DATA_ULONG },
 154  136          { "get_nofree",         KSTAT_DATA_ULONG },
 155  137          { "rel_async",          KSTAT_DATA_ULONG },
 156  138          { "rel_write",          KSTAT_DATA_ULONG },
 157  139          { "rel_free",           KSTAT_DATA_ULONG },
 158  140          { "rel_abort",          KSTAT_DATA_ULONG },
 159  141          { "rel_dontneed",       KSTAT_DATA_ULONG },
 160  142          { "release",            KSTAT_DATA_ULONG },
 161  143          { "pagecreate",         KSTAT_DATA_ULONG },
 162  144          { "free_notfree",       KSTAT_DATA_ULONG },
 163  145          { "free_dirty",         KSTAT_DATA_ULONG },
 164  146          { "free",               KSTAT_DATA_ULONG },
 165  147          { "stolen",             KSTAT_DATA_ULONG },
 166  148          { "get_nomtx",          KSTAT_DATA_ULONG }
 167  149  };
 168  150  
 169  151  kstat_named_t *segmapcnt_ptr = (kstat_named_t *)&segmapcnt;
 170  152  uint_t segmapcnt_ndata = sizeof (segmapcnt) / sizeof (kstat_named_t);
 171  153  
 172  154  /*
 173  155   * Return number of map pages in segment.
 174  156   */
 175  157  #define MAP_PAGES(seg)          ((seg)->s_size >> MAXBSHIFT)
 176  158  
 177  159  /*
 178  160   * Translate addr into smap number within segment.
 179  161   */
 180  162  #define MAP_PAGE(seg, addr)  (((addr) - (seg)->s_base) >> MAXBSHIFT)
 181  163  
 182  164  /*
 183  165   * Translate addr in seg into struct smap pointer.
 184  166   */
 185  167  #define GET_SMAP(seg, addr)     \
 186  168          &(((struct segmap_data *)((seg)->s_data))->smd_sm[MAP_PAGE(seg, addr)])
 187  169  
 188  170  /*
 189  171   * Bit in map (16 bit bitmap).
 190  172   */
 191  173  #define SMAP_BIT_MASK(bitindex) (1 << ((bitindex) & 0xf))
 192  174  
 193  175  static int smd_colormsk = 0;
 194  176  static int smd_ncolor = 0;
 195  177  static int smd_nfree = 0;
 196  178  static int smd_freemsk = 0;
 197  179  #ifdef DEBUG
 198  180  static int *colors_used;
 199  181  #endif
 200  182  static struct smap *smd_smap;
 201  183  static struct smaphash *smd_hash;
 202  184  #ifdef SEGMAP_HASHSTATS
 203  185  static unsigned int *smd_hash_len;
 204  186  #endif
 205  187  static struct smfree *smd_free;
 206  188  static ulong_t smd_hashmsk = 0;
 207  189  
 208  190  #define SEGMAP_MAXCOLOR         2
 209  191  #define SEGMAP_CACHE_PAD        64
 210  192  
 211  193  union segmap_cpu {
 212  194          struct {
 213  195                  uint32_t        scpu_free_ndx[SEGMAP_MAXCOLOR];
 214  196                  struct smap     *scpu_last_smap;
 215  197                  ulong_t         scpu_getmap;
 216  198                  ulong_t         scpu_release;
 217  199                  ulong_t         scpu_get_reclaim;
 218  200                  ulong_t         scpu_fault;
 219  201                  ulong_t         scpu_pagecreate;
 220  202                  ulong_t         scpu_get_reuse;
 221  203          } scpu;
 222  204          char    scpu_pad[SEGMAP_CACHE_PAD];
 223  205  };
 224  206  static union segmap_cpu *smd_cpu;
 225  207  
 226  208  /*
 227  209   * There are three locks in seg_map:
 228  210   *      - per freelist mutexes
 229  211   *      - per hashchain mutexes
 230  212   *      - per smap mutexes
 231  213   *
 232  214   * The lock ordering is to get the smap mutex to lock down the slot
 233  215   * first then the hash lock (for hash in/out (vp, off) list) or the
 234  216   * freelist lock to put the slot back on the free list.
 235  217   *
 236  218   * The hash search is done by only holding the hashchain lock, when a wanted
 237  219   * slot is found, we drop the hashchain lock then lock the slot so there
 238  220   * is no overlapping of hashchain and smap locks. After the slot is
 239  221   * locked, we verify again if the slot is still what we are looking
 240  222   * for.
 241  223   *
 242  224   * Allocation of a free slot is done by holding the freelist lock,
 243  225   * then locking the smap slot at the head of the freelist. This is
 244  226   * in reversed lock order so mutex_tryenter() is used.
 245  227   *
 246  228   * The smap lock protects all fields in smap structure except for
 247  229   * the link fields for hash/free lists which are protected by
 248  230   * hashchain and freelist locks.
 249  231   */
 250  232  
 251  233  #define SHASHMTX(hashid)        (&smd_hash[hashid].sh_mtx)
 252  234  
 253  235  #define SMP2SMF(smp)            (&smd_free[(smp - smd_smap) & smd_freemsk])
 254  236  #define SMP2SMF_NDX(smp)        (ushort_t)((smp - smd_smap) & smd_freemsk)
 255  237  
 256  238  #define SMAPMTX(smp) (&smp->sm_mtx)
 257  239  
 258  240  #define SMAP_HASHFUNC(vp, off, hashid) \
 259  241          { \
 260  242          hashid = ((((uintptr_t)(vp) >> 6) + ((uintptr_t)(vp) >> 3) + \
 261  243                  ((off) >> MAXBSHIFT)) & smd_hashmsk); \
 262  244          }
 263  245  
 264  246  /*
 265  247   * The most frequently updated kstat counters are kept in the
 266  248   * per cpu array to avoid hot cache blocks. The update function
 267  249   * sums the cpu local counters to update the global counters.
 268  250   */
 269  251  
 270  252  /* ARGSUSED */
 271  253  int
 272  254  segmap_kstat_update(kstat_t *ksp, int rw)
 273  255  {
 274  256          int i;
 275  257          ulong_t getmap, release, get_reclaim;
 276  258          ulong_t fault, pagecreate, get_reuse;
 277  259  
 278  260          if (rw == KSTAT_WRITE)
 279  261                  return (EACCES);
 280  262          getmap = release = get_reclaim = (ulong_t)0;
 281  263          fault = pagecreate = get_reuse = (ulong_t)0;
 282  264          for (i = 0; i < max_ncpus; i++) {
 283  265                  getmap += smd_cpu[i].scpu.scpu_getmap;
 284  266                  release  += smd_cpu[i].scpu.scpu_release;
 285  267                  get_reclaim += smd_cpu[i].scpu.scpu_get_reclaim;
 286  268                  fault  += smd_cpu[i].scpu.scpu_fault;
 287  269                  pagecreate  += smd_cpu[i].scpu.scpu_pagecreate;
 288  270                  get_reuse += smd_cpu[i].scpu.scpu_get_reuse;
 289  271          }
 290  272          segmapcnt.smp_getmap.value.ul = getmap;
 291  273          segmapcnt.smp_release.value.ul = release;
 292  274          segmapcnt.smp_get_reclaim.value.ul = get_reclaim;
 293  275          segmapcnt.smp_fault.value.ul = fault;
 294  276          segmapcnt.smp_pagecreate.value.ul = pagecreate;
 295  277          segmapcnt.smp_get_reuse.value.ul = get_reuse;
 296  278          return (0);
 297  279  }
 298  280  
 299  281  int
 300  282  segmap_create(struct seg *seg, void *argsp)
 301  283  {
 302  284          struct segmap_data *smd;
 303  285          struct smap *smp;
 304  286          struct smfree *sm;
 305  287          struct segmap_crargs *a = (struct segmap_crargs *)argsp;
 306  288          struct smaphash *shashp;
 307  289          union segmap_cpu *scpu;
 308  290          long i, npages;
 309  291          size_t hashsz;
 310  292          uint_t nfreelist;
 311  293          extern void prefetch_smap_w(void *);
 312  294          extern int max_ncpus;
 313  295  
 314  296          ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock));
 315  297  
 316  298          if (((uintptr_t)seg->s_base | seg->s_size) & MAXBOFFSET) {
 317  299                  panic("segkmap not MAXBSIZE aligned");
 318  300                  /*NOTREACHED*/
 319  301          }
 320  302  
 321  303          smd = kmem_zalloc(sizeof (struct segmap_data), KM_SLEEP);
 322  304  
 323  305          seg->s_data = (void *)smd;
 324  306          seg->s_ops = &segmap_ops;
 325  307          smd->smd_prot = a->prot;
 326  308  
 327  309          /*
 328  310           * Scale the number of smap freelists to be
 329  311           * proportional to max_ncpus * number of virtual colors.
 330  312           * The caller can over-ride this scaling by providing
 331  313           * a non-zero a->nfreelist argument.
 332  314           */
 333  315          nfreelist = a->nfreelist;
 334  316          if (nfreelist == 0)
 335  317                  nfreelist = max_ncpus;
 336  318          else if (nfreelist < 0 || nfreelist > 4 * max_ncpus) {
 337  319                  cmn_err(CE_WARN, "segmap_create: nfreelist out of range "
 338  320                  "%d, using %d", nfreelist, max_ncpus);
 339  321                  nfreelist = max_ncpus;
 340  322          }
 341  323          if (!ISP2(nfreelist)) {
 342  324                  /* round up nfreelist to the next power of two. */
 343  325                  nfreelist = 1 << (highbit(nfreelist));
 344  326          }
 345  327  
 346  328          /*
 347  329           * Get the number of virtual colors - must be a power of 2.
 348  330           */
 349  331          if (a->shmsize)
 350  332                  smd_ncolor = a->shmsize >> MAXBSHIFT;
 351  333          else
 352  334                  smd_ncolor = 1;
 353  335          ASSERT((smd_ncolor & (smd_ncolor - 1)) == 0);
 354  336          ASSERT(smd_ncolor <= SEGMAP_MAXCOLOR);
 355  337          smd_colormsk = smd_ncolor - 1;
 356  338          smd->smd_nfree = smd_nfree = smd_ncolor * nfreelist;
 357  339          smd_freemsk = smd_nfree - 1;
 358  340  
 359  341          /*
 360  342           * Allocate and initialize the freelist headers.
 361  343           * Note that sm_freeq[1] starts out as the release queue. This
 362  344           * is known when the smap structures are initialized below.
 363  345           */
 364  346          smd_free = smd->smd_free =
 365  347              kmem_zalloc(smd_nfree * sizeof (struct smfree), KM_SLEEP);
 366  348          for (i = 0; i < smd_nfree; i++) {
 367  349                  sm = &smd->smd_free[i];
 368  350                  mutex_init(&sm->sm_freeq[0].smq_mtx, NULL, MUTEX_DEFAULT, NULL);
 369  351                  mutex_init(&sm->sm_freeq[1].smq_mtx, NULL, MUTEX_DEFAULT, NULL);
 370  352                  sm->sm_allocq = &sm->sm_freeq[0];
 371  353                  sm->sm_releq = &sm->sm_freeq[1];
 372  354          }
 373  355  
 374  356          /*
 375  357           * Allocate and initialize the smap hash chain headers.
 376  358           * Compute hash size rounding down to the next power of two.
 377  359           */
 378  360          npages = MAP_PAGES(seg);
 379  361          smd->smd_npages = npages;
 380  362          hashsz = npages / SMAP_HASHAVELEN;
 381  363          hashsz = 1 << (highbit(hashsz)-1);
 382  364          smd_hashmsk = hashsz - 1;
 383  365          smd_hash = smd->smd_hash =
 384  366              kmem_alloc(hashsz * sizeof (struct smaphash), KM_SLEEP);
 385  367  #ifdef SEGMAP_HASHSTATS
 386  368          smd_hash_len =
 387  369              kmem_zalloc(hashsz * sizeof (unsigned int), KM_SLEEP);
 388  370  #endif
 389  371          for (i = 0, shashp = smd_hash; i < hashsz; i++, shashp++) {
 390  372                  shashp->sh_hash_list = NULL;
 391  373                  mutex_init(&shashp->sh_mtx, NULL, MUTEX_DEFAULT, NULL);
 392  374          }
 393  375  
 394  376          /*
 395  377           * Allocate and initialize the smap structures.
 396  378           * Link all slots onto the appropriate freelist.
 397  379           * The smap array is large enough to affect boot time
 398  380           * on large systems, so use memory prefetching and only
 399  381           * go through the array 1 time. Inline a optimized version
 400  382           * of segmap_smapadd to add structures to freelists with
 401  383           * knowledge that no locks are needed here.
 402  384           */
 403  385          smd_smap = smd->smd_sm =
 404  386              kmem_alloc(sizeof (struct smap) * npages, KM_SLEEP);
 405  387  
 406  388          for (smp = &smd->smd_sm[MAP_PAGES(seg) - 1];
 407  389              smp >= smd->smd_sm; smp--) {
 408  390                  struct smap *smpfreelist;
 409  391                  struct sm_freeq *releq;
 410  392  
 411  393                  prefetch_smap_w((char *)smp);
 412  394  
 413  395                  smp->sm_vp = NULL;
 414  396                  smp->sm_hash = NULL;
 415  397                  smp->sm_off = 0;
 416  398                  smp->sm_bitmap = 0;
 417  399                  smp->sm_refcnt = 0;
 418  400                  mutex_init(&smp->sm_mtx, NULL, MUTEX_DEFAULT, NULL);
 419  401                  smp->sm_free_ndx = SMP2SMF_NDX(smp);
 420  402  
 421  403                  sm = SMP2SMF(smp);
 422  404                  releq = sm->sm_releq;
 423  405  
 424  406                  smpfreelist = releq->smq_free;
 425  407                  if (smpfreelist == 0) {
 426  408                          releq->smq_free = smp->sm_next = smp->sm_prev = smp;
 427  409                  } else {
 428  410                          smp->sm_next = smpfreelist;
 429  411                          smp->sm_prev = smpfreelist->sm_prev;
 430  412                          smpfreelist->sm_prev = smp;
 431  413                          smp->sm_prev->sm_next = smp;
 432  414                          releq->smq_free = smp->sm_next;
 433  415                  }
 434  416  
 435  417                  /*
 436  418                   * sm_flag = 0 (no SM_QNDX_ZERO) implies smap on sm_freeq[1]
 437  419                   */
 438  420                  smp->sm_flags = 0;
 439  421  
 440  422  #ifdef  SEGKPM_SUPPORT
 441  423                  /*
 442  424                   * Due to the fragile prefetch loop no
 443  425                   * separate function is used here.
 444  426                   */
 445  427                  smp->sm_kpme_next = NULL;
 446  428                  smp->sm_kpme_prev = NULL;
 447  429                  smp->sm_kpme_page = NULL;
 448  430  #endif
 449  431          }
 450  432  
 451  433          /*
 452  434           * Allocate the per color indices that distribute allocation
 453  435           * requests over the free lists. Each cpu will have a private
 454  436           * rotor index to spread the allocations even across the available
 455  437           * smap freelists. Init the scpu_last_smap field to the first
 456  438           * smap element so there is no need to check for NULL.
 457  439           */
 458  440          smd_cpu =
 459  441              kmem_zalloc(sizeof (union segmap_cpu) * max_ncpus, KM_SLEEP);
 460  442          for (i = 0, scpu = smd_cpu; i < max_ncpus; i++, scpu++) {
 461  443                  int j;
 462  444                  for (j = 0; j < smd_ncolor; j++)
 463  445                          scpu->scpu.scpu_free_ndx[j] = j;
 464  446                  scpu->scpu.scpu_last_smap = smd_smap;
 465  447          }
 466  448  
 467  449          vpm_init();
 468  450  
 469  451  #ifdef DEBUG
 470  452          /*
 471  453           * Keep track of which colors are used more often.
 472  454           */
 473  455          colors_used = kmem_zalloc(smd_nfree * sizeof (int), KM_SLEEP);
 474  456  #endif /* DEBUG */
 475  457  
 476  458          return (0);
 477  459  }
 478  460  
 479  461  static void
 480  462  segmap_free(seg)
 481  463          struct seg *seg;
 482  464  {
 483  465          ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock));
 484  466  }
 485  467  
 486  468  /*
 487  469   * Do a F_SOFTUNLOCK call over the range requested.
 488  470   * The range must have already been F_SOFTLOCK'ed.
 489  471   */
 490  472  static void
 491  473  segmap_unlock(
 492  474          struct hat *hat,
 493  475          struct seg *seg,
 494  476          caddr_t addr,
 495  477          size_t len,
 496  478          enum seg_rw rw,
 497  479          struct smap *smp)
 498  480  {
 499  481          page_t *pp;
 500  482          caddr_t adr;
 501  483          u_offset_t off;
 502  484          struct vnode *vp;
 503  485          kmutex_t *smtx;
 504  486  
 505  487          ASSERT(smp->sm_refcnt > 0);
 506  488  
 507  489  #ifdef lint
 508  490          seg = seg;
 509  491  #endif
 510  492  
 511  493          if (segmap_kpm && IS_KPM_ADDR(addr)) {
 512  494  
 513  495                  /*
 514  496                   * We're called only from segmap_fault and this was a
 515  497                   * NOP in case of a kpm based smap, so dangerous things
 516  498                   * must have happened in the meantime. Pages are prefaulted
 517  499                   * and locked in segmap_getmapflt and they will not be
 518  500                   * unlocked until segmap_release.
 519  501                   */
 520  502                  panic("segmap_unlock: called with kpm addr %p", (void *)addr);
 521  503                  /*NOTREACHED*/
 522  504          }
 523  505  
 524  506          vp = smp->sm_vp;
 525  507          off = smp->sm_off + (u_offset_t)((uintptr_t)addr & MAXBOFFSET);
 526  508  
 527  509          hat_unlock(hat, addr, P2ROUNDUP(len, PAGESIZE));
 528  510          for (adr = addr; adr < addr + len; adr += PAGESIZE, off += PAGESIZE) {
 529  511                  ushort_t bitmask;
 530  512  
 531  513                  /*
 532  514                   * Use page_find() instead of page_lookup() to
 533  515                   * find the page since we know that it has
 534  516                   * "shared" lock.
 535  517                   */
 536  518                  pp = page_find(vp, off);
 537  519                  if (pp == NULL) {
 538  520                          panic("segmap_unlock: page not found");
 539  521                          /*NOTREACHED*/
 540  522                  }
 541  523  
 542  524                  if (rw == S_WRITE) {
 543  525                          hat_setrefmod(pp);
 544  526                  } else if (rw != S_OTHER) {
 545  527                          TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT,
 546  528                          "segmap_fault:pp %p vp %p offset %llx", pp, vp, off);
 547  529                          hat_setref(pp);
 548  530                  }
 549  531  
 550  532                  /*
 551  533                   * Clear bitmap, if the bit corresponding to "off" is set,
 552  534                   * since the page and translation are being unlocked.
 553  535                   */
 554  536                  bitmask = SMAP_BIT_MASK((off - smp->sm_off) >> PAGESHIFT);
 555  537  
 556  538                  /*
 557  539                   * Large Files: Following assertion is to verify
 558  540                   * the correctness of the cast to (int) above.
 559  541                   */
 560  542                  ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);
 561  543                  smtx = SMAPMTX(smp);
 562  544                  mutex_enter(smtx);
 563  545                  if (smp->sm_bitmap & bitmask) {
 564  546                          smp->sm_bitmap &= ~bitmask;
 565  547                  }
 566  548                  mutex_exit(smtx);
 567  549  
 568  550                  page_unlock(pp);
 569  551          }
 570  552  }
 571  553  
 572  554  #define MAXPPB  (MAXBSIZE/4096) /* assumes minimum page size of 4k */
 573  555  
 574  556  /*
 575  557   * This routine is called via a machine specific fault handling
 576  558   * routine.  It is also called by software routines wishing to
 577  559   * lock or unlock a range of addresses.
 578  560   *
 579  561   * Note that this routine expects a page-aligned "addr".
 580  562   */
 581  563  faultcode_t
 582  564  segmap_fault(
 583  565          struct hat *hat,
 584  566          struct seg *seg,
 585  567          caddr_t addr,
 586  568          size_t len,
 587  569          enum fault_type type,
 588  570          enum seg_rw rw)
 589  571  {
 590  572          struct segmap_data *smd = (struct segmap_data *)seg->s_data;
 591  573          struct smap *smp;
 592  574          page_t *pp, **ppp;
 593  575          struct vnode *vp;
 594  576          u_offset_t off;
 595  577          page_t *pl[MAXPPB + 1];
 596  578          uint_t prot;
 597  579          u_offset_t addroff;
 598  580          caddr_t adr;
 599  581          int err;
 600  582          u_offset_t sm_off;
 601  583          int hat_flag;
 602  584  
 603  585          if (segmap_kpm && IS_KPM_ADDR(addr)) {
 604  586                  int newpage;
 605  587                  kmutex_t *smtx;
 606  588  
 607  589                  /*
 608  590                   * Pages are successfully prefaulted and locked in
 609  591                   * segmap_getmapflt and can't be unlocked until
 610  592                   * segmap_release. No hat mappings have to be locked
 611  593                   * and they also can't be unlocked as long as the
 612  594                   * caller owns an active kpm addr.
 613  595                   */
 614  596  #ifndef DEBUG
 615  597                  if (type != F_SOFTUNLOCK)
 616  598                          return (0);
 617  599  #endif
 618  600  
 619  601                  if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
 620  602                          panic("segmap_fault: smap not found "
 621  603                              "for addr %p", (void *)addr);
 622  604                          /*NOTREACHED*/
 623  605                  }
 624  606  
 625  607                  smtx = SMAPMTX(smp);
 626  608  #ifdef  DEBUG
 627  609                  newpage = smp->sm_flags & SM_KPM_NEWPAGE;
 628  610                  if (newpage) {
 629  611                          cmn_err(CE_WARN, "segmap_fault: newpage? smp %p",
 630  612                              (void *)smp);
 631  613                  }
 632  614  
 633  615                  if (type != F_SOFTUNLOCK) {
 634  616                          mutex_exit(smtx);
 635  617                          return (0);
 636  618                  }
 637  619  #endif
 638  620                  mutex_exit(smtx);
 639  621                  vp = smp->sm_vp;
 640  622                  sm_off = smp->sm_off;
 641  623  
 642  624                  if (vp == NULL)
 643  625                          return (FC_MAKE_ERR(EIO));
 644  626  
 645  627                  ASSERT(smp->sm_refcnt > 0);
 646  628  
 647  629                  addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET);
 648  630                  if (addroff + len > MAXBSIZE)
 649  631                          panic("segmap_fault: endaddr %p exceeds MAXBSIZE chunk",
 650  632                              (void *)(addr + len));
 651  633  
 652  634                  off = sm_off + addroff;
 653  635  
 654  636                  pp = page_find(vp, off);
 655  637  
 656  638                  if (pp == NULL)
 657  639                          panic("segmap_fault: softunlock page not found");
 658  640  
 659  641                  /*
 660  642                   * Set ref bit also here in case of S_OTHER to avoid the
 661  643                   * overhead of supporting other cases than F_SOFTUNLOCK
 662  644                   * with segkpm. We can do this because the underlying
 663  645                   * pages are locked anyway.
 664  646                   */
 665  647                  if (rw == S_WRITE) {
 666  648                          hat_setrefmod(pp);
 667  649                  } else {
 668  650                          TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT,
 669  651                              "segmap_fault:pp %p vp %p offset %llx",
 670  652                              pp, vp, off);
 671  653                          hat_setref(pp);
 672  654                  }
 673  655  
 674  656                  return (0);
 675  657          }
 676  658  
 677  659          smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++;
 678  660          smp = GET_SMAP(seg, addr);
 679  661          vp = smp->sm_vp;
 680  662          sm_off = smp->sm_off;
 681  663  
 682  664          if (vp == NULL)
 683  665                  return (FC_MAKE_ERR(EIO));
 684  666  
 685  667          ASSERT(smp->sm_refcnt > 0);
 686  668  
 687  669          addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET);
 688  670          if (addroff + len > MAXBSIZE) {
 689  671                  panic("segmap_fault: endaddr %p "
 690  672                      "exceeds MAXBSIZE chunk", (void *)(addr + len));
 691  673                  /*NOTREACHED*/
 692  674          }
 693  675          off = sm_off + addroff;
 694  676  
 695  677          /*
 696  678           * First handle the easy stuff
 697  679           */
 698  680          if (type == F_SOFTUNLOCK) {
 699  681                  segmap_unlock(hat, seg, addr, len, rw, smp);
 700  682                  return (0);
 701  683          }
 702  684  
 703  685          TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE,
 704  686              "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp);
 705  687          err = VOP_GETPAGE(vp, (offset_t)off, len, &prot, pl, MAXBSIZE,
 706  688              seg, addr, rw, CRED(), NULL);
 707  689  
 708  690          if (err)
 709  691                  return (FC_MAKE_ERR(err));
 710  692  
 711  693          prot &= smd->smd_prot;
 712  694  
 713  695          /*
 714  696           * Handle all pages returned in the pl[] array.
 715  697           * This loop is coded on the assumption that if
 716  698           * there was no error from the VOP_GETPAGE routine,
 717  699           * that the page list returned will contain all the
 718  700           * needed pages for the vp from [off..off + len].
 719  701           */
 720  702          ppp = pl;
 721  703          while ((pp = *ppp++) != NULL) {
 722  704                  u_offset_t poff;
 723  705                  ASSERT(pp->p_vnode == vp);
 724  706                  hat_flag = HAT_LOAD;
 725  707  
 726  708                  /*
 727  709                   * Verify that the pages returned are within the range
 728  710                   * of this segmap region.  Note that it is theoretically
 729  711                   * possible for pages outside this range to be returned,
 730  712                   * but it is not very likely.  If we cannot use the
 731  713                   * page here, just release it and go on to the next one.
 732  714                   */
 733  715                  if (pp->p_offset < sm_off ||
 734  716                      pp->p_offset >= sm_off + MAXBSIZE) {
 735  717                          (void) page_release(pp, 1);
 736  718                          continue;
 737  719                  }
 738  720  
 739  721                  ASSERT(hat == kas.a_hat);
 740  722                  poff = pp->p_offset;
 741  723                  adr = addr + (poff - off);
 742  724                  if (adr >= addr && adr < addr + len) {
 743  725                          hat_setref(pp);
 744  726                          TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT,
 745  727                              "segmap_fault:pp %p vp %p offset %llx",
 746  728                              pp, vp, poff);
 747  729                          if (type == F_SOFTLOCK)
 748  730                                  hat_flag = HAT_LOAD_LOCK;
 749  731                  }
 750  732  
 751  733                  /*
 752  734                   * Deal with VMODSORT pages here. If we know this is a write
 753  735                   * do the setmod now and allow write protection.
 754  736                   * As long as it's modified or not S_OTHER, remove write
 755  737                   * protection. With S_OTHER it's up to the FS to deal with this.
 756  738                   */
 757  739                  if (IS_VMODSORT(vp)) {
 758  740                          if (rw == S_WRITE)
 759  741                                  hat_setmod(pp);
 760  742                          else if (rw != S_OTHER && !hat_ismod(pp))
 761  743                                  prot &= ~PROT_WRITE;
 762  744                  }
 763  745  
 764  746                  hat_memload(hat, adr, pp, prot, hat_flag);
 765  747                  if (hat_flag != HAT_LOAD_LOCK)
 766  748                          page_unlock(pp);
 767  749          }
 768  750          return (0);
 769  751  }
 770  752  
 771  753  /*
 772  754   * This routine is used to start I/O on pages asynchronously.
 773  755   */
 774  756  static faultcode_t
 775  757  segmap_faulta(struct seg *seg, caddr_t addr)
 776  758  {
 777  759          struct smap *smp;
 778  760          struct vnode *vp;
 779  761          u_offset_t off;
 780  762          int err;
 781  763  
 782  764          if (segmap_kpm && IS_KPM_ADDR(addr)) {
 783  765                  int     newpage;
 784  766                  kmutex_t *smtx;
 785  767  
 786  768                  /*
 787  769                   * Pages are successfully prefaulted and locked in
 788  770                   * segmap_getmapflt and can't be unlocked until
 789  771                   * segmap_release. No hat mappings have to be locked
 790  772                   * and they also can't be unlocked as long as the
 791  773                   * caller owns an active kpm addr.
 792  774                   */
 793  775  #ifdef  DEBUG
 794  776                  if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
 795  777                          panic("segmap_faulta: smap not found "
 796  778                              "for addr %p", (void *)addr);
 797  779                          /*NOTREACHED*/
 798  780                  }
 799  781  
 800  782                  smtx = SMAPMTX(smp);
 801  783                  newpage = smp->sm_flags & SM_KPM_NEWPAGE;
 802  784                  mutex_exit(smtx);
 803  785                  if (newpage)
 804  786                          cmn_err(CE_WARN, "segmap_faulta: newpage? smp %p",
 805  787                              (void *)smp);
 806  788  #endif
 807  789                  return (0);
 808  790          }
 809  791  
 810  792          segmapcnt.smp_faulta.value.ul++;
 811  793          smp = GET_SMAP(seg, addr);
 812  794  
 813  795          ASSERT(smp->sm_refcnt > 0);
 814  796  
 815  797          vp = smp->sm_vp;
 816  798          off = smp->sm_off;
 817  799  
 818  800          if (vp == NULL) {
 819  801                  cmn_err(CE_WARN, "segmap_faulta - no vp");
 820  802                  return (FC_MAKE_ERR(EIO));
 821  803          }
 822  804  
 823  805          TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE,
 824  806              "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp);
 825  807  
 826  808          err = VOP_GETPAGE(vp, (offset_t)(off + ((offset_t)((uintptr_t)addr
 827  809              & MAXBOFFSET))), PAGESIZE, (uint_t *)NULL, (page_t **)NULL, 0,
 828  810              seg, addr, S_READ, CRED(), NULL);
 829  811  
 830  812          if (err)
 831  813                  return (FC_MAKE_ERR(err));
 832  814          return (0);
 833  815  }
 834  816  
 835  817  /*ARGSUSED*/
 836  818  static int
 837  819  segmap_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
 838  820  {
 839  821          struct segmap_data *smd = (struct segmap_data *)seg->s_data;
 840  822  
 841  823          ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock));
 842  824  
 843  825          /*
 844  826           * Need not acquire the segment lock since
 845  827           * "smd_prot" is a read-only field.
 846  828           */
 847  829          return (((smd->smd_prot & prot) != prot) ? EACCES : 0);
 848  830  }
 849  831  
 850  832  static int
 851  833  segmap_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
 852  834  {
 853  835          struct segmap_data *smd = (struct segmap_data *)seg->s_data;
 854  836          size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
 855  837  
 856  838          ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
 857  839  
 858  840          if (pgno != 0) {
 859  841                  do {
 860  842                          protv[--pgno] = smd->smd_prot;
 861  843                  } while (pgno != 0);
 862  844          }
 863  845          return (0);
 864  846  }
 865  847  
 866  848  static u_offset_t
 867  849  segmap_getoffset(struct seg *seg, caddr_t addr)
 868  850  {
 869  851          struct segmap_data *smd = (struct segmap_data *)seg->s_data;
 870  852  
 871  853          ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock));
 872  854  
 873  855          return ((u_offset_t)smd->smd_sm->sm_off + (addr - seg->s_base));
 874  856  }
 875  857  
 876  858  /*ARGSUSED*/
 877  859  static int
 878  860  segmap_gettype(struct seg *seg, caddr_t addr)
 879  861  {
 880  862          ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock));
 881  863  
 882  864          return (MAP_SHARED);
 883  865  }
 884  866  
 885  867  /*ARGSUSED*/
 886  868  static int
 887  869  segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
 888  870  {
 889  871          struct segmap_data *smd = (struct segmap_data *)seg->s_data;
 890  872  
 891  873          ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock));
 892  874  
 893  875          /* XXX - This doesn't make any sense */
 894  876          *vpp = smd->smd_sm->sm_vp;
 895  877          return (0);
 896  878  }
 897  879  
 898  880  /*
 899  881   * Check to see if it makes sense to do kluster/read ahead to
 900  882   * addr + delta relative to the mapping at addr.  We assume here
 901  883   * that delta is a signed PAGESIZE'd multiple (which can be negative).

↓ open down ↓

764 lines elided

↑ open up ↑

 902  884   *
 903  885   * For segmap we always "approve" of this action from our standpoint.
 904  886   */
 905  887  /*ARGSUSED*/
 906  888  static int
 907  889  segmap_kluster(struct seg *seg, caddr_t addr, ssize_t delta)
 908  890  {
 909  891          return (0);
 910  892  }
 911  893  
 912      -static void
 913      -segmap_badop()
 914      -{
 915      -        panic("segmap_badop");
 916      -        /*NOTREACHED*/
 917      -}
 918      -
 919  894  /*
 920  895   * Special private segmap operations
 921  896   */
 922  897  
 923  898  /*
 924  899   * Add smap to the appropriate free list.
 925  900   */
 926  901  static void
 927  902  segmap_smapadd(struct smap *smp)
 928  903  {

 929  904          struct smfree *sm;
 930  905          struct smap *smpfreelist;
 931  906          struct sm_freeq *releq;
 932  907  
 933  908          ASSERT(MUTEX_HELD(SMAPMTX(smp)));
 934  909  
 935  910          if (smp->sm_refcnt != 0) {
 936  911                  panic("segmap_smapadd");
 937  912                  /*NOTREACHED*/
 938  913          }
 939  914  
 940  915          sm = &smd_free[smp->sm_free_ndx];
 941  916          /*
 942  917           * Add to the tail of the release queue
 943  918           * Note that sm_releq and sm_allocq could toggle
 944  919           * before we get the lock. This does not affect
 945  920           * correctness as the 2 queues are only maintained
 946  921           * to reduce lock pressure.
 947  922           */
 948  923          releq = sm->sm_releq;
 949  924          if (releq == &sm->sm_freeq[0])
 950  925                  smp->sm_flags |= SM_QNDX_ZERO;
 951  926          else
 952  927                  smp->sm_flags &= ~SM_QNDX_ZERO;
 953  928          mutex_enter(&releq->smq_mtx);
 954  929          smpfreelist = releq->smq_free;
 955  930          if (smpfreelist == 0) {
 956  931                  int want;
 957  932  
 958  933                  releq->smq_free = smp->sm_next = smp->sm_prev = smp;
 959  934                  /*
 960  935                   * Both queue mutexes held to set sm_want;
 961  936                   * snapshot the value before dropping releq mutex.
 962  937                   * If sm_want appears after the releq mutex is dropped,
 963  938                   * then the smap just freed is already gone.
 964  939                   */
 965  940                  want = sm->sm_want;
 966  941                  mutex_exit(&releq->smq_mtx);
 967  942                  /*
 968  943                   * See if there was a waiter before dropping the releq mutex
 969  944                   * then recheck after obtaining sm_freeq[0] mutex as
 970  945                   * the another thread may have already signaled.
 971  946                   */
 972  947                  if (want) {
 973  948                          mutex_enter(&sm->sm_freeq[0].smq_mtx);
 974  949                          if (sm->sm_want)
 975  950                                  cv_signal(&sm->sm_free_cv);
 976  951                          mutex_exit(&sm->sm_freeq[0].smq_mtx);
 977  952                  }
 978  953          } else {
 979  954                  smp->sm_next = smpfreelist;
 980  955                  smp->sm_prev = smpfreelist->sm_prev;
 981  956                  smpfreelist->sm_prev = smp;
 982  957                  smp->sm_prev->sm_next = smp;
 983  958                  mutex_exit(&releq->smq_mtx);
 984  959          }
 985  960  }
 986  961  
 987  962  
 988  963  static struct smap *
 989  964  segmap_hashin(struct smap *smp, struct vnode *vp, u_offset_t off, int hashid)
 990  965  {
 991  966          struct smap **hpp;
 992  967          struct smap *tmp;
 993  968          kmutex_t *hmtx;
 994  969  
 995  970          ASSERT(MUTEX_HELD(SMAPMTX(smp)));
 996  971          ASSERT(smp->sm_vp == NULL);
 997  972          ASSERT(smp->sm_hash == NULL);
 998  973          ASSERT(smp->sm_prev == NULL);
 999  974          ASSERT(smp->sm_next == NULL);
1000  975          ASSERT(hashid >= 0 && hashid <= smd_hashmsk);
1001  976  
1002  977          hmtx = SHASHMTX(hashid);
1003  978  
1004  979          mutex_enter(hmtx);
1005  980          /*
1006  981           * First we need to verify that no one has created a smp
1007  982           * with (vp,off) as its tag before we us.
1008  983           */
1009  984          for (tmp = smd_hash[hashid].sh_hash_list;
1010  985              tmp != NULL; tmp = tmp->sm_hash)
1011  986                  if (tmp->sm_vp == vp && tmp->sm_off == off)
1012  987                          break;
1013  988  
1014  989          if (tmp == NULL) {
1015  990                  /*
1016  991                   * No one created one yet.
1017  992                   *
1018  993                   * Funniness here - we don't increment the ref count on the
1019  994                   * vnode * even though we have another pointer to it here.
1020  995                   * The reason for this is that we don't want the fact that
1021  996                   * a seg_map entry somewhere refers to a vnode to prevent the
1022  997                   * vnode * itself from going away.  This is because this
1023  998                   * reference to the vnode is a "soft one".  In the case where
1024  999                   * a mapping is being used by a rdwr [or directory routine?]
1025 1000                   * there already has to be a non-zero ref count on the vnode.
1026 1001                   * In the case where the vp has been freed and the the smap
1027 1002                   * structure is on the free list, there are no pages in memory
1028 1003                   * that can refer to the vnode.  Thus even if we reuse the same
1029 1004                   * vnode/smap structure for a vnode which has the same
1030 1005                   * address but represents a different object, we are ok.
1031 1006                   */
1032 1007                  smp->sm_vp = vp;
1033 1008                  smp->sm_off = off;
1034 1009  
1035 1010                  hpp = &smd_hash[hashid].sh_hash_list;
1036 1011                  smp->sm_hash = *hpp;
1037 1012                  *hpp = smp;
1038 1013  #ifdef SEGMAP_HASHSTATS
1039 1014                  smd_hash_len[hashid]++;
1040 1015  #endif
1041 1016          }
1042 1017          mutex_exit(hmtx);
1043 1018  
1044 1019          return (tmp);
1045 1020  }
1046 1021  
1047 1022  static void
1048 1023  segmap_hashout(struct smap *smp)
1049 1024  {
1050 1025          struct smap **hpp, *hp;
1051 1026          struct vnode *vp;
1052 1027          kmutex_t *mtx;
1053 1028          int hashid;
1054 1029          u_offset_t off;
1055 1030  
1056 1031          ASSERT(MUTEX_HELD(SMAPMTX(smp)));
1057 1032  
1058 1033          vp = smp->sm_vp;
1059 1034          off = smp->sm_off;
1060 1035  
1061 1036          SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */
1062 1037          mtx = SHASHMTX(hashid);
1063 1038          mutex_enter(mtx);
1064 1039  
1065 1040          hpp = &smd_hash[hashid].sh_hash_list;
1066 1041          for (;;) {
1067 1042                  hp = *hpp;
1068 1043                  if (hp == NULL) {
1069 1044                          panic("segmap_hashout");
1070 1045                          /*NOTREACHED*/
1071 1046                  }
1072 1047                  if (hp == smp)
1073 1048                          break;
1074 1049                  hpp = &hp->sm_hash;
1075 1050          }
1076 1051  
1077 1052          *hpp = smp->sm_hash;
1078 1053          smp->sm_hash = NULL;
1079 1054  #ifdef SEGMAP_HASHSTATS
1080 1055          smd_hash_len[hashid]--;
1081 1056  #endif
1082 1057          mutex_exit(mtx);
1083 1058  
1084 1059          smp->sm_vp = NULL;
1085 1060          smp->sm_off = (u_offset_t)0;
1086 1061  
1087 1062  }
1088 1063  
1089 1064  /*
1090 1065   * Attempt to free unmodified, unmapped, and non locked segmap
1091 1066   * pages.
1092 1067   */
1093 1068  void
1094 1069  segmap_pagefree(struct vnode *vp, u_offset_t off)
1095 1070  {
1096 1071          u_offset_t pgoff;
1097 1072          page_t  *pp;
1098 1073  
1099 1074          for (pgoff = off; pgoff < off + MAXBSIZE; pgoff += PAGESIZE) {
1100 1075  
1101 1076                  if ((pp = page_lookup_nowait(vp, pgoff, SE_EXCL)) == NULL)
1102 1077                          continue;
1103 1078  
1104 1079                  switch (page_release(pp, 1)) {
1105 1080                  case PGREL_NOTREL:
1106 1081                          segmapcnt.smp_free_notfree.value.ul++;
1107 1082                          break;
1108 1083                  case PGREL_MOD:
1109 1084                          segmapcnt.smp_free_dirty.value.ul++;
1110 1085                          break;
1111 1086                  case PGREL_CLEAN:
1112 1087                          segmapcnt.smp_free.value.ul++;
1113 1088                          break;
1114 1089                  }
1115 1090          }
1116 1091  }
1117 1092  
1118 1093  /*
1119 1094   * Locks held on entry: smap lock
1120 1095   * Locks held on exit : smap lock.
1121 1096   */
1122 1097  
1123 1098  static void
1124 1099  grab_smp(struct smap *smp, page_t *pp)
1125 1100  {
1126 1101          ASSERT(MUTEX_HELD(SMAPMTX(smp)));
1127 1102          ASSERT(smp->sm_refcnt == 0);
1128 1103  
1129 1104          if (smp->sm_vp != (struct vnode *)NULL) {
1130 1105                  struct vnode    *vp = smp->sm_vp;
1131 1106                  u_offset_t      off = smp->sm_off;
1132 1107                  /*
1133 1108                   * Destroy old vnode association and
1134 1109                   * unload any hardware translations to
1135 1110                   * the old object.
1136 1111                   */
1137 1112                  smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reuse++;
1138 1113                  segmap_hashout(smp);
1139 1114  
1140 1115                  /*
1141 1116                   * This node is off freelist and hashlist,
1142 1117                   * so there is no reason to drop/reacquire sm_mtx
1143 1118                   * across calls to hat_unload.
1144 1119                   */
1145 1120                  if (segmap_kpm) {
1146 1121                          caddr_t vaddr;
1147 1122                          int hat_unload_needed = 0;
1148 1123  
1149 1124                          /*
1150 1125                           * unload kpm mapping
1151 1126                           */
1152 1127                          if (pp != NULL) {
1153 1128                                  vaddr = hat_kpm_page2va(pp, 1);
1154 1129                                  hat_kpm_mapout(pp, GET_KPME(smp), vaddr);
1155 1130                                  page_unlock(pp);
1156 1131                          }
1157 1132  
1158 1133                          /*
1159 1134                           * Check if we have (also) the rare case of a
1160 1135                           * non kpm mapping.
1161 1136                           */
1162 1137                          if (smp->sm_flags & SM_NOTKPM_RELEASED) {
1163 1138                                  hat_unload_needed = 1;
1164 1139                                  smp->sm_flags &= ~SM_NOTKPM_RELEASED;
1165 1140                          }
1166 1141  
1167 1142                          if (hat_unload_needed) {
1168 1143                                  hat_unload(kas.a_hat, segkmap->s_base +
1169 1144                                      ((smp - smd_smap) * MAXBSIZE),
1170 1145                                      MAXBSIZE, HAT_UNLOAD);
1171 1146                          }
1172 1147  
1173 1148                  } else {
1174 1149                          ASSERT(smp->sm_flags & SM_NOTKPM_RELEASED);
1175 1150                          smp->sm_flags &= ~SM_NOTKPM_RELEASED;
1176 1151                          hat_unload(kas.a_hat, segkmap->s_base +
1177 1152                              ((smp - smd_smap) * MAXBSIZE),
1178 1153                              MAXBSIZE, HAT_UNLOAD);
1179 1154                  }
1180 1155                  segmap_pagefree(vp, off);
1181 1156          }
1182 1157  }
1183 1158  
1184 1159  static struct smap *
1185 1160  get_free_smp(int free_ndx)
1186 1161  {
1187 1162          struct smfree *sm;
1188 1163          kmutex_t *smtx;
1189 1164          struct smap *smp, *first;
1190 1165          struct sm_freeq *allocq, *releq;
1191 1166          struct kpme *kpme;
1192 1167          page_t *pp = NULL;
1193 1168          int end_ndx, page_locked = 0;
1194 1169  
1195 1170          end_ndx = free_ndx;
1196 1171          sm = &smd_free[free_ndx];
1197 1172  
1198 1173  retry_queue:
1199 1174          allocq = sm->sm_allocq;
1200 1175          mutex_enter(&allocq->smq_mtx);
1201 1176  
1202 1177          if ((smp = allocq->smq_free) == NULL) {
1203 1178  
1204 1179  skip_queue:
1205 1180                  /*
1206 1181                   * The alloc list is empty or this queue is being skipped;
1207 1182                   * first see if the allocq toggled.
1208 1183                   */
1209 1184                  if (sm->sm_allocq != allocq) {
1210 1185                          /* queue changed */
1211 1186                          mutex_exit(&allocq->smq_mtx);
1212 1187                          goto retry_queue;
1213 1188                  }
1214 1189                  releq = sm->sm_releq;
1215 1190                  if (!mutex_tryenter(&releq->smq_mtx)) {
1216 1191                          /* cannot get releq; a free smp may be there now */
1217 1192                          mutex_exit(&allocq->smq_mtx);
1218 1193  
1219 1194                          /*
1220 1195                           * This loop could spin forever if this thread has
1221 1196                           * higher priority than the thread that is holding
1222 1197                           * releq->smq_mtx. In order to force the other thread
1223 1198                           * to run, we'll lock/unlock the mutex which is safe
1224 1199                           * since we just unlocked the allocq mutex.
1225 1200                           */
1226 1201                          mutex_enter(&releq->smq_mtx);
1227 1202                          mutex_exit(&releq->smq_mtx);
1228 1203                          goto retry_queue;
1229 1204                  }
1230 1205                  if (releq->smq_free == NULL) {
1231 1206                          /*
1232 1207                           * This freelist is empty.
1233 1208                           * This should not happen unless clients
1234 1209                           * are failing to release the segmap
1235 1210                           * window after accessing the data.
1236 1211                           * Before resorting to sleeping, try
1237 1212                           * the next list of the same color.
1238 1213                           */
1239 1214                          free_ndx = (free_ndx + smd_ncolor) & smd_freemsk;
1240 1215                          if (free_ndx != end_ndx) {
1241 1216                                  mutex_exit(&releq->smq_mtx);
1242 1217                                  mutex_exit(&allocq->smq_mtx);
1243 1218                                  sm = &smd_free[free_ndx];
1244 1219                                  goto retry_queue;
1245 1220                          }
1246 1221                          /*
1247 1222                           * Tried all freelists of the same color once,
1248 1223                           * wait on this list and hope something gets freed.
1249 1224                           */
1250 1225                          segmapcnt.smp_get_nofree.value.ul++;
1251 1226                          sm->sm_want++;
1252 1227                          mutex_exit(&sm->sm_freeq[1].smq_mtx);
1253 1228                          cv_wait(&sm->sm_free_cv,
1254 1229                              &sm->sm_freeq[0].smq_mtx);
1255 1230                          sm->sm_want--;
1256 1231                          mutex_exit(&sm->sm_freeq[0].smq_mtx);
1257 1232                          sm = &smd_free[free_ndx];
1258 1233                          goto retry_queue;
1259 1234                  } else {
1260 1235                          /*
1261 1236                           * Something on the rele queue; flip the alloc
1262 1237                           * and rele queues and retry.
1263 1238                           */
1264 1239                          sm->sm_allocq = releq;
1265 1240                          sm->sm_releq = allocq;
1266 1241                          mutex_exit(&allocq->smq_mtx);
1267 1242                          mutex_exit(&releq->smq_mtx);
1268 1243                          if (page_locked) {
1269 1244                                  delay(hz >> 2);
1270 1245                                  page_locked = 0;
1271 1246                          }
1272 1247                          goto retry_queue;
1273 1248                  }
1274 1249          } else {
1275 1250                  /*
1276 1251                   * Fastpath the case we get the smap mutex
1277 1252                   * on the first try.
1278 1253                   */
1279 1254                  first = smp;
1280 1255  next_smap:
1281 1256                  smtx = SMAPMTX(smp);
1282 1257                  if (!mutex_tryenter(smtx)) {
1283 1258                          /*
1284 1259                           * Another thread is trying to reclaim this slot.
1285 1260                           * Skip to the next queue or smap.
1286 1261                           */
1287 1262                          if ((smp = smp->sm_next) == first) {
1288 1263                                  goto skip_queue;
1289 1264                          } else {
1290 1265                                  goto next_smap;
1291 1266                          }
1292 1267                  } else {
1293 1268                          /*
1294 1269                           * if kpme exists, get shared lock on the page
1295 1270                           */
1296 1271                          if (segmap_kpm && smp->sm_vp != NULL) {
1297 1272  
1298 1273                                  kpme = GET_KPME(smp);
1299 1274                                  pp = kpme->kpe_page;
1300 1275  
1301 1276                                  if (pp != NULL) {
1302 1277                                          if (!page_trylock(pp, SE_SHARED)) {
1303 1278                                                  smp = smp->sm_next;
1304 1279                                                  mutex_exit(smtx);
1305 1280                                                  page_locked = 1;
1306 1281  
1307 1282                                                  pp = NULL;
1308 1283  
1309 1284                                                  if (smp == first) {
1310 1285                                                          goto skip_queue;
1311 1286                                                  } else {
1312 1287                                                          goto next_smap;
1313 1288                                                  }
1314 1289                                          } else {
1315 1290                                                  if (kpme->kpe_page == NULL) {
1316 1291                                                          page_unlock(pp);
1317 1292                                                          pp = NULL;
1318 1293                                                  }
1319 1294                                          }
1320 1295                                  }
1321 1296                          }
1322 1297  
1323 1298                          /*
1324 1299                           * At this point, we've selected smp.  Remove smp
1325 1300                           * from its freelist.  If smp is the first one in
1326 1301                           * the freelist, update the head of the freelist.
1327 1302                           */
1328 1303                          if (first == smp) {
1329 1304                                  ASSERT(first == allocq->smq_free);
1330 1305                                  allocq->smq_free = smp->sm_next;
1331 1306                          }
1332 1307  
1333 1308                          /*
1334 1309                           * if the head of the freelist still points to smp,
1335 1310                           * then there are no more free smaps in that list.
1336 1311                           */
1337 1312                          if (allocq->smq_free == smp)
1338 1313                                  /*
1339 1314                                   * Took the last one
1340 1315                                   */
1341 1316                                  allocq->smq_free = NULL;
1342 1317                          else {
1343 1318                                  smp->sm_prev->sm_next = smp->sm_next;
1344 1319                                  smp->sm_next->sm_prev = smp->sm_prev;
1345 1320                          }
1346 1321                          mutex_exit(&allocq->smq_mtx);
1347 1322                          smp->sm_prev = smp->sm_next = NULL;
1348 1323  
1349 1324                          /*
1350 1325                           * if pp != NULL, pp must have been locked;
1351 1326                           * grab_smp() unlocks pp.
1352 1327                           */
1353 1328                          ASSERT((pp == NULL) || PAGE_LOCKED(pp));
1354 1329                          grab_smp(smp, pp);
1355 1330                          /* return smp locked. */
1356 1331                          ASSERT(SMAPMTX(smp) == smtx);
1357 1332                          ASSERT(MUTEX_HELD(smtx));
1358 1333                          return (smp);
1359 1334                  }
1360 1335          }
1361 1336  }
1362 1337  
1363 1338  /*
1364 1339   * Special public segmap operations
1365 1340   */
1366 1341  
1367 1342  /*
1368 1343   * Create pages (without using VOP_GETPAGE) and load up translations to them.
1369 1344   * If softlock is TRUE, then set things up so that it looks like a call
1370 1345   * to segmap_fault with F_SOFTLOCK.
1371 1346   *
1372 1347   * Returns 1, if a page is created by calling page_create_va(), or 0 otherwise.
1373 1348   *
1374 1349   * All fields in the generic segment (struct seg) are considered to be
1375 1350   * read-only for "segmap" even though the kernel address space (kas) may
1376 1351   * not be locked, hence no lock is needed to access them.
1377 1352   */
1378 1353  int
1379 1354  segmap_pagecreate(struct seg *seg, caddr_t addr, size_t len, int softlock)
1380 1355  {
1381 1356          struct segmap_data *smd = (struct segmap_data *)seg->s_data;
1382 1357          page_t *pp;
1383 1358          u_offset_t off;
1384 1359          struct smap *smp;
1385 1360          struct vnode *vp;
1386 1361          caddr_t eaddr;
1387 1362          int newpage = 0;
1388 1363          uint_t prot;
1389 1364          kmutex_t *smtx;
1390 1365          int hat_flag;
1391 1366  
1392 1367          ASSERT(seg->s_as == &kas);
1393 1368  
1394 1369          if (segmap_kpm && IS_KPM_ADDR(addr)) {
1395 1370                  /*
1396 1371                   * Pages are successfully prefaulted and locked in
1397 1372                   * segmap_getmapflt and can't be unlocked until
1398 1373                   * segmap_release. The SM_KPM_NEWPAGE flag is set
1399 1374                   * in segmap_pagecreate_kpm when new pages are created.
1400 1375                   * and it is returned as "newpage" indication here.
1401 1376                   */
1402 1377                  if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
1403 1378                          panic("segmap_pagecreate: smap not found "
1404 1379                              "for addr %p", (void *)addr);
1405 1380                          /*NOTREACHED*/
1406 1381                  }
1407 1382  
1408 1383                  smtx = SMAPMTX(smp);
1409 1384                  newpage = smp->sm_flags & SM_KPM_NEWPAGE;
1410 1385                  smp->sm_flags &= ~SM_KPM_NEWPAGE;
1411 1386                  mutex_exit(smtx);
1412 1387  
1413 1388                  return (newpage);
1414 1389          }
1415 1390  
1416 1391          smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++;
1417 1392  
1418 1393          eaddr = addr + len;
1419 1394          addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1420 1395  
1421 1396          smp = GET_SMAP(seg, addr);
1422 1397  
1423 1398          /*
1424 1399           * We don't grab smp mutex here since we assume the smp
1425 1400           * has a refcnt set already which prevents the slot from
1426 1401           * changing its id.
1427 1402           */
1428 1403          ASSERT(smp->sm_refcnt > 0);
1429 1404  
1430 1405          vp = smp->sm_vp;
1431 1406          off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET));
1432 1407          prot = smd->smd_prot;
1433 1408  
1434 1409          for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) {
1435 1410                  hat_flag = HAT_LOAD;
1436 1411                  pp = page_lookup(vp, off, SE_SHARED);
1437 1412                  if (pp == NULL) {
1438 1413                          ushort_t bitindex;
1439 1414  
1440 1415                          if ((pp = page_create_va(vp, off,
1441 1416                              PAGESIZE, PG_WAIT, seg, addr)) == NULL) {
1442 1417                                  panic("segmap_pagecreate: page_create failed");
1443 1418                                  /*NOTREACHED*/
1444 1419                          }
1445 1420                          newpage = 1;
1446 1421                          page_io_unlock(pp);
1447 1422  
1448 1423                          /*
1449 1424                           * Since pages created here do not contain valid
1450 1425                           * data until the caller writes into them, the
1451 1426                           * "exclusive" lock will not be dropped to prevent
1452 1427                           * other users from accessing the page.  We also
1453 1428                           * have to lock the translation to prevent a fault
1454 1429                           * from occurring when the virtual address mapped by
1455 1430                           * this page is written into.  This is necessary to
1456 1431                           * avoid a deadlock since we haven't dropped the
1457 1432                           * "exclusive" lock.
1458 1433                           */
1459 1434                          bitindex = (ushort_t)((off - smp->sm_off) >> PAGESHIFT);
1460 1435  
1461 1436                          /*
1462 1437                           * Large Files: The following assertion is to
1463 1438                           * verify the cast above.
1464 1439                           */
1465 1440                          ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);
1466 1441                          smtx = SMAPMTX(smp);
1467 1442                          mutex_enter(smtx);
1468 1443                          smp->sm_bitmap |= SMAP_BIT_MASK(bitindex);
1469 1444                          mutex_exit(smtx);
1470 1445  
1471 1446                          hat_flag = HAT_LOAD_LOCK;
1472 1447                  } else if (softlock) {
1473 1448                          hat_flag = HAT_LOAD_LOCK;
1474 1449                  }
1475 1450  
1476 1451                  if (IS_VMODSORT(pp->p_vnode) && (prot & PROT_WRITE))
1477 1452                          hat_setmod(pp);
1478 1453  
1479 1454                  hat_memload(kas.a_hat, addr, pp, prot, hat_flag);
1480 1455  
1481 1456                  if (hat_flag != HAT_LOAD_LOCK)
1482 1457                          page_unlock(pp);
1483 1458  
1484 1459                  TRACE_5(TR_FAC_VM, TR_SEGMAP_PAGECREATE,
1485 1460                      "segmap_pagecreate:seg %p addr %p pp %p vp %p offset %llx",
1486 1461                      seg, addr, pp, vp, off);
1487 1462          }
1488 1463  
1489 1464          return (newpage);
1490 1465  }
1491 1466  
1492 1467  void
1493 1468  segmap_pageunlock(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw)
1494 1469  {
1495 1470          struct smap     *smp;
1496 1471          ushort_t        bitmask;
1497 1472          page_t          *pp;
1498 1473          struct  vnode   *vp;
1499 1474          u_offset_t      off;
1500 1475          caddr_t         eaddr;
1501 1476          kmutex_t        *smtx;
1502 1477  
1503 1478          ASSERT(seg->s_as == &kas);
1504 1479  
1505 1480          eaddr = addr + len;
1506 1481          addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1507 1482  
1508 1483          if (segmap_kpm && IS_KPM_ADDR(addr)) {
1509 1484                  /*
1510 1485                   * Pages are successfully prefaulted and locked in
1511 1486                   * segmap_getmapflt and can't be unlocked until
1512 1487                   * segmap_release, so no pages or hat mappings have
1513 1488                   * to be unlocked at this point.
1514 1489                   */
1515 1490  #ifdef DEBUG
1516 1491                  if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
1517 1492                          panic("segmap_pageunlock: smap not found "
1518 1493                              "for addr %p", (void *)addr);
1519 1494                          /*NOTREACHED*/
1520 1495                  }
1521 1496  
1522 1497                  ASSERT(smp->sm_refcnt > 0);
1523 1498                  mutex_exit(SMAPMTX(smp));
1524 1499  #endif
1525 1500                  return;
1526 1501          }
1527 1502  
1528 1503          smp = GET_SMAP(seg, addr);
1529 1504          smtx = SMAPMTX(smp);
1530 1505  
1531 1506          ASSERT(smp->sm_refcnt > 0);
1532 1507  
1533 1508          vp = smp->sm_vp;
1534 1509          off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET));
1535 1510  
1536 1511          for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) {
1537 1512                  bitmask = SMAP_BIT_MASK((int)(off - smp->sm_off) >> PAGESHIFT);
1538 1513  
1539 1514                  /*
1540 1515                   * Large Files: Following assertion is to verify
1541 1516                   * the correctness of the cast to (int) above.
1542 1517                   */
1543 1518                  ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);
1544 1519  
1545 1520                  /*
1546 1521                   * If the bit corresponding to "off" is set,
1547 1522                   * clear this bit in the bitmap, unlock translations,
1548 1523                   * and release the "exclusive" lock on the page.
1549 1524                   */
1550 1525                  if (smp->sm_bitmap & bitmask) {
1551 1526                          mutex_enter(smtx);
1552 1527                          smp->sm_bitmap &= ~bitmask;
1553 1528                          mutex_exit(smtx);
1554 1529  
1555 1530                          hat_unlock(kas.a_hat, addr, PAGESIZE);
1556 1531  
1557 1532                          /*
1558 1533                           * Use page_find() instead of page_lookup() to
1559 1534                           * find the page since we know that it has
1560 1535                           * "exclusive" lock.
1561 1536                           */
1562 1537                          pp = page_find(vp, off);
1563 1538                          if (pp == NULL) {
1564 1539                                  panic("segmap_pageunlock: page not found");
1565 1540                                  /*NOTREACHED*/
1566 1541                          }
1567 1542                          if (rw == S_WRITE) {
1568 1543                                  hat_setrefmod(pp);
1569 1544                          } else if (rw != S_OTHER) {
1570 1545                                  hat_setref(pp);
1571 1546                          }
1572 1547  
1573 1548                          page_unlock(pp);
1574 1549                  }
1575 1550          }
1576 1551  }
1577 1552  
1578 1553  caddr_t
1579 1554  segmap_getmap(struct seg *seg, struct vnode *vp, u_offset_t off)
1580 1555  {
1581 1556          return (segmap_getmapflt(seg, vp, off, MAXBSIZE, 0, S_OTHER));
1582 1557  }
1583 1558  
1584 1559  /*
1585 1560   * This is the magic virtual address that offset 0 of an ELF
1586 1561   * file gets mapped to in user space. This is used to pick
1587 1562   * the vac color on the freelist.
1588 1563   */
1589 1564  #define ELF_OFFZERO_VA  (0x10000)
1590 1565  /*
1591 1566   * segmap_getmap allocates a MAXBSIZE big slot to map the vnode vp
1592 1567   * in the range <off, off + len). off doesn't need to be MAXBSIZE aligned.
1593 1568   * The return address is  always MAXBSIZE aligned.
1594 1569   *
1595 1570   * If forcefault is nonzero and the MMU translations haven't yet been created,
1596 1571   * segmap_getmap will call segmap_fault(..., F_INVAL, rw) to create them.
1597 1572   */
1598 1573  caddr_t
1599 1574  segmap_getmapflt(
1600 1575          struct seg *seg,
1601 1576          struct vnode *vp,
1602 1577          u_offset_t off,
1603 1578          size_t len,
1604 1579          int forcefault,
1605 1580          enum seg_rw rw)
1606 1581  {
1607 1582          struct smap *smp, *nsmp;
1608 1583          extern struct vnode *common_specvp();
1609 1584          caddr_t baseaddr;                       /* MAXBSIZE aligned */
1610 1585          u_offset_t baseoff;
1611 1586          int newslot;
1612 1587          caddr_t vaddr;
1613 1588          int color, hashid;
1614 1589          kmutex_t *hashmtx, *smapmtx;
1615 1590          struct smfree *sm;
1616 1591          page_t  *pp;
1617 1592          struct kpme *kpme;
1618 1593          uint_t  prot;
1619 1594          caddr_t base;
1620 1595          page_t  *pl[MAXPPB + 1];
1621 1596          int     error;
1622 1597          int     is_kpm = 1;
1623 1598  
1624 1599          ASSERT(seg->s_as == &kas);
1625 1600          ASSERT(seg == segkmap);
1626 1601  
1627 1602          baseoff = off & (offset_t)MAXBMASK;
1628 1603          if (off + len > baseoff + MAXBSIZE) {
1629 1604                  panic("segmap_getmap bad len");
1630 1605                  /*NOTREACHED*/
1631 1606          }
1632 1607  
1633 1608          /*
1634 1609           * If this is a block device we have to be sure to use the
1635 1610           * "common" block device vnode for the mapping.
1636 1611           */
1637 1612          if (vp->v_type == VBLK)
1638 1613                  vp = common_specvp(vp);
1639 1614  
1640 1615          smd_cpu[CPU->cpu_seqid].scpu.scpu_getmap++;
1641 1616  
1642 1617          if (segmap_kpm == 0 ||
1643 1618              (forcefault == SM_PAGECREATE && rw != S_WRITE)) {
1644 1619                  is_kpm = 0;
1645 1620          }
1646 1621  
1647 1622          SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */
1648 1623          hashmtx = SHASHMTX(hashid);
1649 1624  
1650 1625  retry_hash:
1651 1626          mutex_enter(hashmtx);
1652 1627          for (smp = smd_hash[hashid].sh_hash_list;
1653 1628              smp != NULL; smp = smp->sm_hash)
1654 1629                  if (smp->sm_vp == vp && smp->sm_off == baseoff)
1655 1630                          break;
1656 1631          mutex_exit(hashmtx);
1657 1632  
1658 1633  vrfy_smp:
1659 1634          if (smp != NULL) {
1660 1635  
1661 1636                  ASSERT(vp->v_count != 0);
1662 1637  
1663 1638                  /*
1664 1639                   * Get smap lock and recheck its tag. The hash lock
1665 1640                   * is dropped since the hash is based on (vp, off)
1666 1641                   * and (vp, off) won't change when we have smap mtx.
1667 1642                   */
1668 1643                  smapmtx = SMAPMTX(smp);
1669 1644                  mutex_enter(smapmtx);
1670 1645                  if (smp->sm_vp != vp || smp->sm_off != baseoff) {
1671 1646                          mutex_exit(smapmtx);
1672 1647                          goto retry_hash;
1673 1648                  }
1674 1649  
1675 1650                  if (smp->sm_refcnt == 0) {
1676 1651  
1677 1652                          smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reclaim++;
1678 1653  
1679 1654                          /*
1680 1655                           * Could still be on the free list. However, this
1681 1656                           * could also be an smp that is transitioning from
1682 1657                           * the free list when we have too much contention
1683 1658                           * for the smapmtx's. In this case, we have an
1684 1659                           * unlocked smp that is not on the free list any
1685 1660                           * longer, but still has a 0 refcnt.  The only way
1686 1661                           * to be sure is to check the freelist pointers.
1687 1662                           * Since we now have the smapmtx, we are guaranteed
1688 1663                           * that the (vp, off) won't change, so we are safe
1689 1664                           * to reclaim it.  get_free_smp() knows that this
1690 1665                           * can happen, and it will check the refcnt.
1691 1666                           */
1692 1667  
1693 1668                          if ((smp->sm_next != NULL)) {
1694 1669                                  struct sm_freeq *freeq;
1695 1670  
1696 1671                                  ASSERT(smp->sm_prev != NULL);
1697 1672                                  sm = &smd_free[smp->sm_free_ndx];
1698 1673  
1699 1674                                  if (smp->sm_flags & SM_QNDX_ZERO)
1700 1675                                          freeq = &sm->sm_freeq[0];
1701 1676                                  else
1702 1677                                          freeq = &sm->sm_freeq[1];
1703 1678  
1704 1679                                  mutex_enter(&freeq->smq_mtx);
1705 1680                                  if (freeq->smq_free != smp) {
1706 1681                                          /*
1707 1682                                           * fastpath normal case
1708 1683                                           */
1709 1684                                          smp->sm_prev->sm_next = smp->sm_next;
1710 1685                                          smp->sm_next->sm_prev = smp->sm_prev;
1711 1686                                  } else if (smp == smp->sm_next) {
1712 1687                                          /*
1713 1688                                           * Taking the last smap on freelist
1714 1689                                           */
1715 1690                                          freeq->smq_free = NULL;
1716 1691                                  } else {
1717 1692                                          /*
1718 1693                                           * Reclaiming 1st smap on list
1719 1694                                           */
1720 1695                                          freeq->smq_free = smp->sm_next;
1721 1696                                          smp->sm_prev->sm_next = smp->sm_next;
1722 1697                                          smp->sm_next->sm_prev = smp->sm_prev;
1723 1698                                  }
1724 1699                                  mutex_exit(&freeq->smq_mtx);
1725 1700                                  smp->sm_prev = smp->sm_next = NULL;
1726 1701                          } else {
1727 1702                                  ASSERT(smp->sm_prev == NULL);
1728 1703                                  segmapcnt.smp_stolen.value.ul++;
1729 1704                          }
1730 1705  
1731 1706                  } else {
1732 1707                          segmapcnt.smp_get_use.value.ul++;
1733 1708                  }
1734 1709                  smp->sm_refcnt++;               /* another user */
1735 1710  
1736 1711                  /*
1737 1712                   * We don't invoke segmap_fault via TLB miss, so we set ref
1738 1713                   * and mod bits in advance. For S_OTHER  we set them in
1739 1714                   * segmap_fault F_SOFTUNLOCK.
1740 1715                   */
1741 1716                  if (is_kpm) {
1742 1717                          if (rw == S_WRITE) {
1743 1718                                  smp->sm_flags |= SM_WRITE_DATA;
1744 1719                          } else if (rw == S_READ) {
1745 1720                                  smp->sm_flags |= SM_READ_DATA;
1746 1721                          }
1747 1722                  }
1748 1723                  mutex_exit(smapmtx);
1749 1724  
1750 1725                  newslot = 0;
1751 1726          } else {
1752 1727  
1753 1728                  uint32_t free_ndx, *free_ndxp;
1754 1729                  union segmap_cpu *scpu;
1755 1730  
1756 1731                  /*
1757 1732                   * On a PAC machine or a machine with anti-alias
1758 1733                   * hardware, smd_colormsk will be zero.
1759 1734                   *
1760 1735                   * On a VAC machine- pick color by offset in the file
1761 1736                   * so we won't get VAC conflicts on elf files.
1762 1737                   * On data files, color does not matter but we
1763 1738                   * don't know what kind of file it is so we always
1764 1739                   * pick color by offset. This causes color
1765 1740                   * corresponding to file offset zero to be used more
1766 1741                   * heavily.
1767 1742                   */
1768 1743                  color = (baseoff >> MAXBSHIFT) & smd_colormsk;
1769 1744                  scpu = smd_cpu+CPU->cpu_seqid;
1770 1745                  free_ndxp = &scpu->scpu.scpu_free_ndx[color];
1771 1746                  free_ndx = (*free_ndxp += smd_ncolor) & smd_freemsk;
1772 1747  #ifdef DEBUG
1773 1748                  colors_used[free_ndx]++;
1774 1749  #endif /* DEBUG */
1775 1750  
1776 1751                  /*
1777 1752                   * Get a locked smp slot from the free list.
1778 1753                   */
1779 1754                  smp = get_free_smp(free_ndx);
1780 1755                  smapmtx = SMAPMTX(smp);
1781 1756  
1782 1757                  ASSERT(smp->sm_vp == NULL);
1783 1758  
1784 1759                  if ((nsmp = segmap_hashin(smp, vp, baseoff, hashid)) != NULL) {
1785 1760                          /*
1786 1761                           * Failed to hashin, there exists one now.
1787 1762                           * Return the smp we just allocated.
1788 1763                           */
1789 1764                          segmap_smapadd(smp);
1790 1765                          mutex_exit(smapmtx);
1791 1766  
1792 1767                          smp = nsmp;
1793 1768                          goto vrfy_smp;
1794 1769                  }
1795 1770                  smp->sm_refcnt++;               /* another user */
1796 1771  
1797 1772                  /*
1798 1773                   * We don't invoke segmap_fault via TLB miss, so we set ref
1799 1774                   * and mod bits in advance. For S_OTHER  we set them in
1800 1775                   * segmap_fault F_SOFTUNLOCK.
1801 1776                   */
1802 1777                  if (is_kpm) {
1803 1778                          if (rw == S_WRITE) {
1804 1779                                  smp->sm_flags |= SM_WRITE_DATA;
1805 1780                          } else if (rw == S_READ) {
1806 1781                                  smp->sm_flags |= SM_READ_DATA;
1807 1782                          }
1808 1783                  }
1809 1784                  mutex_exit(smapmtx);
1810 1785  
1811 1786                  newslot = 1;
1812 1787          }
1813 1788  
1814 1789          if (!is_kpm)
1815 1790                  goto use_segmap_range;
1816 1791  
1817 1792          /*
1818 1793           * Use segkpm
1819 1794           */
1820 1795          /* Lint directive required until 6746211 is fixed */
1821 1796          /*CONSTCOND*/
1822 1797          ASSERT(PAGESIZE == MAXBSIZE);
1823 1798  
1824 1799          /*
1825 1800           * remember the last smp faulted on this cpu.
1826 1801           */
1827 1802          (smd_cpu+CPU->cpu_seqid)->scpu.scpu_last_smap = smp;
1828 1803  
1829 1804          if (forcefault == SM_PAGECREATE) {
1830 1805                  baseaddr = segmap_pagecreate_kpm(seg, vp, baseoff, smp, rw);
1831 1806                  return (baseaddr);
1832 1807          }
1833 1808  
1834 1809          if (newslot == 0 &&
1835 1810              (pp = GET_KPME(smp)->kpe_page) != NULL) {
1836 1811  
1837 1812                  /* fastpath */
1838 1813                  switch (rw) {
1839 1814                  case S_READ:
1840 1815                  case S_WRITE:
1841 1816                          if (page_trylock(pp, SE_SHARED)) {
1842 1817                                  if (PP_ISFREE(pp) ||
1843 1818                                      !(pp->p_vnode == vp &&
1844 1819                                      pp->p_offset == baseoff)) {
1845 1820                                          page_unlock(pp);
1846 1821                                          pp = page_lookup(vp, baseoff,
1847 1822                                              SE_SHARED);
1848 1823                                  }
1849 1824                          } else {
1850 1825                                  pp = page_lookup(vp, baseoff, SE_SHARED);
1851 1826                          }
1852 1827  
1853 1828                          if (pp == NULL) {
1854 1829                                  ASSERT(GET_KPME(smp)->kpe_page == NULL);
1855 1830                                  break;
1856 1831                          }
1857 1832  
1858 1833                          if (rw == S_WRITE &&
1859 1834                              hat_page_getattr(pp, P_MOD | P_REF) !=
1860 1835                              (P_MOD | P_REF)) {
1861 1836                                  page_unlock(pp);
1862 1837                                  break;
1863 1838                          }
1864 1839  
1865 1840                          /*
1866 1841                           * We have the p_selock as reader, grab_smp
1867 1842                           * can't hit us, we have bumped the smap
1868 1843                           * refcnt and hat_pageunload needs the
1869 1844                           * p_selock exclusive.
1870 1845                           */
1871 1846                          kpme = GET_KPME(smp);
1872 1847                          if (kpme->kpe_page == pp) {
1873 1848                                  baseaddr = hat_kpm_page2va(pp, 0);
1874 1849                          } else if (kpme->kpe_page == NULL) {
1875 1850                                  baseaddr = hat_kpm_mapin(pp, kpme);
1876 1851                          } else {
1877 1852                                  panic("segmap_getmapflt: stale "
1878 1853                                      "kpme page, kpme %p", (void *)kpme);
1879 1854                                  /*NOTREACHED*/
1880 1855                          }
1881 1856  
1882 1857                          /*
1883 1858                           * We don't invoke segmap_fault via TLB miss,
1884 1859                           * so we set ref and mod bits in advance.
1885 1860                           * For S_OTHER and we set them in segmap_fault
1886 1861                           * F_SOFTUNLOCK.
1887 1862                           */
1888 1863                          if (rw == S_READ && !hat_isref(pp))
1889 1864                                  hat_setref(pp);
1890 1865  
1891 1866                          return (baseaddr);
1892 1867                  default:
1893 1868                          break;
1894 1869                  }
1895 1870          }
1896 1871  
1897 1872          base = segkpm_create_va(baseoff);
1898 1873          error = VOP_GETPAGE(vp, (offset_t)baseoff, len, &prot, pl, MAXBSIZE,
1899 1874              seg, base, rw, CRED(), NULL);
1900 1875  
1901 1876          pp = pl[0];
1902 1877          if (error || pp == NULL) {
1903 1878                  /*
1904 1879                   * Use segmap address slot and let segmap_fault deal
1905 1880                   * with the error cases. There is no error return
1906 1881                   * possible here.
1907 1882                   */
1908 1883                  goto use_segmap_range;
1909 1884          }
1910 1885  
1911 1886          ASSERT(pl[1] == NULL);
1912 1887  
1913 1888          /*
1914 1889           * When prot is not returned w/ PROT_ALL the returned pages
1915 1890           * are not backed by fs blocks. For most of the segmap users
1916 1891           * this is no problem, they don't write to the pages in the
1917 1892           * same request and therefore don't rely on a following
1918 1893           * trap driven segmap_fault. With SM_LOCKPROTO users it
1919 1894           * is more secure to use segkmap adresses to allow
1920 1895           * protection segmap_fault's.
1921 1896           */
1922 1897          if (prot != PROT_ALL && forcefault == SM_LOCKPROTO) {
1923 1898                  /*
1924 1899                   * Use segmap address slot and let segmap_fault
1925 1900                   * do the error return.
1926 1901                   */
1927 1902                  ASSERT(rw != S_WRITE);
1928 1903                  ASSERT(PAGE_LOCKED(pp));
1929 1904                  page_unlock(pp);
1930 1905                  forcefault = 0;
1931 1906                  goto use_segmap_range;
1932 1907          }
1933 1908  
1934 1909          /*
1935 1910           * We have the p_selock as reader, grab_smp can't hit us, we
1936 1911           * have bumped the smap refcnt and hat_pageunload needs the
1937 1912           * p_selock exclusive.
1938 1913           */
1939 1914          kpme = GET_KPME(smp);
1940 1915          if (kpme->kpe_page == pp) {
1941 1916                  baseaddr = hat_kpm_page2va(pp, 0);
1942 1917          } else if (kpme->kpe_page == NULL) {
1943 1918                  baseaddr = hat_kpm_mapin(pp, kpme);
1944 1919          } else {
1945 1920                  panic("segmap_getmapflt: stale kpme page after "
1946 1921                      "VOP_GETPAGE, kpme %p", (void *)kpme);
1947 1922                  /*NOTREACHED*/
1948 1923          }
1949 1924  
1950 1925          smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++;
1951 1926  
1952 1927          return (baseaddr);
1953 1928  
1954 1929  
1955 1930  use_segmap_range:
1956 1931          baseaddr = seg->s_base + ((smp - smd_smap) * MAXBSIZE);
1957 1932          TRACE_4(TR_FAC_VM, TR_SEGMAP_GETMAP,
1958 1933              "segmap_getmap:seg %p addr %p vp %p offset %llx",
1959 1934              seg, baseaddr, vp, baseoff);
1960 1935  
1961 1936          /*
1962 1937           * Prefault the translations
1963 1938           */
1964 1939          vaddr = baseaddr + (off - baseoff);
1965 1940          if (forcefault && (newslot || !hat_probe(kas.a_hat, vaddr))) {
1966 1941  
1967 1942                  caddr_t pgaddr = (caddr_t)((uintptr_t)vaddr &
1968 1943                      (uintptr_t)PAGEMASK);
1969 1944  
1970 1945                  (void) segmap_fault(kas.a_hat, seg, pgaddr,
1971 1946                      (vaddr + len - pgaddr + PAGESIZE - 1) & (uintptr_t)PAGEMASK,
1972 1947                      F_INVAL, rw);
1973 1948          }
1974 1949  
1975 1950          return (baseaddr);
1976 1951  }
1977 1952  
1978 1953  int
1979 1954  segmap_release(struct seg *seg, caddr_t addr, uint_t flags)
1980 1955  {
1981 1956          struct smap     *smp;
1982 1957          int             error;
1983 1958          int             bflags = 0;
1984 1959          struct vnode    *vp;
1985 1960          u_offset_t      offset;
1986 1961          kmutex_t        *smtx;
1987 1962          int             is_kpm = 0;
1988 1963          page_t          *pp;
1989 1964  
1990 1965          if (segmap_kpm && IS_KPM_ADDR(addr)) {
1991 1966  
1992 1967                  if (((uintptr_t)addr & MAXBOFFSET) != 0) {
1993 1968                          panic("segmap_release: addr %p not "
1994 1969                              "MAXBSIZE aligned", (void *)addr);
1995 1970                          /*NOTREACHED*/
1996 1971                  }
1997 1972  
1998 1973                  if ((smp = get_smap_kpm(addr, &pp)) == NULL) {
1999 1974                          panic("segmap_release: smap not found "
2000 1975                              "for addr %p", (void *)addr);
2001 1976                          /*NOTREACHED*/
2002 1977                  }
2003 1978  
2004 1979                  TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP,
2005 1980                      "segmap_relmap:seg %p addr %p smp %p",
2006 1981                      seg, addr, smp);
2007 1982  
2008 1983                  smtx = SMAPMTX(smp);
2009 1984  
2010 1985                  /*
2011 1986                   * For compatibility reasons segmap_pagecreate_kpm sets this
2012 1987                   * flag to allow a following segmap_pagecreate to return
2013 1988                   * this as "newpage" flag. When segmap_pagecreate is not
2014 1989                   * called at all we clear it now.
2015 1990                   */
2016 1991                  smp->sm_flags &= ~SM_KPM_NEWPAGE;
2017 1992                  is_kpm = 1;
2018 1993                  if (smp->sm_flags & SM_WRITE_DATA) {
2019 1994                          hat_setrefmod(pp);
2020 1995                  } else if (smp->sm_flags & SM_READ_DATA) {
2021 1996                          hat_setref(pp);
2022 1997                  }
2023 1998          } else {
2024 1999                  if (addr < seg->s_base || addr >= seg->s_base + seg->s_size ||
2025 2000                      ((uintptr_t)addr & MAXBOFFSET) != 0) {
2026 2001                          panic("segmap_release: bad addr %p", (void *)addr);
2027 2002                          /*NOTREACHED*/
2028 2003                  }
2029 2004                  smp = GET_SMAP(seg, addr);
2030 2005  
2031 2006                  TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP,
2032 2007                      "segmap_relmap:seg %p addr %p smp %p",
2033 2008                      seg, addr, smp);
2034 2009  
2035 2010                  smtx = SMAPMTX(smp);
2036 2011                  mutex_enter(smtx);
2037 2012                  smp->sm_flags |= SM_NOTKPM_RELEASED;
2038 2013          }
2039 2014  
2040 2015          ASSERT(smp->sm_refcnt > 0);
2041 2016  
2042 2017          /*
2043 2018           * Need to call VOP_PUTPAGE() if any flags (except SM_DONTNEED)
2044 2019           * are set.
2045 2020           */
2046 2021          if ((flags & ~SM_DONTNEED) != 0) {
2047 2022                  if (flags & SM_WRITE)
2048 2023                          segmapcnt.smp_rel_write.value.ul++;
2049 2024                  if (flags & SM_ASYNC) {
2050 2025                          bflags |= B_ASYNC;
2051 2026                          segmapcnt.smp_rel_async.value.ul++;
2052 2027                  }
2053 2028                  if (flags & SM_INVAL) {
2054 2029                          bflags |= B_INVAL;
2055 2030                          segmapcnt.smp_rel_abort.value.ul++;
2056 2031                  }
2057 2032                  if (flags & SM_DESTROY) {
2058 2033                          bflags |= (B_INVAL|B_TRUNC);
2059 2034                          segmapcnt.smp_rel_abort.value.ul++;
2060 2035                  }
2061 2036                  if (smp->sm_refcnt == 1) {
2062 2037                          /*
2063 2038                           * We only bother doing the FREE and DONTNEED flags
2064 2039                           * if no one else is still referencing this mapping.
2065 2040                           */
2066 2041                          if (flags & SM_FREE) {
2067 2042                                  bflags |= B_FREE;
2068 2043                                  segmapcnt.smp_rel_free.value.ul++;
2069 2044                          }
2070 2045                          if (flags & SM_DONTNEED) {
2071 2046                                  bflags |= B_DONTNEED;
2072 2047                                  segmapcnt.smp_rel_dontneed.value.ul++;
2073 2048                          }
2074 2049                  }
2075 2050          } else {
2076 2051                  smd_cpu[CPU->cpu_seqid].scpu.scpu_release++;
2077 2052          }
2078 2053  
2079 2054          vp = smp->sm_vp;
2080 2055          offset = smp->sm_off;
2081 2056  
2082 2057          if (--smp->sm_refcnt == 0) {
2083 2058  
2084 2059                  smp->sm_flags &= ~(SM_WRITE_DATA | SM_READ_DATA);
2085 2060  
2086 2061                  if (flags & (SM_INVAL|SM_DESTROY)) {
2087 2062                          segmap_hashout(smp);    /* remove map info */
2088 2063                          if (is_kpm) {
2089 2064                                  hat_kpm_mapout(pp, GET_KPME(smp), addr);
2090 2065                                  if (smp->sm_flags & SM_NOTKPM_RELEASED) {
2091 2066                                          smp->sm_flags &= ~SM_NOTKPM_RELEASED;
2092 2067                                          hat_unload(kas.a_hat, segkmap->s_base +
2093 2068                                              ((smp - smd_smap) * MAXBSIZE),
2094 2069                                              MAXBSIZE, HAT_UNLOAD);
2095 2070                                  }
2096 2071  
2097 2072                          } else {
2098 2073                                  if (segmap_kpm)
2099 2074                                          segkpm_mapout_validkpme(GET_KPME(smp));
2100 2075  
2101 2076                                  smp->sm_flags &= ~SM_NOTKPM_RELEASED;
2102 2077                                  hat_unload(kas.a_hat, addr, MAXBSIZE,
2103 2078                                      HAT_UNLOAD);
2104 2079                          }
2105 2080                  }
2106 2081                  segmap_smapadd(smp);    /* add to free list */
2107 2082          }
2108 2083  
2109 2084          mutex_exit(smtx);
2110 2085  
2111 2086          if (is_kpm)
2112 2087                  page_unlock(pp);
2113 2088          /*
2114 2089           * Now invoke VOP_PUTPAGE() if any flags (except SM_DONTNEED)
2115 2090           * are set.
2116 2091           */
2117 2092          if ((flags & ~SM_DONTNEED) != 0) {
2118 2093                  error = VOP_PUTPAGE(vp, offset, MAXBSIZE,
2119 2094                      bflags, CRED(), NULL);
2120 2095          } else {
2121 2096                  error = 0;
2122 2097          }
2123 2098  
2124 2099          return (error);
2125 2100  }
2126 2101  
2127 2102  /*
2128 2103   * Dump the pages belonging to this segmap segment.
2129 2104   */
2130 2105  static void
2131 2106  segmap_dump(struct seg *seg)
2132 2107  {
2133 2108          struct segmap_data *smd;
2134 2109          struct smap *smp, *smp_end;
2135 2110          page_t *pp;
2136 2111          pfn_t pfn;
2137 2112          u_offset_t off;
2138 2113          caddr_t addr;
2139 2114  
2140 2115          smd = (struct segmap_data *)seg->s_data;
2141 2116          addr = seg->s_base;
2142 2117          for (smp = smd->smd_sm, smp_end = smp + smd->smd_npages;
2143 2118              smp < smp_end; smp++) {
2144 2119  
2145 2120                  if (smp->sm_refcnt) {
2146 2121                          for (off = 0; off < MAXBSIZE; off += PAGESIZE) {
2147 2122                                  int we_own_it = 0;
2148 2123  
2149 2124                                  /*
2150 2125                                   * If pp == NULL, the page either does
2151 2126                                   * not exist or is exclusively locked.
2152 2127                                   * So determine if it exists before
2153 2128                                   * searching for it.
2154 2129                                   */
2155 2130                                  if ((pp = page_lookup_nowait(smp->sm_vp,
2156 2131                                      smp->sm_off + off, SE_SHARED)))
2157 2132                                          we_own_it = 1;
2158 2133                                  else
2159 2134                                          pp = page_exists(smp->sm_vp,
2160 2135                                              smp->sm_off + off);
2161 2136  
2162 2137                                  if (pp) {
2163 2138                                          pfn = page_pptonum(pp);
2164 2139                                          dump_addpage(seg->s_as,
2165 2140                                              addr + off, pfn);
2166 2141                                          if (we_own_it)
2167 2142                                                  page_unlock(pp);
2168 2143                                  }
2169 2144                                  dump_timeleft = dump_timeout;
2170 2145                          }
2171 2146                  }
2172 2147                  addr += MAXBSIZE;
2173 2148          }
2174 2149  }
2175 2150  
2176 2151  /*ARGSUSED*/
2177 2152  static int
2178 2153  segmap_pagelock(struct seg *seg, caddr_t addr, size_t len,
2179 2154      struct page ***ppp, enum lock_type type, enum seg_rw rw)
2180 2155  {

↓ open down ↓

1252 lines elided

↑ open up ↑

2181 2156          return (ENOTSUP);
2182 2157  }
2183 2158  
2184 2159  static int
2185 2160  segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
2186 2161  {
2187 2162          struct segmap_data *smd = (struct segmap_data *)seg->s_data;
2188 2163  
2189 2164          memidp->val[0] = (uintptr_t)smd->smd_sm->sm_vp;
2190 2165          memidp->val[1] = smd->smd_sm->sm_off + (uintptr_t)(addr - seg->s_base);
2191      -        return (0);
2192      -}
2193      -
2194      -/*ARGSUSED*/
2195      -static lgrp_mem_policy_info_t *
2196      -segmap_getpolicy(struct seg *seg, caddr_t addr)
2197      -{
2198      -        return (NULL);
2199      -}
2200      -
2201      -/*ARGSUSED*/
2202      -static int
2203      -segmap_capable(struct seg *seg, segcapability_t capability)
2204      -{
2205 2166          return (0);
2206 2167  }
2207 2168  
2208 2169  
2209 2170  #ifdef  SEGKPM_SUPPORT
2210 2171  
2211 2172  /*
2212 2173   * segkpm support routines
2213 2174   */
2214 2175

2215 2176  static caddr_t
2216 2177  segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off,
2217 2178          struct smap *smp, enum seg_rw rw)
2218 2179  {
2219 2180          caddr_t base;
2220 2181          page_t  *pp;
2221 2182          int     newpage = 0;
2222 2183          struct kpme     *kpme;
2223 2184  
2224 2185          ASSERT(smp->sm_refcnt > 0);
2225 2186  
2226 2187          if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
2227 2188                  kmutex_t *smtx;
2228 2189  
2229 2190                  base = segkpm_create_va(off);
2230 2191  
2231 2192                  if ((pp = page_create_va(vp, off, PAGESIZE, PG_WAIT,
2232 2193                      seg, base)) == NULL) {
2233 2194                          panic("segmap_pagecreate_kpm: "
2234 2195                              "page_create failed");
2235 2196                          /*NOTREACHED*/
2236 2197                  }
2237 2198  
2238 2199                  newpage = 1;
2239 2200                  page_io_unlock(pp);
2240 2201                  ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);
2241 2202  
2242 2203                  /*
2243 2204                   * Mark this here until the following segmap_pagecreate
2244 2205                   * or segmap_release.
2245 2206                   */
2246 2207                  smtx = SMAPMTX(smp);
2247 2208                  mutex_enter(smtx);
2248 2209                  smp->sm_flags |= SM_KPM_NEWPAGE;
2249 2210                  mutex_exit(smtx);
2250 2211          }
2251 2212  
2252 2213          kpme = GET_KPME(smp);
2253 2214          if (!newpage && kpme->kpe_page == pp)
2254 2215                  base = hat_kpm_page2va(pp, 0);
2255 2216          else
2256 2217                  base = hat_kpm_mapin(pp, kpme);
2257 2218  
2258 2219          /*
2259 2220           * FS code may decide not to call segmap_pagecreate and we
2260 2221           * don't invoke segmap_fault via TLB miss, so we have to set
2261 2222           * ref and mod bits in advance.
2262 2223           */
2263 2224          if (rw == S_WRITE) {
2264 2225                  hat_setrefmod(pp);
2265 2226          } else {
2266 2227                  ASSERT(rw == S_READ);
2267 2228                  hat_setref(pp);
2268 2229          }
2269 2230  
2270 2231          smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++;
2271 2232  
2272 2233          return (base);
2273 2234  }
2274 2235  
2275 2236  /*
2276 2237   * Find the smap structure corresponding to the
2277 2238   * KPM addr and return it locked.
2278 2239   */
2279 2240  struct smap *
2280 2241  get_smap_kpm(caddr_t addr, page_t **ppp)
2281 2242  {
2282 2243          struct smap     *smp;
2283 2244          struct vnode    *vp;
2284 2245          u_offset_t      offset;
2285 2246          caddr_t         baseaddr = (caddr_t)((uintptr_t)addr & MAXBMASK);
2286 2247          int             hashid;
2287 2248          kmutex_t        *hashmtx;
2288 2249          page_t          *pp;
2289 2250          union segmap_cpu *scpu;
2290 2251  
2291 2252          pp = hat_kpm_vaddr2page(baseaddr);
2292 2253  
2293 2254          ASSERT(pp && !PP_ISFREE(pp));
2294 2255          ASSERT(PAGE_LOCKED(pp));
2295 2256          ASSERT(((uintptr_t)pp->p_offset & MAXBOFFSET) == 0);
2296 2257  
2297 2258          vp = pp->p_vnode;
2298 2259          offset = pp->p_offset;
2299 2260          ASSERT(vp != NULL);
2300 2261  
2301 2262          /*
2302 2263           * Assume the last smap used on this cpu is the one needed.
2303 2264           */
2304 2265          scpu = smd_cpu+CPU->cpu_seqid;
2305 2266          smp = scpu->scpu.scpu_last_smap;
2306 2267          mutex_enter(&smp->sm_mtx);
2307 2268          if (smp->sm_vp == vp && smp->sm_off == offset) {
2308 2269                  ASSERT(smp->sm_refcnt > 0);
2309 2270          } else {
2310 2271                  /*
2311 2272                   * Assumption wrong, find the smap on the hash chain.
2312 2273                   */
2313 2274                  mutex_exit(&smp->sm_mtx);
2314 2275                  SMAP_HASHFUNC(vp, offset, hashid); /* macro assigns hashid */
2315 2276                  hashmtx = SHASHMTX(hashid);
2316 2277  
2317 2278                  mutex_enter(hashmtx);
2318 2279                  smp = smd_hash[hashid].sh_hash_list;
2319 2280                  for (; smp != NULL; smp = smp->sm_hash) {
2320 2281                          if (smp->sm_vp == vp && smp->sm_off == offset)
2321 2282                                  break;
2322 2283                  }
2323 2284                  mutex_exit(hashmtx);
2324 2285                  if (smp) {
2325 2286                          mutex_enter(&smp->sm_mtx);
2326 2287                          ASSERT(smp->sm_vp == vp && smp->sm_off == offset);
2327 2288                  }
2328 2289          }
2329 2290  
2330 2291          if (ppp)
2331 2292                  *ppp = smp ? pp : NULL;
2332 2293  
2333 2294          return (smp);
2334 2295  }
2335 2296  
2336 2297  #else   /* SEGKPM_SUPPORT */
2337 2298  
2338 2299  /* segkpm stubs */
2339 2300  
2340 2301  /*ARGSUSED*/
2341 2302  static caddr_t
2342 2303  segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off,
2343 2304          struct smap *smp, enum seg_rw rw)
2344 2305  {
2345 2306          return (NULL);
2346 2307  }
2347 2308  
2348 2309  /*ARGSUSED*/
2349 2310  struct smap *
2350 2311  get_smap_kpm(caddr_t addr, page_t **ppp)
2351 2312  {
2352 2313          return (NULL);
2353 2314  }
2354 2315  
2355 2316  #endif  /* SEGKPM_SUPPORT */

↓ open down ↓

141 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX