use-NULL-capable-segop-as-a-shorthand-for-no-capabilities Wdiff usr/src/uts/common/vm/seg_map.c

Print this page

use NULL capable segop as a shorthand for no-capabilities
Instead of forcing every segment driver to implement a dummy "return 0"
function, handle NULL capable segop function pointer as "no copabilities
supported" shorthand.

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/vm/seg_map.c
          +++ new/usr/src/uts/common/vm/seg_map.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   */
  25   25  
  26   26  /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T     */
  27   27  /*        All Rights Reserved   */
  28   28  
  29   29  /*
  30   30   * Portions of this source code were derived from Berkeley 4.3 BSD
  31   31   * under license from the Regents of the University of California.
  32   32   */
  33   33  
  34   34  /*
  35   35   * VM - generic vnode mapping segment.
  36   36   *
  37   37   * The segmap driver is used only by the kernel to get faster (than seg_vn)
  38   38   * mappings [lower routine overhead; more persistent cache] to random
  39   39   * vnode/offsets.  Note than the kernel may (and does) use seg_vn as well.
  40   40   */
  41   41  
  42   42  #include <sys/types.h>
  43   43  #include <sys/t_lock.h>
  44   44  #include <sys/param.h>
  45   45  #include <sys/sysmacros.h>
  46   46  #include <sys/buf.h>
  47   47  #include <sys/systm.h>
  48   48  #include <sys/vnode.h>
  49   49  #include <sys/mman.h>
  50   50  #include <sys/errno.h>
  51   51  #include <sys/cred.h>
  52   52  #include <sys/kmem.h>
  53   53  #include <sys/vtrace.h>
  54   54  #include <sys/cmn_err.h>
  55   55  #include <sys/debug.h>
  56   56  #include <sys/thread.h>
  57   57  #include <sys/dumphdr.h>
  58   58  #include <sys/bitmap.h>
  59   59  #include <sys/lgrp.h>
  60   60  
  61   61  #include <vm/seg_kmem.h>
  62   62  #include <vm/hat.h>
  63   63  #include <vm/as.h>
  64   64  #include <vm/seg.h>
  65   65  #include <vm/seg_kpm.h>
  66   66  #include <vm/seg_map.h>
  67   67  #include <vm/page.h>
  68   68  #include <vm/pvn.h>
  69   69  #include <vm/rm.h>
  70   70  
  71   71  /*
  72   72   * Private seg op routines.
  73   73   */
  74   74  static void     segmap_free(struct seg *seg);
  75   75  faultcode_t segmap_fault(struct hat *hat, struct seg *seg, caddr_t addr,
  76   76                          size_t len, enum fault_type type, enum seg_rw rw);
  77   77  static faultcode_t segmap_faulta(struct seg *seg, caddr_t addr);
  78   78  static int      segmap_checkprot(struct seg *seg, caddr_t addr, size_t len,
  79   79                          uint_t prot);
  80   80  static int      segmap_kluster(struct seg *seg, caddr_t addr, ssize_t);

↓ open down ↓

80 lines elided

↑ open up ↑

  81   81  static int      segmap_getprot(struct seg *seg, caddr_t addr, size_t len,
  82   82                          uint_t *protv);
  83   83  static u_offset_t       segmap_getoffset(struct seg *seg, caddr_t addr);
  84   84  static int      segmap_gettype(struct seg *seg, caddr_t addr);
  85   85  static int      segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp);
  86   86  static void     segmap_dump(struct seg *seg);
  87   87  static int      segmap_pagelock(struct seg *seg, caddr_t addr, size_t len,
  88   88                          struct page ***ppp, enum lock_type type,
  89   89                          enum seg_rw rw);
  90   90  static int      segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp);
  91      -static int      segmap_capable(struct seg *seg, segcapability_t capability);
  92   91  
  93   92  /* segkpm support */
  94   93  static caddr_t  segmap_pagecreate_kpm(struct seg *, vnode_t *, u_offset_t,
  95   94                          struct smap *, enum seg_rw);
  96   95  struct smap     *get_smap_kpm(caddr_t, page_t **);
  97   96  
  98   97  static struct seg_ops segmap_ops = {
  99   98          .free           = segmap_free,
 100   99          .fault          = segmap_fault,
 101  100          .faulta         = segmap_faulta,
 102  101          .checkprot      = segmap_checkprot,
 103  102          .kluster        = segmap_kluster,
 104  103          .getprot        = segmap_getprot,
 105  104          .getoffset      = segmap_getoffset,
 106  105          .gettype        = segmap_gettype,
 107  106          .getvp          = segmap_getvp,
 108  107          .dump           = segmap_dump,
 109  108          .pagelock       = segmap_pagelock,
 110  109          .getmemid       = segmap_getmemid,
 111      -        .capable        = segmap_capable,
 112  110  };
 113  111  
 114  112  /*
 115  113   * Private segmap routines.
 116  114   */
 117  115  static void     segmap_unlock(struct hat *hat, struct seg *seg, caddr_t addr,
 118  116                          size_t len, enum seg_rw rw, struct smap *smp);
 119  117  static void     segmap_smapadd(struct smap *smp);
 120  118  static struct smap *segmap_hashin(struct smap *smp, struct vnode *vp,
 121  119                          u_offset_t off, int hashid);

 122  120  static void     segmap_hashout(struct smap *smp);
 123  121  
 124  122  
 125  123  /*
 126  124   * Statistics for segmap operations.
 127  125   *
 128  126   * No explicit locking to protect these stats.
 129  127   */
 130  128  struct segmapcnt segmapcnt = {
 131  129          { "fault",              KSTAT_DATA_ULONG },
 132  130          { "faulta",             KSTAT_DATA_ULONG },
 133  131          { "getmap",             KSTAT_DATA_ULONG },
 134  132          { "get_use",            KSTAT_DATA_ULONG },
 135  133          { "get_reclaim",        KSTAT_DATA_ULONG },
 136  134          { "get_reuse",          KSTAT_DATA_ULONG },
 137  135          { "get_unused",         KSTAT_DATA_ULONG },
 138  136          { "get_nofree",         KSTAT_DATA_ULONG },
 139  137          { "rel_async",          KSTAT_DATA_ULONG },
 140  138          { "rel_write",          KSTAT_DATA_ULONG },
 141  139          { "rel_free",           KSTAT_DATA_ULONG },
 142  140          { "rel_abort",          KSTAT_DATA_ULONG },
 143  141          { "rel_dontneed",       KSTAT_DATA_ULONG },
 144  142          { "release",            KSTAT_DATA_ULONG },
 145  143          { "pagecreate",         KSTAT_DATA_ULONG },
 146  144          { "free_notfree",       KSTAT_DATA_ULONG },
 147  145          { "free_dirty",         KSTAT_DATA_ULONG },
 148  146          { "free",               KSTAT_DATA_ULONG },
 149  147          { "stolen",             KSTAT_DATA_ULONG },
 150  148          { "get_nomtx",          KSTAT_DATA_ULONG }
 151  149  };
 152  150  
 153  151  kstat_named_t *segmapcnt_ptr = (kstat_named_t *)&segmapcnt;
 154  152  uint_t segmapcnt_ndata = sizeof (segmapcnt) / sizeof (kstat_named_t);
 155  153  
 156  154  /*
 157  155   * Return number of map pages in segment.
 158  156   */
 159  157  #define MAP_PAGES(seg)          ((seg)->s_size >> MAXBSHIFT)
 160  158  
 161  159  /*
 162  160   * Translate addr into smap number within segment.
 163  161   */
 164  162  #define MAP_PAGE(seg, addr)  (((addr) - (seg)->s_base) >> MAXBSHIFT)
 165  163  
 166  164  /*
 167  165   * Translate addr in seg into struct smap pointer.
 168  166   */
 169  167  #define GET_SMAP(seg, addr)     \
 170  168          &(((struct segmap_data *)((seg)->s_data))->smd_sm[MAP_PAGE(seg, addr)])
 171  169  
 172  170  /*
 173  171   * Bit in map (16 bit bitmap).
 174  172   */
 175  173  #define SMAP_BIT_MASK(bitindex) (1 << ((bitindex) & 0xf))
 176  174  
 177  175  static int smd_colormsk = 0;
 178  176  static int smd_ncolor = 0;
 179  177  static int smd_nfree = 0;
 180  178  static int smd_freemsk = 0;
 181  179  #ifdef DEBUG
 182  180  static int *colors_used;
 183  181  #endif
 184  182  static struct smap *smd_smap;
 185  183  static struct smaphash *smd_hash;
 186  184  #ifdef SEGMAP_HASHSTATS
 187  185  static unsigned int *smd_hash_len;
 188  186  #endif
 189  187  static struct smfree *smd_free;
 190  188  static ulong_t smd_hashmsk = 0;
 191  189  
 192  190  #define SEGMAP_MAXCOLOR         2
 193  191  #define SEGMAP_CACHE_PAD        64
 194  192  
 195  193  union segmap_cpu {
 196  194          struct {
 197  195                  uint32_t        scpu_free_ndx[SEGMAP_MAXCOLOR];
 198  196                  struct smap     *scpu_last_smap;
 199  197                  ulong_t         scpu_getmap;
 200  198                  ulong_t         scpu_release;
 201  199                  ulong_t         scpu_get_reclaim;
 202  200                  ulong_t         scpu_fault;
 203  201                  ulong_t         scpu_pagecreate;
 204  202                  ulong_t         scpu_get_reuse;
 205  203          } scpu;
 206  204          char    scpu_pad[SEGMAP_CACHE_PAD];
 207  205  };
 208  206  static union segmap_cpu *smd_cpu;
 209  207  
 210  208  /*
 211  209   * There are three locks in seg_map:
 212  210   *      - per freelist mutexes
 213  211   *      - per hashchain mutexes
 214  212   *      - per smap mutexes
 215  213   *
 216  214   * The lock ordering is to get the smap mutex to lock down the slot
 217  215   * first then the hash lock (for hash in/out (vp, off) list) or the
 218  216   * freelist lock to put the slot back on the free list.
 219  217   *
 220  218   * The hash search is done by only holding the hashchain lock, when a wanted
 221  219   * slot is found, we drop the hashchain lock then lock the slot so there
 222  220   * is no overlapping of hashchain and smap locks. After the slot is
 223  221   * locked, we verify again if the slot is still what we are looking
 224  222   * for.
 225  223   *
 226  224   * Allocation of a free slot is done by holding the freelist lock,
 227  225   * then locking the smap slot at the head of the freelist. This is
 228  226   * in reversed lock order so mutex_tryenter() is used.
 229  227   *
 230  228   * The smap lock protects all fields in smap structure except for
 231  229   * the link fields for hash/free lists which are protected by
 232  230   * hashchain and freelist locks.
 233  231   */
 234  232  
 235  233  #define SHASHMTX(hashid)        (&smd_hash[hashid].sh_mtx)
 236  234  
 237  235  #define SMP2SMF(smp)            (&smd_free[(smp - smd_smap) & smd_freemsk])
 238  236  #define SMP2SMF_NDX(smp)        (ushort_t)((smp - smd_smap) & smd_freemsk)
 239  237  
 240  238  #define SMAPMTX(smp) (&smp->sm_mtx)
 241  239  
 242  240  #define SMAP_HASHFUNC(vp, off, hashid) \
 243  241          { \
 244  242          hashid = ((((uintptr_t)(vp) >> 6) + ((uintptr_t)(vp) >> 3) + \
 245  243                  ((off) >> MAXBSHIFT)) & smd_hashmsk); \
 246  244          }
 247  245  
 248  246  /*
 249  247   * The most frequently updated kstat counters are kept in the
 250  248   * per cpu array to avoid hot cache blocks. The update function
 251  249   * sums the cpu local counters to update the global counters.
 252  250   */
 253  251  
 254  252  /* ARGSUSED */
 255  253  int
 256  254  segmap_kstat_update(kstat_t *ksp, int rw)
 257  255  {
 258  256          int i;
 259  257          ulong_t getmap, release, get_reclaim;
 260  258          ulong_t fault, pagecreate, get_reuse;
 261  259  
 262  260          if (rw == KSTAT_WRITE)
 263  261                  return (EACCES);
 264  262          getmap = release = get_reclaim = (ulong_t)0;
 265  263          fault = pagecreate = get_reuse = (ulong_t)0;
 266  264          for (i = 0; i < max_ncpus; i++) {
 267  265                  getmap += smd_cpu[i].scpu.scpu_getmap;
 268  266                  release  += smd_cpu[i].scpu.scpu_release;
 269  267                  get_reclaim += smd_cpu[i].scpu.scpu_get_reclaim;
 270  268                  fault  += smd_cpu[i].scpu.scpu_fault;
 271  269                  pagecreate  += smd_cpu[i].scpu.scpu_pagecreate;
 272  270                  get_reuse += smd_cpu[i].scpu.scpu_get_reuse;
 273  271          }
 274  272          segmapcnt.smp_getmap.value.ul = getmap;
 275  273          segmapcnt.smp_release.value.ul = release;
 276  274          segmapcnt.smp_get_reclaim.value.ul = get_reclaim;
 277  275          segmapcnt.smp_fault.value.ul = fault;
 278  276          segmapcnt.smp_pagecreate.value.ul = pagecreate;
 279  277          segmapcnt.smp_get_reuse.value.ul = get_reuse;
 280  278          return (0);
 281  279  }
 282  280  
 283  281  int
 284  282  segmap_create(struct seg *seg, void *argsp)
 285  283  {
 286  284          struct segmap_data *smd;
 287  285          struct smap *smp;
 288  286          struct smfree *sm;
 289  287          struct segmap_crargs *a = (struct segmap_crargs *)argsp;
 290  288          struct smaphash *shashp;
 291  289          union segmap_cpu *scpu;
 292  290          long i, npages;
 293  291          size_t hashsz;
 294  292          uint_t nfreelist;
 295  293          extern void prefetch_smap_w(void *);
 296  294          extern int max_ncpus;
 297  295  
 298  296          ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock));
 299  297  
 300  298          if (((uintptr_t)seg->s_base | seg->s_size) & MAXBOFFSET) {
 301  299                  panic("segkmap not MAXBSIZE aligned");
 302  300                  /*NOTREACHED*/
 303  301          }
 304  302  
 305  303          smd = kmem_zalloc(sizeof (struct segmap_data), KM_SLEEP);
 306  304  
 307  305          seg->s_data = (void *)smd;
 308  306          seg->s_ops = &segmap_ops;
 309  307          smd->smd_prot = a->prot;
 310  308  
 311  309          /*
 312  310           * Scale the number of smap freelists to be
 313  311           * proportional to max_ncpus * number of virtual colors.
 314  312           * The caller can over-ride this scaling by providing
 315  313           * a non-zero a->nfreelist argument.
 316  314           */
 317  315          nfreelist = a->nfreelist;
 318  316          if (nfreelist == 0)
 319  317                  nfreelist = max_ncpus;
 320  318          else if (nfreelist < 0 || nfreelist > 4 * max_ncpus) {
 321  319                  cmn_err(CE_WARN, "segmap_create: nfreelist out of range "
 322  320                  "%d, using %d", nfreelist, max_ncpus);
 323  321                  nfreelist = max_ncpus;
 324  322          }
 325  323          if (!ISP2(nfreelist)) {
 326  324                  /* round up nfreelist to the next power of two. */
 327  325                  nfreelist = 1 << (highbit(nfreelist));
 328  326          }
 329  327  
 330  328          /*
 331  329           * Get the number of virtual colors - must be a power of 2.
 332  330           */
 333  331          if (a->shmsize)
 334  332                  smd_ncolor = a->shmsize >> MAXBSHIFT;
 335  333          else
 336  334                  smd_ncolor = 1;
 337  335          ASSERT((smd_ncolor & (smd_ncolor - 1)) == 0);
 338  336          ASSERT(smd_ncolor <= SEGMAP_MAXCOLOR);
 339  337          smd_colormsk = smd_ncolor - 1;
 340  338          smd->smd_nfree = smd_nfree = smd_ncolor * nfreelist;
 341  339          smd_freemsk = smd_nfree - 1;
 342  340  
 343  341          /*
 344  342           * Allocate and initialize the freelist headers.
 345  343           * Note that sm_freeq[1] starts out as the release queue. This
 346  344           * is known when the smap structures are initialized below.
 347  345           */
 348  346          smd_free = smd->smd_free =
 349  347              kmem_zalloc(smd_nfree * sizeof (struct smfree), KM_SLEEP);
 350  348          for (i = 0; i < smd_nfree; i++) {
 351  349                  sm = &smd->smd_free[i];
 352  350                  mutex_init(&sm->sm_freeq[0].smq_mtx, NULL, MUTEX_DEFAULT, NULL);
 353  351                  mutex_init(&sm->sm_freeq[1].smq_mtx, NULL, MUTEX_DEFAULT, NULL);
 354  352                  sm->sm_allocq = &sm->sm_freeq[0];
 355  353                  sm->sm_releq = &sm->sm_freeq[1];
 356  354          }
 357  355  
 358  356          /*
 359  357           * Allocate and initialize the smap hash chain headers.
 360  358           * Compute hash size rounding down to the next power of two.
 361  359           */
 362  360          npages = MAP_PAGES(seg);
 363  361          smd->smd_npages = npages;
 364  362          hashsz = npages / SMAP_HASHAVELEN;
 365  363          hashsz = 1 << (highbit(hashsz)-1);
 366  364          smd_hashmsk = hashsz - 1;
 367  365          smd_hash = smd->smd_hash =
 368  366              kmem_alloc(hashsz * sizeof (struct smaphash), KM_SLEEP);
 369  367  #ifdef SEGMAP_HASHSTATS
 370  368          smd_hash_len =
 371  369              kmem_zalloc(hashsz * sizeof (unsigned int), KM_SLEEP);
 372  370  #endif
 373  371          for (i = 0, shashp = smd_hash; i < hashsz; i++, shashp++) {
 374  372                  shashp->sh_hash_list = NULL;
 375  373                  mutex_init(&shashp->sh_mtx, NULL, MUTEX_DEFAULT, NULL);
 376  374          }
 377  375  
 378  376          /*
 379  377           * Allocate and initialize the smap structures.
 380  378           * Link all slots onto the appropriate freelist.
 381  379           * The smap array is large enough to affect boot time
 382  380           * on large systems, so use memory prefetching and only
 383  381           * go through the array 1 time. Inline a optimized version
 384  382           * of segmap_smapadd to add structures to freelists with
 385  383           * knowledge that no locks are needed here.
 386  384           */
 387  385          smd_smap = smd->smd_sm =
 388  386              kmem_alloc(sizeof (struct smap) * npages, KM_SLEEP);
 389  387  
 390  388          for (smp = &smd->smd_sm[MAP_PAGES(seg) - 1];
 391  389              smp >= smd->smd_sm; smp--) {
 392  390                  struct smap *smpfreelist;
 393  391                  struct sm_freeq *releq;
 394  392  
 395  393                  prefetch_smap_w((char *)smp);
 396  394  
 397  395                  smp->sm_vp = NULL;
 398  396                  smp->sm_hash = NULL;
 399  397                  smp->sm_off = 0;
 400  398                  smp->sm_bitmap = 0;
 401  399                  smp->sm_refcnt = 0;
 402  400                  mutex_init(&smp->sm_mtx, NULL, MUTEX_DEFAULT, NULL);
 403  401                  smp->sm_free_ndx = SMP2SMF_NDX(smp);
 404  402  
 405  403                  sm = SMP2SMF(smp);
 406  404                  releq = sm->sm_releq;
 407  405  
 408  406                  smpfreelist = releq->smq_free;
 409  407                  if (smpfreelist == 0) {
 410  408                          releq->smq_free = smp->sm_next = smp->sm_prev = smp;
 411  409                  } else {
 412  410                          smp->sm_next = smpfreelist;
 413  411                          smp->sm_prev = smpfreelist->sm_prev;
 414  412                          smpfreelist->sm_prev = smp;
 415  413                          smp->sm_prev->sm_next = smp;
 416  414                          releq->smq_free = smp->sm_next;
 417  415                  }
 418  416  
 419  417                  /*
 420  418                   * sm_flag = 0 (no SM_QNDX_ZERO) implies smap on sm_freeq[1]
 421  419                   */
 422  420                  smp->sm_flags = 0;
 423  421  
 424  422  #ifdef  SEGKPM_SUPPORT
 425  423                  /*
 426  424                   * Due to the fragile prefetch loop no
 427  425                   * separate function is used here.
 428  426                   */
 429  427                  smp->sm_kpme_next = NULL;
 430  428                  smp->sm_kpme_prev = NULL;
 431  429                  smp->sm_kpme_page = NULL;
 432  430  #endif
 433  431          }
 434  432  
 435  433          /*
 436  434           * Allocate the per color indices that distribute allocation
 437  435           * requests over the free lists. Each cpu will have a private
 438  436           * rotor index to spread the allocations even across the available
 439  437           * smap freelists. Init the scpu_last_smap field to the first
 440  438           * smap element so there is no need to check for NULL.
 441  439           */
 442  440          smd_cpu =
 443  441              kmem_zalloc(sizeof (union segmap_cpu) * max_ncpus, KM_SLEEP);
 444  442          for (i = 0, scpu = smd_cpu; i < max_ncpus; i++, scpu++) {
 445  443                  int j;
 446  444                  for (j = 0; j < smd_ncolor; j++)
 447  445                          scpu->scpu.scpu_free_ndx[j] = j;
 448  446                  scpu->scpu.scpu_last_smap = smd_smap;
 449  447          }
 450  448  
 451  449          vpm_init();
 452  450  
 453  451  #ifdef DEBUG
 454  452          /*
 455  453           * Keep track of which colors are used more often.
 456  454           */
 457  455          colors_used = kmem_zalloc(smd_nfree * sizeof (int), KM_SLEEP);
 458  456  #endif /* DEBUG */
 459  457  
 460  458          return (0);
 461  459  }
 462  460  
 463  461  static void
 464  462  segmap_free(seg)
 465  463          struct seg *seg;
 466  464  {
 467  465          ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock));
 468  466  }
 469  467  
 470  468  /*
 471  469   * Do a F_SOFTUNLOCK call over the range requested.
 472  470   * The range must have already been F_SOFTLOCK'ed.
 473  471   */
 474  472  static void
 475  473  segmap_unlock(
 476  474          struct hat *hat,
 477  475          struct seg *seg,
 478  476          caddr_t addr,
 479  477          size_t len,
 480  478          enum seg_rw rw,
 481  479          struct smap *smp)
 482  480  {
 483  481          page_t *pp;
 484  482          caddr_t adr;
 485  483          u_offset_t off;
 486  484          struct vnode *vp;
 487  485          kmutex_t *smtx;
 488  486  
 489  487          ASSERT(smp->sm_refcnt > 0);
 490  488  
 491  489  #ifdef lint
 492  490          seg = seg;
 493  491  #endif
 494  492  
 495  493          if (segmap_kpm && IS_KPM_ADDR(addr)) {
 496  494  
 497  495                  /*
 498  496                   * We're called only from segmap_fault and this was a
 499  497                   * NOP in case of a kpm based smap, so dangerous things
 500  498                   * must have happened in the meantime. Pages are prefaulted
 501  499                   * and locked in segmap_getmapflt and they will not be
 502  500                   * unlocked until segmap_release.
 503  501                   */
 504  502                  panic("segmap_unlock: called with kpm addr %p", (void *)addr);
 505  503                  /*NOTREACHED*/
 506  504          }
 507  505  
 508  506          vp = smp->sm_vp;
 509  507          off = smp->sm_off + (u_offset_t)((uintptr_t)addr & MAXBOFFSET);
 510  508  
 511  509          hat_unlock(hat, addr, P2ROUNDUP(len, PAGESIZE));
 512  510          for (adr = addr; adr < addr + len; adr += PAGESIZE, off += PAGESIZE) {
 513  511                  ushort_t bitmask;
 514  512  
 515  513                  /*
 516  514                   * Use page_find() instead of page_lookup() to
 517  515                   * find the page since we know that it has
 518  516                   * "shared" lock.
 519  517                   */
 520  518                  pp = page_find(vp, off);
 521  519                  if (pp == NULL) {
 522  520                          panic("segmap_unlock: page not found");
 523  521                          /*NOTREACHED*/
 524  522                  }
 525  523  
 526  524                  if (rw == S_WRITE) {
 527  525                          hat_setrefmod(pp);
 528  526                  } else if (rw != S_OTHER) {
 529  527                          TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT,
 530  528                          "segmap_fault:pp %p vp %p offset %llx", pp, vp, off);
 531  529                          hat_setref(pp);
 532  530                  }
 533  531  
 534  532                  /*
 535  533                   * Clear bitmap, if the bit corresponding to "off" is set,
 536  534                   * since the page and translation are being unlocked.
 537  535                   */
 538  536                  bitmask = SMAP_BIT_MASK((off - smp->sm_off) >> PAGESHIFT);
 539  537  
 540  538                  /*
 541  539                   * Large Files: Following assertion is to verify
 542  540                   * the correctness of the cast to (int) above.
 543  541                   */
 544  542                  ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);
 545  543                  smtx = SMAPMTX(smp);
 546  544                  mutex_enter(smtx);
 547  545                  if (smp->sm_bitmap & bitmask) {
 548  546                          smp->sm_bitmap &= ~bitmask;
 549  547                  }
 550  548                  mutex_exit(smtx);
 551  549  
 552  550                  page_unlock(pp);
 553  551          }
 554  552  }
 555  553  
 556  554  #define MAXPPB  (MAXBSIZE/4096) /* assumes minimum page size of 4k */
 557  555  
 558  556  /*
 559  557   * This routine is called via a machine specific fault handling
 560  558   * routine.  It is also called by software routines wishing to
 561  559   * lock or unlock a range of addresses.
 562  560   *
 563  561   * Note that this routine expects a page-aligned "addr".
 564  562   */
 565  563  faultcode_t
 566  564  segmap_fault(
 567  565          struct hat *hat,
 568  566          struct seg *seg,
 569  567          caddr_t addr,
 570  568          size_t len,
 571  569          enum fault_type type,
 572  570          enum seg_rw rw)
 573  571  {
 574  572          struct segmap_data *smd = (struct segmap_data *)seg->s_data;
 575  573          struct smap *smp;
 576  574          page_t *pp, **ppp;
 577  575          struct vnode *vp;
 578  576          u_offset_t off;
 579  577          page_t *pl[MAXPPB + 1];
 580  578          uint_t prot;
 581  579          u_offset_t addroff;
 582  580          caddr_t adr;
 583  581          int err;
 584  582          u_offset_t sm_off;
 585  583          int hat_flag;
 586  584  
 587  585          if (segmap_kpm && IS_KPM_ADDR(addr)) {
 588  586                  int newpage;
 589  587                  kmutex_t *smtx;
 590  588  
 591  589                  /*
 592  590                   * Pages are successfully prefaulted and locked in
 593  591                   * segmap_getmapflt and can't be unlocked until
 594  592                   * segmap_release. No hat mappings have to be locked
 595  593                   * and they also can't be unlocked as long as the
 596  594                   * caller owns an active kpm addr.
 597  595                   */
 598  596  #ifndef DEBUG
 599  597                  if (type != F_SOFTUNLOCK)
 600  598                          return (0);
 601  599  #endif
 602  600  
 603  601                  if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
 604  602                          panic("segmap_fault: smap not found "
 605  603                              "for addr %p", (void *)addr);
 606  604                          /*NOTREACHED*/
 607  605                  }
 608  606  
 609  607                  smtx = SMAPMTX(smp);
 610  608  #ifdef  DEBUG
 611  609                  newpage = smp->sm_flags & SM_KPM_NEWPAGE;
 612  610                  if (newpage) {
 613  611                          cmn_err(CE_WARN, "segmap_fault: newpage? smp %p",
 614  612                              (void *)smp);
 615  613                  }
 616  614  
 617  615                  if (type != F_SOFTUNLOCK) {
 618  616                          mutex_exit(smtx);
 619  617                          return (0);
 620  618                  }
 621  619  #endif
 622  620                  mutex_exit(smtx);
 623  621                  vp = smp->sm_vp;
 624  622                  sm_off = smp->sm_off;
 625  623  
 626  624                  if (vp == NULL)
 627  625                          return (FC_MAKE_ERR(EIO));
 628  626  
 629  627                  ASSERT(smp->sm_refcnt > 0);
 630  628  
 631  629                  addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET);
 632  630                  if (addroff + len > MAXBSIZE)
 633  631                          panic("segmap_fault: endaddr %p exceeds MAXBSIZE chunk",
 634  632                              (void *)(addr + len));
 635  633  
 636  634                  off = sm_off + addroff;
 637  635  
 638  636                  pp = page_find(vp, off);
 639  637  
 640  638                  if (pp == NULL)
 641  639                          panic("segmap_fault: softunlock page not found");
 642  640  
 643  641                  /*
 644  642                   * Set ref bit also here in case of S_OTHER to avoid the
 645  643                   * overhead of supporting other cases than F_SOFTUNLOCK
 646  644                   * with segkpm. We can do this because the underlying
 647  645                   * pages are locked anyway.
 648  646                   */
 649  647                  if (rw == S_WRITE) {
 650  648                          hat_setrefmod(pp);
 651  649                  } else {
 652  650                          TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT,
 653  651                              "segmap_fault:pp %p vp %p offset %llx",
 654  652                              pp, vp, off);
 655  653                          hat_setref(pp);
 656  654                  }
 657  655  
 658  656                  return (0);
 659  657          }
 660  658  
 661  659          smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++;
 662  660          smp = GET_SMAP(seg, addr);
 663  661          vp = smp->sm_vp;
 664  662          sm_off = smp->sm_off;
 665  663  
 666  664          if (vp == NULL)
 667  665                  return (FC_MAKE_ERR(EIO));
 668  666  
 669  667          ASSERT(smp->sm_refcnt > 0);
 670  668  
 671  669          addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET);
 672  670          if (addroff + len > MAXBSIZE) {
 673  671                  panic("segmap_fault: endaddr %p "
 674  672                      "exceeds MAXBSIZE chunk", (void *)(addr + len));
 675  673                  /*NOTREACHED*/
 676  674          }
 677  675          off = sm_off + addroff;
 678  676  
 679  677          /*
 680  678           * First handle the easy stuff
 681  679           */
 682  680          if (type == F_SOFTUNLOCK) {
 683  681                  segmap_unlock(hat, seg, addr, len, rw, smp);
 684  682                  return (0);
 685  683          }
 686  684  
 687  685          TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE,
 688  686              "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp);
 689  687          err = VOP_GETPAGE(vp, (offset_t)off, len, &prot, pl, MAXBSIZE,
 690  688              seg, addr, rw, CRED(), NULL);
 691  689  
 692  690          if (err)
 693  691                  return (FC_MAKE_ERR(err));
 694  692  
 695  693          prot &= smd->smd_prot;
 696  694  
 697  695          /*
 698  696           * Handle all pages returned in the pl[] array.
 699  697           * This loop is coded on the assumption that if
 700  698           * there was no error from the VOP_GETPAGE routine,
 701  699           * that the page list returned will contain all the
 702  700           * needed pages for the vp from [off..off + len].
 703  701           */
 704  702          ppp = pl;
 705  703          while ((pp = *ppp++) != NULL) {
 706  704                  u_offset_t poff;
 707  705                  ASSERT(pp->p_vnode == vp);
 708  706                  hat_flag = HAT_LOAD;
 709  707  
 710  708                  /*
 711  709                   * Verify that the pages returned are within the range
 712  710                   * of this segmap region.  Note that it is theoretically
 713  711                   * possible for pages outside this range to be returned,
 714  712                   * but it is not very likely.  If we cannot use the
 715  713                   * page here, just release it and go on to the next one.
 716  714                   */
 717  715                  if (pp->p_offset < sm_off ||
 718  716                      pp->p_offset >= sm_off + MAXBSIZE) {
 719  717                          (void) page_release(pp, 1);
 720  718                          continue;
 721  719                  }
 722  720  
 723  721                  ASSERT(hat == kas.a_hat);
 724  722                  poff = pp->p_offset;
 725  723                  adr = addr + (poff - off);
 726  724                  if (adr >= addr && adr < addr + len) {
 727  725                          hat_setref(pp);
 728  726                          TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT,
 729  727                              "segmap_fault:pp %p vp %p offset %llx",
 730  728                              pp, vp, poff);
 731  729                          if (type == F_SOFTLOCK)
 732  730                                  hat_flag = HAT_LOAD_LOCK;
 733  731                  }
 734  732  
 735  733                  /*
 736  734                   * Deal with VMODSORT pages here. If we know this is a write
 737  735                   * do the setmod now and allow write protection.
 738  736                   * As long as it's modified or not S_OTHER, remove write
 739  737                   * protection. With S_OTHER it's up to the FS to deal with this.
 740  738                   */
 741  739                  if (IS_VMODSORT(vp)) {
 742  740                          if (rw == S_WRITE)
 743  741                                  hat_setmod(pp);
 744  742                          else if (rw != S_OTHER && !hat_ismod(pp))
 745  743                                  prot &= ~PROT_WRITE;
 746  744                  }
 747  745  
 748  746                  hat_memload(hat, adr, pp, prot, hat_flag);
 749  747                  if (hat_flag != HAT_LOAD_LOCK)
 750  748                          page_unlock(pp);
 751  749          }
 752  750          return (0);
 753  751  }
 754  752  
 755  753  /*
 756  754   * This routine is used to start I/O on pages asynchronously.
 757  755   */
 758  756  static faultcode_t
 759  757  segmap_faulta(struct seg *seg, caddr_t addr)
 760  758  {
 761  759          struct smap *smp;
 762  760          struct vnode *vp;
 763  761          u_offset_t off;
 764  762          int err;
 765  763  
 766  764          if (segmap_kpm && IS_KPM_ADDR(addr)) {
 767  765                  int     newpage;
 768  766                  kmutex_t *smtx;
 769  767  
 770  768                  /*
 771  769                   * Pages are successfully prefaulted and locked in
 772  770                   * segmap_getmapflt and can't be unlocked until
 773  771                   * segmap_release. No hat mappings have to be locked
 774  772                   * and they also can't be unlocked as long as the
 775  773                   * caller owns an active kpm addr.
 776  774                   */
 777  775  #ifdef  DEBUG
 778  776                  if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
 779  777                          panic("segmap_faulta: smap not found "
 780  778                              "for addr %p", (void *)addr);
 781  779                          /*NOTREACHED*/
 782  780                  }
 783  781  
 784  782                  smtx = SMAPMTX(smp);
 785  783                  newpage = smp->sm_flags & SM_KPM_NEWPAGE;
 786  784                  mutex_exit(smtx);
 787  785                  if (newpage)
 788  786                          cmn_err(CE_WARN, "segmap_faulta: newpage? smp %p",
 789  787                              (void *)smp);
 790  788  #endif
 791  789                  return (0);
 792  790          }
 793  791  
 794  792          segmapcnt.smp_faulta.value.ul++;
 795  793          smp = GET_SMAP(seg, addr);
 796  794  
 797  795          ASSERT(smp->sm_refcnt > 0);
 798  796  
 799  797          vp = smp->sm_vp;
 800  798          off = smp->sm_off;
 801  799  
 802  800          if (vp == NULL) {
 803  801                  cmn_err(CE_WARN, "segmap_faulta - no vp");
 804  802                  return (FC_MAKE_ERR(EIO));
 805  803          }
 806  804  
 807  805          TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE,
 808  806              "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp);
 809  807  
 810  808          err = VOP_GETPAGE(vp, (offset_t)(off + ((offset_t)((uintptr_t)addr
 811  809              & MAXBOFFSET))), PAGESIZE, (uint_t *)NULL, (page_t **)NULL, 0,
 812  810              seg, addr, S_READ, CRED(), NULL);
 813  811  
 814  812          if (err)
 815  813                  return (FC_MAKE_ERR(err));
 816  814          return (0);
 817  815  }
 818  816  
 819  817  /*ARGSUSED*/
 820  818  static int
 821  819  segmap_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
 822  820  {
 823  821          struct segmap_data *smd = (struct segmap_data *)seg->s_data;
 824  822  
 825  823          ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock));
 826  824  
 827  825          /*
 828  826           * Need not acquire the segment lock since
 829  827           * "smd_prot" is a read-only field.
 830  828           */
 831  829          return (((smd->smd_prot & prot) != prot) ? EACCES : 0);
 832  830  }
 833  831  
 834  832  static int
 835  833  segmap_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
 836  834  {
 837  835          struct segmap_data *smd = (struct segmap_data *)seg->s_data;
 838  836          size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
 839  837  
 840  838          ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
 841  839  
 842  840          if (pgno != 0) {
 843  841                  do {
 844  842                          protv[--pgno] = smd->smd_prot;
 845  843                  } while (pgno != 0);
 846  844          }
 847  845          return (0);
 848  846  }
 849  847  
 850  848  static u_offset_t
 851  849  segmap_getoffset(struct seg *seg, caddr_t addr)
 852  850  {
 853  851          struct segmap_data *smd = (struct segmap_data *)seg->s_data;
 854  852  
 855  853          ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock));
 856  854  
 857  855          return ((u_offset_t)smd->smd_sm->sm_off + (addr - seg->s_base));
 858  856  }
 859  857  
 860  858  /*ARGSUSED*/
 861  859  static int
 862  860  segmap_gettype(struct seg *seg, caddr_t addr)
 863  861  {
 864  862          ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock));
 865  863  
 866  864          return (MAP_SHARED);
 867  865  }
 868  866  
 869  867  /*ARGSUSED*/
 870  868  static int
 871  869  segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
 872  870  {
 873  871          struct segmap_data *smd = (struct segmap_data *)seg->s_data;
 874  872  
 875  873          ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock));
 876  874  
 877  875          /* XXX - This doesn't make any sense */
 878  876          *vpp = smd->smd_sm->sm_vp;
 879  877          return (0);
 880  878  }
 881  879  
 882  880  /*
 883  881   * Check to see if it makes sense to do kluster/read ahead to
 884  882   * addr + delta relative to the mapping at addr.  We assume here
 885  883   * that delta is a signed PAGESIZE'd multiple (which can be negative).
 886  884   *
 887  885   * For segmap we always "approve" of this action from our standpoint.
 888  886   */
 889  887  /*ARGSUSED*/
 890  888  static int
 891  889  segmap_kluster(struct seg *seg, caddr_t addr, ssize_t delta)
 892  890  {
 893  891          return (0);
 894  892  }
 895  893  
 896  894  /*
 897  895   * Special private segmap operations
 898  896   */
 899  897  
 900  898  /*
 901  899   * Add smap to the appropriate free list.
 902  900   */
 903  901  static void
 904  902  segmap_smapadd(struct smap *smp)
 905  903  {
 906  904          struct smfree *sm;
 907  905          struct smap *smpfreelist;
 908  906          struct sm_freeq *releq;
 909  907  
 910  908          ASSERT(MUTEX_HELD(SMAPMTX(smp)));
 911  909  
 912  910          if (smp->sm_refcnt != 0) {
 913  911                  panic("segmap_smapadd");
 914  912                  /*NOTREACHED*/
 915  913          }
 916  914  
 917  915          sm = &smd_free[smp->sm_free_ndx];
 918  916          /*
 919  917           * Add to the tail of the release queue
 920  918           * Note that sm_releq and sm_allocq could toggle
 921  919           * before we get the lock. This does not affect
 922  920           * correctness as the 2 queues are only maintained
 923  921           * to reduce lock pressure.
 924  922           */
 925  923          releq = sm->sm_releq;
 926  924          if (releq == &sm->sm_freeq[0])
 927  925                  smp->sm_flags |= SM_QNDX_ZERO;
 928  926          else
 929  927                  smp->sm_flags &= ~SM_QNDX_ZERO;
 930  928          mutex_enter(&releq->smq_mtx);
 931  929          smpfreelist = releq->smq_free;
 932  930          if (smpfreelist == 0) {
 933  931                  int want;
 934  932  
 935  933                  releq->smq_free = smp->sm_next = smp->sm_prev = smp;
 936  934                  /*
 937  935                   * Both queue mutexes held to set sm_want;
 938  936                   * snapshot the value before dropping releq mutex.
 939  937                   * If sm_want appears after the releq mutex is dropped,
 940  938                   * then the smap just freed is already gone.
 941  939                   */
 942  940                  want = sm->sm_want;
 943  941                  mutex_exit(&releq->smq_mtx);
 944  942                  /*
 945  943                   * See if there was a waiter before dropping the releq mutex
 946  944                   * then recheck after obtaining sm_freeq[0] mutex as
 947  945                   * the another thread may have already signaled.
 948  946                   */
 949  947                  if (want) {
 950  948                          mutex_enter(&sm->sm_freeq[0].smq_mtx);
 951  949                          if (sm->sm_want)
 952  950                                  cv_signal(&sm->sm_free_cv);
 953  951                          mutex_exit(&sm->sm_freeq[0].smq_mtx);
 954  952                  }
 955  953          } else {
 956  954                  smp->sm_next = smpfreelist;
 957  955                  smp->sm_prev = smpfreelist->sm_prev;
 958  956                  smpfreelist->sm_prev = smp;
 959  957                  smp->sm_prev->sm_next = smp;
 960  958                  mutex_exit(&releq->smq_mtx);
 961  959          }
 962  960  }
 963  961  
 964  962  
 965  963  static struct smap *
 966  964  segmap_hashin(struct smap *smp, struct vnode *vp, u_offset_t off, int hashid)
 967  965  {
 968  966          struct smap **hpp;
 969  967          struct smap *tmp;
 970  968          kmutex_t *hmtx;
 971  969  
 972  970          ASSERT(MUTEX_HELD(SMAPMTX(smp)));
 973  971          ASSERT(smp->sm_vp == NULL);
 974  972          ASSERT(smp->sm_hash == NULL);
 975  973          ASSERT(smp->sm_prev == NULL);
 976  974          ASSERT(smp->sm_next == NULL);
 977  975          ASSERT(hashid >= 0 && hashid <= smd_hashmsk);
 978  976  
 979  977          hmtx = SHASHMTX(hashid);
 980  978  
 981  979          mutex_enter(hmtx);
 982  980          /*
 983  981           * First we need to verify that no one has created a smp
 984  982           * with (vp,off) as its tag before we us.
 985  983           */
 986  984          for (tmp = smd_hash[hashid].sh_hash_list;
 987  985              tmp != NULL; tmp = tmp->sm_hash)
 988  986                  if (tmp->sm_vp == vp && tmp->sm_off == off)
 989  987                          break;
 990  988  
 991  989          if (tmp == NULL) {
 992  990                  /*
 993  991                   * No one created one yet.
 994  992                   *
 995  993                   * Funniness here - we don't increment the ref count on the
 996  994                   * vnode * even though we have another pointer to it here.
 997  995                   * The reason for this is that we don't want the fact that
 998  996                   * a seg_map entry somewhere refers to a vnode to prevent the
 999  997                   * vnode * itself from going away.  This is because this
1000  998                   * reference to the vnode is a "soft one".  In the case where
1001  999                   * a mapping is being used by a rdwr [or directory routine?]
1002 1000                   * there already has to be a non-zero ref count on the vnode.
1003 1001                   * In the case where the vp has been freed and the the smap
1004 1002                   * structure is on the free list, there are no pages in memory
1005 1003                   * that can refer to the vnode.  Thus even if we reuse the same
1006 1004                   * vnode/smap structure for a vnode which has the same
1007 1005                   * address but represents a different object, we are ok.
1008 1006                   */
1009 1007                  smp->sm_vp = vp;
1010 1008                  smp->sm_off = off;
1011 1009  
1012 1010                  hpp = &smd_hash[hashid].sh_hash_list;
1013 1011                  smp->sm_hash = *hpp;
1014 1012                  *hpp = smp;
1015 1013  #ifdef SEGMAP_HASHSTATS
1016 1014                  smd_hash_len[hashid]++;
1017 1015  #endif
1018 1016          }
1019 1017          mutex_exit(hmtx);
1020 1018  
1021 1019          return (tmp);
1022 1020  }
1023 1021  
1024 1022  static void
1025 1023  segmap_hashout(struct smap *smp)
1026 1024  {
1027 1025          struct smap **hpp, *hp;
1028 1026          struct vnode *vp;
1029 1027          kmutex_t *mtx;
1030 1028          int hashid;
1031 1029          u_offset_t off;
1032 1030  
1033 1031          ASSERT(MUTEX_HELD(SMAPMTX(smp)));
1034 1032  
1035 1033          vp = smp->sm_vp;
1036 1034          off = smp->sm_off;
1037 1035  
1038 1036          SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */
1039 1037          mtx = SHASHMTX(hashid);
1040 1038          mutex_enter(mtx);
1041 1039  
1042 1040          hpp = &smd_hash[hashid].sh_hash_list;
1043 1041          for (;;) {
1044 1042                  hp = *hpp;
1045 1043                  if (hp == NULL) {
1046 1044                          panic("segmap_hashout");
1047 1045                          /*NOTREACHED*/
1048 1046                  }
1049 1047                  if (hp == smp)
1050 1048                          break;
1051 1049                  hpp = &hp->sm_hash;
1052 1050          }
1053 1051  
1054 1052          *hpp = smp->sm_hash;
1055 1053          smp->sm_hash = NULL;
1056 1054  #ifdef SEGMAP_HASHSTATS
1057 1055          smd_hash_len[hashid]--;
1058 1056  #endif
1059 1057          mutex_exit(mtx);
1060 1058  
1061 1059          smp->sm_vp = NULL;
1062 1060          smp->sm_off = (u_offset_t)0;
1063 1061  
1064 1062  }
1065 1063  
1066 1064  /*
1067 1065   * Attempt to free unmodified, unmapped, and non locked segmap
1068 1066   * pages.
1069 1067   */
1070 1068  void
1071 1069  segmap_pagefree(struct vnode *vp, u_offset_t off)
1072 1070  {
1073 1071          u_offset_t pgoff;
1074 1072          page_t  *pp;
1075 1073  
1076 1074          for (pgoff = off; pgoff < off + MAXBSIZE; pgoff += PAGESIZE) {
1077 1075  
1078 1076                  if ((pp = page_lookup_nowait(vp, pgoff, SE_EXCL)) == NULL)
1079 1077                          continue;
1080 1078  
1081 1079                  switch (page_release(pp, 1)) {
1082 1080                  case PGREL_NOTREL:
1083 1081                          segmapcnt.smp_free_notfree.value.ul++;
1084 1082                          break;
1085 1083                  case PGREL_MOD:
1086 1084                          segmapcnt.smp_free_dirty.value.ul++;
1087 1085                          break;
1088 1086                  case PGREL_CLEAN:
1089 1087                          segmapcnt.smp_free.value.ul++;
1090 1088                          break;
1091 1089                  }
1092 1090          }
1093 1091  }
1094 1092  
1095 1093  /*
1096 1094   * Locks held on entry: smap lock
1097 1095   * Locks held on exit : smap lock.
1098 1096   */
1099 1097  
1100 1098  static void
1101 1099  grab_smp(struct smap *smp, page_t *pp)
1102 1100  {
1103 1101          ASSERT(MUTEX_HELD(SMAPMTX(smp)));
1104 1102          ASSERT(smp->sm_refcnt == 0);
1105 1103  
1106 1104          if (smp->sm_vp != (struct vnode *)NULL) {
1107 1105                  struct vnode    *vp = smp->sm_vp;
1108 1106                  u_offset_t      off = smp->sm_off;
1109 1107                  /*
1110 1108                   * Destroy old vnode association and
1111 1109                   * unload any hardware translations to
1112 1110                   * the old object.
1113 1111                   */
1114 1112                  smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reuse++;
1115 1113                  segmap_hashout(smp);
1116 1114  
1117 1115                  /*
1118 1116                   * This node is off freelist and hashlist,
1119 1117                   * so there is no reason to drop/reacquire sm_mtx
1120 1118                   * across calls to hat_unload.
1121 1119                   */
1122 1120                  if (segmap_kpm) {
1123 1121                          caddr_t vaddr;
1124 1122                          int hat_unload_needed = 0;
1125 1123  
1126 1124                          /*
1127 1125                           * unload kpm mapping
1128 1126                           */
1129 1127                          if (pp != NULL) {
1130 1128                                  vaddr = hat_kpm_page2va(pp, 1);
1131 1129                                  hat_kpm_mapout(pp, GET_KPME(smp), vaddr);
1132 1130                                  page_unlock(pp);
1133 1131                          }
1134 1132  
1135 1133                          /*
1136 1134                           * Check if we have (also) the rare case of a
1137 1135                           * non kpm mapping.
1138 1136                           */
1139 1137                          if (smp->sm_flags & SM_NOTKPM_RELEASED) {
1140 1138                                  hat_unload_needed = 1;
1141 1139                                  smp->sm_flags &= ~SM_NOTKPM_RELEASED;
1142 1140                          }
1143 1141  
1144 1142                          if (hat_unload_needed) {
1145 1143                                  hat_unload(kas.a_hat, segkmap->s_base +
1146 1144                                      ((smp - smd_smap) * MAXBSIZE),
1147 1145                                      MAXBSIZE, HAT_UNLOAD);
1148 1146                          }
1149 1147  
1150 1148                  } else {
1151 1149                          ASSERT(smp->sm_flags & SM_NOTKPM_RELEASED);
1152 1150                          smp->sm_flags &= ~SM_NOTKPM_RELEASED;
1153 1151                          hat_unload(kas.a_hat, segkmap->s_base +
1154 1152                              ((smp - smd_smap) * MAXBSIZE),
1155 1153                              MAXBSIZE, HAT_UNLOAD);
1156 1154                  }
1157 1155                  segmap_pagefree(vp, off);
1158 1156          }
1159 1157  }
1160 1158  
1161 1159  static struct smap *
1162 1160  get_free_smp(int free_ndx)
1163 1161  {
1164 1162          struct smfree *sm;
1165 1163          kmutex_t *smtx;
1166 1164          struct smap *smp, *first;
1167 1165          struct sm_freeq *allocq, *releq;
1168 1166          struct kpme *kpme;
1169 1167          page_t *pp = NULL;
1170 1168          int end_ndx, page_locked = 0;
1171 1169  
1172 1170          end_ndx = free_ndx;
1173 1171          sm = &smd_free[free_ndx];
1174 1172  
1175 1173  retry_queue:
1176 1174          allocq = sm->sm_allocq;
1177 1175          mutex_enter(&allocq->smq_mtx);
1178 1176  
1179 1177          if ((smp = allocq->smq_free) == NULL) {
1180 1178  
1181 1179  skip_queue:
1182 1180                  /*
1183 1181                   * The alloc list is empty or this queue is being skipped;
1184 1182                   * first see if the allocq toggled.
1185 1183                   */
1186 1184                  if (sm->sm_allocq != allocq) {
1187 1185                          /* queue changed */
1188 1186                          mutex_exit(&allocq->smq_mtx);
1189 1187                          goto retry_queue;
1190 1188                  }
1191 1189                  releq = sm->sm_releq;
1192 1190                  if (!mutex_tryenter(&releq->smq_mtx)) {
1193 1191                          /* cannot get releq; a free smp may be there now */
1194 1192                          mutex_exit(&allocq->smq_mtx);
1195 1193  
1196 1194                          /*
1197 1195                           * This loop could spin forever if this thread has
1198 1196                           * higher priority than the thread that is holding
1199 1197                           * releq->smq_mtx. In order to force the other thread
1200 1198                           * to run, we'll lock/unlock the mutex which is safe
1201 1199                           * since we just unlocked the allocq mutex.
1202 1200                           */
1203 1201                          mutex_enter(&releq->smq_mtx);
1204 1202                          mutex_exit(&releq->smq_mtx);
1205 1203                          goto retry_queue;
1206 1204                  }
1207 1205                  if (releq->smq_free == NULL) {
1208 1206                          /*
1209 1207                           * This freelist is empty.
1210 1208                           * This should not happen unless clients
1211 1209                           * are failing to release the segmap
1212 1210                           * window after accessing the data.
1213 1211                           * Before resorting to sleeping, try
1214 1212                           * the next list of the same color.
1215 1213                           */
1216 1214                          free_ndx = (free_ndx + smd_ncolor) & smd_freemsk;
1217 1215                          if (free_ndx != end_ndx) {
1218 1216                                  mutex_exit(&releq->smq_mtx);
1219 1217                                  mutex_exit(&allocq->smq_mtx);
1220 1218                                  sm = &smd_free[free_ndx];
1221 1219                                  goto retry_queue;
1222 1220                          }
1223 1221                          /*
1224 1222                           * Tried all freelists of the same color once,
1225 1223                           * wait on this list and hope something gets freed.
1226 1224                           */
1227 1225                          segmapcnt.smp_get_nofree.value.ul++;
1228 1226                          sm->sm_want++;
1229 1227                          mutex_exit(&sm->sm_freeq[1].smq_mtx);
1230 1228                          cv_wait(&sm->sm_free_cv,
1231 1229                              &sm->sm_freeq[0].smq_mtx);
1232 1230                          sm->sm_want--;
1233 1231                          mutex_exit(&sm->sm_freeq[0].smq_mtx);
1234 1232                          sm = &smd_free[free_ndx];
1235 1233                          goto retry_queue;
1236 1234                  } else {
1237 1235                          /*
1238 1236                           * Something on the rele queue; flip the alloc
1239 1237                           * and rele queues and retry.
1240 1238                           */
1241 1239                          sm->sm_allocq = releq;
1242 1240                          sm->sm_releq = allocq;
1243 1241                          mutex_exit(&allocq->smq_mtx);
1244 1242                          mutex_exit(&releq->smq_mtx);
1245 1243                          if (page_locked) {
1246 1244                                  delay(hz >> 2);
1247 1245                                  page_locked = 0;
1248 1246                          }
1249 1247                          goto retry_queue;
1250 1248                  }
1251 1249          } else {
1252 1250                  /*
1253 1251                   * Fastpath the case we get the smap mutex
1254 1252                   * on the first try.
1255 1253                   */
1256 1254                  first = smp;
1257 1255  next_smap:
1258 1256                  smtx = SMAPMTX(smp);
1259 1257                  if (!mutex_tryenter(smtx)) {
1260 1258                          /*
1261 1259                           * Another thread is trying to reclaim this slot.
1262 1260                           * Skip to the next queue or smap.
1263 1261                           */
1264 1262                          if ((smp = smp->sm_next) == first) {
1265 1263                                  goto skip_queue;
1266 1264                          } else {
1267 1265                                  goto next_smap;
1268 1266                          }
1269 1267                  } else {
1270 1268                          /*
1271 1269                           * if kpme exists, get shared lock on the page
1272 1270                           */
1273 1271                          if (segmap_kpm && smp->sm_vp != NULL) {
1274 1272  
1275 1273                                  kpme = GET_KPME(smp);
1276 1274                                  pp = kpme->kpe_page;
1277 1275  
1278 1276                                  if (pp != NULL) {
1279 1277                                          if (!page_trylock(pp, SE_SHARED)) {
1280 1278                                                  smp = smp->sm_next;
1281 1279                                                  mutex_exit(smtx);
1282 1280                                                  page_locked = 1;
1283 1281  
1284 1282                                                  pp = NULL;
1285 1283  
1286 1284                                                  if (smp == first) {
1287 1285                                                          goto skip_queue;
1288 1286                                                  } else {
1289 1287                                                          goto next_smap;
1290 1288                                                  }
1291 1289                                          } else {
1292 1290                                                  if (kpme->kpe_page == NULL) {
1293 1291                                                          page_unlock(pp);
1294 1292                                                          pp = NULL;
1295 1293                                                  }
1296 1294                                          }
1297 1295                                  }
1298 1296                          }
1299 1297  
1300 1298                          /*
1301 1299                           * At this point, we've selected smp.  Remove smp
1302 1300                           * from its freelist.  If smp is the first one in
1303 1301                           * the freelist, update the head of the freelist.
1304 1302                           */
1305 1303                          if (first == smp) {
1306 1304                                  ASSERT(first == allocq->smq_free);
1307 1305                                  allocq->smq_free = smp->sm_next;
1308 1306                          }
1309 1307  
1310 1308                          /*
1311 1309                           * if the head of the freelist still points to smp,
1312 1310                           * then there are no more free smaps in that list.
1313 1311                           */
1314 1312                          if (allocq->smq_free == smp)
1315 1313                                  /*
1316 1314                                   * Took the last one
1317 1315                                   */
1318 1316                                  allocq->smq_free = NULL;
1319 1317                          else {
1320 1318                                  smp->sm_prev->sm_next = smp->sm_next;
1321 1319                                  smp->sm_next->sm_prev = smp->sm_prev;
1322 1320                          }
1323 1321                          mutex_exit(&allocq->smq_mtx);
1324 1322                          smp->sm_prev = smp->sm_next = NULL;
1325 1323  
1326 1324                          /*
1327 1325                           * if pp != NULL, pp must have been locked;
1328 1326                           * grab_smp() unlocks pp.
1329 1327                           */
1330 1328                          ASSERT((pp == NULL) || PAGE_LOCKED(pp));
1331 1329                          grab_smp(smp, pp);
1332 1330                          /* return smp locked. */
1333 1331                          ASSERT(SMAPMTX(smp) == smtx);
1334 1332                          ASSERT(MUTEX_HELD(smtx));
1335 1333                          return (smp);
1336 1334                  }
1337 1335          }
1338 1336  }
1339 1337  
1340 1338  /*
1341 1339   * Special public segmap operations
1342 1340   */
1343 1341  
1344 1342  /*
1345 1343   * Create pages (without using VOP_GETPAGE) and load up translations to them.
1346 1344   * If softlock is TRUE, then set things up so that it looks like a call
1347 1345   * to segmap_fault with F_SOFTLOCK.
1348 1346   *
1349 1347   * Returns 1, if a page is created by calling page_create_va(), or 0 otherwise.
1350 1348   *
1351 1349   * All fields in the generic segment (struct seg) are considered to be
1352 1350   * read-only for "segmap" even though the kernel address space (kas) may
1353 1351   * not be locked, hence no lock is needed to access them.
1354 1352   */
1355 1353  int
1356 1354  segmap_pagecreate(struct seg *seg, caddr_t addr, size_t len, int softlock)
1357 1355  {
1358 1356          struct segmap_data *smd = (struct segmap_data *)seg->s_data;
1359 1357          page_t *pp;
1360 1358          u_offset_t off;
1361 1359          struct smap *smp;
1362 1360          struct vnode *vp;
1363 1361          caddr_t eaddr;
1364 1362          int newpage = 0;
1365 1363          uint_t prot;
1366 1364          kmutex_t *smtx;
1367 1365          int hat_flag;
1368 1366  
1369 1367          ASSERT(seg->s_as == &kas);
1370 1368  
1371 1369          if (segmap_kpm && IS_KPM_ADDR(addr)) {
1372 1370                  /*
1373 1371                   * Pages are successfully prefaulted and locked in
1374 1372                   * segmap_getmapflt and can't be unlocked until
1375 1373                   * segmap_release. The SM_KPM_NEWPAGE flag is set
1376 1374                   * in segmap_pagecreate_kpm when new pages are created.
1377 1375                   * and it is returned as "newpage" indication here.
1378 1376                   */
1379 1377                  if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
1380 1378                          panic("segmap_pagecreate: smap not found "
1381 1379                              "for addr %p", (void *)addr);
1382 1380                          /*NOTREACHED*/
1383 1381                  }
1384 1382  
1385 1383                  smtx = SMAPMTX(smp);
1386 1384                  newpage = smp->sm_flags & SM_KPM_NEWPAGE;
1387 1385                  smp->sm_flags &= ~SM_KPM_NEWPAGE;
1388 1386                  mutex_exit(smtx);
1389 1387  
1390 1388                  return (newpage);
1391 1389          }
1392 1390  
1393 1391          smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++;
1394 1392  
1395 1393          eaddr = addr + len;
1396 1394          addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1397 1395  
1398 1396          smp = GET_SMAP(seg, addr);
1399 1397  
1400 1398          /*
1401 1399           * We don't grab smp mutex here since we assume the smp
1402 1400           * has a refcnt set already which prevents the slot from
1403 1401           * changing its id.
1404 1402           */
1405 1403          ASSERT(smp->sm_refcnt > 0);
1406 1404  
1407 1405          vp = smp->sm_vp;
1408 1406          off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET));
1409 1407          prot = smd->smd_prot;
1410 1408  
1411 1409          for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) {
1412 1410                  hat_flag = HAT_LOAD;
1413 1411                  pp = page_lookup(vp, off, SE_SHARED);
1414 1412                  if (pp == NULL) {
1415 1413                          ushort_t bitindex;
1416 1414  
1417 1415                          if ((pp = page_create_va(vp, off,
1418 1416                              PAGESIZE, PG_WAIT, seg, addr)) == NULL) {
1419 1417                                  panic("segmap_pagecreate: page_create failed");
1420 1418                                  /*NOTREACHED*/
1421 1419                          }
1422 1420                          newpage = 1;
1423 1421                          page_io_unlock(pp);
1424 1422  
1425 1423                          /*
1426 1424                           * Since pages created here do not contain valid
1427 1425                           * data until the caller writes into them, the
1428 1426                           * "exclusive" lock will not be dropped to prevent
1429 1427                           * other users from accessing the page.  We also
1430 1428                           * have to lock the translation to prevent a fault
1431 1429                           * from occurring when the virtual address mapped by
1432 1430                           * this page is written into.  This is necessary to
1433 1431                           * avoid a deadlock since we haven't dropped the
1434 1432                           * "exclusive" lock.
1435 1433                           */
1436 1434                          bitindex = (ushort_t)((off - smp->sm_off) >> PAGESHIFT);
1437 1435  
1438 1436                          /*
1439 1437                           * Large Files: The following assertion is to
1440 1438                           * verify the cast above.
1441 1439                           */
1442 1440                          ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);
1443 1441                          smtx = SMAPMTX(smp);
1444 1442                          mutex_enter(smtx);
1445 1443                          smp->sm_bitmap |= SMAP_BIT_MASK(bitindex);
1446 1444                          mutex_exit(smtx);
1447 1445  
1448 1446                          hat_flag = HAT_LOAD_LOCK;
1449 1447                  } else if (softlock) {
1450 1448                          hat_flag = HAT_LOAD_LOCK;
1451 1449                  }
1452 1450  
1453 1451                  if (IS_VMODSORT(pp->p_vnode) && (prot & PROT_WRITE))
1454 1452                          hat_setmod(pp);
1455 1453  
1456 1454                  hat_memload(kas.a_hat, addr, pp, prot, hat_flag);
1457 1455  
1458 1456                  if (hat_flag != HAT_LOAD_LOCK)
1459 1457                          page_unlock(pp);
1460 1458  
1461 1459                  TRACE_5(TR_FAC_VM, TR_SEGMAP_PAGECREATE,
1462 1460                      "segmap_pagecreate:seg %p addr %p pp %p vp %p offset %llx",
1463 1461                      seg, addr, pp, vp, off);
1464 1462          }
1465 1463  
1466 1464          return (newpage);
1467 1465  }
1468 1466  
1469 1467  void
1470 1468  segmap_pageunlock(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw)
1471 1469  {
1472 1470          struct smap     *smp;
1473 1471          ushort_t        bitmask;
1474 1472          page_t          *pp;
1475 1473          struct  vnode   *vp;
1476 1474          u_offset_t      off;
1477 1475          caddr_t         eaddr;
1478 1476          kmutex_t        *smtx;
1479 1477  
1480 1478          ASSERT(seg->s_as == &kas);
1481 1479  
1482 1480          eaddr = addr + len;
1483 1481          addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1484 1482  
1485 1483          if (segmap_kpm && IS_KPM_ADDR(addr)) {
1486 1484                  /*
1487 1485                   * Pages are successfully prefaulted and locked in
1488 1486                   * segmap_getmapflt and can't be unlocked until
1489 1487                   * segmap_release, so no pages or hat mappings have
1490 1488                   * to be unlocked at this point.
1491 1489                   */
1492 1490  #ifdef DEBUG
1493 1491                  if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
1494 1492                          panic("segmap_pageunlock: smap not found "
1495 1493                              "for addr %p", (void *)addr);
1496 1494                          /*NOTREACHED*/
1497 1495                  }
1498 1496  
1499 1497                  ASSERT(smp->sm_refcnt > 0);
1500 1498                  mutex_exit(SMAPMTX(smp));
1501 1499  #endif
1502 1500                  return;
1503 1501          }
1504 1502  
1505 1503          smp = GET_SMAP(seg, addr);
1506 1504          smtx = SMAPMTX(smp);
1507 1505  
1508 1506          ASSERT(smp->sm_refcnt > 0);
1509 1507  
1510 1508          vp = smp->sm_vp;
1511 1509          off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET));
1512 1510  
1513 1511          for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) {
1514 1512                  bitmask = SMAP_BIT_MASK((int)(off - smp->sm_off) >> PAGESHIFT);
1515 1513  
1516 1514                  /*
1517 1515                   * Large Files: Following assertion is to verify
1518 1516                   * the correctness of the cast to (int) above.
1519 1517                   */
1520 1518                  ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);
1521 1519  
1522 1520                  /*
1523 1521                   * If the bit corresponding to "off" is set,
1524 1522                   * clear this bit in the bitmap, unlock translations,
1525 1523                   * and release the "exclusive" lock on the page.
1526 1524                   */
1527 1525                  if (smp->sm_bitmap & bitmask) {
1528 1526                          mutex_enter(smtx);
1529 1527                          smp->sm_bitmap &= ~bitmask;
1530 1528                          mutex_exit(smtx);
1531 1529  
1532 1530                          hat_unlock(kas.a_hat, addr, PAGESIZE);
1533 1531  
1534 1532                          /*
1535 1533                           * Use page_find() instead of page_lookup() to
1536 1534                           * find the page since we know that it has
1537 1535                           * "exclusive" lock.
1538 1536                           */
1539 1537                          pp = page_find(vp, off);
1540 1538                          if (pp == NULL) {
1541 1539                                  panic("segmap_pageunlock: page not found");
1542 1540                                  /*NOTREACHED*/
1543 1541                          }
1544 1542                          if (rw == S_WRITE) {
1545 1543                                  hat_setrefmod(pp);
1546 1544                          } else if (rw != S_OTHER) {
1547 1545                                  hat_setref(pp);
1548 1546                          }
1549 1547  
1550 1548                          page_unlock(pp);
1551 1549                  }
1552 1550          }
1553 1551  }
1554 1552  
1555 1553  caddr_t
1556 1554  segmap_getmap(struct seg *seg, struct vnode *vp, u_offset_t off)
1557 1555  {
1558 1556          return (segmap_getmapflt(seg, vp, off, MAXBSIZE, 0, S_OTHER));
1559 1557  }
1560 1558  
1561 1559  /*
1562 1560   * This is the magic virtual address that offset 0 of an ELF
1563 1561   * file gets mapped to in user space. This is used to pick
1564 1562   * the vac color on the freelist.
1565 1563   */
1566 1564  #define ELF_OFFZERO_VA  (0x10000)
1567 1565  /*
1568 1566   * segmap_getmap allocates a MAXBSIZE big slot to map the vnode vp
1569 1567   * in the range <off, off + len). off doesn't need to be MAXBSIZE aligned.
1570 1568   * The return address is  always MAXBSIZE aligned.
1571 1569   *
1572 1570   * If forcefault is nonzero and the MMU translations haven't yet been created,
1573 1571   * segmap_getmap will call segmap_fault(..., F_INVAL, rw) to create them.
1574 1572   */
1575 1573  caddr_t
1576 1574  segmap_getmapflt(
1577 1575          struct seg *seg,
1578 1576          struct vnode *vp,
1579 1577          u_offset_t off,
1580 1578          size_t len,
1581 1579          int forcefault,
1582 1580          enum seg_rw rw)
1583 1581  {
1584 1582          struct smap *smp, *nsmp;
1585 1583          extern struct vnode *common_specvp();
1586 1584          caddr_t baseaddr;                       /* MAXBSIZE aligned */
1587 1585          u_offset_t baseoff;
1588 1586          int newslot;
1589 1587          caddr_t vaddr;
1590 1588          int color, hashid;
1591 1589          kmutex_t *hashmtx, *smapmtx;
1592 1590          struct smfree *sm;
1593 1591          page_t  *pp;
1594 1592          struct kpme *kpme;
1595 1593          uint_t  prot;
1596 1594          caddr_t base;
1597 1595          page_t  *pl[MAXPPB + 1];
1598 1596          int     error;
1599 1597          int     is_kpm = 1;
1600 1598  
1601 1599          ASSERT(seg->s_as == &kas);
1602 1600          ASSERT(seg == segkmap);
1603 1601  
1604 1602          baseoff = off & (offset_t)MAXBMASK;
1605 1603          if (off + len > baseoff + MAXBSIZE) {
1606 1604                  panic("segmap_getmap bad len");
1607 1605                  /*NOTREACHED*/
1608 1606          }
1609 1607  
1610 1608          /*
1611 1609           * If this is a block device we have to be sure to use the
1612 1610           * "common" block device vnode for the mapping.
1613 1611           */
1614 1612          if (vp->v_type == VBLK)
1615 1613                  vp = common_specvp(vp);
1616 1614  
1617 1615          smd_cpu[CPU->cpu_seqid].scpu.scpu_getmap++;
1618 1616  
1619 1617          if (segmap_kpm == 0 ||
1620 1618              (forcefault == SM_PAGECREATE && rw != S_WRITE)) {
1621 1619                  is_kpm = 0;
1622 1620          }
1623 1621  
1624 1622          SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */
1625 1623          hashmtx = SHASHMTX(hashid);
1626 1624  
1627 1625  retry_hash:
1628 1626          mutex_enter(hashmtx);
1629 1627          for (smp = smd_hash[hashid].sh_hash_list;
1630 1628              smp != NULL; smp = smp->sm_hash)
1631 1629                  if (smp->sm_vp == vp && smp->sm_off == baseoff)
1632 1630                          break;
1633 1631          mutex_exit(hashmtx);
1634 1632  
1635 1633  vrfy_smp:
1636 1634          if (smp != NULL) {
1637 1635  
1638 1636                  ASSERT(vp->v_count != 0);
1639 1637  
1640 1638                  /*
1641 1639                   * Get smap lock and recheck its tag. The hash lock
1642 1640                   * is dropped since the hash is based on (vp, off)
1643 1641                   * and (vp, off) won't change when we have smap mtx.
1644 1642                   */
1645 1643                  smapmtx = SMAPMTX(smp);
1646 1644                  mutex_enter(smapmtx);
1647 1645                  if (smp->sm_vp != vp || smp->sm_off != baseoff) {
1648 1646                          mutex_exit(smapmtx);
1649 1647                          goto retry_hash;
1650 1648                  }
1651 1649  
1652 1650                  if (smp->sm_refcnt == 0) {
1653 1651  
1654 1652                          smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reclaim++;
1655 1653  
1656 1654                          /*
1657 1655                           * Could still be on the free list. However, this
1658 1656                           * could also be an smp that is transitioning from
1659 1657                           * the free list when we have too much contention
1660 1658                           * for the smapmtx's. In this case, we have an
1661 1659                           * unlocked smp that is not on the free list any
1662 1660                           * longer, but still has a 0 refcnt.  The only way
1663 1661                           * to be sure is to check the freelist pointers.
1664 1662                           * Since we now have the smapmtx, we are guaranteed
1665 1663                           * that the (vp, off) won't change, so we are safe
1666 1664                           * to reclaim it.  get_free_smp() knows that this
1667 1665                           * can happen, and it will check the refcnt.
1668 1666                           */
1669 1667  
1670 1668                          if ((smp->sm_next != NULL)) {
1671 1669                                  struct sm_freeq *freeq;
1672 1670  
1673 1671                                  ASSERT(smp->sm_prev != NULL);
1674 1672                                  sm = &smd_free[smp->sm_free_ndx];
1675 1673  
1676 1674                                  if (smp->sm_flags & SM_QNDX_ZERO)
1677 1675                                          freeq = &sm->sm_freeq[0];
1678 1676                                  else
1679 1677                                          freeq = &sm->sm_freeq[1];
1680 1678  
1681 1679                                  mutex_enter(&freeq->smq_mtx);
1682 1680                                  if (freeq->smq_free != smp) {
1683 1681                                          /*
1684 1682                                           * fastpath normal case
1685 1683                                           */
1686 1684                                          smp->sm_prev->sm_next = smp->sm_next;
1687 1685                                          smp->sm_next->sm_prev = smp->sm_prev;
1688 1686                                  } else if (smp == smp->sm_next) {
1689 1687                                          /*
1690 1688                                           * Taking the last smap on freelist
1691 1689                                           */
1692 1690                                          freeq->smq_free = NULL;
1693 1691                                  } else {
1694 1692                                          /*
1695 1693                                           * Reclaiming 1st smap on list
1696 1694                                           */
1697 1695                                          freeq->smq_free = smp->sm_next;
1698 1696                                          smp->sm_prev->sm_next = smp->sm_next;
1699 1697                                          smp->sm_next->sm_prev = smp->sm_prev;
1700 1698                                  }
1701 1699                                  mutex_exit(&freeq->smq_mtx);
1702 1700                                  smp->sm_prev = smp->sm_next = NULL;
1703 1701                          } else {
1704 1702                                  ASSERT(smp->sm_prev == NULL);
1705 1703                                  segmapcnt.smp_stolen.value.ul++;
1706 1704                          }
1707 1705  
1708 1706                  } else {
1709 1707                          segmapcnt.smp_get_use.value.ul++;
1710 1708                  }
1711 1709                  smp->sm_refcnt++;               /* another user */
1712 1710  
1713 1711                  /*
1714 1712                   * We don't invoke segmap_fault via TLB miss, so we set ref
1715 1713                   * and mod bits in advance. For S_OTHER  we set them in
1716 1714                   * segmap_fault F_SOFTUNLOCK.
1717 1715                   */
1718 1716                  if (is_kpm) {
1719 1717                          if (rw == S_WRITE) {
1720 1718                                  smp->sm_flags |= SM_WRITE_DATA;
1721 1719                          } else if (rw == S_READ) {
1722 1720                                  smp->sm_flags |= SM_READ_DATA;
1723 1721                          }
1724 1722                  }
1725 1723                  mutex_exit(smapmtx);
1726 1724  
1727 1725                  newslot = 0;
1728 1726          } else {
1729 1727  
1730 1728                  uint32_t free_ndx, *free_ndxp;
1731 1729                  union segmap_cpu *scpu;
1732 1730  
1733 1731                  /*
1734 1732                   * On a PAC machine or a machine with anti-alias
1735 1733                   * hardware, smd_colormsk will be zero.
1736 1734                   *
1737 1735                   * On a VAC machine- pick color by offset in the file
1738 1736                   * so we won't get VAC conflicts on elf files.
1739 1737                   * On data files, color does not matter but we
1740 1738                   * don't know what kind of file it is so we always
1741 1739                   * pick color by offset. This causes color
1742 1740                   * corresponding to file offset zero to be used more
1743 1741                   * heavily.
1744 1742                   */
1745 1743                  color = (baseoff >> MAXBSHIFT) & smd_colormsk;
1746 1744                  scpu = smd_cpu+CPU->cpu_seqid;
1747 1745                  free_ndxp = &scpu->scpu.scpu_free_ndx[color];
1748 1746                  free_ndx = (*free_ndxp += smd_ncolor) & smd_freemsk;
1749 1747  #ifdef DEBUG
1750 1748                  colors_used[free_ndx]++;
1751 1749  #endif /* DEBUG */
1752 1750  
1753 1751                  /*
1754 1752                   * Get a locked smp slot from the free list.
1755 1753                   */
1756 1754                  smp = get_free_smp(free_ndx);
1757 1755                  smapmtx = SMAPMTX(smp);
1758 1756  
1759 1757                  ASSERT(smp->sm_vp == NULL);
1760 1758  
1761 1759                  if ((nsmp = segmap_hashin(smp, vp, baseoff, hashid)) != NULL) {
1762 1760                          /*
1763 1761                           * Failed to hashin, there exists one now.
1764 1762                           * Return the smp we just allocated.
1765 1763                           */
1766 1764                          segmap_smapadd(smp);
1767 1765                          mutex_exit(smapmtx);
1768 1766  
1769 1767                          smp = nsmp;
1770 1768                          goto vrfy_smp;
1771 1769                  }
1772 1770                  smp->sm_refcnt++;               /* another user */
1773 1771  
1774 1772                  /*
1775 1773                   * We don't invoke segmap_fault via TLB miss, so we set ref
1776 1774                   * and mod bits in advance. For S_OTHER  we set them in
1777 1775                   * segmap_fault F_SOFTUNLOCK.
1778 1776                   */
1779 1777                  if (is_kpm) {
1780 1778                          if (rw == S_WRITE) {
1781 1779                                  smp->sm_flags |= SM_WRITE_DATA;
1782 1780                          } else if (rw == S_READ) {
1783 1781                                  smp->sm_flags |= SM_READ_DATA;
1784 1782                          }
1785 1783                  }
1786 1784                  mutex_exit(smapmtx);
1787 1785  
1788 1786                  newslot = 1;
1789 1787          }
1790 1788  
1791 1789          if (!is_kpm)
1792 1790                  goto use_segmap_range;
1793 1791  
1794 1792          /*
1795 1793           * Use segkpm
1796 1794           */
1797 1795          /* Lint directive required until 6746211 is fixed */
1798 1796          /*CONSTCOND*/
1799 1797          ASSERT(PAGESIZE == MAXBSIZE);
1800 1798  
1801 1799          /*
1802 1800           * remember the last smp faulted on this cpu.
1803 1801           */
1804 1802          (smd_cpu+CPU->cpu_seqid)->scpu.scpu_last_smap = smp;
1805 1803  
1806 1804          if (forcefault == SM_PAGECREATE) {
1807 1805                  baseaddr = segmap_pagecreate_kpm(seg, vp, baseoff, smp, rw);
1808 1806                  return (baseaddr);
1809 1807          }
1810 1808  
1811 1809          if (newslot == 0 &&
1812 1810              (pp = GET_KPME(smp)->kpe_page) != NULL) {
1813 1811  
1814 1812                  /* fastpath */
1815 1813                  switch (rw) {
1816 1814                  case S_READ:
1817 1815                  case S_WRITE:
1818 1816                          if (page_trylock(pp, SE_SHARED)) {
1819 1817                                  if (PP_ISFREE(pp) ||
1820 1818                                      !(pp->p_vnode == vp &&
1821 1819                                      pp->p_offset == baseoff)) {
1822 1820                                          page_unlock(pp);
1823 1821                                          pp = page_lookup(vp, baseoff,
1824 1822                                              SE_SHARED);
1825 1823                                  }
1826 1824                          } else {
1827 1825                                  pp = page_lookup(vp, baseoff, SE_SHARED);
1828 1826                          }
1829 1827  
1830 1828                          if (pp == NULL) {
1831 1829                                  ASSERT(GET_KPME(smp)->kpe_page == NULL);
1832 1830                                  break;
1833 1831                          }
1834 1832  
1835 1833                          if (rw == S_WRITE &&
1836 1834                              hat_page_getattr(pp, P_MOD | P_REF) !=
1837 1835                              (P_MOD | P_REF)) {
1838 1836                                  page_unlock(pp);
1839 1837                                  break;
1840 1838                          }
1841 1839  
1842 1840                          /*
1843 1841                           * We have the p_selock as reader, grab_smp
1844 1842                           * can't hit us, we have bumped the smap
1845 1843                           * refcnt and hat_pageunload needs the
1846 1844                           * p_selock exclusive.
1847 1845                           */
1848 1846                          kpme = GET_KPME(smp);
1849 1847                          if (kpme->kpe_page == pp) {
1850 1848                                  baseaddr = hat_kpm_page2va(pp, 0);
1851 1849                          } else if (kpme->kpe_page == NULL) {
1852 1850                                  baseaddr = hat_kpm_mapin(pp, kpme);
1853 1851                          } else {
1854 1852                                  panic("segmap_getmapflt: stale "
1855 1853                                      "kpme page, kpme %p", (void *)kpme);
1856 1854                                  /*NOTREACHED*/
1857 1855                          }
1858 1856  
1859 1857                          /*
1860 1858                           * We don't invoke segmap_fault via TLB miss,
1861 1859                           * so we set ref and mod bits in advance.
1862 1860                           * For S_OTHER and we set them in segmap_fault
1863 1861                           * F_SOFTUNLOCK.
1864 1862                           */
1865 1863                          if (rw == S_READ && !hat_isref(pp))
1866 1864                                  hat_setref(pp);
1867 1865  
1868 1866                          return (baseaddr);
1869 1867                  default:
1870 1868                          break;
1871 1869                  }
1872 1870          }
1873 1871  
1874 1872          base = segkpm_create_va(baseoff);
1875 1873          error = VOP_GETPAGE(vp, (offset_t)baseoff, len, &prot, pl, MAXBSIZE,
1876 1874              seg, base, rw, CRED(), NULL);
1877 1875  
1878 1876          pp = pl[0];
1879 1877          if (error || pp == NULL) {
1880 1878                  /*
1881 1879                   * Use segmap address slot and let segmap_fault deal
1882 1880                   * with the error cases. There is no error return
1883 1881                   * possible here.
1884 1882                   */
1885 1883                  goto use_segmap_range;
1886 1884          }
1887 1885  
1888 1886          ASSERT(pl[1] == NULL);
1889 1887  
1890 1888          /*
1891 1889           * When prot is not returned w/ PROT_ALL the returned pages
1892 1890           * are not backed by fs blocks. For most of the segmap users
1893 1891           * this is no problem, they don't write to the pages in the
1894 1892           * same request and therefore don't rely on a following
1895 1893           * trap driven segmap_fault. With SM_LOCKPROTO users it
1896 1894           * is more secure to use segkmap adresses to allow
1897 1895           * protection segmap_fault's.
1898 1896           */
1899 1897          if (prot != PROT_ALL && forcefault == SM_LOCKPROTO) {
1900 1898                  /*
1901 1899                   * Use segmap address slot and let segmap_fault
1902 1900                   * do the error return.
1903 1901                   */
1904 1902                  ASSERT(rw != S_WRITE);
1905 1903                  ASSERT(PAGE_LOCKED(pp));
1906 1904                  page_unlock(pp);
1907 1905                  forcefault = 0;
1908 1906                  goto use_segmap_range;
1909 1907          }
1910 1908  
1911 1909          /*
1912 1910           * We have the p_selock as reader, grab_smp can't hit us, we
1913 1911           * have bumped the smap refcnt and hat_pageunload needs the
1914 1912           * p_selock exclusive.
1915 1913           */
1916 1914          kpme = GET_KPME(smp);
1917 1915          if (kpme->kpe_page == pp) {
1918 1916                  baseaddr = hat_kpm_page2va(pp, 0);
1919 1917          } else if (kpme->kpe_page == NULL) {
1920 1918                  baseaddr = hat_kpm_mapin(pp, kpme);
1921 1919          } else {
1922 1920                  panic("segmap_getmapflt: stale kpme page after "
1923 1921                      "VOP_GETPAGE, kpme %p", (void *)kpme);
1924 1922                  /*NOTREACHED*/
1925 1923          }
1926 1924  
1927 1925          smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++;
1928 1926  
1929 1927          return (baseaddr);
1930 1928  
1931 1929  
1932 1930  use_segmap_range:
1933 1931          baseaddr = seg->s_base + ((smp - smd_smap) * MAXBSIZE);
1934 1932          TRACE_4(TR_FAC_VM, TR_SEGMAP_GETMAP,
1935 1933              "segmap_getmap:seg %p addr %p vp %p offset %llx",
1936 1934              seg, baseaddr, vp, baseoff);
1937 1935  
1938 1936          /*
1939 1937           * Prefault the translations
1940 1938           */
1941 1939          vaddr = baseaddr + (off - baseoff);
1942 1940          if (forcefault && (newslot || !hat_probe(kas.a_hat, vaddr))) {
1943 1941  
1944 1942                  caddr_t pgaddr = (caddr_t)((uintptr_t)vaddr &
1945 1943                      (uintptr_t)PAGEMASK);
1946 1944  
1947 1945                  (void) segmap_fault(kas.a_hat, seg, pgaddr,
1948 1946                      (vaddr + len - pgaddr + PAGESIZE - 1) & (uintptr_t)PAGEMASK,
1949 1947                      F_INVAL, rw);
1950 1948          }
1951 1949  
1952 1950          return (baseaddr);
1953 1951  }
1954 1952  
1955 1953  int
1956 1954  segmap_release(struct seg *seg, caddr_t addr, uint_t flags)
1957 1955  {
1958 1956          struct smap     *smp;
1959 1957          int             error;
1960 1958          int             bflags = 0;
1961 1959          struct vnode    *vp;
1962 1960          u_offset_t      offset;
1963 1961          kmutex_t        *smtx;
1964 1962          int             is_kpm = 0;
1965 1963          page_t          *pp;
1966 1964  
1967 1965          if (segmap_kpm && IS_KPM_ADDR(addr)) {
1968 1966  
1969 1967                  if (((uintptr_t)addr & MAXBOFFSET) != 0) {
1970 1968                          panic("segmap_release: addr %p not "
1971 1969                              "MAXBSIZE aligned", (void *)addr);
1972 1970                          /*NOTREACHED*/
1973 1971                  }
1974 1972  
1975 1973                  if ((smp = get_smap_kpm(addr, &pp)) == NULL) {
1976 1974                          panic("segmap_release: smap not found "
1977 1975                              "for addr %p", (void *)addr);
1978 1976                          /*NOTREACHED*/
1979 1977                  }
1980 1978  
1981 1979                  TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP,
1982 1980                      "segmap_relmap:seg %p addr %p smp %p",
1983 1981                      seg, addr, smp);
1984 1982  
1985 1983                  smtx = SMAPMTX(smp);
1986 1984  
1987 1985                  /*
1988 1986                   * For compatibility reasons segmap_pagecreate_kpm sets this
1989 1987                   * flag to allow a following segmap_pagecreate to return
1990 1988                   * this as "newpage" flag. When segmap_pagecreate is not
1991 1989                   * called at all we clear it now.
1992 1990                   */
1993 1991                  smp->sm_flags &= ~SM_KPM_NEWPAGE;
1994 1992                  is_kpm = 1;
1995 1993                  if (smp->sm_flags & SM_WRITE_DATA) {
1996 1994                          hat_setrefmod(pp);
1997 1995                  } else if (smp->sm_flags & SM_READ_DATA) {
1998 1996                          hat_setref(pp);
1999 1997                  }
2000 1998          } else {
2001 1999                  if (addr < seg->s_base || addr >= seg->s_base + seg->s_size ||
2002 2000                      ((uintptr_t)addr & MAXBOFFSET) != 0) {
2003 2001                          panic("segmap_release: bad addr %p", (void *)addr);
2004 2002                          /*NOTREACHED*/
2005 2003                  }
2006 2004                  smp = GET_SMAP(seg, addr);
2007 2005  
2008 2006                  TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP,
2009 2007                      "segmap_relmap:seg %p addr %p smp %p",
2010 2008                      seg, addr, smp);
2011 2009  
2012 2010                  smtx = SMAPMTX(smp);
2013 2011                  mutex_enter(smtx);
2014 2012                  smp->sm_flags |= SM_NOTKPM_RELEASED;
2015 2013          }
2016 2014  
2017 2015          ASSERT(smp->sm_refcnt > 0);
2018 2016  
2019 2017          /*
2020 2018           * Need to call VOP_PUTPAGE() if any flags (except SM_DONTNEED)
2021 2019           * are set.
2022 2020           */
2023 2021          if ((flags & ~SM_DONTNEED) != 0) {
2024 2022                  if (flags & SM_WRITE)
2025 2023                          segmapcnt.smp_rel_write.value.ul++;
2026 2024                  if (flags & SM_ASYNC) {
2027 2025                          bflags |= B_ASYNC;
2028 2026                          segmapcnt.smp_rel_async.value.ul++;
2029 2027                  }
2030 2028                  if (flags & SM_INVAL) {
2031 2029                          bflags |= B_INVAL;
2032 2030                          segmapcnt.smp_rel_abort.value.ul++;
2033 2031                  }
2034 2032                  if (flags & SM_DESTROY) {
2035 2033                          bflags |= (B_INVAL|B_TRUNC);
2036 2034                          segmapcnt.smp_rel_abort.value.ul++;
2037 2035                  }
2038 2036                  if (smp->sm_refcnt == 1) {
2039 2037                          /*
2040 2038                           * We only bother doing the FREE and DONTNEED flags
2041 2039                           * if no one else is still referencing this mapping.
2042 2040                           */
2043 2041                          if (flags & SM_FREE) {
2044 2042                                  bflags |= B_FREE;
2045 2043                                  segmapcnt.smp_rel_free.value.ul++;
2046 2044                          }
2047 2045                          if (flags & SM_DONTNEED) {
2048 2046                                  bflags |= B_DONTNEED;
2049 2047                                  segmapcnt.smp_rel_dontneed.value.ul++;
2050 2048                          }
2051 2049                  }
2052 2050          } else {
2053 2051                  smd_cpu[CPU->cpu_seqid].scpu.scpu_release++;
2054 2052          }
2055 2053  
2056 2054          vp = smp->sm_vp;
2057 2055          offset = smp->sm_off;
2058 2056  
2059 2057          if (--smp->sm_refcnt == 0) {
2060 2058  
2061 2059                  smp->sm_flags &= ~(SM_WRITE_DATA | SM_READ_DATA);
2062 2060  
2063 2061                  if (flags & (SM_INVAL|SM_DESTROY)) {
2064 2062                          segmap_hashout(smp);    /* remove map info */
2065 2063                          if (is_kpm) {
2066 2064                                  hat_kpm_mapout(pp, GET_KPME(smp), addr);
2067 2065                                  if (smp->sm_flags & SM_NOTKPM_RELEASED) {
2068 2066                                          smp->sm_flags &= ~SM_NOTKPM_RELEASED;
2069 2067                                          hat_unload(kas.a_hat, segkmap->s_base +
2070 2068                                              ((smp - smd_smap) * MAXBSIZE),
2071 2069                                              MAXBSIZE, HAT_UNLOAD);
2072 2070                                  }
2073 2071  
2074 2072                          } else {
2075 2073                                  if (segmap_kpm)
2076 2074                                          segkpm_mapout_validkpme(GET_KPME(smp));
2077 2075  
2078 2076                                  smp->sm_flags &= ~SM_NOTKPM_RELEASED;
2079 2077                                  hat_unload(kas.a_hat, addr, MAXBSIZE,
2080 2078                                      HAT_UNLOAD);
2081 2079                          }
2082 2080                  }
2083 2081                  segmap_smapadd(smp);    /* add to free list */
2084 2082          }
2085 2083  
2086 2084          mutex_exit(smtx);
2087 2085  
2088 2086          if (is_kpm)
2089 2087                  page_unlock(pp);
2090 2088          /*
2091 2089           * Now invoke VOP_PUTPAGE() if any flags (except SM_DONTNEED)
2092 2090           * are set.
2093 2091           */
2094 2092          if ((flags & ~SM_DONTNEED) != 0) {
2095 2093                  error = VOP_PUTPAGE(vp, offset, MAXBSIZE,
2096 2094                      bflags, CRED(), NULL);
2097 2095          } else {
2098 2096                  error = 0;
2099 2097          }
2100 2098  
2101 2099          return (error);
2102 2100  }
2103 2101  
2104 2102  /*
2105 2103   * Dump the pages belonging to this segmap segment.
2106 2104   */
2107 2105  static void
2108 2106  segmap_dump(struct seg *seg)
2109 2107  {
2110 2108          struct segmap_data *smd;
2111 2109          struct smap *smp, *smp_end;
2112 2110          page_t *pp;
2113 2111          pfn_t pfn;
2114 2112          u_offset_t off;
2115 2113          caddr_t addr;
2116 2114  
2117 2115          smd = (struct segmap_data *)seg->s_data;
2118 2116          addr = seg->s_base;
2119 2117          for (smp = smd->smd_sm, smp_end = smp + smd->smd_npages;
2120 2118              smp < smp_end; smp++) {
2121 2119  
2122 2120                  if (smp->sm_refcnt) {
2123 2121                          for (off = 0; off < MAXBSIZE; off += PAGESIZE) {
2124 2122                                  int we_own_it = 0;
2125 2123  
2126 2124                                  /*
2127 2125                                   * If pp == NULL, the page either does
2128 2126                                   * not exist or is exclusively locked.
2129 2127                                   * So determine if it exists before
2130 2128                                   * searching for it.
2131 2129                                   */
2132 2130                                  if ((pp = page_lookup_nowait(smp->sm_vp,
2133 2131                                      smp->sm_off + off, SE_SHARED)))
2134 2132                                          we_own_it = 1;
2135 2133                                  else
2136 2134                                          pp = page_exists(smp->sm_vp,
2137 2135                                              smp->sm_off + off);
2138 2136  
2139 2137                                  if (pp) {
2140 2138                                          pfn = page_pptonum(pp);
2141 2139                                          dump_addpage(seg->s_as,
2142 2140                                              addr + off, pfn);
2143 2141                                          if (we_own_it)
2144 2142                                                  page_unlock(pp);
2145 2143                                  }
2146 2144                                  dump_timeleft = dump_timeout;
2147 2145                          }
2148 2146                  }
2149 2147                  addr += MAXBSIZE;
2150 2148          }
2151 2149  }
2152 2150  
2153 2151  /*ARGSUSED*/
2154 2152  static int
2155 2153  segmap_pagelock(struct seg *seg, caddr_t addr, size_t len,
2156 2154      struct page ***ppp, enum lock_type type, enum seg_rw rw)
2157 2155  {

↓ open down ↓

2036 lines elided

↑ open up ↑

2158 2156          return (ENOTSUP);
2159 2157  }
2160 2158  
2161 2159  static int
2162 2160  segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
2163 2161  {
2164 2162          struct segmap_data *smd = (struct segmap_data *)seg->s_data;
2165 2163  
2166 2164          memidp->val[0] = (uintptr_t)smd->smd_sm->sm_vp;
2167 2165          memidp->val[1] = smd->smd_sm->sm_off + (uintptr_t)(addr - seg->s_base);
2168      -        return (0);
2169      -}
2170      -
2171      -/*ARGSUSED*/
2172      -static int
2173      -segmap_capable(struct seg *seg, segcapability_t capability)
2174      -{
2175 2166          return (0);
2176 2167  }
2177 2168  
2178 2169  
2179 2170  #ifdef  SEGKPM_SUPPORT
2180 2171  
2181 2172  /*
2182 2173   * segkpm support routines
2183 2174   */
2184 2175

2185 2176  static caddr_t
2186 2177  segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off,
2187 2178          struct smap *smp, enum seg_rw rw)
2188 2179  {
2189 2180          caddr_t base;
2190 2181          page_t  *pp;
2191 2182          int     newpage = 0;
2192 2183          struct kpme     *kpme;
2193 2184  
2194 2185          ASSERT(smp->sm_refcnt > 0);
2195 2186  
2196 2187          if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
2197 2188                  kmutex_t *smtx;
2198 2189  
2199 2190                  base = segkpm_create_va(off);
2200 2191  
2201 2192                  if ((pp = page_create_va(vp, off, PAGESIZE, PG_WAIT,
2202 2193                      seg, base)) == NULL) {
2203 2194                          panic("segmap_pagecreate_kpm: "
2204 2195                              "page_create failed");
2205 2196                          /*NOTREACHED*/
2206 2197                  }
2207 2198  
2208 2199                  newpage = 1;
2209 2200                  page_io_unlock(pp);
2210 2201                  ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);
2211 2202  
2212 2203                  /*
2213 2204                   * Mark this here until the following segmap_pagecreate
2214 2205                   * or segmap_release.
2215 2206                   */
2216 2207                  smtx = SMAPMTX(smp);
2217 2208                  mutex_enter(smtx);
2218 2209                  smp->sm_flags |= SM_KPM_NEWPAGE;
2219 2210                  mutex_exit(smtx);
2220 2211          }
2221 2212  
2222 2213          kpme = GET_KPME(smp);
2223 2214          if (!newpage && kpme->kpe_page == pp)
2224 2215                  base = hat_kpm_page2va(pp, 0);
2225 2216          else
2226 2217                  base = hat_kpm_mapin(pp, kpme);
2227 2218  
2228 2219          /*
2229 2220           * FS code may decide not to call segmap_pagecreate and we
2230 2221           * don't invoke segmap_fault via TLB miss, so we have to set
2231 2222           * ref and mod bits in advance.
2232 2223           */
2233 2224          if (rw == S_WRITE) {
2234 2225                  hat_setrefmod(pp);
2235 2226          } else {
2236 2227                  ASSERT(rw == S_READ);
2237 2228                  hat_setref(pp);
2238 2229          }
2239 2230  
2240 2231          smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++;
2241 2232  
2242 2233          return (base);
2243 2234  }
2244 2235  
2245 2236  /*
2246 2237   * Find the smap structure corresponding to the
2247 2238   * KPM addr and return it locked.
2248 2239   */
2249 2240  struct smap *
2250 2241  get_smap_kpm(caddr_t addr, page_t **ppp)
2251 2242  {
2252 2243          struct smap     *smp;
2253 2244          struct vnode    *vp;
2254 2245          u_offset_t      offset;
2255 2246          caddr_t         baseaddr = (caddr_t)((uintptr_t)addr & MAXBMASK);
2256 2247          int             hashid;
2257 2248          kmutex_t        *hashmtx;
2258 2249          page_t          *pp;
2259 2250          union segmap_cpu *scpu;
2260 2251  
2261 2252          pp = hat_kpm_vaddr2page(baseaddr);
2262 2253  
2263 2254          ASSERT(pp && !PP_ISFREE(pp));
2264 2255          ASSERT(PAGE_LOCKED(pp));
2265 2256          ASSERT(((uintptr_t)pp->p_offset & MAXBOFFSET) == 0);
2266 2257  
2267 2258          vp = pp->p_vnode;
2268 2259          offset = pp->p_offset;
2269 2260          ASSERT(vp != NULL);
2270 2261  
2271 2262          /*
2272 2263           * Assume the last smap used on this cpu is the one needed.
2273 2264           */
2274 2265          scpu = smd_cpu+CPU->cpu_seqid;
2275 2266          smp = scpu->scpu.scpu_last_smap;
2276 2267          mutex_enter(&smp->sm_mtx);
2277 2268          if (smp->sm_vp == vp && smp->sm_off == offset) {
2278 2269                  ASSERT(smp->sm_refcnt > 0);
2279 2270          } else {
2280 2271                  /*
2281 2272                   * Assumption wrong, find the smap on the hash chain.
2282 2273                   */
2283 2274                  mutex_exit(&smp->sm_mtx);
2284 2275                  SMAP_HASHFUNC(vp, offset, hashid); /* macro assigns hashid */
2285 2276                  hashmtx = SHASHMTX(hashid);
2286 2277  
2287 2278                  mutex_enter(hashmtx);
2288 2279                  smp = smd_hash[hashid].sh_hash_list;
2289 2280                  for (; smp != NULL; smp = smp->sm_hash) {
2290 2281                          if (smp->sm_vp == vp && smp->sm_off == offset)
2291 2282                                  break;
2292 2283                  }
2293 2284                  mutex_exit(hashmtx);
2294 2285                  if (smp) {
2295 2286                          mutex_enter(&smp->sm_mtx);
2296 2287                          ASSERT(smp->sm_vp == vp && smp->sm_off == offset);
2297 2288                  }
2298 2289          }
2299 2290  
2300 2291          if (ppp)
2301 2292                  *ppp = smp ? pp : NULL;
2302 2293  
2303 2294          return (smp);
2304 2295  }
2305 2296  
2306 2297  #else   /* SEGKPM_SUPPORT */
2307 2298  
2308 2299  /* segkpm stubs */
2309 2300  
2310 2301  /*ARGSUSED*/
2311 2302  static caddr_t
2312 2303  segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off,
2313 2304          struct smap *smp, enum seg_rw rw)
2314 2305  {
2315 2306          return (NULL);
2316 2307  }
2317 2308  
2318 2309  /*ARGSUSED*/
2319 2310  struct smap *
2320 2311  get_smap_kpm(caddr_t addr, page_t **ppp)
2321 2312  {
2322 2313          return (NULL);
2323 2314  }
2324 2315  
2325 2316  #endif  /* SEGKPM_SUPPORT */

↓ open down ↓

141 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX