6147-segop_getpolicy-already-checks-for-a-NULL-op Wdiff usr/src/uts/common/vm/seg_map.c

Print this page

6147 segop_getpolicy already checks for a NULL op
Reviewed by: Garrett D'Amore <garrett@damore.org>

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/vm/seg_map.c
          +++ new/usr/src/uts/common/vm/seg_map.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   */
  25   25  
  26   26  /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T     */
  27   27  /*        All Rights Reserved   */
  28   28  
  29   29  /*
  30   30   * Portions of this source code were derived from Berkeley 4.3 BSD
  31   31   * under license from the Regents of the University of California.
  32   32   */
  33   33  
  34   34  /*
  35   35   * VM - generic vnode mapping segment.
  36   36   *
  37   37   * The segmap driver is used only by the kernel to get faster (than seg_vn)
  38   38   * mappings [lower routine overhead; more persistent cache] to random
  39   39   * vnode/offsets.  Note than the kernel may (and does) use seg_vn as well.
  40   40   */
  41   41  
  42   42  #include <sys/types.h>
  43   43  #include <sys/t_lock.h>
  44   44  #include <sys/param.h>
  45   45  #include <sys/sysmacros.h>
  46   46  #include <sys/buf.h>
  47   47  #include <sys/systm.h>
  48   48  #include <sys/vnode.h>
  49   49  #include <sys/mman.h>
  50   50  #include <sys/errno.h>
  51   51  #include <sys/cred.h>
  52   52  #include <sys/kmem.h>
  53   53  #include <sys/vtrace.h>
  54   54  #include <sys/cmn_err.h>
  55   55  #include <sys/debug.h>
  56   56  #include <sys/thread.h>
  57   57  #include <sys/dumphdr.h>
  58   58  #include <sys/bitmap.h>
  59   59  #include <sys/lgrp.h>
  60   60  
  61   61  #include <vm/seg_kmem.h>
  62   62  #include <vm/hat.h>
  63   63  #include <vm/as.h>
  64   64  #include <vm/seg.h>
  65   65  #include <vm/seg_kpm.h>
  66   66  #include <vm/seg_map.h>
  67   67  #include <vm/page.h>
  68   68  #include <vm/pvn.h>
  69   69  #include <vm/rm.h>
  70   70  
  71   71  /*
  72   72   * Private seg op routines.
  73   73   */
  74   74  static void     segmap_free(struct seg *seg);
  75   75  faultcode_t segmap_fault(struct hat *hat, struct seg *seg, caddr_t addr,
  76   76                          size_t len, enum fault_type type, enum seg_rw rw);
  77   77  static faultcode_t segmap_faulta(struct seg *seg, caddr_t addr);
  78   78  static int      segmap_checkprot(struct seg *seg, caddr_t addr, size_t len,
  79   79                          uint_t prot);
  80   80  static int      segmap_kluster(struct seg *seg, caddr_t addr, ssize_t);
  81   81  static int      segmap_getprot(struct seg *seg, caddr_t addr, size_t len,

↓ open down ↓

81 lines elided

↑ open up ↑

  82   82                          uint_t *protv);
  83   83  static u_offset_t       segmap_getoffset(struct seg *seg, caddr_t addr);
  84   84  static int      segmap_gettype(struct seg *seg, caddr_t addr);
  85   85  static int      segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp);
  86   86  static void     segmap_dump(struct seg *seg);
  87   87  static int      segmap_pagelock(struct seg *seg, caddr_t addr, size_t len,
  88   88                          struct page ***ppp, enum lock_type type,
  89   89                          enum seg_rw rw);
  90   90  static void     segmap_badop(void);
  91   91  static int      segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp);
  92      -static lgrp_mem_policy_info_t   *segmap_getpolicy(struct seg *seg,
  93      -    caddr_t addr);
  94   92  static int      segmap_capable(struct seg *seg, segcapability_t capability);
  95   93  
  96   94  /* segkpm support */
  97   95  static caddr_t  segmap_pagecreate_kpm(struct seg *, vnode_t *, u_offset_t,
  98   96                          struct smap *, enum seg_rw);
  99   97  struct smap     *get_smap_kpm(caddr_t, page_t **);
 100   98  
 101   99  #define SEGMAP_BADOP(t) (t(*)())segmap_badop
 102  100  
 103  101  static struct seg_ops segmap_ops = {

 104  102          .dup            = SEGMAP_BADOP(int),
 105  103          .unmap          = SEGMAP_BADOP(int),
 106  104          .free           = segmap_free,
 107  105          .fault          = segmap_fault,
 108  106          .faulta         = segmap_faulta,
 109  107          .setprot        = SEGMAP_BADOP(int),
 110  108          .checkprot      = segmap_checkprot,
 111  109          .kluster        = segmap_kluster,
 112  110          .swapout        = SEGMAP_BADOP(size_t),
 113  111          .sync           = SEGMAP_BADOP(int),
 114  112          .incore         = SEGMAP_BADOP(size_t),

↓ open down ↓

11 lines elided

↑ open up ↑

 115  113          .lockop         = SEGMAP_BADOP(int),
 116  114          .getprot        = segmap_getprot,
 117  115          .getoffset      = segmap_getoffset,
 118  116          .gettype        = segmap_gettype,
 119  117          .getvp          = segmap_getvp,
 120  118          .advise         = SEGMAP_BADOP(int),
 121  119          .dump           = segmap_dump,
 122  120          .pagelock       = segmap_pagelock,
 123  121          .setpagesize    = SEGMAP_BADOP(int),
 124  122          .getmemid       = segmap_getmemid,
 125      -        .getpolicy      = segmap_getpolicy,
 126  123          .capable        = segmap_capable,
 127  124  };
 128  125  
 129  126  /*
 130  127   * Private segmap routines.
 131  128   */
 132  129  static void     segmap_unlock(struct hat *hat, struct seg *seg, caddr_t addr,
 133  130                          size_t len, enum seg_rw rw, struct smap *smp);
 134  131  static void     segmap_smapadd(struct smap *smp);
 135  132  static struct smap *segmap_hashin(struct smap *smp, struct vnode *vp,

 136  133                          u_offset_t off, int hashid);
 137  134  static void     segmap_hashout(struct smap *smp);
 138  135  
 139  136  
 140  137  /*
 141  138   * Statistics for segmap operations.
 142  139   *
 143  140   * No explicit locking to protect these stats.
 144  141   */
 145  142  struct segmapcnt segmapcnt = {
 146  143          { "fault",              KSTAT_DATA_ULONG },
 147  144          { "faulta",             KSTAT_DATA_ULONG },
 148  145          { "getmap",             KSTAT_DATA_ULONG },
 149  146          { "get_use",            KSTAT_DATA_ULONG },
 150  147          { "get_reclaim",        KSTAT_DATA_ULONG },
 151  148          { "get_reuse",          KSTAT_DATA_ULONG },
 152  149          { "get_unused",         KSTAT_DATA_ULONG },
 153  150          { "get_nofree",         KSTAT_DATA_ULONG },
 154  151          { "rel_async",          KSTAT_DATA_ULONG },
 155  152          { "rel_write",          KSTAT_DATA_ULONG },
 156  153          { "rel_free",           KSTAT_DATA_ULONG },
 157  154          { "rel_abort",          KSTAT_DATA_ULONG },
 158  155          { "rel_dontneed",       KSTAT_DATA_ULONG },
 159  156          { "release",            KSTAT_DATA_ULONG },
 160  157          { "pagecreate",         KSTAT_DATA_ULONG },
 161  158          { "free_notfree",       KSTAT_DATA_ULONG },
 162  159          { "free_dirty",         KSTAT_DATA_ULONG },
 163  160          { "free",               KSTAT_DATA_ULONG },
 164  161          { "stolen",             KSTAT_DATA_ULONG },
 165  162          { "get_nomtx",          KSTAT_DATA_ULONG }
 166  163  };
 167  164  
 168  165  kstat_named_t *segmapcnt_ptr = (kstat_named_t *)&segmapcnt;
 169  166  uint_t segmapcnt_ndata = sizeof (segmapcnt) / sizeof (kstat_named_t);
 170  167  
 171  168  /*
 172  169   * Return number of map pages in segment.
 173  170   */
 174  171  #define MAP_PAGES(seg)          ((seg)->s_size >> MAXBSHIFT)
 175  172  
 176  173  /*
 177  174   * Translate addr into smap number within segment.
 178  175   */
 179  176  #define MAP_PAGE(seg, addr)  (((addr) - (seg)->s_base) >> MAXBSHIFT)
 180  177  
 181  178  /*
 182  179   * Translate addr in seg into struct smap pointer.
 183  180   */
 184  181  #define GET_SMAP(seg, addr)     \
 185  182          &(((struct segmap_data *)((seg)->s_data))->smd_sm[MAP_PAGE(seg, addr)])
 186  183  
 187  184  /*
 188  185   * Bit in map (16 bit bitmap).
 189  186   */
 190  187  #define SMAP_BIT_MASK(bitindex) (1 << ((bitindex) & 0xf))
 191  188  
 192  189  static int smd_colormsk = 0;
 193  190  static int smd_ncolor = 0;
 194  191  static int smd_nfree = 0;
 195  192  static int smd_freemsk = 0;
 196  193  #ifdef DEBUG
 197  194  static int *colors_used;
 198  195  #endif
 199  196  static struct smap *smd_smap;
 200  197  static struct smaphash *smd_hash;
 201  198  #ifdef SEGMAP_HASHSTATS
 202  199  static unsigned int *smd_hash_len;
 203  200  #endif
 204  201  static struct smfree *smd_free;
 205  202  static ulong_t smd_hashmsk = 0;
 206  203  
 207  204  #define SEGMAP_MAXCOLOR         2
 208  205  #define SEGMAP_CACHE_PAD        64
 209  206  
 210  207  union segmap_cpu {
 211  208          struct {
 212  209                  uint32_t        scpu_free_ndx[SEGMAP_MAXCOLOR];
 213  210                  struct smap     *scpu_last_smap;
 214  211                  ulong_t         scpu_getmap;
 215  212                  ulong_t         scpu_release;
 216  213                  ulong_t         scpu_get_reclaim;
 217  214                  ulong_t         scpu_fault;
 218  215                  ulong_t         scpu_pagecreate;
 219  216                  ulong_t         scpu_get_reuse;
 220  217          } scpu;
 221  218          char    scpu_pad[SEGMAP_CACHE_PAD];
 222  219  };
 223  220  static union segmap_cpu *smd_cpu;
 224  221  
 225  222  /*
 226  223   * There are three locks in seg_map:
 227  224   *      - per freelist mutexes
 228  225   *      - per hashchain mutexes
 229  226   *      - per smap mutexes
 230  227   *
 231  228   * The lock ordering is to get the smap mutex to lock down the slot
 232  229   * first then the hash lock (for hash in/out (vp, off) list) or the
 233  230   * freelist lock to put the slot back on the free list.
 234  231   *
 235  232   * The hash search is done by only holding the hashchain lock, when a wanted
 236  233   * slot is found, we drop the hashchain lock then lock the slot so there
 237  234   * is no overlapping of hashchain and smap locks. After the slot is
 238  235   * locked, we verify again if the slot is still what we are looking
 239  236   * for.
 240  237   *
 241  238   * Allocation of a free slot is done by holding the freelist lock,
 242  239   * then locking the smap slot at the head of the freelist. This is
 243  240   * in reversed lock order so mutex_tryenter() is used.
 244  241   *
 245  242   * The smap lock protects all fields in smap structure except for
 246  243   * the link fields for hash/free lists which are protected by
 247  244   * hashchain and freelist locks.
 248  245   */
 249  246  
 250  247  #define SHASHMTX(hashid)        (&smd_hash[hashid].sh_mtx)
 251  248  
 252  249  #define SMP2SMF(smp)            (&smd_free[(smp - smd_smap) & smd_freemsk])
 253  250  #define SMP2SMF_NDX(smp)        (ushort_t)((smp - smd_smap) & smd_freemsk)
 254  251  
 255  252  #define SMAPMTX(smp) (&smp->sm_mtx)
 256  253  
 257  254  #define SMAP_HASHFUNC(vp, off, hashid) \
 258  255          { \
 259  256          hashid = ((((uintptr_t)(vp) >> 6) + ((uintptr_t)(vp) >> 3) + \
 260  257                  ((off) >> MAXBSHIFT)) & smd_hashmsk); \
 261  258          }
 262  259  
 263  260  /*
 264  261   * The most frequently updated kstat counters are kept in the
 265  262   * per cpu array to avoid hot cache blocks. The update function
 266  263   * sums the cpu local counters to update the global counters.
 267  264   */
 268  265  
 269  266  /* ARGSUSED */
 270  267  int
 271  268  segmap_kstat_update(kstat_t *ksp, int rw)
 272  269  {
 273  270          int i;
 274  271          ulong_t getmap, release, get_reclaim;
 275  272          ulong_t fault, pagecreate, get_reuse;
 276  273  
 277  274          if (rw == KSTAT_WRITE)
 278  275                  return (EACCES);
 279  276          getmap = release = get_reclaim = (ulong_t)0;
 280  277          fault = pagecreate = get_reuse = (ulong_t)0;
 281  278          for (i = 0; i < max_ncpus; i++) {
 282  279                  getmap += smd_cpu[i].scpu.scpu_getmap;
 283  280                  release  += smd_cpu[i].scpu.scpu_release;
 284  281                  get_reclaim += smd_cpu[i].scpu.scpu_get_reclaim;
 285  282                  fault  += smd_cpu[i].scpu.scpu_fault;
 286  283                  pagecreate  += smd_cpu[i].scpu.scpu_pagecreate;
 287  284                  get_reuse += smd_cpu[i].scpu.scpu_get_reuse;
 288  285          }
 289  286          segmapcnt.smp_getmap.value.ul = getmap;
 290  287          segmapcnt.smp_release.value.ul = release;
 291  288          segmapcnt.smp_get_reclaim.value.ul = get_reclaim;
 292  289          segmapcnt.smp_fault.value.ul = fault;
 293  290          segmapcnt.smp_pagecreate.value.ul = pagecreate;
 294  291          segmapcnt.smp_get_reuse.value.ul = get_reuse;
 295  292          return (0);
 296  293  }
 297  294  
 298  295  int
 299  296  segmap_create(struct seg *seg, void *argsp)
 300  297  {
 301  298          struct segmap_data *smd;
 302  299          struct smap *smp;
 303  300          struct smfree *sm;
 304  301          struct segmap_crargs *a = (struct segmap_crargs *)argsp;
 305  302          struct smaphash *shashp;
 306  303          union segmap_cpu *scpu;
 307  304          long i, npages;
 308  305          size_t hashsz;
 309  306          uint_t nfreelist;
 310  307          extern void prefetch_smap_w(void *);
 311  308          extern int max_ncpus;
 312  309  
 313  310          ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock));
 314  311  
 315  312          if (((uintptr_t)seg->s_base | seg->s_size) & MAXBOFFSET) {
 316  313                  panic("segkmap not MAXBSIZE aligned");
 317  314                  /*NOTREACHED*/
 318  315          }
 319  316  
 320  317          smd = kmem_zalloc(sizeof (struct segmap_data), KM_SLEEP);
 321  318  
 322  319          seg->s_data = (void *)smd;
 323  320          seg->s_ops = &segmap_ops;
 324  321          smd->smd_prot = a->prot;
 325  322  
 326  323          /*
 327  324           * Scale the number of smap freelists to be
 328  325           * proportional to max_ncpus * number of virtual colors.
 329  326           * The caller can over-ride this scaling by providing
 330  327           * a non-zero a->nfreelist argument.
 331  328           */
 332  329          nfreelist = a->nfreelist;
 333  330          if (nfreelist == 0)
 334  331                  nfreelist = max_ncpus;
 335  332          else if (nfreelist < 0 || nfreelist > 4 * max_ncpus) {
 336  333                  cmn_err(CE_WARN, "segmap_create: nfreelist out of range "
 337  334                  "%d, using %d", nfreelist, max_ncpus);
 338  335                  nfreelist = max_ncpus;
 339  336          }
 340  337          if (!ISP2(nfreelist)) {
 341  338                  /* round up nfreelist to the next power of two. */
 342  339                  nfreelist = 1 << (highbit(nfreelist));
 343  340          }
 344  341  
 345  342          /*
 346  343           * Get the number of virtual colors - must be a power of 2.
 347  344           */
 348  345          if (a->shmsize)
 349  346                  smd_ncolor = a->shmsize >> MAXBSHIFT;
 350  347          else
 351  348                  smd_ncolor = 1;
 352  349          ASSERT((smd_ncolor & (smd_ncolor - 1)) == 0);
 353  350          ASSERT(smd_ncolor <= SEGMAP_MAXCOLOR);
 354  351          smd_colormsk = smd_ncolor - 1;
 355  352          smd->smd_nfree = smd_nfree = smd_ncolor * nfreelist;
 356  353          smd_freemsk = smd_nfree - 1;
 357  354  
 358  355          /*
 359  356           * Allocate and initialize the freelist headers.
 360  357           * Note that sm_freeq[1] starts out as the release queue. This
 361  358           * is known when the smap structures are initialized below.
 362  359           */
 363  360          smd_free = smd->smd_free =
 364  361              kmem_zalloc(smd_nfree * sizeof (struct smfree), KM_SLEEP);
 365  362          for (i = 0; i < smd_nfree; i++) {
 366  363                  sm = &smd->smd_free[i];
 367  364                  mutex_init(&sm->sm_freeq[0].smq_mtx, NULL, MUTEX_DEFAULT, NULL);
 368  365                  mutex_init(&sm->sm_freeq[1].smq_mtx, NULL, MUTEX_DEFAULT, NULL);
 369  366                  sm->sm_allocq = &sm->sm_freeq[0];
 370  367                  sm->sm_releq = &sm->sm_freeq[1];
 371  368          }
 372  369  
 373  370          /*
 374  371           * Allocate and initialize the smap hash chain headers.
 375  372           * Compute hash size rounding down to the next power of two.
 376  373           */
 377  374          npages = MAP_PAGES(seg);
 378  375          smd->smd_npages = npages;
 379  376          hashsz = npages / SMAP_HASHAVELEN;
 380  377          hashsz = 1 << (highbit(hashsz)-1);
 381  378          smd_hashmsk = hashsz - 1;
 382  379          smd_hash = smd->smd_hash =
 383  380              kmem_alloc(hashsz * sizeof (struct smaphash), KM_SLEEP);
 384  381  #ifdef SEGMAP_HASHSTATS
 385  382          smd_hash_len =
 386  383              kmem_zalloc(hashsz * sizeof (unsigned int), KM_SLEEP);
 387  384  #endif
 388  385          for (i = 0, shashp = smd_hash; i < hashsz; i++, shashp++) {
 389  386                  shashp->sh_hash_list = NULL;
 390  387                  mutex_init(&shashp->sh_mtx, NULL, MUTEX_DEFAULT, NULL);
 391  388          }
 392  389  
 393  390          /*
 394  391           * Allocate and initialize the smap structures.
 395  392           * Link all slots onto the appropriate freelist.
 396  393           * The smap array is large enough to affect boot time
 397  394           * on large systems, so use memory prefetching and only
 398  395           * go through the array 1 time. Inline a optimized version
 399  396           * of segmap_smapadd to add structures to freelists with
 400  397           * knowledge that no locks are needed here.
 401  398           */
 402  399          smd_smap = smd->smd_sm =
 403  400              kmem_alloc(sizeof (struct smap) * npages, KM_SLEEP);
 404  401  
 405  402          for (smp = &smd->smd_sm[MAP_PAGES(seg) - 1];
 406  403              smp >= smd->smd_sm; smp--) {
 407  404                  struct smap *smpfreelist;
 408  405                  struct sm_freeq *releq;
 409  406  
 410  407                  prefetch_smap_w((char *)smp);
 411  408  
 412  409                  smp->sm_vp = NULL;
 413  410                  smp->sm_hash = NULL;
 414  411                  smp->sm_off = 0;
 415  412                  smp->sm_bitmap = 0;
 416  413                  smp->sm_refcnt = 0;
 417  414                  mutex_init(&smp->sm_mtx, NULL, MUTEX_DEFAULT, NULL);
 418  415                  smp->sm_free_ndx = SMP2SMF_NDX(smp);
 419  416  
 420  417                  sm = SMP2SMF(smp);
 421  418                  releq = sm->sm_releq;
 422  419  
 423  420                  smpfreelist = releq->smq_free;
 424  421                  if (smpfreelist == 0) {
 425  422                          releq->smq_free = smp->sm_next = smp->sm_prev = smp;
 426  423                  } else {
 427  424                          smp->sm_next = smpfreelist;
 428  425                          smp->sm_prev = smpfreelist->sm_prev;
 429  426                          smpfreelist->sm_prev = smp;
 430  427                          smp->sm_prev->sm_next = smp;
 431  428                          releq->smq_free = smp->sm_next;
 432  429                  }
 433  430  
 434  431                  /*
 435  432                   * sm_flag = 0 (no SM_QNDX_ZERO) implies smap on sm_freeq[1]
 436  433                   */
 437  434                  smp->sm_flags = 0;
 438  435  
 439  436  #ifdef  SEGKPM_SUPPORT
 440  437                  /*
 441  438                   * Due to the fragile prefetch loop no
 442  439                   * separate function is used here.
 443  440                   */
 444  441                  smp->sm_kpme_next = NULL;
 445  442                  smp->sm_kpme_prev = NULL;
 446  443                  smp->sm_kpme_page = NULL;
 447  444  #endif
 448  445          }
 449  446  
 450  447          /*
 451  448           * Allocate the per color indices that distribute allocation
 452  449           * requests over the free lists. Each cpu will have a private
 453  450           * rotor index to spread the allocations even across the available
 454  451           * smap freelists. Init the scpu_last_smap field to the first
 455  452           * smap element so there is no need to check for NULL.
 456  453           */
 457  454          smd_cpu =
 458  455              kmem_zalloc(sizeof (union segmap_cpu) * max_ncpus, KM_SLEEP);
 459  456          for (i = 0, scpu = smd_cpu; i < max_ncpus; i++, scpu++) {
 460  457                  int j;
 461  458                  for (j = 0; j < smd_ncolor; j++)
 462  459                          scpu->scpu.scpu_free_ndx[j] = j;
 463  460                  scpu->scpu.scpu_last_smap = smd_smap;
 464  461          }
 465  462  
 466  463          vpm_init();
 467  464  
 468  465  #ifdef DEBUG
 469  466          /*
 470  467           * Keep track of which colors are used more often.
 471  468           */
 472  469          colors_used = kmem_zalloc(smd_nfree * sizeof (int), KM_SLEEP);
 473  470  #endif /* DEBUG */
 474  471  
 475  472          return (0);
 476  473  }
 477  474  
 478  475  static void
 479  476  segmap_free(seg)
 480  477          struct seg *seg;
 481  478  {
 482  479          ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock));
 483  480  }
 484  481  
 485  482  /*
 486  483   * Do a F_SOFTUNLOCK call over the range requested.
 487  484   * The range must have already been F_SOFTLOCK'ed.
 488  485   */
 489  486  static void
 490  487  segmap_unlock(
 491  488          struct hat *hat,
 492  489          struct seg *seg,
 493  490          caddr_t addr,
 494  491          size_t len,
 495  492          enum seg_rw rw,
 496  493          struct smap *smp)
 497  494  {
 498  495          page_t *pp;
 499  496          caddr_t adr;
 500  497          u_offset_t off;
 501  498          struct vnode *vp;
 502  499          kmutex_t *smtx;
 503  500  
 504  501          ASSERT(smp->sm_refcnt > 0);
 505  502  
 506  503  #ifdef lint
 507  504          seg = seg;
 508  505  #endif
 509  506  
 510  507          if (segmap_kpm && IS_KPM_ADDR(addr)) {
 511  508  
 512  509                  /*
 513  510                   * We're called only from segmap_fault and this was a
 514  511                   * NOP in case of a kpm based smap, so dangerous things
 515  512                   * must have happened in the meantime. Pages are prefaulted
 516  513                   * and locked in segmap_getmapflt and they will not be
 517  514                   * unlocked until segmap_release.
 518  515                   */
 519  516                  panic("segmap_unlock: called with kpm addr %p", (void *)addr);
 520  517                  /*NOTREACHED*/
 521  518          }
 522  519  
 523  520          vp = smp->sm_vp;
 524  521          off = smp->sm_off + (u_offset_t)((uintptr_t)addr & MAXBOFFSET);
 525  522  
 526  523          hat_unlock(hat, addr, P2ROUNDUP(len, PAGESIZE));
 527  524          for (adr = addr; adr < addr + len; adr += PAGESIZE, off += PAGESIZE) {
 528  525                  ushort_t bitmask;
 529  526  
 530  527                  /*
 531  528                   * Use page_find() instead of page_lookup() to
 532  529                   * find the page since we know that it has
 533  530                   * "shared" lock.
 534  531                   */
 535  532                  pp = page_find(vp, off);
 536  533                  if (pp == NULL) {
 537  534                          panic("segmap_unlock: page not found");
 538  535                          /*NOTREACHED*/
 539  536                  }
 540  537  
 541  538                  if (rw == S_WRITE) {
 542  539                          hat_setrefmod(pp);
 543  540                  } else if (rw != S_OTHER) {
 544  541                          TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT,
 545  542                          "segmap_fault:pp %p vp %p offset %llx", pp, vp, off);
 546  543                          hat_setref(pp);
 547  544                  }
 548  545  
 549  546                  /*
 550  547                   * Clear bitmap, if the bit corresponding to "off" is set,
 551  548                   * since the page and translation are being unlocked.
 552  549                   */
 553  550                  bitmask = SMAP_BIT_MASK((off - smp->sm_off) >> PAGESHIFT);
 554  551  
 555  552                  /*
 556  553                   * Large Files: Following assertion is to verify
 557  554                   * the correctness of the cast to (int) above.
 558  555                   */
 559  556                  ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);
 560  557                  smtx = SMAPMTX(smp);
 561  558                  mutex_enter(smtx);
 562  559                  if (smp->sm_bitmap & bitmask) {
 563  560                          smp->sm_bitmap &= ~bitmask;
 564  561                  }
 565  562                  mutex_exit(smtx);
 566  563  
 567  564                  page_unlock(pp);
 568  565          }
 569  566  }
 570  567  
 571  568  #define MAXPPB  (MAXBSIZE/4096) /* assumes minimum page size of 4k */
 572  569  
 573  570  /*
 574  571   * This routine is called via a machine specific fault handling
 575  572   * routine.  It is also called by software routines wishing to
 576  573   * lock or unlock a range of addresses.
 577  574   *
 578  575   * Note that this routine expects a page-aligned "addr".
 579  576   */
 580  577  faultcode_t
 581  578  segmap_fault(
 582  579          struct hat *hat,
 583  580          struct seg *seg,
 584  581          caddr_t addr,
 585  582          size_t len,
 586  583          enum fault_type type,
 587  584          enum seg_rw rw)
 588  585  {
 589  586          struct segmap_data *smd = (struct segmap_data *)seg->s_data;
 590  587          struct smap *smp;
 591  588          page_t *pp, **ppp;
 592  589          struct vnode *vp;
 593  590          u_offset_t off;
 594  591          page_t *pl[MAXPPB + 1];
 595  592          uint_t prot;
 596  593          u_offset_t addroff;
 597  594          caddr_t adr;
 598  595          int err;
 599  596          u_offset_t sm_off;
 600  597          int hat_flag;
 601  598  
 602  599          if (segmap_kpm && IS_KPM_ADDR(addr)) {
 603  600                  int newpage;
 604  601                  kmutex_t *smtx;
 605  602  
 606  603                  /*
 607  604                   * Pages are successfully prefaulted and locked in
 608  605                   * segmap_getmapflt and can't be unlocked until
 609  606                   * segmap_release. No hat mappings have to be locked
 610  607                   * and they also can't be unlocked as long as the
 611  608                   * caller owns an active kpm addr.
 612  609                   */
 613  610  #ifndef DEBUG
 614  611                  if (type != F_SOFTUNLOCK)
 615  612                          return (0);
 616  613  #endif
 617  614  
 618  615                  if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
 619  616                          panic("segmap_fault: smap not found "
 620  617                              "for addr %p", (void *)addr);
 621  618                          /*NOTREACHED*/
 622  619                  }
 623  620  
 624  621                  smtx = SMAPMTX(smp);
 625  622  #ifdef  DEBUG
 626  623                  newpage = smp->sm_flags & SM_KPM_NEWPAGE;
 627  624                  if (newpage) {
 628  625                          cmn_err(CE_WARN, "segmap_fault: newpage? smp %p",
 629  626                              (void *)smp);
 630  627                  }
 631  628  
 632  629                  if (type != F_SOFTUNLOCK) {
 633  630                          mutex_exit(smtx);
 634  631                          return (0);
 635  632                  }
 636  633  #endif
 637  634                  mutex_exit(smtx);
 638  635                  vp = smp->sm_vp;
 639  636                  sm_off = smp->sm_off;
 640  637  
 641  638                  if (vp == NULL)
 642  639                          return (FC_MAKE_ERR(EIO));
 643  640  
 644  641                  ASSERT(smp->sm_refcnt > 0);
 645  642  
 646  643                  addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET);
 647  644                  if (addroff + len > MAXBSIZE)
 648  645                          panic("segmap_fault: endaddr %p exceeds MAXBSIZE chunk",
 649  646                              (void *)(addr + len));
 650  647  
 651  648                  off = sm_off + addroff;
 652  649  
 653  650                  pp = page_find(vp, off);
 654  651  
 655  652                  if (pp == NULL)
 656  653                          panic("segmap_fault: softunlock page not found");
 657  654  
 658  655                  /*
 659  656                   * Set ref bit also here in case of S_OTHER to avoid the
 660  657                   * overhead of supporting other cases than F_SOFTUNLOCK
 661  658                   * with segkpm. We can do this because the underlying
 662  659                   * pages are locked anyway.
 663  660                   */
 664  661                  if (rw == S_WRITE) {
 665  662                          hat_setrefmod(pp);
 666  663                  } else {
 667  664                          TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT,
 668  665                              "segmap_fault:pp %p vp %p offset %llx",
 669  666                              pp, vp, off);
 670  667                          hat_setref(pp);
 671  668                  }
 672  669  
 673  670                  return (0);
 674  671          }
 675  672  
 676  673          smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++;
 677  674          smp = GET_SMAP(seg, addr);
 678  675          vp = smp->sm_vp;
 679  676          sm_off = smp->sm_off;
 680  677  
 681  678          if (vp == NULL)
 682  679                  return (FC_MAKE_ERR(EIO));
 683  680  
 684  681          ASSERT(smp->sm_refcnt > 0);
 685  682  
 686  683          addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET);
 687  684          if (addroff + len > MAXBSIZE) {
 688  685                  panic("segmap_fault: endaddr %p "
 689  686                      "exceeds MAXBSIZE chunk", (void *)(addr + len));
 690  687                  /*NOTREACHED*/
 691  688          }
 692  689          off = sm_off + addroff;
 693  690  
 694  691          /*
 695  692           * First handle the easy stuff
 696  693           */
 697  694          if (type == F_SOFTUNLOCK) {
 698  695                  segmap_unlock(hat, seg, addr, len, rw, smp);
 699  696                  return (0);
 700  697          }
 701  698  
 702  699          TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE,
 703  700              "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp);
 704  701          err = VOP_GETPAGE(vp, (offset_t)off, len, &prot, pl, MAXBSIZE,
 705  702              seg, addr, rw, CRED(), NULL);
 706  703  
 707  704          if (err)
 708  705                  return (FC_MAKE_ERR(err));
 709  706  
 710  707          prot &= smd->smd_prot;
 711  708  
 712  709          /*
 713  710           * Handle all pages returned in the pl[] array.
 714  711           * This loop is coded on the assumption that if
 715  712           * there was no error from the VOP_GETPAGE routine,
 716  713           * that the page list returned will contain all the
 717  714           * needed pages for the vp from [off..off + len].
 718  715           */
 719  716          ppp = pl;
 720  717          while ((pp = *ppp++) != NULL) {
 721  718                  u_offset_t poff;
 722  719                  ASSERT(pp->p_vnode == vp);
 723  720                  hat_flag = HAT_LOAD;
 724  721  
 725  722                  /*
 726  723                   * Verify that the pages returned are within the range
 727  724                   * of this segmap region.  Note that it is theoretically
 728  725                   * possible for pages outside this range to be returned,
 729  726                   * but it is not very likely.  If we cannot use the
 730  727                   * page here, just release it and go on to the next one.
 731  728                   */
 732  729                  if (pp->p_offset < sm_off ||
 733  730                      pp->p_offset >= sm_off + MAXBSIZE) {
 734  731                          (void) page_release(pp, 1);
 735  732                          continue;
 736  733                  }
 737  734  
 738  735                  ASSERT(hat == kas.a_hat);
 739  736                  poff = pp->p_offset;
 740  737                  adr = addr + (poff - off);
 741  738                  if (adr >= addr && adr < addr + len) {
 742  739                          hat_setref(pp);
 743  740                          TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT,
 744  741                              "segmap_fault:pp %p vp %p offset %llx",
 745  742                              pp, vp, poff);
 746  743                          if (type == F_SOFTLOCK)
 747  744                                  hat_flag = HAT_LOAD_LOCK;
 748  745                  }
 749  746  
 750  747                  /*
 751  748                   * Deal with VMODSORT pages here. If we know this is a write
 752  749                   * do the setmod now and allow write protection.
 753  750                   * As long as it's modified or not S_OTHER, remove write
 754  751                   * protection. With S_OTHER it's up to the FS to deal with this.
 755  752                   */
 756  753                  if (IS_VMODSORT(vp)) {
 757  754                          if (rw == S_WRITE)
 758  755                                  hat_setmod(pp);
 759  756                          else if (rw != S_OTHER && !hat_ismod(pp))
 760  757                                  prot &= ~PROT_WRITE;
 761  758                  }
 762  759  
 763  760                  hat_memload(hat, adr, pp, prot, hat_flag);
 764  761                  if (hat_flag != HAT_LOAD_LOCK)
 765  762                          page_unlock(pp);
 766  763          }
 767  764          return (0);
 768  765  }
 769  766  
 770  767  /*
 771  768   * This routine is used to start I/O on pages asynchronously.
 772  769   */
 773  770  static faultcode_t
 774  771  segmap_faulta(struct seg *seg, caddr_t addr)
 775  772  {
 776  773          struct smap *smp;
 777  774          struct vnode *vp;
 778  775          u_offset_t off;
 779  776          int err;
 780  777  
 781  778          if (segmap_kpm && IS_KPM_ADDR(addr)) {
 782  779                  int     newpage;
 783  780                  kmutex_t *smtx;
 784  781  
 785  782                  /*
 786  783                   * Pages are successfully prefaulted and locked in
 787  784                   * segmap_getmapflt and can't be unlocked until
 788  785                   * segmap_release. No hat mappings have to be locked
 789  786                   * and they also can't be unlocked as long as the
 790  787                   * caller owns an active kpm addr.
 791  788                   */
 792  789  #ifdef  DEBUG
 793  790                  if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
 794  791                          panic("segmap_faulta: smap not found "
 795  792                              "for addr %p", (void *)addr);
 796  793                          /*NOTREACHED*/
 797  794                  }
 798  795  
 799  796                  smtx = SMAPMTX(smp);
 800  797                  newpage = smp->sm_flags & SM_KPM_NEWPAGE;
 801  798                  mutex_exit(smtx);
 802  799                  if (newpage)
 803  800                          cmn_err(CE_WARN, "segmap_faulta: newpage? smp %p",
 804  801                              (void *)smp);
 805  802  #endif
 806  803                  return (0);
 807  804          }
 808  805  
 809  806          segmapcnt.smp_faulta.value.ul++;
 810  807          smp = GET_SMAP(seg, addr);
 811  808  
 812  809          ASSERT(smp->sm_refcnt > 0);
 813  810  
 814  811          vp = smp->sm_vp;
 815  812          off = smp->sm_off;
 816  813  
 817  814          if (vp == NULL) {
 818  815                  cmn_err(CE_WARN, "segmap_faulta - no vp");
 819  816                  return (FC_MAKE_ERR(EIO));
 820  817          }
 821  818  
 822  819          TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE,
 823  820              "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp);
 824  821  
 825  822          err = VOP_GETPAGE(vp, (offset_t)(off + ((offset_t)((uintptr_t)addr
 826  823              & MAXBOFFSET))), PAGESIZE, (uint_t *)NULL, (page_t **)NULL, 0,
 827  824              seg, addr, S_READ, CRED(), NULL);
 828  825  
 829  826          if (err)
 830  827                  return (FC_MAKE_ERR(err));
 831  828          return (0);
 832  829  }
 833  830  
 834  831  /*ARGSUSED*/
 835  832  static int
 836  833  segmap_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
 837  834  {
 838  835          struct segmap_data *smd = (struct segmap_data *)seg->s_data;
 839  836  
 840  837          ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock));
 841  838  
 842  839          /*
 843  840           * Need not acquire the segment lock since
 844  841           * "smd_prot" is a read-only field.
 845  842           */
 846  843          return (((smd->smd_prot & prot) != prot) ? EACCES : 0);
 847  844  }
 848  845  
 849  846  static int
 850  847  segmap_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
 851  848  {
 852  849          struct segmap_data *smd = (struct segmap_data *)seg->s_data;
 853  850          size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
 854  851  
 855  852          ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
 856  853  
 857  854          if (pgno != 0) {
 858  855                  do {
 859  856                          protv[--pgno] = smd->smd_prot;
 860  857                  } while (pgno != 0);
 861  858          }
 862  859          return (0);
 863  860  }
 864  861  
 865  862  static u_offset_t
 866  863  segmap_getoffset(struct seg *seg, caddr_t addr)
 867  864  {
 868  865          struct segmap_data *smd = (struct segmap_data *)seg->s_data;
 869  866  
 870  867          ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock));
 871  868  
 872  869          return ((u_offset_t)smd->smd_sm->sm_off + (addr - seg->s_base));
 873  870  }
 874  871  
 875  872  /*ARGSUSED*/
 876  873  static int
 877  874  segmap_gettype(struct seg *seg, caddr_t addr)
 878  875  {
 879  876          ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock));
 880  877  
 881  878          return (MAP_SHARED);
 882  879  }
 883  880  
 884  881  /*ARGSUSED*/
 885  882  static int
 886  883  segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
 887  884  {
 888  885          struct segmap_data *smd = (struct segmap_data *)seg->s_data;
 889  886  
 890  887          ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock));
 891  888  
 892  889          /* XXX - This doesn't make any sense */
 893  890          *vpp = smd->smd_sm->sm_vp;
 894  891          return (0);
 895  892  }
 896  893  
 897  894  /*
 898  895   * Check to see if it makes sense to do kluster/read ahead to
 899  896   * addr + delta relative to the mapping at addr.  We assume here
 900  897   * that delta is a signed PAGESIZE'd multiple (which can be negative).
 901  898   *
 902  899   * For segmap we always "approve" of this action from our standpoint.
 903  900   */
 904  901  /*ARGSUSED*/
 905  902  static int
 906  903  segmap_kluster(struct seg *seg, caddr_t addr, ssize_t delta)
 907  904  {
 908  905          return (0);
 909  906  }
 910  907  
 911  908  static void
 912  909  segmap_badop()
 913  910  {
 914  911          panic("segmap_badop");
 915  912          /*NOTREACHED*/
 916  913  }
 917  914  
 918  915  /*
 919  916   * Special private segmap operations
 920  917   */
 921  918  
 922  919  /*
 923  920   * Add smap to the appropriate free list.
 924  921   */
 925  922  static void
 926  923  segmap_smapadd(struct smap *smp)
 927  924  {
 928  925          struct smfree *sm;
 929  926          struct smap *smpfreelist;
 930  927          struct sm_freeq *releq;
 931  928  
 932  929          ASSERT(MUTEX_HELD(SMAPMTX(smp)));
 933  930  
 934  931          if (smp->sm_refcnt != 0) {
 935  932                  panic("segmap_smapadd");
 936  933                  /*NOTREACHED*/
 937  934          }
 938  935  
 939  936          sm = &smd_free[smp->sm_free_ndx];
 940  937          /*
 941  938           * Add to the tail of the release queue
 942  939           * Note that sm_releq and sm_allocq could toggle
 943  940           * before we get the lock. This does not affect
 944  941           * correctness as the 2 queues are only maintained
 945  942           * to reduce lock pressure.
 946  943           */
 947  944          releq = sm->sm_releq;
 948  945          if (releq == &sm->sm_freeq[0])
 949  946                  smp->sm_flags |= SM_QNDX_ZERO;
 950  947          else
 951  948                  smp->sm_flags &= ~SM_QNDX_ZERO;
 952  949          mutex_enter(&releq->smq_mtx);
 953  950          smpfreelist = releq->smq_free;
 954  951          if (smpfreelist == 0) {
 955  952                  int want;
 956  953  
 957  954                  releq->smq_free = smp->sm_next = smp->sm_prev = smp;
 958  955                  /*
 959  956                   * Both queue mutexes held to set sm_want;
 960  957                   * snapshot the value before dropping releq mutex.
 961  958                   * If sm_want appears after the releq mutex is dropped,
 962  959                   * then the smap just freed is already gone.
 963  960                   */
 964  961                  want = sm->sm_want;
 965  962                  mutex_exit(&releq->smq_mtx);
 966  963                  /*
 967  964                   * See if there was a waiter before dropping the releq mutex
 968  965                   * then recheck after obtaining sm_freeq[0] mutex as
 969  966                   * the another thread may have already signaled.
 970  967                   */
 971  968                  if (want) {
 972  969                          mutex_enter(&sm->sm_freeq[0].smq_mtx);
 973  970                          if (sm->sm_want)
 974  971                                  cv_signal(&sm->sm_free_cv);
 975  972                          mutex_exit(&sm->sm_freeq[0].smq_mtx);
 976  973                  }
 977  974          } else {
 978  975                  smp->sm_next = smpfreelist;
 979  976                  smp->sm_prev = smpfreelist->sm_prev;
 980  977                  smpfreelist->sm_prev = smp;
 981  978                  smp->sm_prev->sm_next = smp;
 982  979                  mutex_exit(&releq->smq_mtx);
 983  980          }
 984  981  }
 985  982  
 986  983  
 987  984  static struct smap *
 988  985  segmap_hashin(struct smap *smp, struct vnode *vp, u_offset_t off, int hashid)
 989  986  {
 990  987          struct smap **hpp;
 991  988          struct smap *tmp;
 992  989          kmutex_t *hmtx;
 993  990  
 994  991          ASSERT(MUTEX_HELD(SMAPMTX(smp)));
 995  992          ASSERT(smp->sm_vp == NULL);
 996  993          ASSERT(smp->sm_hash == NULL);
 997  994          ASSERT(smp->sm_prev == NULL);
 998  995          ASSERT(smp->sm_next == NULL);
 999  996          ASSERT(hashid >= 0 && hashid <= smd_hashmsk);
1000  997  
1001  998          hmtx = SHASHMTX(hashid);
1002  999  
1003 1000          mutex_enter(hmtx);
1004 1001          /*
1005 1002           * First we need to verify that no one has created a smp
1006 1003           * with (vp,off) as its tag before we us.
1007 1004           */
1008 1005          for (tmp = smd_hash[hashid].sh_hash_list;
1009 1006              tmp != NULL; tmp = tmp->sm_hash)
1010 1007                  if (tmp->sm_vp == vp && tmp->sm_off == off)
1011 1008                          break;
1012 1009  
1013 1010          if (tmp == NULL) {
1014 1011                  /*
1015 1012                   * No one created one yet.
1016 1013                   *
1017 1014                   * Funniness here - we don't increment the ref count on the
1018 1015                   * vnode * even though we have another pointer to it here.
1019 1016                   * The reason for this is that we don't want the fact that
1020 1017                   * a seg_map entry somewhere refers to a vnode to prevent the
1021 1018                   * vnode * itself from going away.  This is because this
1022 1019                   * reference to the vnode is a "soft one".  In the case where
1023 1020                   * a mapping is being used by a rdwr [or directory routine?]
1024 1021                   * there already has to be a non-zero ref count on the vnode.
1025 1022                   * In the case where the vp has been freed and the the smap
1026 1023                   * structure is on the free list, there are no pages in memory
1027 1024                   * that can refer to the vnode.  Thus even if we reuse the same
1028 1025                   * vnode/smap structure for a vnode which has the same
1029 1026                   * address but represents a different object, we are ok.
1030 1027                   */
1031 1028                  smp->sm_vp = vp;
1032 1029                  smp->sm_off = off;
1033 1030  
1034 1031                  hpp = &smd_hash[hashid].sh_hash_list;
1035 1032                  smp->sm_hash = *hpp;
1036 1033                  *hpp = smp;
1037 1034  #ifdef SEGMAP_HASHSTATS
1038 1035                  smd_hash_len[hashid]++;
1039 1036  #endif
1040 1037          }
1041 1038          mutex_exit(hmtx);
1042 1039  
1043 1040          return (tmp);
1044 1041  }
1045 1042  
1046 1043  static void
1047 1044  segmap_hashout(struct smap *smp)
1048 1045  {
1049 1046          struct smap **hpp, *hp;
1050 1047          struct vnode *vp;
1051 1048          kmutex_t *mtx;
1052 1049          int hashid;
1053 1050          u_offset_t off;
1054 1051  
1055 1052          ASSERT(MUTEX_HELD(SMAPMTX(smp)));
1056 1053  
1057 1054          vp = smp->sm_vp;
1058 1055          off = smp->sm_off;
1059 1056  
1060 1057          SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */
1061 1058          mtx = SHASHMTX(hashid);
1062 1059          mutex_enter(mtx);
1063 1060  
1064 1061          hpp = &smd_hash[hashid].sh_hash_list;
1065 1062          for (;;) {
1066 1063                  hp = *hpp;
1067 1064                  if (hp == NULL) {
1068 1065                          panic("segmap_hashout");
1069 1066                          /*NOTREACHED*/
1070 1067                  }
1071 1068                  if (hp == smp)
1072 1069                          break;
1073 1070                  hpp = &hp->sm_hash;
1074 1071          }
1075 1072  
1076 1073          *hpp = smp->sm_hash;
1077 1074          smp->sm_hash = NULL;
1078 1075  #ifdef SEGMAP_HASHSTATS
1079 1076          smd_hash_len[hashid]--;
1080 1077  #endif
1081 1078          mutex_exit(mtx);
1082 1079  
1083 1080          smp->sm_vp = NULL;
1084 1081          smp->sm_off = (u_offset_t)0;
1085 1082  
1086 1083  }
1087 1084  
1088 1085  /*
1089 1086   * Attempt to free unmodified, unmapped, and non locked segmap
1090 1087   * pages.
1091 1088   */
1092 1089  void
1093 1090  segmap_pagefree(struct vnode *vp, u_offset_t off)
1094 1091  {
1095 1092          u_offset_t pgoff;
1096 1093          page_t  *pp;
1097 1094  
1098 1095          for (pgoff = off; pgoff < off + MAXBSIZE; pgoff += PAGESIZE) {
1099 1096  
1100 1097                  if ((pp = page_lookup_nowait(vp, pgoff, SE_EXCL)) == NULL)
1101 1098                          continue;
1102 1099  
1103 1100                  switch (page_release(pp, 1)) {
1104 1101                  case PGREL_NOTREL:
1105 1102                          segmapcnt.smp_free_notfree.value.ul++;
1106 1103                          break;
1107 1104                  case PGREL_MOD:
1108 1105                          segmapcnt.smp_free_dirty.value.ul++;
1109 1106                          break;
1110 1107                  case PGREL_CLEAN:
1111 1108                          segmapcnt.smp_free.value.ul++;
1112 1109                          break;
1113 1110                  }
1114 1111          }
1115 1112  }
1116 1113  
1117 1114  /*
1118 1115   * Locks held on entry: smap lock
1119 1116   * Locks held on exit : smap lock.
1120 1117   */
1121 1118  
1122 1119  static void
1123 1120  grab_smp(struct smap *smp, page_t *pp)
1124 1121  {
1125 1122          ASSERT(MUTEX_HELD(SMAPMTX(smp)));
1126 1123          ASSERT(smp->sm_refcnt == 0);
1127 1124  
1128 1125          if (smp->sm_vp != (struct vnode *)NULL) {
1129 1126                  struct vnode    *vp = smp->sm_vp;
1130 1127                  u_offset_t      off = smp->sm_off;
1131 1128                  /*
1132 1129                   * Destroy old vnode association and
1133 1130                   * unload any hardware translations to
1134 1131                   * the old object.
1135 1132                   */
1136 1133                  smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reuse++;
1137 1134                  segmap_hashout(smp);
1138 1135  
1139 1136                  /*
1140 1137                   * This node is off freelist and hashlist,
1141 1138                   * so there is no reason to drop/reacquire sm_mtx
1142 1139                   * across calls to hat_unload.
1143 1140                   */
1144 1141                  if (segmap_kpm) {
1145 1142                          caddr_t vaddr;
1146 1143                          int hat_unload_needed = 0;
1147 1144  
1148 1145                          /*
1149 1146                           * unload kpm mapping
1150 1147                           */
1151 1148                          if (pp != NULL) {
1152 1149                                  vaddr = hat_kpm_page2va(pp, 1);
1153 1150                                  hat_kpm_mapout(pp, GET_KPME(smp), vaddr);
1154 1151                                  page_unlock(pp);
1155 1152                          }
1156 1153  
1157 1154                          /*
1158 1155                           * Check if we have (also) the rare case of a
1159 1156                           * non kpm mapping.
1160 1157                           */
1161 1158                          if (smp->sm_flags & SM_NOTKPM_RELEASED) {
1162 1159                                  hat_unload_needed = 1;
1163 1160                                  smp->sm_flags &= ~SM_NOTKPM_RELEASED;
1164 1161                          }
1165 1162  
1166 1163                          if (hat_unload_needed) {
1167 1164                                  hat_unload(kas.a_hat, segkmap->s_base +
1168 1165                                      ((smp - smd_smap) * MAXBSIZE),
1169 1166                                      MAXBSIZE, HAT_UNLOAD);
1170 1167                          }
1171 1168  
1172 1169                  } else {
1173 1170                          ASSERT(smp->sm_flags & SM_NOTKPM_RELEASED);
1174 1171                          smp->sm_flags &= ~SM_NOTKPM_RELEASED;
1175 1172                          hat_unload(kas.a_hat, segkmap->s_base +
1176 1173                              ((smp - smd_smap) * MAXBSIZE),
1177 1174                              MAXBSIZE, HAT_UNLOAD);
1178 1175                  }
1179 1176                  segmap_pagefree(vp, off);
1180 1177          }
1181 1178  }
1182 1179  
1183 1180  static struct smap *
1184 1181  get_free_smp(int free_ndx)
1185 1182  {
1186 1183          struct smfree *sm;
1187 1184          kmutex_t *smtx;
1188 1185          struct smap *smp, *first;
1189 1186          struct sm_freeq *allocq, *releq;
1190 1187          struct kpme *kpme;
1191 1188          page_t *pp = NULL;
1192 1189          int end_ndx, page_locked = 0;
1193 1190  
1194 1191          end_ndx = free_ndx;
1195 1192          sm = &smd_free[free_ndx];
1196 1193  
1197 1194  retry_queue:
1198 1195          allocq = sm->sm_allocq;
1199 1196          mutex_enter(&allocq->smq_mtx);
1200 1197  
1201 1198          if ((smp = allocq->smq_free) == NULL) {
1202 1199  
1203 1200  skip_queue:
1204 1201                  /*
1205 1202                   * The alloc list is empty or this queue is being skipped;
1206 1203                   * first see if the allocq toggled.
1207 1204                   */
1208 1205                  if (sm->sm_allocq != allocq) {
1209 1206                          /* queue changed */
1210 1207                          mutex_exit(&allocq->smq_mtx);
1211 1208                          goto retry_queue;
1212 1209                  }
1213 1210                  releq = sm->sm_releq;
1214 1211                  if (!mutex_tryenter(&releq->smq_mtx)) {
1215 1212                          /* cannot get releq; a free smp may be there now */
1216 1213                          mutex_exit(&allocq->smq_mtx);
1217 1214  
1218 1215                          /*
1219 1216                           * This loop could spin forever if this thread has
1220 1217                           * higher priority than the thread that is holding
1221 1218                           * releq->smq_mtx. In order to force the other thread
1222 1219                           * to run, we'll lock/unlock the mutex which is safe
1223 1220                           * since we just unlocked the allocq mutex.
1224 1221                           */
1225 1222                          mutex_enter(&releq->smq_mtx);
1226 1223                          mutex_exit(&releq->smq_mtx);
1227 1224                          goto retry_queue;
1228 1225                  }
1229 1226                  if (releq->smq_free == NULL) {
1230 1227                          /*
1231 1228                           * This freelist is empty.
1232 1229                           * This should not happen unless clients
1233 1230                           * are failing to release the segmap
1234 1231                           * window after accessing the data.
1235 1232                           * Before resorting to sleeping, try
1236 1233                           * the next list of the same color.
1237 1234                           */
1238 1235                          free_ndx = (free_ndx + smd_ncolor) & smd_freemsk;
1239 1236                          if (free_ndx != end_ndx) {
1240 1237                                  mutex_exit(&releq->smq_mtx);
1241 1238                                  mutex_exit(&allocq->smq_mtx);
1242 1239                                  sm = &smd_free[free_ndx];
1243 1240                                  goto retry_queue;
1244 1241                          }
1245 1242                          /*
1246 1243                           * Tried all freelists of the same color once,
1247 1244                           * wait on this list and hope something gets freed.
1248 1245                           */
1249 1246                          segmapcnt.smp_get_nofree.value.ul++;
1250 1247                          sm->sm_want++;
1251 1248                          mutex_exit(&sm->sm_freeq[1].smq_mtx);
1252 1249                          cv_wait(&sm->sm_free_cv,
1253 1250                              &sm->sm_freeq[0].smq_mtx);
1254 1251                          sm->sm_want--;
1255 1252                          mutex_exit(&sm->sm_freeq[0].smq_mtx);
1256 1253                          sm = &smd_free[free_ndx];
1257 1254                          goto retry_queue;
1258 1255                  } else {
1259 1256                          /*
1260 1257                           * Something on the rele queue; flip the alloc
1261 1258                           * and rele queues and retry.
1262 1259                           */
1263 1260                          sm->sm_allocq = releq;
1264 1261                          sm->sm_releq = allocq;
1265 1262                          mutex_exit(&allocq->smq_mtx);
1266 1263                          mutex_exit(&releq->smq_mtx);
1267 1264                          if (page_locked) {
1268 1265                                  delay(hz >> 2);
1269 1266                                  page_locked = 0;
1270 1267                          }
1271 1268                          goto retry_queue;
1272 1269                  }
1273 1270          } else {
1274 1271                  /*
1275 1272                   * Fastpath the case we get the smap mutex
1276 1273                   * on the first try.
1277 1274                   */
1278 1275                  first = smp;
1279 1276  next_smap:
1280 1277                  smtx = SMAPMTX(smp);
1281 1278                  if (!mutex_tryenter(smtx)) {
1282 1279                          /*
1283 1280                           * Another thread is trying to reclaim this slot.
1284 1281                           * Skip to the next queue or smap.
1285 1282                           */
1286 1283                          if ((smp = smp->sm_next) == first) {
1287 1284                                  goto skip_queue;
1288 1285                          } else {
1289 1286                                  goto next_smap;
1290 1287                          }
1291 1288                  } else {
1292 1289                          /*
1293 1290                           * if kpme exists, get shared lock on the page
1294 1291                           */
1295 1292                          if (segmap_kpm && smp->sm_vp != NULL) {
1296 1293  
1297 1294                                  kpme = GET_KPME(smp);
1298 1295                                  pp = kpme->kpe_page;
1299 1296  
1300 1297                                  if (pp != NULL) {
1301 1298                                          if (!page_trylock(pp, SE_SHARED)) {
1302 1299                                                  smp = smp->sm_next;
1303 1300                                                  mutex_exit(smtx);
1304 1301                                                  page_locked = 1;
1305 1302  
1306 1303                                                  pp = NULL;
1307 1304  
1308 1305                                                  if (smp == first) {
1309 1306                                                          goto skip_queue;
1310 1307                                                  } else {
1311 1308                                                          goto next_smap;
1312 1309                                                  }
1313 1310                                          } else {
1314 1311                                                  if (kpme->kpe_page == NULL) {
1315 1312                                                          page_unlock(pp);
1316 1313                                                          pp = NULL;
1317 1314                                                  }
1318 1315                                          }
1319 1316                                  }
1320 1317                          }
1321 1318  
1322 1319                          /*
1323 1320                           * At this point, we've selected smp.  Remove smp
1324 1321                           * from its freelist.  If smp is the first one in
1325 1322                           * the freelist, update the head of the freelist.
1326 1323                           */
1327 1324                          if (first == smp) {
1328 1325                                  ASSERT(first == allocq->smq_free);
1329 1326                                  allocq->smq_free = smp->sm_next;
1330 1327                          }
1331 1328  
1332 1329                          /*
1333 1330                           * if the head of the freelist still points to smp,
1334 1331                           * then there are no more free smaps in that list.
1335 1332                           */
1336 1333                          if (allocq->smq_free == smp)
1337 1334                                  /*
1338 1335                                   * Took the last one
1339 1336                                   */
1340 1337                                  allocq->smq_free = NULL;
1341 1338                          else {
1342 1339                                  smp->sm_prev->sm_next = smp->sm_next;
1343 1340                                  smp->sm_next->sm_prev = smp->sm_prev;
1344 1341                          }
1345 1342                          mutex_exit(&allocq->smq_mtx);
1346 1343                          smp->sm_prev = smp->sm_next = NULL;
1347 1344  
1348 1345                          /*
1349 1346                           * if pp != NULL, pp must have been locked;
1350 1347                           * grab_smp() unlocks pp.
1351 1348                           */
1352 1349                          ASSERT((pp == NULL) || PAGE_LOCKED(pp));
1353 1350                          grab_smp(smp, pp);
1354 1351                          /* return smp locked. */
1355 1352                          ASSERT(SMAPMTX(smp) == smtx);
1356 1353                          ASSERT(MUTEX_HELD(smtx));
1357 1354                          return (smp);
1358 1355                  }
1359 1356          }
1360 1357  }
1361 1358  
1362 1359  /*
1363 1360   * Special public segmap operations
1364 1361   */
1365 1362  
1366 1363  /*
1367 1364   * Create pages (without using VOP_GETPAGE) and load up translations to them.
1368 1365   * If softlock is TRUE, then set things up so that it looks like a call
1369 1366   * to segmap_fault with F_SOFTLOCK.
1370 1367   *
1371 1368   * Returns 1, if a page is created by calling page_create_va(), or 0 otherwise.
1372 1369   *
1373 1370   * All fields in the generic segment (struct seg) are considered to be
1374 1371   * read-only for "segmap" even though the kernel address space (kas) may
1375 1372   * not be locked, hence no lock is needed to access them.
1376 1373   */
1377 1374  int
1378 1375  segmap_pagecreate(struct seg *seg, caddr_t addr, size_t len, int softlock)
1379 1376  {
1380 1377          struct segmap_data *smd = (struct segmap_data *)seg->s_data;
1381 1378          page_t *pp;
1382 1379          u_offset_t off;
1383 1380          struct smap *smp;
1384 1381          struct vnode *vp;
1385 1382          caddr_t eaddr;
1386 1383          int newpage = 0;
1387 1384          uint_t prot;
1388 1385          kmutex_t *smtx;
1389 1386          int hat_flag;
1390 1387  
1391 1388          ASSERT(seg->s_as == &kas);
1392 1389  
1393 1390          if (segmap_kpm && IS_KPM_ADDR(addr)) {
1394 1391                  /*
1395 1392                   * Pages are successfully prefaulted and locked in
1396 1393                   * segmap_getmapflt and can't be unlocked until
1397 1394                   * segmap_release. The SM_KPM_NEWPAGE flag is set
1398 1395                   * in segmap_pagecreate_kpm when new pages are created.
1399 1396                   * and it is returned as "newpage" indication here.
1400 1397                   */
1401 1398                  if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
1402 1399                          panic("segmap_pagecreate: smap not found "
1403 1400                              "for addr %p", (void *)addr);
1404 1401                          /*NOTREACHED*/
1405 1402                  }
1406 1403  
1407 1404                  smtx = SMAPMTX(smp);
1408 1405                  newpage = smp->sm_flags & SM_KPM_NEWPAGE;
1409 1406                  smp->sm_flags &= ~SM_KPM_NEWPAGE;
1410 1407                  mutex_exit(smtx);
1411 1408  
1412 1409                  return (newpage);
1413 1410          }
1414 1411  
1415 1412          smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++;
1416 1413  
1417 1414          eaddr = addr + len;
1418 1415          addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1419 1416  
1420 1417          smp = GET_SMAP(seg, addr);
1421 1418  
1422 1419          /*
1423 1420           * We don't grab smp mutex here since we assume the smp
1424 1421           * has a refcnt set already which prevents the slot from
1425 1422           * changing its id.
1426 1423           */
1427 1424          ASSERT(smp->sm_refcnt > 0);
1428 1425  
1429 1426          vp = smp->sm_vp;
1430 1427          off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET));
1431 1428          prot = smd->smd_prot;
1432 1429  
1433 1430          for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) {
1434 1431                  hat_flag = HAT_LOAD;
1435 1432                  pp = page_lookup(vp, off, SE_SHARED);
1436 1433                  if (pp == NULL) {
1437 1434                          ushort_t bitindex;
1438 1435  
1439 1436                          if ((pp = page_create_va(vp, off,
1440 1437                              PAGESIZE, PG_WAIT, seg, addr)) == NULL) {
1441 1438                                  panic("segmap_pagecreate: page_create failed");
1442 1439                                  /*NOTREACHED*/
1443 1440                          }
1444 1441                          newpage = 1;
1445 1442                          page_io_unlock(pp);
1446 1443  
1447 1444                          /*
1448 1445                           * Since pages created here do not contain valid
1449 1446                           * data until the caller writes into them, the
1450 1447                           * "exclusive" lock will not be dropped to prevent
1451 1448                           * other users from accessing the page.  We also
1452 1449                           * have to lock the translation to prevent a fault
1453 1450                           * from occurring when the virtual address mapped by
1454 1451                           * this page is written into.  This is necessary to
1455 1452                           * avoid a deadlock since we haven't dropped the
1456 1453                           * "exclusive" lock.
1457 1454                           */
1458 1455                          bitindex = (ushort_t)((off - smp->sm_off) >> PAGESHIFT);
1459 1456  
1460 1457                          /*
1461 1458                           * Large Files: The following assertion is to
1462 1459                           * verify the cast above.
1463 1460                           */
1464 1461                          ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);
1465 1462                          smtx = SMAPMTX(smp);
1466 1463                          mutex_enter(smtx);
1467 1464                          smp->sm_bitmap |= SMAP_BIT_MASK(bitindex);
1468 1465                          mutex_exit(smtx);
1469 1466  
1470 1467                          hat_flag = HAT_LOAD_LOCK;
1471 1468                  } else if (softlock) {
1472 1469                          hat_flag = HAT_LOAD_LOCK;
1473 1470                  }
1474 1471  
1475 1472                  if (IS_VMODSORT(pp->p_vnode) && (prot & PROT_WRITE))
1476 1473                          hat_setmod(pp);
1477 1474  
1478 1475                  hat_memload(kas.a_hat, addr, pp, prot, hat_flag);
1479 1476  
1480 1477                  if (hat_flag != HAT_LOAD_LOCK)
1481 1478                          page_unlock(pp);
1482 1479  
1483 1480                  TRACE_5(TR_FAC_VM, TR_SEGMAP_PAGECREATE,
1484 1481                      "segmap_pagecreate:seg %p addr %p pp %p vp %p offset %llx",
1485 1482                      seg, addr, pp, vp, off);
1486 1483          }
1487 1484  
1488 1485          return (newpage);
1489 1486  }
1490 1487  
1491 1488  void
1492 1489  segmap_pageunlock(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw)
1493 1490  {
1494 1491          struct smap     *smp;
1495 1492          ushort_t        bitmask;
1496 1493          page_t          *pp;
1497 1494          struct  vnode   *vp;
1498 1495          u_offset_t      off;
1499 1496          caddr_t         eaddr;
1500 1497          kmutex_t        *smtx;
1501 1498  
1502 1499          ASSERT(seg->s_as == &kas);
1503 1500  
1504 1501          eaddr = addr + len;
1505 1502          addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1506 1503  
1507 1504          if (segmap_kpm && IS_KPM_ADDR(addr)) {
1508 1505                  /*
1509 1506                   * Pages are successfully prefaulted and locked in
1510 1507                   * segmap_getmapflt and can't be unlocked until
1511 1508                   * segmap_release, so no pages or hat mappings have
1512 1509                   * to be unlocked at this point.
1513 1510                   */
1514 1511  #ifdef DEBUG
1515 1512                  if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
1516 1513                          panic("segmap_pageunlock: smap not found "
1517 1514                              "for addr %p", (void *)addr);
1518 1515                          /*NOTREACHED*/
1519 1516                  }
1520 1517  
1521 1518                  ASSERT(smp->sm_refcnt > 0);
1522 1519                  mutex_exit(SMAPMTX(smp));
1523 1520  #endif
1524 1521                  return;
1525 1522          }
1526 1523  
1527 1524          smp = GET_SMAP(seg, addr);
1528 1525          smtx = SMAPMTX(smp);
1529 1526  
1530 1527          ASSERT(smp->sm_refcnt > 0);
1531 1528  
1532 1529          vp = smp->sm_vp;
1533 1530          off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET));
1534 1531  
1535 1532          for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) {
1536 1533                  bitmask = SMAP_BIT_MASK((int)(off - smp->sm_off) >> PAGESHIFT);
1537 1534  
1538 1535                  /*
1539 1536                   * Large Files: Following assertion is to verify
1540 1537                   * the correctness of the cast to (int) above.
1541 1538                   */
1542 1539                  ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);
1543 1540  
1544 1541                  /*
1545 1542                   * If the bit corresponding to "off" is set,
1546 1543                   * clear this bit in the bitmap, unlock translations,
1547 1544                   * and release the "exclusive" lock on the page.
1548 1545                   */
1549 1546                  if (smp->sm_bitmap & bitmask) {
1550 1547                          mutex_enter(smtx);
1551 1548                          smp->sm_bitmap &= ~bitmask;
1552 1549                          mutex_exit(smtx);
1553 1550  
1554 1551                          hat_unlock(kas.a_hat, addr, PAGESIZE);
1555 1552  
1556 1553                          /*
1557 1554                           * Use page_find() instead of page_lookup() to
1558 1555                           * find the page since we know that it has
1559 1556                           * "exclusive" lock.
1560 1557                           */
1561 1558                          pp = page_find(vp, off);
1562 1559                          if (pp == NULL) {
1563 1560                                  panic("segmap_pageunlock: page not found");
1564 1561                                  /*NOTREACHED*/
1565 1562                          }
1566 1563                          if (rw == S_WRITE) {
1567 1564                                  hat_setrefmod(pp);
1568 1565                          } else if (rw != S_OTHER) {
1569 1566                                  hat_setref(pp);
1570 1567                          }
1571 1568  
1572 1569                          page_unlock(pp);
1573 1570                  }
1574 1571          }
1575 1572  }
1576 1573  
1577 1574  caddr_t
1578 1575  segmap_getmap(struct seg *seg, struct vnode *vp, u_offset_t off)
1579 1576  {
1580 1577          return (segmap_getmapflt(seg, vp, off, MAXBSIZE, 0, S_OTHER));
1581 1578  }
1582 1579  
1583 1580  /*
1584 1581   * This is the magic virtual address that offset 0 of an ELF
1585 1582   * file gets mapped to in user space. This is used to pick
1586 1583   * the vac color on the freelist.
1587 1584   */
1588 1585  #define ELF_OFFZERO_VA  (0x10000)
1589 1586  /*
1590 1587   * segmap_getmap allocates a MAXBSIZE big slot to map the vnode vp
1591 1588   * in the range <off, off + len). off doesn't need to be MAXBSIZE aligned.
1592 1589   * The return address is  always MAXBSIZE aligned.
1593 1590   *
1594 1591   * If forcefault is nonzero and the MMU translations haven't yet been created,
1595 1592   * segmap_getmap will call segmap_fault(..., F_INVAL, rw) to create them.
1596 1593   */
1597 1594  caddr_t
1598 1595  segmap_getmapflt(
1599 1596          struct seg *seg,
1600 1597          struct vnode *vp,
1601 1598          u_offset_t off,
1602 1599          size_t len,
1603 1600          int forcefault,
1604 1601          enum seg_rw rw)
1605 1602  {
1606 1603          struct smap *smp, *nsmp;
1607 1604          extern struct vnode *common_specvp();
1608 1605          caddr_t baseaddr;                       /* MAXBSIZE aligned */
1609 1606          u_offset_t baseoff;
1610 1607          int newslot;
1611 1608          caddr_t vaddr;
1612 1609          int color, hashid;
1613 1610          kmutex_t *hashmtx, *smapmtx;
1614 1611          struct smfree *sm;
1615 1612          page_t  *pp;
1616 1613          struct kpme *kpme;
1617 1614          uint_t  prot;
1618 1615          caddr_t base;
1619 1616          page_t  *pl[MAXPPB + 1];
1620 1617          int     error;
1621 1618          int     is_kpm = 1;
1622 1619  
1623 1620          ASSERT(seg->s_as == &kas);
1624 1621          ASSERT(seg == segkmap);
1625 1622  
1626 1623          baseoff = off & (offset_t)MAXBMASK;
1627 1624          if (off + len > baseoff + MAXBSIZE) {
1628 1625                  panic("segmap_getmap bad len");
1629 1626                  /*NOTREACHED*/
1630 1627          }
1631 1628  
1632 1629          /*
1633 1630           * If this is a block device we have to be sure to use the
1634 1631           * "common" block device vnode for the mapping.
1635 1632           */
1636 1633          if (vp->v_type == VBLK)
1637 1634                  vp = common_specvp(vp);
1638 1635  
1639 1636          smd_cpu[CPU->cpu_seqid].scpu.scpu_getmap++;
1640 1637  
1641 1638          if (segmap_kpm == 0 ||
1642 1639              (forcefault == SM_PAGECREATE && rw != S_WRITE)) {
1643 1640                  is_kpm = 0;
1644 1641          }
1645 1642  
1646 1643          SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */
1647 1644          hashmtx = SHASHMTX(hashid);
1648 1645  
1649 1646  retry_hash:
1650 1647          mutex_enter(hashmtx);
1651 1648          for (smp = smd_hash[hashid].sh_hash_list;
1652 1649              smp != NULL; smp = smp->sm_hash)
1653 1650                  if (smp->sm_vp == vp && smp->sm_off == baseoff)
1654 1651                          break;
1655 1652          mutex_exit(hashmtx);
1656 1653  
1657 1654  vrfy_smp:
1658 1655          if (smp != NULL) {
1659 1656  
1660 1657                  ASSERT(vp->v_count != 0);
1661 1658  
1662 1659                  /*
1663 1660                   * Get smap lock and recheck its tag. The hash lock
1664 1661                   * is dropped since the hash is based on (vp, off)
1665 1662                   * and (vp, off) won't change when we have smap mtx.
1666 1663                   */
1667 1664                  smapmtx = SMAPMTX(smp);
1668 1665                  mutex_enter(smapmtx);
1669 1666                  if (smp->sm_vp != vp || smp->sm_off != baseoff) {
1670 1667                          mutex_exit(smapmtx);
1671 1668                          goto retry_hash;
1672 1669                  }
1673 1670  
1674 1671                  if (smp->sm_refcnt == 0) {
1675 1672  
1676 1673                          smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reclaim++;
1677 1674  
1678 1675                          /*
1679 1676                           * Could still be on the free list. However, this
1680 1677                           * could also be an smp that is transitioning from
1681 1678                           * the free list when we have too much contention
1682 1679                           * for the smapmtx's. In this case, we have an
1683 1680                           * unlocked smp that is not on the free list any
1684 1681                           * longer, but still has a 0 refcnt.  The only way
1685 1682                           * to be sure is to check the freelist pointers.
1686 1683                           * Since we now have the smapmtx, we are guaranteed
1687 1684                           * that the (vp, off) won't change, so we are safe
1688 1685                           * to reclaim it.  get_free_smp() knows that this
1689 1686                           * can happen, and it will check the refcnt.
1690 1687                           */
1691 1688  
1692 1689                          if ((smp->sm_next != NULL)) {
1693 1690                                  struct sm_freeq *freeq;
1694 1691  
1695 1692                                  ASSERT(smp->sm_prev != NULL);
1696 1693                                  sm = &smd_free[smp->sm_free_ndx];
1697 1694  
1698 1695                                  if (smp->sm_flags & SM_QNDX_ZERO)
1699 1696                                          freeq = &sm->sm_freeq[0];
1700 1697                                  else
1701 1698                                          freeq = &sm->sm_freeq[1];
1702 1699  
1703 1700                                  mutex_enter(&freeq->smq_mtx);
1704 1701                                  if (freeq->smq_free != smp) {
1705 1702                                          /*
1706 1703                                           * fastpath normal case
1707 1704                                           */
1708 1705                                          smp->sm_prev->sm_next = smp->sm_next;
1709 1706                                          smp->sm_next->sm_prev = smp->sm_prev;
1710 1707                                  } else if (smp == smp->sm_next) {
1711 1708                                          /*
1712 1709                                           * Taking the last smap on freelist
1713 1710                                           */
1714 1711                                          freeq->smq_free = NULL;
1715 1712                                  } else {
1716 1713                                          /*
1717 1714                                           * Reclaiming 1st smap on list
1718 1715                                           */
1719 1716                                          freeq->smq_free = smp->sm_next;
1720 1717                                          smp->sm_prev->sm_next = smp->sm_next;
1721 1718                                          smp->sm_next->sm_prev = smp->sm_prev;
1722 1719                                  }
1723 1720                                  mutex_exit(&freeq->smq_mtx);
1724 1721                                  smp->sm_prev = smp->sm_next = NULL;
1725 1722                          } else {
1726 1723                                  ASSERT(smp->sm_prev == NULL);
1727 1724                                  segmapcnt.smp_stolen.value.ul++;
1728 1725                          }
1729 1726  
1730 1727                  } else {
1731 1728                          segmapcnt.smp_get_use.value.ul++;
1732 1729                  }
1733 1730                  smp->sm_refcnt++;               /* another user */
1734 1731  
1735 1732                  /*
1736 1733                   * We don't invoke segmap_fault via TLB miss, so we set ref
1737 1734                   * and mod bits in advance. For S_OTHER  we set them in
1738 1735                   * segmap_fault F_SOFTUNLOCK.
1739 1736                   */
1740 1737                  if (is_kpm) {
1741 1738                          if (rw == S_WRITE) {
1742 1739                                  smp->sm_flags |= SM_WRITE_DATA;
1743 1740                          } else if (rw == S_READ) {
1744 1741                                  smp->sm_flags |= SM_READ_DATA;
1745 1742                          }
1746 1743                  }
1747 1744                  mutex_exit(smapmtx);
1748 1745  
1749 1746                  newslot = 0;
1750 1747          } else {
1751 1748  
1752 1749                  uint32_t free_ndx, *free_ndxp;
1753 1750                  union segmap_cpu *scpu;
1754 1751  
1755 1752                  /*
1756 1753                   * On a PAC machine or a machine with anti-alias
1757 1754                   * hardware, smd_colormsk will be zero.
1758 1755                   *
1759 1756                   * On a VAC machine- pick color by offset in the file
1760 1757                   * so we won't get VAC conflicts on elf files.
1761 1758                   * On data files, color does not matter but we
1762 1759                   * don't know what kind of file it is so we always
1763 1760                   * pick color by offset. This causes color
1764 1761                   * corresponding to file offset zero to be used more
1765 1762                   * heavily.
1766 1763                   */
1767 1764                  color = (baseoff >> MAXBSHIFT) & smd_colormsk;
1768 1765                  scpu = smd_cpu+CPU->cpu_seqid;
1769 1766                  free_ndxp = &scpu->scpu.scpu_free_ndx[color];
1770 1767                  free_ndx = (*free_ndxp += smd_ncolor) & smd_freemsk;
1771 1768  #ifdef DEBUG
1772 1769                  colors_used[free_ndx]++;
1773 1770  #endif /* DEBUG */
1774 1771  
1775 1772                  /*
1776 1773                   * Get a locked smp slot from the free list.
1777 1774                   */
1778 1775                  smp = get_free_smp(free_ndx);
1779 1776                  smapmtx = SMAPMTX(smp);
1780 1777  
1781 1778                  ASSERT(smp->sm_vp == NULL);
1782 1779  
1783 1780                  if ((nsmp = segmap_hashin(smp, vp, baseoff, hashid)) != NULL) {
1784 1781                          /*
1785 1782                           * Failed to hashin, there exists one now.
1786 1783                           * Return the smp we just allocated.
1787 1784                           */
1788 1785                          segmap_smapadd(smp);
1789 1786                          mutex_exit(smapmtx);
1790 1787  
1791 1788                          smp = nsmp;
1792 1789                          goto vrfy_smp;
1793 1790                  }
1794 1791                  smp->sm_refcnt++;               /* another user */
1795 1792  
1796 1793                  /*
1797 1794                   * We don't invoke segmap_fault via TLB miss, so we set ref
1798 1795                   * and mod bits in advance. For S_OTHER  we set them in
1799 1796                   * segmap_fault F_SOFTUNLOCK.
1800 1797                   */
1801 1798                  if (is_kpm) {
1802 1799                          if (rw == S_WRITE) {
1803 1800                                  smp->sm_flags |= SM_WRITE_DATA;
1804 1801                          } else if (rw == S_READ) {
1805 1802                                  smp->sm_flags |= SM_READ_DATA;
1806 1803                          }
1807 1804                  }
1808 1805                  mutex_exit(smapmtx);
1809 1806  
1810 1807                  newslot = 1;
1811 1808          }
1812 1809  
1813 1810          if (!is_kpm)
1814 1811                  goto use_segmap_range;
1815 1812  
1816 1813          /*
1817 1814           * Use segkpm
1818 1815           */
1819 1816          /* Lint directive required until 6746211 is fixed */
1820 1817          /*CONSTCOND*/
1821 1818          ASSERT(PAGESIZE == MAXBSIZE);
1822 1819  
1823 1820          /*
1824 1821           * remember the last smp faulted on this cpu.
1825 1822           */
1826 1823          (smd_cpu+CPU->cpu_seqid)->scpu.scpu_last_smap = smp;
1827 1824  
1828 1825          if (forcefault == SM_PAGECREATE) {
1829 1826                  baseaddr = segmap_pagecreate_kpm(seg, vp, baseoff, smp, rw);
1830 1827                  return (baseaddr);
1831 1828          }
1832 1829  
1833 1830          if (newslot == 0 &&
1834 1831              (pp = GET_KPME(smp)->kpe_page) != NULL) {
1835 1832  
1836 1833                  /* fastpath */
1837 1834                  switch (rw) {
1838 1835                  case S_READ:
1839 1836                  case S_WRITE:
1840 1837                          if (page_trylock(pp, SE_SHARED)) {
1841 1838                                  if (PP_ISFREE(pp) ||
1842 1839                                      !(pp->p_vnode == vp &&
1843 1840                                      pp->p_offset == baseoff)) {
1844 1841                                          page_unlock(pp);
1845 1842                                          pp = page_lookup(vp, baseoff,
1846 1843                                              SE_SHARED);
1847 1844                                  }
1848 1845                          } else {
1849 1846                                  pp = page_lookup(vp, baseoff, SE_SHARED);
1850 1847                          }
1851 1848  
1852 1849                          if (pp == NULL) {
1853 1850                                  ASSERT(GET_KPME(smp)->kpe_page == NULL);
1854 1851                                  break;
1855 1852                          }
1856 1853  
1857 1854                          if (rw == S_WRITE &&
1858 1855                              hat_page_getattr(pp, P_MOD | P_REF) !=
1859 1856                              (P_MOD | P_REF)) {
1860 1857                                  page_unlock(pp);
1861 1858                                  break;
1862 1859                          }
1863 1860  
1864 1861                          /*
1865 1862                           * We have the p_selock as reader, grab_smp
1866 1863                           * can't hit us, we have bumped the smap
1867 1864                           * refcnt and hat_pageunload needs the
1868 1865                           * p_selock exclusive.
1869 1866                           */
1870 1867                          kpme = GET_KPME(smp);
1871 1868                          if (kpme->kpe_page == pp) {
1872 1869                                  baseaddr = hat_kpm_page2va(pp, 0);
1873 1870                          } else if (kpme->kpe_page == NULL) {
1874 1871                                  baseaddr = hat_kpm_mapin(pp, kpme);
1875 1872                          } else {
1876 1873                                  panic("segmap_getmapflt: stale "
1877 1874                                      "kpme page, kpme %p", (void *)kpme);
1878 1875                                  /*NOTREACHED*/
1879 1876                          }
1880 1877  
1881 1878                          /*
1882 1879                           * We don't invoke segmap_fault via TLB miss,
1883 1880                           * so we set ref and mod bits in advance.
1884 1881                           * For S_OTHER and we set them in segmap_fault
1885 1882                           * F_SOFTUNLOCK.
1886 1883                           */
1887 1884                          if (rw == S_READ && !hat_isref(pp))
1888 1885                                  hat_setref(pp);
1889 1886  
1890 1887                          return (baseaddr);
1891 1888                  default:
1892 1889                          break;
1893 1890                  }
1894 1891          }
1895 1892  
1896 1893          base = segkpm_create_va(baseoff);
1897 1894          error = VOP_GETPAGE(vp, (offset_t)baseoff, len, &prot, pl, MAXBSIZE,
1898 1895              seg, base, rw, CRED(), NULL);
1899 1896  
1900 1897          pp = pl[0];
1901 1898          if (error || pp == NULL) {
1902 1899                  /*
1903 1900                   * Use segmap address slot and let segmap_fault deal
1904 1901                   * with the error cases. There is no error return
1905 1902                   * possible here.
1906 1903                   */
1907 1904                  goto use_segmap_range;
1908 1905          }
1909 1906  
1910 1907          ASSERT(pl[1] == NULL);
1911 1908  
1912 1909          /*
1913 1910           * When prot is not returned w/ PROT_ALL the returned pages
1914 1911           * are not backed by fs blocks. For most of the segmap users
1915 1912           * this is no problem, they don't write to the pages in the
1916 1913           * same request and therefore don't rely on a following
1917 1914           * trap driven segmap_fault. With SM_LOCKPROTO users it
1918 1915           * is more secure to use segkmap adresses to allow
1919 1916           * protection segmap_fault's.
1920 1917           */
1921 1918          if (prot != PROT_ALL && forcefault == SM_LOCKPROTO) {
1922 1919                  /*
1923 1920                   * Use segmap address slot and let segmap_fault
1924 1921                   * do the error return.
1925 1922                   */
1926 1923                  ASSERT(rw != S_WRITE);
1927 1924                  ASSERT(PAGE_LOCKED(pp));
1928 1925                  page_unlock(pp);
1929 1926                  forcefault = 0;
1930 1927                  goto use_segmap_range;
1931 1928          }
1932 1929  
1933 1930          /*
1934 1931           * We have the p_selock as reader, grab_smp can't hit us, we
1935 1932           * have bumped the smap refcnt and hat_pageunload needs the
1936 1933           * p_selock exclusive.
1937 1934           */
1938 1935          kpme = GET_KPME(smp);
1939 1936          if (kpme->kpe_page == pp) {
1940 1937                  baseaddr = hat_kpm_page2va(pp, 0);
1941 1938          } else if (kpme->kpe_page == NULL) {
1942 1939                  baseaddr = hat_kpm_mapin(pp, kpme);
1943 1940          } else {
1944 1941                  panic("segmap_getmapflt: stale kpme page after "
1945 1942                      "VOP_GETPAGE, kpme %p", (void *)kpme);
1946 1943                  /*NOTREACHED*/
1947 1944          }
1948 1945  
1949 1946          smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++;
1950 1947  
1951 1948          return (baseaddr);
1952 1949  
1953 1950  
1954 1951  use_segmap_range:
1955 1952          baseaddr = seg->s_base + ((smp - smd_smap) * MAXBSIZE);
1956 1953          TRACE_4(TR_FAC_VM, TR_SEGMAP_GETMAP,
1957 1954              "segmap_getmap:seg %p addr %p vp %p offset %llx",
1958 1955              seg, baseaddr, vp, baseoff);
1959 1956  
1960 1957          /*
1961 1958           * Prefault the translations
1962 1959           */
1963 1960          vaddr = baseaddr + (off - baseoff);
1964 1961          if (forcefault && (newslot || !hat_probe(kas.a_hat, vaddr))) {
1965 1962  
1966 1963                  caddr_t pgaddr = (caddr_t)((uintptr_t)vaddr &
1967 1964                      (uintptr_t)PAGEMASK);
1968 1965  
1969 1966                  (void) segmap_fault(kas.a_hat, seg, pgaddr,
1970 1967                      (vaddr + len - pgaddr + PAGESIZE - 1) & (uintptr_t)PAGEMASK,
1971 1968                      F_INVAL, rw);
1972 1969          }
1973 1970  
1974 1971          return (baseaddr);
1975 1972  }
1976 1973  
1977 1974  int
1978 1975  segmap_release(struct seg *seg, caddr_t addr, uint_t flags)
1979 1976  {
1980 1977          struct smap     *smp;
1981 1978          int             error;
1982 1979          int             bflags = 0;
1983 1980          struct vnode    *vp;
1984 1981          u_offset_t      offset;
1985 1982          kmutex_t        *smtx;
1986 1983          int             is_kpm = 0;
1987 1984          page_t          *pp;
1988 1985  
1989 1986          if (segmap_kpm && IS_KPM_ADDR(addr)) {
1990 1987  
1991 1988                  if (((uintptr_t)addr & MAXBOFFSET) != 0) {
1992 1989                          panic("segmap_release: addr %p not "
1993 1990                              "MAXBSIZE aligned", (void *)addr);
1994 1991                          /*NOTREACHED*/
1995 1992                  }
1996 1993  
1997 1994                  if ((smp = get_smap_kpm(addr, &pp)) == NULL) {
1998 1995                          panic("segmap_release: smap not found "
1999 1996                              "for addr %p", (void *)addr);
2000 1997                          /*NOTREACHED*/
2001 1998                  }
2002 1999  
2003 2000                  TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP,
2004 2001                      "segmap_relmap:seg %p addr %p smp %p",
2005 2002                      seg, addr, smp);
2006 2003  
2007 2004                  smtx = SMAPMTX(smp);
2008 2005  
2009 2006                  /*
2010 2007                   * For compatibility reasons segmap_pagecreate_kpm sets this
2011 2008                   * flag to allow a following segmap_pagecreate to return
2012 2009                   * this as "newpage" flag. When segmap_pagecreate is not
2013 2010                   * called at all we clear it now.
2014 2011                   */
2015 2012                  smp->sm_flags &= ~SM_KPM_NEWPAGE;
2016 2013                  is_kpm = 1;
2017 2014                  if (smp->sm_flags & SM_WRITE_DATA) {
2018 2015                          hat_setrefmod(pp);
2019 2016                  } else if (smp->sm_flags & SM_READ_DATA) {
2020 2017                          hat_setref(pp);
2021 2018                  }
2022 2019          } else {
2023 2020                  if (addr < seg->s_base || addr >= seg->s_base + seg->s_size ||
2024 2021                      ((uintptr_t)addr & MAXBOFFSET) != 0) {
2025 2022                          panic("segmap_release: bad addr %p", (void *)addr);
2026 2023                          /*NOTREACHED*/
2027 2024                  }
2028 2025                  smp = GET_SMAP(seg, addr);
2029 2026  
2030 2027                  TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP,
2031 2028                      "segmap_relmap:seg %p addr %p smp %p",
2032 2029                      seg, addr, smp);
2033 2030  
2034 2031                  smtx = SMAPMTX(smp);
2035 2032                  mutex_enter(smtx);
2036 2033                  smp->sm_flags |= SM_NOTKPM_RELEASED;
2037 2034          }
2038 2035  
2039 2036          ASSERT(smp->sm_refcnt > 0);
2040 2037  
2041 2038          /*
2042 2039           * Need to call VOP_PUTPAGE() if any flags (except SM_DONTNEED)
2043 2040           * are set.
2044 2041           */
2045 2042          if ((flags & ~SM_DONTNEED) != 0) {
2046 2043                  if (flags & SM_WRITE)
2047 2044                          segmapcnt.smp_rel_write.value.ul++;
2048 2045                  if (flags & SM_ASYNC) {
2049 2046                          bflags |= B_ASYNC;
2050 2047                          segmapcnt.smp_rel_async.value.ul++;
2051 2048                  }
2052 2049                  if (flags & SM_INVAL) {
2053 2050                          bflags |= B_INVAL;
2054 2051                          segmapcnt.smp_rel_abort.value.ul++;
2055 2052                  }
2056 2053                  if (flags & SM_DESTROY) {
2057 2054                          bflags |= (B_INVAL|B_TRUNC);
2058 2055                          segmapcnt.smp_rel_abort.value.ul++;
2059 2056                  }
2060 2057                  if (smp->sm_refcnt == 1) {
2061 2058                          /*
2062 2059                           * We only bother doing the FREE and DONTNEED flags
2063 2060                           * if no one else is still referencing this mapping.
2064 2061                           */
2065 2062                          if (flags & SM_FREE) {
2066 2063                                  bflags |= B_FREE;
2067 2064                                  segmapcnt.smp_rel_free.value.ul++;
2068 2065                          }
2069 2066                          if (flags & SM_DONTNEED) {
2070 2067                                  bflags |= B_DONTNEED;
2071 2068                                  segmapcnt.smp_rel_dontneed.value.ul++;
2072 2069                          }
2073 2070                  }
2074 2071          } else {
2075 2072                  smd_cpu[CPU->cpu_seqid].scpu.scpu_release++;
2076 2073          }
2077 2074  
2078 2075          vp = smp->sm_vp;
2079 2076          offset = smp->sm_off;
2080 2077  
2081 2078          if (--smp->sm_refcnt == 0) {
2082 2079  
2083 2080                  smp->sm_flags &= ~(SM_WRITE_DATA | SM_READ_DATA);
2084 2081  
2085 2082                  if (flags & (SM_INVAL|SM_DESTROY)) {
2086 2083                          segmap_hashout(smp);    /* remove map info */
2087 2084                          if (is_kpm) {
2088 2085                                  hat_kpm_mapout(pp, GET_KPME(smp), addr);
2089 2086                                  if (smp->sm_flags & SM_NOTKPM_RELEASED) {
2090 2087                                          smp->sm_flags &= ~SM_NOTKPM_RELEASED;
2091 2088                                          hat_unload(kas.a_hat, segkmap->s_base +
2092 2089                                              ((smp - smd_smap) * MAXBSIZE),
2093 2090                                              MAXBSIZE, HAT_UNLOAD);
2094 2091                                  }
2095 2092  
2096 2093                          } else {
2097 2094                                  if (segmap_kpm)
2098 2095                                          segkpm_mapout_validkpme(GET_KPME(smp));
2099 2096  
2100 2097                                  smp->sm_flags &= ~SM_NOTKPM_RELEASED;
2101 2098                                  hat_unload(kas.a_hat, addr, MAXBSIZE,
2102 2099                                      HAT_UNLOAD);
2103 2100                          }
2104 2101                  }
2105 2102                  segmap_smapadd(smp);    /* add to free list */
2106 2103          }
2107 2104  
2108 2105          mutex_exit(smtx);
2109 2106  
2110 2107          if (is_kpm)
2111 2108                  page_unlock(pp);
2112 2109          /*
2113 2110           * Now invoke VOP_PUTPAGE() if any flags (except SM_DONTNEED)
2114 2111           * are set.
2115 2112           */
2116 2113          if ((flags & ~SM_DONTNEED) != 0) {
2117 2114                  error = VOP_PUTPAGE(vp, offset, MAXBSIZE,
2118 2115                      bflags, CRED(), NULL);
2119 2116          } else {
2120 2117                  error = 0;
2121 2118          }
2122 2119  
2123 2120          return (error);
2124 2121  }
2125 2122  
2126 2123  /*
2127 2124   * Dump the pages belonging to this segmap segment.
2128 2125   */
2129 2126  static void
2130 2127  segmap_dump(struct seg *seg)
2131 2128  {
2132 2129          struct segmap_data *smd;
2133 2130          struct smap *smp, *smp_end;
2134 2131          page_t *pp;
2135 2132          pfn_t pfn;
2136 2133          u_offset_t off;
2137 2134          caddr_t addr;
2138 2135  
2139 2136          smd = (struct segmap_data *)seg->s_data;
2140 2137          addr = seg->s_base;
2141 2138          for (smp = smd->smd_sm, smp_end = smp + smd->smd_npages;
2142 2139              smp < smp_end; smp++) {
2143 2140  
2144 2141                  if (smp->sm_refcnt) {
2145 2142                          for (off = 0; off < MAXBSIZE; off += PAGESIZE) {
2146 2143                                  int we_own_it = 0;
2147 2144  
2148 2145                                  /*
2149 2146                                   * If pp == NULL, the page either does
2150 2147                                   * not exist or is exclusively locked.
2151 2148                                   * So determine if it exists before
2152 2149                                   * searching for it.
2153 2150                                   */
2154 2151                                  if ((pp = page_lookup_nowait(smp->sm_vp,
2155 2152                                      smp->sm_off + off, SE_SHARED)))
2156 2153                                          we_own_it = 1;
2157 2154                                  else
2158 2155                                          pp = page_exists(smp->sm_vp,
2159 2156                                              smp->sm_off + off);
2160 2157  
2161 2158                                  if (pp) {
2162 2159                                          pfn = page_pptonum(pp);
2163 2160                                          dump_addpage(seg->s_as,
2164 2161                                              addr + off, pfn);
2165 2162                                          if (we_own_it)
2166 2163                                                  page_unlock(pp);
2167 2164                                  }
2168 2165                                  dump_timeleft = dump_timeout;
2169 2166                          }
2170 2167                  }
2171 2168                  addr += MAXBSIZE;
2172 2169          }
2173 2170  }
2174 2171  
2175 2172  /*ARGSUSED*/
2176 2173  static int
2177 2174  segmap_pagelock(struct seg *seg, caddr_t addr, size_t len,
2178 2175      struct page ***ppp, enum lock_type type, enum seg_rw rw)
2179 2176  {
2180 2177          return (ENOTSUP);

↓ open down ↓

2045 lines elided

↑ open up ↑

2181 2178  }
2182 2179  
2183 2180  static int
2184 2181  segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
2185 2182  {
2186 2183          struct segmap_data *smd = (struct segmap_data *)seg->s_data;
2187 2184  
2188 2185          memidp->val[0] = (uintptr_t)smd->smd_sm->sm_vp;
2189 2186          memidp->val[1] = smd->smd_sm->sm_off + (uintptr_t)(addr - seg->s_base);
2190 2187          return (0);
2191      -}
2192      -
2193      -/*ARGSUSED*/
2194      -static lgrp_mem_policy_info_t *
2195      -segmap_getpolicy(struct seg *seg, caddr_t addr)
2196      -{
2197      -        return (NULL);
2198 2188  }
2199 2189  
2200 2190  /*ARGSUSED*/
2201 2191  static int
2202 2192  segmap_capable(struct seg *seg, segcapability_t capability)
2203 2193  {
2204 2194          return (0);
2205 2195  }
2206 2196  
2207 2197

2208 2198  #ifdef  SEGKPM_SUPPORT
2209 2199  
2210 2200  /*
2211 2201   * segkpm support routines
2212 2202   */
2213 2203  
2214 2204  static caddr_t
2215 2205  segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off,
2216 2206          struct smap *smp, enum seg_rw rw)
2217 2207  {
2218 2208          caddr_t base;
2219 2209          page_t  *pp;
2220 2210          int     newpage = 0;
2221 2211          struct kpme     *kpme;
2222 2212  
2223 2213          ASSERT(smp->sm_refcnt > 0);
2224 2214  
2225 2215          if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
2226 2216                  kmutex_t *smtx;
2227 2217  
2228 2218                  base = segkpm_create_va(off);
2229 2219  
2230 2220                  if ((pp = page_create_va(vp, off, PAGESIZE, PG_WAIT,
2231 2221                      seg, base)) == NULL) {
2232 2222                          panic("segmap_pagecreate_kpm: "
2233 2223                              "page_create failed");
2234 2224                          /*NOTREACHED*/
2235 2225                  }
2236 2226  
2237 2227                  newpage = 1;
2238 2228                  page_io_unlock(pp);
2239 2229                  ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);
2240 2230  
2241 2231                  /*
2242 2232                   * Mark this here until the following segmap_pagecreate
2243 2233                   * or segmap_release.
2244 2234                   */
2245 2235                  smtx = SMAPMTX(smp);
2246 2236                  mutex_enter(smtx);
2247 2237                  smp->sm_flags |= SM_KPM_NEWPAGE;
2248 2238                  mutex_exit(smtx);
2249 2239          }
2250 2240  
2251 2241          kpme = GET_KPME(smp);
2252 2242          if (!newpage && kpme->kpe_page == pp)
2253 2243                  base = hat_kpm_page2va(pp, 0);
2254 2244          else
2255 2245                  base = hat_kpm_mapin(pp, kpme);
2256 2246  
2257 2247          /*
2258 2248           * FS code may decide not to call segmap_pagecreate and we
2259 2249           * don't invoke segmap_fault via TLB miss, so we have to set
2260 2250           * ref and mod bits in advance.
2261 2251           */
2262 2252          if (rw == S_WRITE) {
2263 2253                  hat_setrefmod(pp);
2264 2254          } else {
2265 2255                  ASSERT(rw == S_READ);
2266 2256                  hat_setref(pp);
2267 2257          }
2268 2258  
2269 2259          smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++;
2270 2260  
2271 2261          return (base);
2272 2262  }
2273 2263  
2274 2264  /*
2275 2265   * Find the smap structure corresponding to the
2276 2266   * KPM addr and return it locked.
2277 2267   */
2278 2268  struct smap *
2279 2269  get_smap_kpm(caddr_t addr, page_t **ppp)
2280 2270  {
2281 2271          struct smap     *smp;
2282 2272          struct vnode    *vp;
2283 2273          u_offset_t      offset;
2284 2274          caddr_t         baseaddr = (caddr_t)((uintptr_t)addr & MAXBMASK);
2285 2275          int             hashid;
2286 2276          kmutex_t        *hashmtx;
2287 2277          page_t          *pp;
2288 2278          union segmap_cpu *scpu;
2289 2279  
2290 2280          pp = hat_kpm_vaddr2page(baseaddr);
2291 2281  
2292 2282          ASSERT(pp && !PP_ISFREE(pp));
2293 2283          ASSERT(PAGE_LOCKED(pp));
2294 2284          ASSERT(((uintptr_t)pp->p_offset & MAXBOFFSET) == 0);
2295 2285  
2296 2286          vp = pp->p_vnode;
2297 2287          offset = pp->p_offset;
2298 2288          ASSERT(vp != NULL);
2299 2289  
2300 2290          /*
2301 2291           * Assume the last smap used on this cpu is the one needed.
2302 2292           */
2303 2293          scpu = smd_cpu+CPU->cpu_seqid;
2304 2294          smp = scpu->scpu.scpu_last_smap;
2305 2295          mutex_enter(&smp->sm_mtx);
2306 2296          if (smp->sm_vp == vp && smp->sm_off == offset) {
2307 2297                  ASSERT(smp->sm_refcnt > 0);
2308 2298          } else {
2309 2299                  /*
2310 2300                   * Assumption wrong, find the smap on the hash chain.
2311 2301                   */
2312 2302                  mutex_exit(&smp->sm_mtx);
2313 2303                  SMAP_HASHFUNC(vp, offset, hashid); /* macro assigns hashid */
2314 2304                  hashmtx = SHASHMTX(hashid);
2315 2305  
2316 2306                  mutex_enter(hashmtx);
2317 2307                  smp = smd_hash[hashid].sh_hash_list;
2318 2308                  for (; smp != NULL; smp = smp->sm_hash) {
2319 2309                          if (smp->sm_vp == vp && smp->sm_off == offset)
2320 2310                                  break;
2321 2311                  }
2322 2312                  mutex_exit(hashmtx);
2323 2313                  if (smp) {
2324 2314                          mutex_enter(&smp->sm_mtx);
2325 2315                          ASSERT(smp->sm_vp == vp && smp->sm_off == offset);
2326 2316                  }
2327 2317          }
2328 2318  
2329 2319          if (ppp)
2330 2320                  *ppp = smp ? pp : NULL;
2331 2321  
2332 2322          return (smp);
2333 2323  }
2334 2324  
2335 2325  #else   /* SEGKPM_SUPPORT */
2336 2326  
2337 2327  /* segkpm stubs */
2338 2328  
2339 2329  /*ARGSUSED*/
2340 2330  static caddr_t
2341 2331  segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off,
2342 2332          struct smap *smp, enum seg_rw rw)
2343 2333  {
2344 2334          return (NULL);
2345 2335  }
2346 2336  
2347 2337  /*ARGSUSED*/
2348 2338  struct smap *
2349 2339  get_smap_kpm(caddr_t addr, page_t **ppp)
2350 2340  {
2351 2341          return (NULL);
2352 2342  }
2353 2343  
2354 2344  #endif  /* SEGKPM_SUPPORT */

↓ open down ↓

147 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX