no-need-for-bad-op-segment-op-functions Wdiff usr/src/uts/common/vm/seg_spt.c

Print this page

no need for bad-op segment op functions
The segment drivers have a number of bad-op functions that simply panic.
Keeping the function pointer NULL will accomplish the same thing in most
cases.  In other cases, keeping the function pointer NULL will result in
proper error code being returned.

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/vm/seg_spt.c
          +++ new/usr/src/uts/common/vm/seg_spt.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   */
  24   24  
  25   25  #include <sys/param.h>
  26   26  #include <sys/user.h>
  27   27  #include <sys/mman.h>
  28   28  #include <sys/kmem.h>
  29   29  #include <sys/sysmacros.h>
  30   30  #include <sys/cmn_err.h>
  31   31  #include <sys/systm.h>
  32   32  #include <sys/tuneable.h>
  33   33  #include <vm/hat.h>
  34   34  #include <vm/seg.h>
  35   35  #include <vm/as.h>
  36   36  #include <vm/anon.h>
  37   37  #include <vm/page.h>
  38   38  #include <sys/buf.h>
  39   39  #include <sys/swap.h>
  40   40  #include <sys/atomic.h>
  41   41  #include <vm/seg_spt.h>
  42   42  #include <sys/debug.h>
  43   43  #include <sys/vtrace.h>
  44   44  #include <sys/shm.h>
  45   45  #include <sys/shm_impl.h>
  46   46  #include <sys/lgrp.h>
  47   47  #include <sys/vmsystm.h>
  48   48  #include <sys/policy.h>
  49   49  #include <sys/project.h>
  50   50  #include <sys/tnf_probe.h>
  51   51  #include <sys/zone.h>
  52   52  
  53   53  #define SEGSPTADDR      (caddr_t)0x0
  54   54  
  55   55  /*
  56   56   * # pages used for spt
  57   57   */
  58   58  size_t  spt_used;
  59   59  
  60   60  /*
  61   61   * segspt_minfree is the memory left for system after ISM
  62   62   * locked its pages; it is set up to 5% of availrmem in
  63   63   * sptcreate when ISM is created.  ISM should not use more
  64   64   * than ~90% of availrmem; if it does, then the performance
  65   65   * of the system may decrease. Machines with large memories may
  66   66   * be able to use up more memory for ISM so we set the default
  67   67   * segspt_minfree to 5% (which gives ISM max 95% of availrmem.
  68   68   * If somebody wants even more memory for ISM (risking hanging

↓ open down ↓

68 lines elided

↑ open up ↑

  69   69   * the system) they can patch the segspt_minfree to smaller number.
  70   70   */
  71   71  pgcnt_t segspt_minfree = 0;
  72   72  
  73   73  static int segspt_create(struct seg *seg, caddr_t argsp);
  74   74  static int segspt_unmap(struct seg *seg, caddr_t raddr, size_t ssize);
  75   75  static void segspt_free(struct seg *seg);
  76   76  static void segspt_free_pages(struct seg *seg, caddr_t addr, size_t len);
  77   77  static lgrp_mem_policy_info_t *segspt_getpolicy(struct seg *seg, caddr_t addr);
  78   78  
  79      -static void
  80      -segspt_badop()
  81      -{
  82      -        panic("segspt_badop called");
  83      -        /*NOTREACHED*/
  84      -}
  85      -
  86      -#define SEGSPT_BADOP(t) (t(*)())segspt_badop
  87      -
  88   79  struct seg_ops segspt_ops = {
  89      -        .dup            = SEGSPT_BADOP(int),
  90   80          .unmap          = segspt_unmap,
  91   81          .free           = segspt_free,
  92      -        .fault          = SEGSPT_BADOP(int),
  93      -        .faulta         = SEGSPT_BADOP(faultcode_t),
  94      -        .setprot        = SEGSPT_BADOP(int),
  95      -        .checkprot      = SEGSPT_BADOP(int),
  96      -        .kluster        = SEGSPT_BADOP(int),
  97      -        .sync           = SEGSPT_BADOP(int),
  98      -        .incore         = SEGSPT_BADOP(size_t),
  99      -        .lockop         = SEGSPT_BADOP(int),
 100      -        .getprot        = SEGSPT_BADOP(int),
 101      -        .getoffset      = SEGSPT_BADOP(u_offset_t),
 102      -        .gettype        = SEGSPT_BADOP(int),
 103      -        .getvp          = SEGSPT_BADOP(int),
 104      -        .advise         = SEGSPT_BADOP(int),
 105      -        .dump           = SEGSPT_BADOP(void),
 106      -        .pagelock       = SEGSPT_BADOP(int),
 107      -        .setpagesize    = SEGSPT_BADOP(int),
 108      -        .getmemid       = SEGSPT_BADOP(int),
 109   82          .getpolicy      = segspt_getpolicy,
 110      -        .capable        = SEGSPT_BADOP(int),
 111   83          .inherit        = seg_inherit_notsup,
 112   84  };
 113   85  
 114   86  static int segspt_shmdup(struct seg *seg, struct seg *newseg);
 115   87  static int segspt_shmunmap(struct seg *seg, caddr_t raddr, size_t ssize);
 116   88  static void segspt_shmfree(struct seg *seg);
 117   89  static faultcode_t segspt_shmfault(struct hat *hat, struct seg *seg,
 118   90                  caddr_t addr, size_t len, enum fault_type type, enum seg_rw rw);
 119   91  static faultcode_t segspt_shmfaulta(struct seg *seg, caddr_t addr);
 120   92  static int segspt_shmsetprot(register struct seg *seg, register caddr_t addr,

 121   93                          register size_t len, register uint_t prot);
 122   94  static int segspt_shmcheckprot(struct seg *seg, caddr_t addr, size_t size,
 123   95                          uint_t prot);
 124   96  static int      segspt_shmkluster(struct seg *seg, caddr_t addr, ssize_t delta);
 125   97  static size_t segspt_shmincore(struct seg *seg, caddr_t addr, size_t len,
 126   98                          register char *vec);
 127   99  static int segspt_shmsync(struct seg *seg, register caddr_t addr, size_t len,
 128  100                          int attr, uint_t flags);
 129  101  static int segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len,
 130  102                          int attr, int op, ulong_t *lockmap, size_t pos);
 131  103  static int segspt_shmgetprot(struct seg *seg, caddr_t addr, size_t len,
 132  104                          uint_t *protv);
 133  105  static u_offset_t segspt_shmgetoffset(struct seg *seg, caddr_t addr);
 134  106  static int segspt_shmgettype(struct seg *seg, caddr_t addr);
 135  107  static int segspt_shmgetvp(struct seg *seg, caddr_t addr, struct vnode **vpp);
 136  108  static int segspt_shmadvise(struct seg *seg, caddr_t addr, size_t len,
 137  109                          uint_t behav);
 138  110  static void segspt_shmdump(struct seg *seg);
 139  111  static int segspt_shmpagelock(struct seg *, caddr_t, size_t,
 140  112                          struct page ***, enum lock_type, enum seg_rw);
 141  113  static int segspt_shmsetpgsz(struct seg *, caddr_t, size_t, uint_t);
 142  114  static int segspt_shmgetmemid(struct seg *, caddr_t, memid_t *);
 143  115  static lgrp_mem_policy_info_t *segspt_shmgetpolicy(struct seg *, caddr_t);
 144  116  static int segspt_shmcapable(struct seg *, segcapability_t);
 145  117  
 146  118  struct seg_ops segspt_shmops = {
 147  119          .dup            = segspt_shmdup,
 148  120          .unmap          = segspt_shmunmap,
 149  121          .free           = segspt_shmfree,
 150  122          .fault          = segspt_shmfault,
 151  123          .faulta         = segspt_shmfaulta,
 152  124          .setprot        = segspt_shmsetprot,
 153  125          .checkprot      = segspt_shmcheckprot,
 154  126          .kluster        = segspt_shmkluster,
 155  127          .sync           = segspt_shmsync,
 156  128          .incore         = segspt_shmincore,
 157  129          .lockop         = segspt_shmlockop,
 158  130          .getprot        = segspt_shmgetprot,
 159  131          .getoffset      = segspt_shmgetoffset,
 160  132          .gettype        = segspt_shmgettype,
 161  133          .getvp          = segspt_shmgetvp,
 162  134          .advise         = segspt_shmadvise,
 163  135          .dump           = segspt_shmdump,
 164  136          .pagelock       = segspt_shmpagelock,
 165  137          .setpagesize    = segspt_shmsetpgsz,
 166  138          .getmemid       = segspt_shmgetmemid,
 167  139          .getpolicy      = segspt_shmgetpolicy,
 168  140          .capable        = segspt_shmcapable,
 169  141          .inherit        = seg_inherit_notsup,
 170  142  };
 171  143  
 172  144  static void segspt_purge(struct seg *seg);
 173  145  static int segspt_reclaim(void *, caddr_t, size_t, struct page **,
 174  146                  enum seg_rw, int);
 175  147  static int spt_anon_getpages(struct seg *seg, caddr_t addr, size_t len,
 176  148                  page_t **ppa);
 177  149  
 178  150  
 179  151  
 180  152  /*ARGSUSED*/
 181  153  int
 182  154  sptcreate(size_t size, struct seg **sptseg, struct anon_map *amp,
 183  155          uint_t prot, uint_t flags, uint_t share_szc)
 184  156  {
 185  157          int     err;
 186  158          struct  as      *newas;
 187  159          struct  segspt_crargs sptcargs;
 188  160  
 189  161  #ifdef DEBUG
 190  162          TNF_PROBE_1(sptcreate, "spt", /* CSTYLED */,
 191  163                          tnf_ulong, size, size );
 192  164  #endif
 193  165          if (segspt_minfree == 0)        /* leave min 5% of availrmem for */
 194  166                  segspt_minfree = availrmem/20;  /* for the system */
 195  167  
 196  168          if (!hat_supported(HAT_SHARED_PT, (void *)0))
 197  169                  return (EINVAL);
 198  170  
 199  171          /*
 200  172           * get a new as for this shared memory segment
 201  173           */
 202  174          newas = as_alloc();
 203  175          newas->a_proc = NULL;
 204  176          sptcargs.amp = amp;
 205  177          sptcargs.prot = prot;
 206  178          sptcargs.flags = flags;
 207  179          sptcargs.szc = share_szc;
 208  180          /*
 209  181           * create a shared page table (spt) segment
 210  182           */
 211  183  
 212  184          if (err = as_map(newas, SEGSPTADDR, size, segspt_create, &sptcargs)) {
 213  185                  as_free(newas);
 214  186                  return (err);
 215  187          }
 216  188          *sptseg = sptcargs.seg_spt;
 217  189          return (0);
 218  190  }
 219  191  
 220  192  void
 221  193  sptdestroy(struct as *as, struct anon_map *amp)
 222  194  {
 223  195  
 224  196  #ifdef DEBUG
 225  197          TNF_PROBE_0(sptdestroy, "spt", /* CSTYLED */);
 226  198  #endif
 227  199          (void) as_unmap(as, SEGSPTADDR, amp->size);
 228  200          as_free(as);
 229  201  }
 230  202  
 231  203  /*
 232  204   * called from seg_free().
 233  205   * free (i.e., unlock, unmap, return to free list)
 234  206   *  all the pages in the given seg.
 235  207   */
 236  208  void
 237  209  segspt_free(struct seg  *seg)
 238  210  {
 239  211          struct spt_data *sptd = (struct spt_data *)seg->s_data;
 240  212  
 241  213          ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
 242  214  
 243  215          if (sptd != NULL) {
 244  216                  if (sptd->spt_realsize)
 245  217                          segspt_free_pages(seg, seg->s_base, sptd->spt_realsize);
 246  218  
 247  219          if (sptd->spt_ppa_lckcnt)
 248  220                  kmem_free(sptd->spt_ppa_lckcnt,
 249  221                      sizeof (*sptd->spt_ppa_lckcnt)
 250  222                      * btopr(sptd->spt_amp->size));
 251  223                  kmem_free(sptd->spt_vp, sizeof (*sptd->spt_vp));
 252  224                  cv_destroy(&sptd->spt_cv);
 253  225                  mutex_destroy(&sptd->spt_lock);
 254  226                  kmem_free(sptd, sizeof (*sptd));
 255  227          }
 256  228  }
 257  229  
 258  230  /*ARGSUSED*/
 259  231  static int
 260  232  segspt_shmsync(struct seg *seg, caddr_t addr, size_t len, int attr,
 261  233          uint_t flags)
 262  234  {
 263  235          ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
 264  236  
 265  237          return (0);
 266  238  }
 267  239  
 268  240  /*ARGSUSED*/
 269  241  static size_t
 270  242  segspt_shmincore(struct seg *seg, caddr_t addr, size_t len, char *vec)
 271  243  {
 272  244          caddr_t eo_seg;
 273  245          pgcnt_t npages;
 274  246          struct shm_data *shmd = (struct shm_data *)seg->s_data;
 275  247          struct seg      *sptseg;
 276  248          struct spt_data *sptd;
 277  249  
 278  250          ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
 279  251  #ifdef lint
 280  252          seg = seg;
 281  253  #endif
 282  254          sptseg = shmd->shm_sptseg;
 283  255          sptd = sptseg->s_data;
 284  256  
 285  257          if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
 286  258                  eo_seg = addr + len;
 287  259                  while (addr < eo_seg) {
 288  260                          /* page exists, and it's locked. */
 289  261                          *vec++ = SEG_PAGE_INCORE | SEG_PAGE_LOCKED |
 290  262                              SEG_PAGE_ANON;
 291  263                          addr += PAGESIZE;
 292  264                  }
 293  265                  return (len);
 294  266          } else {
 295  267                  struct  anon_map *amp = shmd->shm_amp;
 296  268                  struct  anon    *ap;
 297  269                  page_t          *pp;
 298  270                  pgcnt_t         anon_index;
 299  271                  struct vnode    *vp;
 300  272                  u_offset_t      off;
 301  273                  ulong_t         i;
 302  274                  int             ret;
 303  275                  anon_sync_obj_t cookie;
 304  276  
 305  277                  addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
 306  278                  anon_index = seg_page(seg, addr);
 307  279                  npages = btopr(len);
 308  280                  if (anon_index + npages > btopr(shmd->shm_amp->size)) {
 309  281                          return (EINVAL);
 310  282                  }
 311  283                  ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
 312  284                  for (i = 0; i < npages; i++, anon_index++) {
 313  285                          ret = 0;
 314  286                          anon_array_enter(amp, anon_index, &cookie);
 315  287                          ap = anon_get_ptr(amp->ahp, anon_index);
 316  288                          if (ap != NULL) {
 317  289                                  swap_xlate(ap, &vp, &off);
 318  290                                  anon_array_exit(&cookie);
 319  291                                  pp = page_lookup_nowait(vp, off, SE_SHARED);
 320  292                                  if (pp != NULL) {
 321  293                                          ret |= SEG_PAGE_INCORE | SEG_PAGE_ANON;
 322  294                                          page_unlock(pp);
 323  295                                  }
 324  296                          } else {
 325  297                                  anon_array_exit(&cookie);
 326  298                          }
 327  299                          if (shmd->shm_vpage[anon_index] & DISM_PG_LOCKED) {
 328  300                                  ret |= SEG_PAGE_LOCKED;
 329  301                          }
 330  302                          *vec++ = (char)ret;
 331  303                  }
 332  304                  ANON_LOCK_EXIT(&amp->a_rwlock);
 333  305                  return (len);
 334  306          }
 335  307  }
 336  308  
 337  309  static int
 338  310  segspt_unmap(struct seg *seg, caddr_t raddr, size_t ssize)
 339  311  {
 340  312          size_t share_size;
 341  313  
 342  314          ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
 343  315  
 344  316          /*
 345  317           * seg.s_size may have been rounded up to the largest page size
 346  318           * in shmat().
 347  319           * XXX This should be cleanedup. sptdestroy should take a length
 348  320           * argument which should be the same as sptcreate. Then
 349  321           * this rounding would not be needed (or is done in shm.c)
 350  322           * Only the check for full segment will be needed.
 351  323           *
 352  324           * XXX -- shouldn't raddr == 0 always? These tests don't seem
 353  325           * to be useful at all.
 354  326           */
 355  327          share_size = page_get_pagesize(seg->s_szc);
 356  328          ssize = P2ROUNDUP(ssize, share_size);
 357  329  
 358  330          if (raddr == seg->s_base && ssize == seg->s_size) {
 359  331                  seg_free(seg);
 360  332                  return (0);
 361  333          } else
 362  334                  return (EINVAL);
 363  335  }
 364  336  
 365  337  int
 366  338  segspt_create(struct seg *seg, caddr_t argsp)
 367  339  {
 368  340          int             err;
 369  341          caddr_t         addr = seg->s_base;
 370  342          struct spt_data *sptd;
 371  343          struct  segspt_crargs *sptcargs = (struct segspt_crargs *)argsp;
 372  344          struct anon_map *amp = sptcargs->amp;
 373  345          struct kshmid   *sp = amp->a_sp;
 374  346          struct  cred    *cred = CRED();
 375  347          ulong_t         i, j, anon_index = 0;
 376  348          pgcnt_t         npages = btopr(amp->size);
 377  349          struct vnode    *vp;
 378  350          page_t          **ppa;
 379  351          uint_t          hat_flags;
 380  352          size_t          pgsz;
 381  353          pgcnt_t         pgcnt;
 382  354          caddr_t         a;
 383  355          pgcnt_t         pidx;
 384  356          size_t          sz;
 385  357          proc_t          *procp = curproc;
 386  358          rctl_qty_t      lockedbytes = 0;
 387  359          kproject_t      *proj;
 388  360  
 389  361          /*
 390  362           * We are holding the a_lock on the underlying dummy as,
 391  363           * so we can make calls to the HAT layer.
 392  364           */
 393  365          ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
 394  366          ASSERT(sp != NULL);
 395  367  
 396  368  #ifdef DEBUG
 397  369          TNF_PROBE_2(segspt_create, "spt", /* CSTYLED */,
 398  370              tnf_opaque, addr, addr, tnf_ulong, len, seg->s_size);
 399  371  #endif
 400  372          if ((sptcargs->flags & SHM_PAGEABLE) == 0) {
 401  373                  if (err = anon_swap_adjust(npages))
 402  374                          return (err);
 403  375          }
 404  376          err = ENOMEM;
 405  377  
 406  378          if ((sptd = kmem_zalloc(sizeof (*sptd), KM_NOSLEEP)) == NULL)
 407  379                  goto out1;
 408  380  
 409  381          if ((sptcargs->flags & SHM_PAGEABLE) == 0) {
 410  382                  if ((ppa = kmem_zalloc(((sizeof (page_t *)) * npages),
 411  383                      KM_NOSLEEP)) == NULL)
 412  384                          goto out2;
 413  385          }
 414  386  
 415  387          mutex_init(&sptd->spt_lock, NULL, MUTEX_DEFAULT, NULL);
 416  388  
 417  389          if ((vp = kmem_zalloc(sizeof (*vp), KM_NOSLEEP)) == NULL)
 418  390                  goto out3;
 419  391  
 420  392          seg->s_ops = &segspt_ops;
 421  393          sptd->spt_vp = vp;
 422  394          sptd->spt_amp = amp;
 423  395          sptd->spt_prot = sptcargs->prot;
 424  396          sptd->spt_flags = sptcargs->flags;
 425  397          seg->s_data = (caddr_t)sptd;
 426  398          sptd->spt_ppa = NULL;
 427  399          sptd->spt_ppa_lckcnt = NULL;
 428  400          seg->s_szc = sptcargs->szc;
 429  401          cv_init(&sptd->spt_cv, NULL, CV_DEFAULT, NULL);
 430  402          sptd->spt_gen = 0;
 431  403  
 432  404          ANON_LOCK_ENTER(&amp->a_rwlock, RW_WRITER);
 433  405          if (seg->s_szc > amp->a_szc) {
 434  406                  amp->a_szc = seg->s_szc;
 435  407          }
 436  408          ANON_LOCK_EXIT(&amp->a_rwlock);
 437  409  
 438  410          /*
 439  411           * Set policy to affect initial allocation of pages in
 440  412           * anon_map_createpages()
 441  413           */
 442  414          (void) lgrp_shm_policy_set(LGRP_MEM_POLICY_DEFAULT, amp, anon_index,
 443  415              NULL, 0, ptob(npages));
 444  416  
 445  417          if (sptcargs->flags & SHM_PAGEABLE) {
 446  418                  size_t  share_sz;
 447  419                  pgcnt_t new_npgs, more_pgs;
 448  420                  struct anon_hdr *nahp;
 449  421                  zone_t *zone;
 450  422  
 451  423                  share_sz = page_get_pagesize(seg->s_szc);
 452  424                  if (!IS_P2ALIGNED(amp->size, share_sz)) {
 453  425                          /*
 454  426                           * We are rounding up the size of the anon array
 455  427                           * on 4 M boundary because we always create 4 M
 456  428                           * of page(s) when locking, faulting pages and we
 457  429                           * don't have to check for all corner cases e.g.
 458  430                           * if there is enough space to allocate 4 M
 459  431                           * page.
 460  432                           */
 461  433                          new_npgs = btop(P2ROUNDUP(amp->size, share_sz));
 462  434                          more_pgs = new_npgs - npages;
 463  435  
 464  436                          /*
 465  437                           * The zone will never be NULL, as a fully created
 466  438                           * shm always has an owning zone.
 467  439                           */
 468  440                          zone = sp->shm_perm.ipc_zone_ref.zref_zone;
 469  441                          ASSERT(zone != NULL);
 470  442                          if (anon_resv_zone(ptob(more_pgs), zone) == 0) {
 471  443                                  err = ENOMEM;
 472  444                                  goto out4;
 473  445                          }
 474  446  
 475  447                          nahp = anon_create(new_npgs, ANON_SLEEP);
 476  448                          ANON_LOCK_ENTER(&amp->a_rwlock, RW_WRITER);
 477  449                          (void) anon_copy_ptr(amp->ahp, 0, nahp, 0, npages,
 478  450                              ANON_SLEEP);
 479  451                          anon_release(amp->ahp, npages);
 480  452                          amp->ahp = nahp;
 481  453                          ASSERT(amp->swresv == ptob(npages));
 482  454                          amp->swresv = amp->size = ptob(new_npgs);
 483  455                          ANON_LOCK_EXIT(&amp->a_rwlock);
 484  456                          npages = new_npgs;
 485  457                  }
 486  458  
 487  459                  sptd->spt_ppa_lckcnt = kmem_zalloc(npages *
 488  460                      sizeof (*sptd->spt_ppa_lckcnt), KM_SLEEP);
 489  461                  sptd->spt_pcachecnt = 0;
 490  462                  sptd->spt_realsize = ptob(npages);
 491  463                  sptcargs->seg_spt = seg;
 492  464                  return (0);
 493  465          }
 494  466  
 495  467          /*
 496  468           * get array of pages for each anon slot in amp
 497  469           */
 498  470          if ((err = anon_map_createpages(amp, anon_index, ptob(npages), ppa,
 499  471              seg, addr, S_CREATE, cred)) != 0)
 500  472                  goto out4;
 501  473  
 502  474          mutex_enter(&sp->shm_mlock);
 503  475  
 504  476          /* May be partially locked, so, count bytes to charge for locking */
 505  477          for (i = 0; i < npages; i++)
 506  478                  if (ppa[i]->p_lckcnt == 0)
 507  479                          lockedbytes += PAGESIZE;
 508  480  
 509  481          proj = sp->shm_perm.ipc_proj;
 510  482  
 511  483          if (lockedbytes > 0) {
 512  484                  mutex_enter(&procp->p_lock);
 513  485                  if (rctl_incr_locked_mem(procp, proj, lockedbytes, 0)) {
 514  486                          mutex_exit(&procp->p_lock);
 515  487                          mutex_exit(&sp->shm_mlock);
 516  488                          for (i = 0; i < npages; i++)
 517  489                                  page_unlock(ppa[i]);
 518  490                          err = ENOMEM;
 519  491                          goto out4;
 520  492                  }
 521  493                  mutex_exit(&procp->p_lock);
 522  494          }
 523  495  
 524  496          /*
 525  497           * addr is initial address corresponding to the first page on ppa list
 526  498           */
 527  499          for (i = 0; i < npages; i++) {
 528  500                  /* attempt to lock all pages */
 529  501                  if (page_pp_lock(ppa[i], 0, 1) == 0) {
 530  502                          /*
 531  503                           * if unable to lock any page, unlock all
 532  504                           * of them and return error
 533  505                           */
 534  506                          for (j = 0; j < i; j++)
 535  507                                  page_pp_unlock(ppa[j], 0, 1);
 536  508                          for (i = 0; i < npages; i++)
 537  509                                  page_unlock(ppa[i]);
 538  510                          rctl_decr_locked_mem(NULL, proj, lockedbytes, 0);
 539  511                          mutex_exit(&sp->shm_mlock);
 540  512                          err = ENOMEM;
 541  513                          goto out4;
 542  514                  }
 543  515          }
 544  516          mutex_exit(&sp->shm_mlock);
 545  517  
 546  518          /*
 547  519           * Some platforms assume that ISM mappings are HAT_LOAD_LOCK
 548  520           * for the entire life of the segment. For example platforms
 549  521           * that do not support Dynamic Reconfiguration.
 550  522           */
 551  523          hat_flags = HAT_LOAD_SHARE;
 552  524          if (!hat_supported(HAT_DYNAMIC_ISM_UNMAP, NULL))
 553  525                  hat_flags |= HAT_LOAD_LOCK;
 554  526  
 555  527          /*
 556  528           * Load translations one lare page at a time
 557  529           * to make sure we don't create mappings bigger than
 558  530           * segment's size code in case underlying pages
 559  531           * are shared with segvn's segment that uses bigger
 560  532           * size code than we do.
 561  533           */
 562  534          pgsz = page_get_pagesize(seg->s_szc);
 563  535          pgcnt = page_get_pagecnt(seg->s_szc);
 564  536          for (a = addr, pidx = 0; pidx < npages; a += pgsz, pidx += pgcnt) {
 565  537                  sz = MIN(pgsz, ptob(npages - pidx));
 566  538                  hat_memload_array(seg->s_as->a_hat, a, sz,
 567  539                      &ppa[pidx], sptd->spt_prot, hat_flags);
 568  540          }
 569  541  
 570  542          /*
 571  543           * On platforms that do not support HAT_DYNAMIC_ISM_UNMAP,
 572  544           * we will leave the pages locked SE_SHARED for the life
 573  545           * of the ISM segment. This will prevent any calls to
 574  546           * hat_pageunload() on this ISM segment for those platforms.
 575  547           */
 576  548          if (!(hat_flags & HAT_LOAD_LOCK)) {
 577  549                  /*
 578  550                   * On platforms that support HAT_DYNAMIC_ISM_UNMAP,
 579  551                   * we no longer need to hold the SE_SHARED lock on the pages,
 580  552                   * since L_PAGELOCK and F_SOFTLOCK calls will grab the
 581  553                   * SE_SHARED lock on the pages as necessary.
 582  554                   */
 583  555                  for (i = 0; i < npages; i++)
 584  556                          page_unlock(ppa[i]);
 585  557          }
 586  558          sptd->spt_pcachecnt = 0;
 587  559          kmem_free(ppa, ((sizeof (page_t *)) * npages));
 588  560          sptd->spt_realsize = ptob(npages);
 589  561          atomic_add_long(&spt_used, npages);
 590  562          sptcargs->seg_spt = seg;
 591  563          return (0);
 592  564  
 593  565  out4:
 594  566          seg->s_data = NULL;
 595  567          kmem_free(vp, sizeof (*vp));
 596  568          cv_destroy(&sptd->spt_cv);
 597  569  out3:
 598  570          mutex_destroy(&sptd->spt_lock);
 599  571          if ((sptcargs->flags & SHM_PAGEABLE) == 0)
 600  572                  kmem_free(ppa, (sizeof (*ppa) * npages));
 601  573  out2:
 602  574          kmem_free(sptd, sizeof (*sptd));
 603  575  out1:
 604  576          if ((sptcargs->flags & SHM_PAGEABLE) == 0)
 605  577                  anon_swap_restore(npages);
 606  578          return (err);
 607  579  }
 608  580  
 609  581  /*ARGSUSED*/
 610  582  void
 611  583  segspt_free_pages(struct seg *seg, caddr_t addr, size_t len)
 612  584  {
 613  585          struct page     *pp;
 614  586          struct spt_data *sptd = (struct spt_data *)seg->s_data;
 615  587          pgcnt_t         npages;
 616  588          ulong_t         anon_idx;
 617  589          struct anon_map *amp;
 618  590          struct anon     *ap;
 619  591          struct vnode    *vp;
 620  592          u_offset_t      off;
 621  593          uint_t          hat_flags;
 622  594          int             root = 0;
 623  595          pgcnt_t         pgs, curnpgs = 0;
 624  596          page_t          *rootpp;
 625  597          rctl_qty_t      unlocked_bytes = 0;
 626  598          kproject_t      *proj;
 627  599          kshmid_t        *sp;
 628  600  
 629  601          ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
 630  602  
 631  603          len = P2ROUNDUP(len, PAGESIZE);
 632  604  
 633  605          npages = btop(len);
 634  606  
 635  607          hat_flags = HAT_UNLOAD_UNLOCK | HAT_UNLOAD_UNMAP;
 636  608          if ((hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) ||
 637  609              (sptd->spt_flags & SHM_PAGEABLE)) {
 638  610                  hat_flags = HAT_UNLOAD_UNMAP;
 639  611          }
 640  612  
 641  613          hat_unload(seg->s_as->a_hat, addr, len, hat_flags);
 642  614  
 643  615          amp = sptd->spt_amp;
 644  616          if (sptd->spt_flags & SHM_PAGEABLE)
 645  617                  npages = btop(amp->size);
 646  618  
 647  619          ASSERT(amp != NULL);
 648  620  
 649  621          if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
 650  622                  sp = amp->a_sp;
 651  623                  proj = sp->shm_perm.ipc_proj;
 652  624                  mutex_enter(&sp->shm_mlock);
 653  625          }
 654  626          for (anon_idx = 0; anon_idx < npages; anon_idx++) {
 655  627                  if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
 656  628                          if ((ap = anon_get_ptr(amp->ahp, anon_idx)) == NULL) {
 657  629                                  panic("segspt_free_pages: null app");
 658  630                                  /*NOTREACHED*/
 659  631                          }
 660  632                  } else {
 661  633                          if ((ap = anon_get_next_ptr(amp->ahp, &anon_idx))
 662  634                              == NULL)
 663  635                                  continue;
 664  636                  }
 665  637                  ASSERT(ANON_ISBUSY(anon_get_slot(amp->ahp, anon_idx)) == 0);
 666  638                  swap_xlate(ap, &vp, &off);
 667  639  
 668  640                  /*
 669  641                   * If this platform supports HAT_DYNAMIC_ISM_UNMAP,
 670  642                   * the pages won't be having SE_SHARED lock at this
 671  643                   * point.
 672  644                   *
 673  645                   * On platforms that do not support HAT_DYNAMIC_ISM_UNMAP,
 674  646                   * the pages are still held SE_SHARED locked from the
 675  647                   * original segspt_create()
 676  648                   *
 677  649                   * Our goal is to get SE_EXCL lock on each page, remove
 678  650                   * permanent lock on it and invalidate the page.
 679  651                   */
 680  652                  if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
 681  653                          if (hat_flags == HAT_UNLOAD_UNMAP)
 682  654                                  pp = page_lookup(vp, off, SE_EXCL);
 683  655                          else {
 684  656                                  if ((pp = page_find(vp, off)) == NULL) {
 685  657                                          panic("segspt_free_pages: "
 686  658                                              "page not locked");
 687  659                                          /*NOTREACHED*/
 688  660                                  }
 689  661                                  if (!page_tryupgrade(pp)) {
 690  662                                          page_unlock(pp);
 691  663                                          pp = page_lookup(vp, off, SE_EXCL);
 692  664                                  }
 693  665                          }
 694  666                          if (pp == NULL) {
 695  667                                  panic("segspt_free_pages: "
 696  668                                      "page not in the system");
 697  669                                  /*NOTREACHED*/
 698  670                          }
 699  671                          ASSERT(pp->p_lckcnt > 0);
 700  672                          page_pp_unlock(pp, 0, 1);
 701  673                          if (pp->p_lckcnt == 0)
 702  674                                  unlocked_bytes += PAGESIZE;
 703  675                  } else {
 704  676                          if ((pp = page_lookup(vp, off, SE_EXCL)) == NULL)
 705  677                                  continue;
 706  678                  }
 707  679                  /*
 708  680                   * It's logical to invalidate the pages here as in most cases
 709  681                   * these were created by segspt.
 710  682                   */
 711  683                  if (pp->p_szc != 0) {
 712  684                          if (root == 0) {
 713  685                                  ASSERT(curnpgs == 0);
 714  686                                  root = 1;
 715  687                                  rootpp = pp;
 716  688                                  pgs = curnpgs = page_get_pagecnt(pp->p_szc);
 717  689                                  ASSERT(pgs > 1);
 718  690                                  ASSERT(IS_P2ALIGNED(pgs, pgs));
 719  691                                  ASSERT(!(page_pptonum(pp) & (pgs - 1)));
 720  692                                  curnpgs--;
 721  693                          } else if ((page_pptonum(pp) & (pgs - 1)) == pgs - 1) {
 722  694                                  ASSERT(curnpgs == 1);
 723  695                                  ASSERT(page_pptonum(pp) ==
 724  696                                      page_pptonum(rootpp) + (pgs - 1));
 725  697                                  page_destroy_pages(rootpp);
 726  698                                  root = 0;
 727  699                                  curnpgs = 0;
 728  700                          } else {
 729  701                                  ASSERT(curnpgs > 1);
 730  702                                  ASSERT(page_pptonum(pp) ==
 731  703                                      page_pptonum(rootpp) + (pgs - curnpgs));
 732  704                                  curnpgs--;
 733  705                          }
 734  706                  } else {
 735  707                          if (root != 0 || curnpgs != 0) {
 736  708                                  panic("segspt_free_pages: bad large page");
 737  709                                  /*NOTREACHED*/
 738  710                          }
 739  711                          /*
 740  712                           * Before destroying the pages, we need to take care
 741  713                           * of the rctl locked memory accounting. For that
 742  714                           * we need to calculte the unlocked_bytes.
 743  715                           */
 744  716                          if (pp->p_lckcnt > 0)
 745  717                                  unlocked_bytes += PAGESIZE;
 746  718                          /*LINTED: constant in conditional context */
 747  719                          VN_DISPOSE(pp, B_INVAL, 0, kcred);
 748  720                  }
 749  721          }
 750  722          if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
 751  723                  if (unlocked_bytes > 0)
 752  724                          rctl_decr_locked_mem(NULL, proj, unlocked_bytes, 0);
 753  725                  mutex_exit(&sp->shm_mlock);
 754  726          }
 755  727          if (root != 0 || curnpgs != 0) {
 756  728                  panic("segspt_free_pages: bad large page");
 757  729                  /*NOTREACHED*/
 758  730          }
 759  731  
 760  732          /*
 761  733           * mark that pages have been released
 762  734           */
 763  735          sptd->spt_realsize = 0;
 764  736  
 765  737          if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
 766  738                  atomic_add_long(&spt_used, -npages);
 767  739                  anon_swap_restore(npages);
 768  740          }
 769  741  }
 770  742  
 771  743  /*
 772  744   * Get memory allocation policy info for specified address in given segment
 773  745   */
 774  746  static lgrp_mem_policy_info_t *
 775  747  segspt_getpolicy(struct seg *seg, caddr_t addr)
 776  748  {
 777  749          struct anon_map         *amp;
 778  750          ulong_t                 anon_index;
 779  751          lgrp_mem_policy_info_t  *policy_info;
 780  752          struct spt_data         *spt_data;
 781  753  
 782  754          ASSERT(seg != NULL);
 783  755  
 784  756          /*
 785  757           * Get anon_map from segspt
 786  758           *
 787  759           * Assume that no lock needs to be held on anon_map, since
 788  760           * it should be protected by its reference count which must be
 789  761           * nonzero for an existing segment
 790  762           * Need to grab readers lock on policy tree though
 791  763           */
 792  764          spt_data = (struct spt_data *)seg->s_data;
 793  765          if (spt_data == NULL)
 794  766                  return (NULL);
 795  767          amp = spt_data->spt_amp;
 796  768          ASSERT(amp->refcnt != 0);
 797  769  
 798  770          /*
 799  771           * Get policy info
 800  772           *
 801  773           * Assume starting anon index of 0
 802  774           */
 803  775          anon_index = seg_page(seg, addr);
 804  776          policy_info = lgrp_shm_policy_get(amp, anon_index, NULL, 0);
 805  777  
 806  778          return (policy_info);
 807  779  }
 808  780  
 809  781  /*
 810  782   * DISM only.
 811  783   * Return locked pages over a given range.
 812  784   *
 813  785   * We will cache all DISM locked pages and save the pplist for the
 814  786   * entire segment in the ppa field of the underlying DISM segment structure.
 815  787   * Later, during a call to segspt_reclaim() we will use this ppa array
 816  788   * to page_unlock() all of the pages and then we will free this ppa list.
 817  789   */
 818  790  /*ARGSUSED*/
 819  791  static int
 820  792  segspt_dismpagelock(struct seg *seg, caddr_t addr, size_t len,
 821  793      struct page ***ppp, enum lock_type type, enum seg_rw rw)
 822  794  {
 823  795          struct  shm_data *shmd = (struct shm_data *)seg->s_data;
 824  796          struct  seg     *sptseg = shmd->shm_sptseg;
 825  797          struct  spt_data *sptd = sptseg->s_data;
 826  798          pgcnt_t pg_idx, npages, tot_npages, npgs;
 827  799          struct  page **pplist, **pl, **ppa, *pp;
 828  800          struct  anon_map *amp;
 829  801          spgcnt_t        an_idx;
 830  802          int     ret = ENOTSUP;
 831  803          uint_t  pl_built = 0;
 832  804          struct  anon *ap;
 833  805          struct  vnode *vp;
 834  806          u_offset_t off;
 835  807          pgcnt_t claim_availrmem = 0;
 836  808          uint_t  szc;
 837  809  
 838  810          ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
 839  811          ASSERT(type == L_PAGELOCK || type == L_PAGEUNLOCK);
 840  812  
 841  813          /*
 842  814           * We want to lock/unlock the entire ISM segment. Therefore,
 843  815           * we will be using the underlying sptseg and it's base address
 844  816           * and length for the caching arguments.
 845  817           */
 846  818          ASSERT(sptseg);
 847  819          ASSERT(sptd);
 848  820  
 849  821          pg_idx = seg_page(seg, addr);
 850  822          npages = btopr(len);
 851  823  
 852  824          /*
 853  825           * check if the request is larger than number of pages covered
 854  826           * by amp
 855  827           */
 856  828          if (pg_idx + npages > btopr(sptd->spt_amp->size)) {
 857  829                  *ppp = NULL;
 858  830                  return (ENOTSUP);
 859  831          }
 860  832  
 861  833          if (type == L_PAGEUNLOCK) {
 862  834                  ASSERT(sptd->spt_ppa != NULL);
 863  835  
 864  836                  seg_pinactive(seg, NULL, seg->s_base, sptd->spt_amp->size,
 865  837                      sptd->spt_ppa, S_WRITE, SEGP_FORCE_WIRED, segspt_reclaim);
 866  838  
 867  839                  /*
 868  840                   * If someone is blocked while unmapping, we purge
 869  841                   * segment page cache and thus reclaim pplist synchronously
 870  842                   * without waiting for seg_pasync_thread. This speeds up
 871  843                   * unmapping in cases where munmap(2) is called, while
 872  844                   * raw async i/o is still in progress or where a thread
 873  845                   * exits on data fault in a multithreaded application.
 874  846                   */
 875  847                  if ((sptd->spt_flags & DISM_PPA_CHANGED) ||
 876  848                      (AS_ISUNMAPWAIT(seg->s_as) &&
 877  849                      shmd->shm_softlockcnt > 0)) {
 878  850                          segspt_purge(seg);
 879  851                  }
 880  852                  return (0);
 881  853          }
 882  854  
 883  855          /* The L_PAGELOCK case ... */
 884  856  
 885  857          if (sptd->spt_flags & DISM_PPA_CHANGED) {
 886  858                  segspt_purge(seg);
 887  859                  /*
 888  860                   * for DISM ppa needs to be rebuild since
 889  861                   * number of locked pages could be changed
 890  862                   */
 891  863                  *ppp = NULL;
 892  864                  return (ENOTSUP);
 893  865          }
 894  866  
 895  867          /*
 896  868           * First try to find pages in segment page cache, without
 897  869           * holding the segment lock.
 898  870           */
 899  871          pplist = seg_plookup(seg, NULL, seg->s_base, sptd->spt_amp->size,
 900  872              S_WRITE, SEGP_FORCE_WIRED);
 901  873          if (pplist != NULL) {
 902  874                  ASSERT(sptd->spt_ppa != NULL);
 903  875                  ASSERT(sptd->spt_ppa == pplist);
 904  876                  ppa = sptd->spt_ppa;
 905  877                  for (an_idx = pg_idx; an_idx < pg_idx + npages; ) {
 906  878                          if (ppa[an_idx] == NULL) {
 907  879                                  seg_pinactive(seg, NULL, seg->s_base,
 908  880                                      sptd->spt_amp->size, ppa,
 909  881                                      S_WRITE, SEGP_FORCE_WIRED, segspt_reclaim);
 910  882                                  *ppp = NULL;
 911  883                                  return (ENOTSUP);
 912  884                          }
 913  885                          if ((szc = ppa[an_idx]->p_szc) != 0) {
 914  886                                  npgs = page_get_pagecnt(szc);
 915  887                                  an_idx = P2ROUNDUP(an_idx + 1, npgs);
 916  888                          } else {
 917  889                                  an_idx++;
 918  890                          }
 919  891                  }
 920  892                  /*
 921  893                   * Since we cache the entire DISM segment, we want to
 922  894                   * set ppp to point to the first slot that corresponds
 923  895                   * to the requested addr, i.e. pg_idx.
 924  896                   */
 925  897                  *ppp = &(sptd->spt_ppa[pg_idx]);
 926  898                  return (0);
 927  899          }
 928  900  
 929  901          mutex_enter(&sptd->spt_lock);
 930  902          /*
 931  903           * try to find pages in segment page cache with mutex
 932  904           */
 933  905          pplist = seg_plookup(seg, NULL, seg->s_base, sptd->spt_amp->size,
 934  906              S_WRITE, SEGP_FORCE_WIRED);
 935  907          if (pplist != NULL) {
 936  908                  ASSERT(sptd->spt_ppa != NULL);
 937  909                  ASSERT(sptd->spt_ppa == pplist);
 938  910                  ppa = sptd->spt_ppa;
 939  911                  for (an_idx = pg_idx; an_idx < pg_idx + npages; ) {
 940  912                          if (ppa[an_idx] == NULL) {
 941  913                                  mutex_exit(&sptd->spt_lock);
 942  914                                  seg_pinactive(seg, NULL, seg->s_base,
 943  915                                      sptd->spt_amp->size, ppa,
 944  916                                      S_WRITE, SEGP_FORCE_WIRED, segspt_reclaim);
 945  917                                  *ppp = NULL;
 946  918                                  return (ENOTSUP);
 947  919                          }
 948  920                          if ((szc = ppa[an_idx]->p_szc) != 0) {
 949  921                                  npgs = page_get_pagecnt(szc);
 950  922                                  an_idx = P2ROUNDUP(an_idx + 1, npgs);
 951  923                          } else {
 952  924                                  an_idx++;
 953  925                          }
 954  926                  }
 955  927                  /*
 956  928                   * Since we cache the entire DISM segment, we want to
 957  929                   * set ppp to point to the first slot that corresponds
 958  930                   * to the requested addr, i.e. pg_idx.
 959  931                   */
 960  932                  mutex_exit(&sptd->spt_lock);
 961  933                  *ppp = &(sptd->spt_ppa[pg_idx]);
 962  934                  return (0);
 963  935          }
 964  936          if (seg_pinsert_check(seg, NULL, seg->s_base, sptd->spt_amp->size,
 965  937              SEGP_FORCE_WIRED) == SEGP_FAIL) {
 966  938                  mutex_exit(&sptd->spt_lock);
 967  939                  *ppp = NULL;
 968  940                  return (ENOTSUP);
 969  941          }
 970  942  
 971  943          /*
 972  944           * No need to worry about protections because DISM pages are always rw.
 973  945           */
 974  946          pl = pplist = NULL;
 975  947          amp = sptd->spt_amp;
 976  948  
 977  949          /*
 978  950           * Do we need to build the ppa array?
 979  951           */
 980  952          if (sptd->spt_ppa == NULL) {
 981  953                  pgcnt_t lpg_cnt = 0;
 982  954  
 983  955                  pl_built = 1;
 984  956                  tot_npages = btopr(sptd->spt_amp->size);
 985  957  
 986  958                  ASSERT(sptd->spt_pcachecnt == 0);
 987  959                  pplist = kmem_zalloc(sizeof (page_t *) * tot_npages, KM_SLEEP);
 988  960                  pl = pplist;
 989  961  
 990  962                  ANON_LOCK_ENTER(&amp->a_rwlock, RW_WRITER);
 991  963                  for (an_idx = 0; an_idx < tot_npages; ) {
 992  964                          ap = anon_get_ptr(amp->ahp, an_idx);
 993  965                          /*
 994  966                           * Cache only mlocked pages. For large pages
 995  967                           * if one (constituent) page is mlocked
 996  968                           * all pages for that large page
 997  969                           * are cached also. This is for quick
 998  970                           * lookups of ppa array;
 999  971                           */
1000  972                          if ((ap != NULL) && (lpg_cnt != 0 ||
1001  973                              (sptd->spt_ppa_lckcnt[an_idx] != 0))) {
1002  974  
1003  975                                  swap_xlate(ap, &vp, &off);
1004  976                                  pp = page_lookup(vp, off, SE_SHARED);
1005  977                                  ASSERT(pp != NULL);
1006  978                                  if (lpg_cnt == 0) {
1007  979                                          lpg_cnt++;
1008  980                                          /*
1009  981                                           * For a small page, we are done --
1010  982                                           * lpg_count is reset to 0 below.
1011  983                                           *
1012  984                                           * For a large page, we are guaranteed
1013  985                                           * to find the anon structures of all
1014  986                                           * constituent pages and a non-zero
1015  987                                           * lpg_cnt ensures that we don't test
1016  988                                           * for mlock for these. We are done
1017  989                                           * when lpg_count reaches (npgs + 1).
1018  990                                           * If we are not the first constituent
1019  991                                           * page, restart at the first one.
1020  992                                           */
1021  993                                          npgs = page_get_pagecnt(pp->p_szc);
1022  994                                          if (!IS_P2ALIGNED(an_idx, npgs)) {
1023  995                                                  an_idx = P2ALIGN(an_idx, npgs);
1024  996                                                  page_unlock(pp);
1025  997                                                  continue;
1026  998                                          }
1027  999                                  }
1028 1000                                  if (++lpg_cnt > npgs)
1029 1001                                          lpg_cnt = 0;
1030 1002  
1031 1003                                  /*
1032 1004                                   * availrmem is decremented only
1033 1005                                   * for unlocked pages
1034 1006                                   */
1035 1007                                  if (sptd->spt_ppa_lckcnt[an_idx] == 0)
1036 1008                                          claim_availrmem++;
1037 1009                                  pplist[an_idx] = pp;
1038 1010                          }
1039 1011                          an_idx++;
1040 1012                  }
1041 1013                  ANON_LOCK_EXIT(&amp->a_rwlock);
1042 1014  
1043 1015                  if (claim_availrmem) {
1044 1016                          mutex_enter(&freemem_lock);
1045 1017                          if (availrmem < tune.t_minarmem + claim_availrmem) {
1046 1018                                  mutex_exit(&freemem_lock);
1047 1019                                  ret = ENOTSUP;
1048 1020                                  claim_availrmem = 0;
1049 1021                                  goto insert_fail;
1050 1022                          } else {
1051 1023                                  availrmem -= claim_availrmem;
1052 1024                          }
1053 1025                          mutex_exit(&freemem_lock);
1054 1026                  }
1055 1027  
1056 1028                  sptd->spt_ppa = pl;
1057 1029          } else {
1058 1030                  /*
1059 1031                   * We already have a valid ppa[].
1060 1032                   */
1061 1033                  pl = sptd->spt_ppa;
1062 1034          }
1063 1035  
1064 1036          ASSERT(pl != NULL);
1065 1037  
1066 1038          ret = seg_pinsert(seg, NULL, seg->s_base, sptd->spt_amp->size,
1067 1039              sptd->spt_amp->size, pl, S_WRITE, SEGP_FORCE_WIRED,
1068 1040              segspt_reclaim);
1069 1041          if (ret == SEGP_FAIL) {
1070 1042                  /*
1071 1043                   * seg_pinsert failed. We return
1072 1044                   * ENOTSUP, so that the as_pagelock() code will
1073 1045                   * then try the slower F_SOFTLOCK path.
1074 1046                   */
1075 1047                  if (pl_built) {
1076 1048                          /*
1077 1049                           * No one else has referenced the ppa[].
1078 1050                           * We created it and we need to destroy it.
1079 1051                           */
1080 1052                          sptd->spt_ppa = NULL;
1081 1053                  }
1082 1054                  ret = ENOTSUP;
1083 1055                  goto insert_fail;
1084 1056          }
1085 1057  
1086 1058          /*
1087 1059           * In either case, we increment softlockcnt on the 'real' segment.
1088 1060           */
1089 1061          sptd->spt_pcachecnt++;
1090 1062          atomic_inc_ulong((ulong_t *)(&(shmd->shm_softlockcnt)));
1091 1063  
1092 1064          ppa = sptd->spt_ppa;
1093 1065          for (an_idx = pg_idx; an_idx < pg_idx + npages; ) {
1094 1066                  if (ppa[an_idx] == NULL) {
1095 1067                          mutex_exit(&sptd->spt_lock);
1096 1068                          seg_pinactive(seg, NULL, seg->s_base,
1097 1069                              sptd->spt_amp->size,
1098 1070                              pl, S_WRITE, SEGP_FORCE_WIRED, segspt_reclaim);
1099 1071                          *ppp = NULL;
1100 1072                          return (ENOTSUP);
1101 1073                  }
1102 1074                  if ((szc = ppa[an_idx]->p_szc) != 0) {
1103 1075                          npgs = page_get_pagecnt(szc);
1104 1076                          an_idx = P2ROUNDUP(an_idx + 1, npgs);
1105 1077                  } else {
1106 1078                          an_idx++;
1107 1079                  }
1108 1080          }
1109 1081          /*
1110 1082           * We can now drop the sptd->spt_lock since the ppa[]
1111 1083           * exists and he have incremented pacachecnt.
1112 1084           */
1113 1085          mutex_exit(&sptd->spt_lock);
1114 1086  
1115 1087          /*
1116 1088           * Since we cache the entire segment, we want to
1117 1089           * set ppp to point to the first slot that corresponds
1118 1090           * to the requested addr, i.e. pg_idx.
1119 1091           */
1120 1092          *ppp = &(sptd->spt_ppa[pg_idx]);
1121 1093          return (0);
1122 1094  
1123 1095  insert_fail:
1124 1096          /*
1125 1097           * We will only reach this code if we tried and failed.
1126 1098           *
1127 1099           * And we can drop the lock on the dummy seg, once we've failed
1128 1100           * to set up a new ppa[].
1129 1101           */
1130 1102          mutex_exit(&sptd->spt_lock);
1131 1103  
1132 1104          if (pl_built) {
1133 1105                  if (claim_availrmem) {
1134 1106                          mutex_enter(&freemem_lock);
1135 1107                          availrmem += claim_availrmem;
1136 1108                          mutex_exit(&freemem_lock);
1137 1109                  }
1138 1110  
1139 1111                  /*
1140 1112                   * We created pl and we need to destroy it.
1141 1113                   */
1142 1114                  pplist = pl;
1143 1115                  for (an_idx = 0; an_idx < tot_npages; an_idx++) {
1144 1116                          if (pplist[an_idx] != NULL)
1145 1117                                  page_unlock(pplist[an_idx]);
1146 1118                  }
1147 1119                  kmem_free(pl, sizeof (page_t *) * tot_npages);
1148 1120          }
1149 1121  
1150 1122          if (shmd->shm_softlockcnt <= 0) {
1151 1123                  if (AS_ISUNMAPWAIT(seg->s_as)) {
1152 1124                          mutex_enter(&seg->s_as->a_contents);
1153 1125                          if (AS_ISUNMAPWAIT(seg->s_as)) {
1154 1126                                  AS_CLRUNMAPWAIT(seg->s_as);
1155 1127                                  cv_broadcast(&seg->s_as->a_cv);
1156 1128                          }
1157 1129                          mutex_exit(&seg->s_as->a_contents);
1158 1130                  }
1159 1131          }
1160 1132          *ppp = NULL;
1161 1133          return (ret);
1162 1134  }
1163 1135  
1164 1136  
1165 1137  
1166 1138  /*
1167 1139   * return locked pages over a given range.
1168 1140   *
1169 1141   * We will cache the entire ISM segment and save the pplist for the
1170 1142   * entire segment in the ppa field of the underlying ISM segment structure.
1171 1143   * Later, during a call to segspt_reclaim() we will use this ppa array
1172 1144   * to page_unlock() all of the pages and then we will free this ppa list.
1173 1145   */
1174 1146  /*ARGSUSED*/
1175 1147  static int
1176 1148  segspt_shmpagelock(struct seg *seg, caddr_t addr, size_t len,
1177 1149      struct page ***ppp, enum lock_type type, enum seg_rw rw)
1178 1150  {
1179 1151          struct shm_data *shmd = (struct shm_data *)seg->s_data;
1180 1152          struct seg      *sptseg = shmd->shm_sptseg;
1181 1153          struct spt_data *sptd = sptseg->s_data;
1182 1154          pgcnt_t np, page_index, npages;
1183 1155          caddr_t a, spt_base;
1184 1156          struct page **pplist, **pl, *pp;
1185 1157          struct anon_map *amp;
1186 1158          ulong_t anon_index;
1187 1159          int ret = ENOTSUP;
1188 1160          uint_t  pl_built = 0;
1189 1161          struct anon *ap;
1190 1162          struct vnode *vp;
1191 1163          u_offset_t off;
1192 1164  
1193 1165          ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
1194 1166          ASSERT(type == L_PAGELOCK || type == L_PAGEUNLOCK);
1195 1167  
1196 1168  
1197 1169          /*
1198 1170           * We want to lock/unlock the entire ISM segment. Therefore,
1199 1171           * we will be using the underlying sptseg and it's base address
1200 1172           * and length for the caching arguments.
1201 1173           */
1202 1174          ASSERT(sptseg);
1203 1175          ASSERT(sptd);
1204 1176  
1205 1177          if (sptd->spt_flags & SHM_PAGEABLE) {
1206 1178                  return (segspt_dismpagelock(seg, addr, len, ppp, type, rw));
1207 1179          }
1208 1180  
1209 1181          page_index = seg_page(seg, addr);
1210 1182          npages = btopr(len);
1211 1183  
1212 1184          /*
1213 1185           * check if the request is larger than number of pages covered
1214 1186           * by amp
1215 1187           */
1216 1188          if (page_index + npages > btopr(sptd->spt_amp->size)) {
1217 1189                  *ppp = NULL;
1218 1190                  return (ENOTSUP);
1219 1191          }
1220 1192  
1221 1193          if (type == L_PAGEUNLOCK) {
1222 1194  
1223 1195                  ASSERT(sptd->spt_ppa != NULL);
1224 1196  
1225 1197                  seg_pinactive(seg, NULL, seg->s_base, sptd->spt_amp->size,
1226 1198                      sptd->spt_ppa, S_WRITE, SEGP_FORCE_WIRED, segspt_reclaim);
1227 1199  
1228 1200                  /*
1229 1201                   * If someone is blocked while unmapping, we purge
1230 1202                   * segment page cache and thus reclaim pplist synchronously
1231 1203                   * without waiting for seg_pasync_thread. This speeds up
1232 1204                   * unmapping in cases where munmap(2) is called, while
1233 1205                   * raw async i/o is still in progress or where a thread
1234 1206                   * exits on data fault in a multithreaded application.
1235 1207                   */
1236 1208                  if (AS_ISUNMAPWAIT(seg->s_as) && (shmd->shm_softlockcnt > 0)) {
1237 1209                          segspt_purge(seg);
1238 1210                  }
1239 1211                  return (0);
1240 1212          }
1241 1213  
1242 1214          /* The L_PAGELOCK case... */
1243 1215  
1244 1216          /*
1245 1217           * First try to find pages in segment page cache, without
1246 1218           * holding the segment lock.
1247 1219           */
1248 1220          pplist = seg_plookup(seg, NULL, seg->s_base, sptd->spt_amp->size,
1249 1221              S_WRITE, SEGP_FORCE_WIRED);
1250 1222          if (pplist != NULL) {
1251 1223                  ASSERT(sptd->spt_ppa == pplist);
1252 1224                  ASSERT(sptd->spt_ppa[page_index]);
1253 1225                  /*
1254 1226                   * Since we cache the entire ISM segment, we want to
1255 1227                   * set ppp to point to the first slot that corresponds
1256 1228                   * to the requested addr, i.e. page_index.
1257 1229                   */
1258 1230                  *ppp = &(sptd->spt_ppa[page_index]);
1259 1231                  return (0);
1260 1232          }
1261 1233  
1262 1234          mutex_enter(&sptd->spt_lock);
1263 1235  
1264 1236          /*
1265 1237           * try to find pages in segment page cache
1266 1238           */
1267 1239          pplist = seg_plookup(seg, NULL, seg->s_base, sptd->spt_amp->size,
1268 1240              S_WRITE, SEGP_FORCE_WIRED);
1269 1241          if (pplist != NULL) {
1270 1242                  ASSERT(sptd->spt_ppa == pplist);
1271 1243                  /*
1272 1244                   * Since we cache the entire segment, we want to
1273 1245                   * set ppp to point to the first slot that corresponds
1274 1246                   * to the requested addr, i.e. page_index.
1275 1247                   */
1276 1248                  mutex_exit(&sptd->spt_lock);
1277 1249                  *ppp = &(sptd->spt_ppa[page_index]);
1278 1250                  return (0);
1279 1251          }
1280 1252  
1281 1253          if (seg_pinsert_check(seg, NULL, seg->s_base, sptd->spt_amp->size,
1282 1254              SEGP_FORCE_WIRED) == SEGP_FAIL) {
1283 1255                  mutex_exit(&sptd->spt_lock);
1284 1256                  *ppp = NULL;
1285 1257                  return (ENOTSUP);
1286 1258          }
1287 1259  
1288 1260          /*
1289 1261           * No need to worry about protections because ISM pages
1290 1262           * are always rw.
1291 1263           */
1292 1264          pl = pplist = NULL;
1293 1265  
1294 1266          /*
1295 1267           * Do we need to build the ppa array?
1296 1268           */
1297 1269          if (sptd->spt_ppa == NULL) {
1298 1270                  ASSERT(sptd->spt_ppa == pplist);
1299 1271  
1300 1272                  spt_base = sptseg->s_base;
1301 1273                  pl_built = 1;
1302 1274  
1303 1275                  /*
1304 1276                   * availrmem is decremented once during anon_swap_adjust()
1305 1277                   * and is incremented during the anon_unresv(), which is
1306 1278                   * called from shm_rm_amp() when the segment is destroyed.
1307 1279                   */
1308 1280                  amp = sptd->spt_amp;
1309 1281                  ASSERT(amp != NULL);
1310 1282  
1311 1283                  /* pcachecnt is protected by sptd->spt_lock */
1312 1284                  ASSERT(sptd->spt_pcachecnt == 0);
1313 1285                  pplist = kmem_zalloc(sizeof (page_t *)
1314 1286                      * btopr(sptd->spt_amp->size), KM_SLEEP);
1315 1287                  pl = pplist;
1316 1288  
1317 1289                  anon_index = seg_page(sptseg, spt_base);
1318 1290  
1319 1291                  ANON_LOCK_ENTER(&amp->a_rwlock, RW_WRITER);
1320 1292                  for (a = spt_base; a < (spt_base + sptd->spt_amp->size);
1321 1293                      a += PAGESIZE, anon_index++, pplist++) {
1322 1294                          ap = anon_get_ptr(amp->ahp, anon_index);
1323 1295                          ASSERT(ap != NULL);
1324 1296                          swap_xlate(ap, &vp, &off);
1325 1297                          pp = page_lookup(vp, off, SE_SHARED);
1326 1298                          ASSERT(pp != NULL);
1327 1299                          *pplist = pp;
1328 1300                  }
1329 1301                  ANON_LOCK_EXIT(&amp->a_rwlock);
1330 1302  
1331 1303                  if (a < (spt_base + sptd->spt_amp->size)) {
1332 1304                          ret = ENOTSUP;
1333 1305                          goto insert_fail;
1334 1306                  }
1335 1307                  sptd->spt_ppa = pl;
1336 1308          } else {
1337 1309                  /*
1338 1310                   * We already have a valid ppa[].
1339 1311                   */
1340 1312                  pl = sptd->spt_ppa;
1341 1313          }
1342 1314  
1343 1315          ASSERT(pl != NULL);
1344 1316  
1345 1317          ret = seg_pinsert(seg, NULL, seg->s_base, sptd->spt_amp->size,
1346 1318              sptd->spt_amp->size, pl, S_WRITE, SEGP_FORCE_WIRED,
1347 1319              segspt_reclaim);
1348 1320          if (ret == SEGP_FAIL) {
1349 1321                  /*
1350 1322                   * seg_pinsert failed. We return
1351 1323                   * ENOTSUP, so that the as_pagelock() code will
1352 1324                   * then try the slower F_SOFTLOCK path.
1353 1325                   */
1354 1326                  if (pl_built) {
1355 1327                          /*
1356 1328                           * No one else has referenced the ppa[].
1357 1329                           * We created it and we need to destroy it.
1358 1330                           */
1359 1331                          sptd->spt_ppa = NULL;
1360 1332                  }
1361 1333                  ret = ENOTSUP;
1362 1334                  goto insert_fail;
1363 1335          }
1364 1336  
1365 1337          /*
1366 1338           * In either case, we increment softlockcnt on the 'real' segment.
1367 1339           */
1368 1340          sptd->spt_pcachecnt++;
1369 1341          atomic_inc_ulong((ulong_t *)(&(shmd->shm_softlockcnt)));
1370 1342  
1371 1343          /*
1372 1344           * We can now drop the sptd->spt_lock since the ppa[]
1373 1345           * exists and he have incremented pacachecnt.
1374 1346           */
1375 1347          mutex_exit(&sptd->spt_lock);
1376 1348  
1377 1349          /*
1378 1350           * Since we cache the entire segment, we want to
1379 1351           * set ppp to point to the first slot that corresponds
1380 1352           * to the requested addr, i.e. page_index.
1381 1353           */
1382 1354          *ppp = &(sptd->spt_ppa[page_index]);
1383 1355          return (0);
1384 1356  
1385 1357  insert_fail:
1386 1358          /*
1387 1359           * We will only reach this code if we tried and failed.
1388 1360           *
1389 1361           * And we can drop the lock on the dummy seg, once we've failed
1390 1362           * to set up a new ppa[].
1391 1363           */
1392 1364          mutex_exit(&sptd->spt_lock);
1393 1365  
1394 1366          if (pl_built) {
1395 1367                  /*
1396 1368                   * We created pl and we need to destroy it.
1397 1369                   */
1398 1370                  pplist = pl;
1399 1371                  np = (((uintptr_t)(a - spt_base)) >> PAGESHIFT);
1400 1372                  while (np) {
1401 1373                          page_unlock(*pplist);
1402 1374                          np--;
1403 1375                          pplist++;
1404 1376                  }
1405 1377                  kmem_free(pl, sizeof (page_t *) * btopr(sptd->spt_amp->size));
1406 1378          }
1407 1379          if (shmd->shm_softlockcnt <= 0) {
1408 1380                  if (AS_ISUNMAPWAIT(seg->s_as)) {
1409 1381                          mutex_enter(&seg->s_as->a_contents);
1410 1382                          if (AS_ISUNMAPWAIT(seg->s_as)) {
1411 1383                                  AS_CLRUNMAPWAIT(seg->s_as);
1412 1384                                  cv_broadcast(&seg->s_as->a_cv);
1413 1385                          }
1414 1386                          mutex_exit(&seg->s_as->a_contents);
1415 1387                  }
1416 1388          }
1417 1389          *ppp = NULL;
1418 1390          return (ret);
1419 1391  }
1420 1392  
1421 1393  /*
1422 1394   * purge any cached pages in the I/O page cache
1423 1395   */
1424 1396  static void
1425 1397  segspt_purge(struct seg *seg)
1426 1398  {
1427 1399          seg_ppurge(seg, NULL, SEGP_FORCE_WIRED);
1428 1400  }
1429 1401  
1430 1402  static int
1431 1403  segspt_reclaim(void *ptag, caddr_t addr, size_t len, struct page **pplist,
1432 1404          enum seg_rw rw, int async)
1433 1405  {
1434 1406          struct seg *seg = (struct seg *)ptag;
1435 1407          struct  shm_data *shmd = (struct shm_data *)seg->s_data;
1436 1408          struct  seg     *sptseg;
1437 1409          struct  spt_data *sptd;
1438 1410          pgcnt_t npages, i, free_availrmem = 0;
1439 1411          int     done = 0;
1440 1412  
1441 1413  #ifdef lint
1442 1414          addr = addr;
1443 1415  #endif
1444 1416          sptseg = shmd->shm_sptseg;
1445 1417          sptd = sptseg->s_data;
1446 1418          npages = (len >> PAGESHIFT);
1447 1419          ASSERT(npages);
1448 1420          ASSERT(sptd->spt_pcachecnt != 0);
1449 1421          ASSERT(sptd->spt_ppa == pplist);
1450 1422          ASSERT(npages == btopr(sptd->spt_amp->size));
1451 1423          ASSERT(async || AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
1452 1424  
1453 1425          /*
1454 1426           * Acquire the lock on the dummy seg and destroy the
1455 1427           * ppa array IF this is the last pcachecnt.
1456 1428           */
1457 1429          mutex_enter(&sptd->spt_lock);
1458 1430          if (--sptd->spt_pcachecnt == 0) {
1459 1431                  for (i = 0; i < npages; i++) {
1460 1432                          if (pplist[i] == NULL) {
1461 1433                                  continue;
1462 1434                          }
1463 1435                          if (rw == S_WRITE) {
1464 1436                                  hat_setrefmod(pplist[i]);
1465 1437                          } else {
1466 1438                                  hat_setref(pplist[i]);
1467 1439                          }
1468 1440                          if ((sptd->spt_flags & SHM_PAGEABLE) &&
1469 1441                              (sptd->spt_ppa_lckcnt[i] == 0))
1470 1442                                  free_availrmem++;
1471 1443                          page_unlock(pplist[i]);
1472 1444                  }
1473 1445                  if ((sptd->spt_flags & SHM_PAGEABLE) && free_availrmem) {
1474 1446                          mutex_enter(&freemem_lock);
1475 1447                          availrmem += free_availrmem;
1476 1448                          mutex_exit(&freemem_lock);
1477 1449                  }
1478 1450                  /*
1479 1451                   * Since we want to cach/uncache the entire ISM segment,
1480 1452                   * we will track the pplist in a segspt specific field
1481 1453                   * ppa, that is initialized at the time we add an entry to
1482 1454                   * the cache.
1483 1455                   */
1484 1456                  ASSERT(sptd->spt_pcachecnt == 0);
1485 1457                  kmem_free(pplist, sizeof (page_t *) * npages);
1486 1458                  sptd->spt_ppa = NULL;
1487 1459                  sptd->spt_flags &= ~DISM_PPA_CHANGED;
1488 1460                  sptd->spt_gen++;
1489 1461                  cv_broadcast(&sptd->spt_cv);
1490 1462                  done = 1;
1491 1463          }
1492 1464          mutex_exit(&sptd->spt_lock);
1493 1465  
1494 1466          /*
1495 1467           * If we are pcache async thread or called via seg_ppurge_wiredpp() we
1496 1468           * may not hold AS lock (in this case async argument is not 0). This
1497 1469           * means if softlockcnt drops to 0 after the decrement below address
1498 1470           * space may get freed. We can't allow it since after softlock
1499 1471           * derement to 0 we still need to access as structure for possible
1500 1472           * wakeup of unmap waiters. To prevent the disappearance of as we take
1501 1473           * this segment's shm_segfree_syncmtx. segspt_shmfree() also takes
1502 1474           * this mutex as a barrier to make sure this routine completes before
1503 1475           * segment is freed.
1504 1476           *
1505 1477           * The second complication we have to deal with in async case is a
1506 1478           * possibility of missed wake up of unmap wait thread. When we don't
1507 1479           * hold as lock here we may take a_contents lock before unmap wait
1508 1480           * thread that was first to see softlockcnt was still not 0. As a
1509 1481           * result we'll fail to wake up an unmap wait thread. To avoid this
1510 1482           * race we set nounmapwait flag in as structure if we drop softlockcnt
1511 1483           * to 0 if async is not 0.  unmapwait thread
1512 1484           * will not block if this flag is set.
1513 1485           */
1514 1486          if (async)
1515 1487                  mutex_enter(&shmd->shm_segfree_syncmtx);
1516 1488  
1517 1489          /*
1518 1490           * Now decrement softlockcnt.
1519 1491           */
1520 1492          ASSERT(shmd->shm_softlockcnt > 0);
1521 1493          atomic_dec_ulong((ulong_t *)(&(shmd->shm_softlockcnt)));
1522 1494  
1523 1495          if (shmd->shm_softlockcnt <= 0) {
1524 1496                  if (async || AS_ISUNMAPWAIT(seg->s_as)) {
1525 1497                          mutex_enter(&seg->s_as->a_contents);
1526 1498                          if (async)
1527 1499                                  AS_SETNOUNMAPWAIT(seg->s_as);
1528 1500                          if (AS_ISUNMAPWAIT(seg->s_as)) {
1529 1501                                  AS_CLRUNMAPWAIT(seg->s_as);
1530 1502                                  cv_broadcast(&seg->s_as->a_cv);
1531 1503                          }
1532 1504                          mutex_exit(&seg->s_as->a_contents);
1533 1505                  }
1534 1506          }
1535 1507  
1536 1508          if (async)
1537 1509                  mutex_exit(&shmd->shm_segfree_syncmtx);
1538 1510  
1539 1511          return (done);
1540 1512  }
1541 1513  
1542 1514  /*
1543 1515   * Do a F_SOFTUNLOCK call over the range requested.
1544 1516   * The range must have already been F_SOFTLOCK'ed.
1545 1517   *
1546 1518   * The calls to acquire and release the anon map lock mutex were
1547 1519   * removed in order to avoid a deadly embrace during a DR
1548 1520   * memory delete operation.  (Eg. DR blocks while waiting for a
1549 1521   * exclusive lock on a page that is being used for kaio; the
1550 1522   * thread that will complete the kaio and call segspt_softunlock
1551 1523   * blocks on the anon map lock; another thread holding the anon
1552 1524   * map lock blocks on another page lock via the segspt_shmfault
1553 1525   * -> page_lookup -> page_lookup_create -> page_lock_es code flow.)
1554 1526   *
1555 1527   * The appropriateness of the removal is based upon the following:
1556 1528   * 1. If we are holding a segment's reader lock and the page is held
1557 1529   * shared, then the corresponding element in anonmap which points to
1558 1530   * anon struct cannot change and there is no need to acquire the
1559 1531   * anonymous map lock.
1560 1532   * 2. Threads in segspt_softunlock have a reader lock on the segment
1561 1533   * and already have the shared page lock, so we are guaranteed that
1562 1534   * the anon map slot cannot change and therefore can call anon_get_ptr()
1563 1535   * without grabbing the anonymous map lock.
1564 1536   * 3. Threads that softlock a shared page break copy-on-write, even if
1565 1537   * its a read.  Thus cow faults can be ignored with respect to soft
1566 1538   * unlocking, since the breaking of cow means that the anon slot(s) will
1567 1539   * not be shared.
1568 1540   */
1569 1541  static void
1570 1542  segspt_softunlock(struct seg *seg, caddr_t sptseg_addr,
1571 1543          size_t len, enum seg_rw rw)
1572 1544  {
1573 1545          struct shm_data *shmd = (struct shm_data *)seg->s_data;
1574 1546          struct seg      *sptseg;
1575 1547          struct spt_data *sptd;
1576 1548          page_t *pp;
1577 1549          caddr_t adr;
1578 1550          struct vnode *vp;
1579 1551          u_offset_t offset;
1580 1552          ulong_t anon_index;
1581 1553          struct anon_map *amp;           /* XXX - for locknest */
1582 1554          struct anon *ap = NULL;
1583 1555          pgcnt_t npages;
1584 1556  
1585 1557          ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
1586 1558  
1587 1559          sptseg = shmd->shm_sptseg;
1588 1560          sptd = sptseg->s_data;
1589 1561  
1590 1562          /*
1591 1563           * Some platforms assume that ISM mappings are HAT_LOAD_LOCK
1592 1564           * and therefore their pages are SE_SHARED locked
1593 1565           * for the entire life of the segment.
1594 1566           */
1595 1567          if ((!hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) &&
1596 1568              ((sptd->spt_flags & SHM_PAGEABLE) == 0)) {
1597 1569                  goto softlock_decrement;
1598 1570          }
1599 1571  
1600 1572          /*
1601 1573           * Any thread is free to do a page_find and
1602 1574           * page_unlock() on the pages within this seg.
1603 1575           *
1604 1576           * We are already holding the as->a_lock on the user's
1605 1577           * real segment, but we need to hold the a_lock on the
1606 1578           * underlying dummy as. This is mostly to satisfy the
1607 1579           * underlying HAT layer.
1608 1580           */
1609 1581          AS_LOCK_ENTER(sptseg->s_as, &sptseg->s_as->a_lock, RW_READER);
1610 1582          hat_unlock(sptseg->s_as->a_hat, sptseg_addr, len);
1611 1583          AS_LOCK_EXIT(sptseg->s_as, &sptseg->s_as->a_lock);
1612 1584  
1613 1585          amp = sptd->spt_amp;
1614 1586          ASSERT(amp != NULL);
1615 1587          anon_index = seg_page(sptseg, sptseg_addr);
1616 1588  
1617 1589          for (adr = sptseg_addr; adr < sptseg_addr + len; adr += PAGESIZE) {
1618 1590                  ap = anon_get_ptr(amp->ahp, anon_index++);
1619 1591                  ASSERT(ap != NULL);
1620 1592                  swap_xlate(ap, &vp, &offset);
1621 1593  
1622 1594                  /*
1623 1595                   * Use page_find() instead of page_lookup() to
1624 1596                   * find the page since we know that it has a
1625 1597                   * "shared" lock.
1626 1598                   */
1627 1599                  pp = page_find(vp, offset);
1628 1600                  ASSERT(ap == anon_get_ptr(amp->ahp, anon_index - 1));
1629 1601                  if (pp == NULL) {
1630 1602                          panic("segspt_softunlock: "
1631 1603                              "addr %p, ap %p, vp %p, off %llx",
1632 1604                              (void *)adr, (void *)ap, (void *)vp, offset);
1633 1605                          /*NOTREACHED*/
1634 1606                  }
1635 1607  
1636 1608                  if (rw == S_WRITE) {
1637 1609                          hat_setrefmod(pp);
1638 1610                  } else if (rw != S_OTHER) {
1639 1611                          hat_setref(pp);
1640 1612                  }
1641 1613                  page_unlock(pp);
1642 1614          }
1643 1615  
1644 1616  softlock_decrement:
1645 1617          npages = btopr(len);
1646 1618          ASSERT(shmd->shm_softlockcnt >= npages);
1647 1619          atomic_add_long((ulong_t *)(&(shmd->shm_softlockcnt)), -npages);
1648 1620          if (shmd->shm_softlockcnt == 0) {
1649 1621                  /*
1650 1622                   * All SOFTLOCKS are gone. Wakeup any waiting
1651 1623                   * unmappers so they can try again to unmap.
1652 1624                   * Check for waiters first without the mutex
1653 1625                   * held so we don't always grab the mutex on
1654 1626                   * softunlocks.
1655 1627                   */
1656 1628                  if (AS_ISUNMAPWAIT(seg->s_as)) {
1657 1629                          mutex_enter(&seg->s_as->a_contents);
1658 1630                          if (AS_ISUNMAPWAIT(seg->s_as)) {
1659 1631                                  AS_CLRUNMAPWAIT(seg->s_as);
1660 1632                                  cv_broadcast(&seg->s_as->a_cv);
1661 1633                          }
1662 1634                          mutex_exit(&seg->s_as->a_contents);
1663 1635                  }
1664 1636          }
1665 1637  }
1666 1638  
1667 1639  int
1668 1640  segspt_shmattach(struct seg *seg, caddr_t *argsp)
1669 1641  {
1670 1642          struct shm_data *shmd_arg = (struct shm_data *)argsp;
1671 1643          struct shm_data *shmd;
1672 1644          struct anon_map *shm_amp = shmd_arg->shm_amp;
1673 1645          struct spt_data *sptd;
1674 1646          int error = 0;
1675 1647  
1676 1648          ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
1677 1649  
1678 1650          shmd = kmem_zalloc((sizeof (*shmd)), KM_NOSLEEP);
1679 1651          if (shmd == NULL)
1680 1652                  return (ENOMEM);
1681 1653  
1682 1654          shmd->shm_sptas = shmd_arg->shm_sptas;
1683 1655          shmd->shm_amp = shm_amp;
1684 1656          shmd->shm_sptseg = shmd_arg->shm_sptseg;
1685 1657  
1686 1658          (void) lgrp_shm_policy_set(LGRP_MEM_POLICY_DEFAULT, shm_amp, 0,
1687 1659              NULL, 0, seg->s_size);
1688 1660  
1689 1661          mutex_init(&shmd->shm_segfree_syncmtx, NULL, MUTEX_DEFAULT, NULL);
1690 1662  
1691 1663          seg->s_data = (void *)shmd;
1692 1664          seg->s_ops = &segspt_shmops;
1693 1665          seg->s_szc = shmd->shm_sptseg->s_szc;
1694 1666          sptd = shmd->shm_sptseg->s_data;
1695 1667  
1696 1668          if (sptd->spt_flags & SHM_PAGEABLE) {
1697 1669                  if ((shmd->shm_vpage = kmem_zalloc(btopr(shm_amp->size),
1698 1670                      KM_NOSLEEP)) == NULL) {
1699 1671                          seg->s_data = (void *)NULL;
1700 1672                          kmem_free(shmd, (sizeof (*shmd)));
1701 1673                          return (ENOMEM);
1702 1674                  }
1703 1675                  shmd->shm_lckpgs = 0;
1704 1676                  if (hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) {
1705 1677                          if ((error = hat_share(seg->s_as->a_hat, seg->s_base,
1706 1678                              shmd_arg->shm_sptas->a_hat, SEGSPTADDR,
1707 1679                              seg->s_size, seg->s_szc)) != 0) {
1708 1680                                  kmem_free(shmd->shm_vpage,
1709 1681                                      btopr(shm_amp->size));
1710 1682                          }
1711 1683                  }
1712 1684          } else {
1713 1685                  error = hat_share(seg->s_as->a_hat, seg->s_base,
1714 1686                      shmd_arg->shm_sptas->a_hat, SEGSPTADDR,
1715 1687                      seg->s_size, seg->s_szc);
1716 1688          }
1717 1689          if (error) {
1718 1690                  seg->s_szc = 0;
1719 1691                  seg->s_data = (void *)NULL;
1720 1692                  kmem_free(shmd, (sizeof (*shmd)));
1721 1693          } else {
1722 1694                  ANON_LOCK_ENTER(&shm_amp->a_rwlock, RW_WRITER);
1723 1695                  shm_amp->refcnt++;
1724 1696                  ANON_LOCK_EXIT(&shm_amp->a_rwlock);
1725 1697          }
1726 1698          return (error);
1727 1699  }
1728 1700  
1729 1701  int
1730 1702  segspt_shmunmap(struct seg *seg, caddr_t raddr, size_t ssize)
1731 1703  {
1732 1704          struct shm_data *shmd = (struct shm_data *)seg->s_data;
1733 1705          int reclaim = 1;
1734 1706  
1735 1707          ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
1736 1708  retry:
1737 1709          if (shmd->shm_softlockcnt > 0) {
1738 1710                  if (reclaim == 1) {
1739 1711                          segspt_purge(seg);
1740 1712                          reclaim = 0;
1741 1713                          goto retry;
1742 1714                  }
1743 1715                  return (EAGAIN);
1744 1716          }
1745 1717  
1746 1718          if (ssize != seg->s_size) {
1747 1719  #ifdef DEBUG
1748 1720                  cmn_err(CE_WARN, "Incompatible ssize %lx s_size %lx\n",
1749 1721                      ssize, seg->s_size);
1750 1722  #endif
1751 1723                  return (EINVAL);
1752 1724          }
1753 1725  
1754 1726          (void) segspt_shmlockop(seg, raddr, shmd->shm_amp->size, 0, MC_UNLOCK,
1755 1727              NULL, 0);
1756 1728          hat_unshare(seg->s_as->a_hat, raddr, ssize, seg->s_szc);
1757 1729  
1758 1730          seg_free(seg);
1759 1731  
1760 1732          return (0);
1761 1733  }
1762 1734  
1763 1735  void
1764 1736  segspt_shmfree(struct seg *seg)
1765 1737  {
1766 1738          struct shm_data *shmd = (struct shm_data *)seg->s_data;
1767 1739          struct anon_map *shm_amp = shmd->shm_amp;
1768 1740  
1769 1741          ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
1770 1742  
1771 1743          (void) segspt_shmlockop(seg, seg->s_base, shm_amp->size, 0,
1772 1744              MC_UNLOCK, NULL, 0);
1773 1745  
1774 1746          /*
1775 1747           * Need to increment refcnt when attaching
1776 1748           * and decrement when detaching because of dup().
1777 1749           */
1778 1750          ANON_LOCK_ENTER(&shm_amp->a_rwlock, RW_WRITER);
1779 1751          shm_amp->refcnt--;
1780 1752          ANON_LOCK_EXIT(&shm_amp->a_rwlock);
1781 1753  
1782 1754          if (shmd->shm_vpage) {  /* only for DISM */
1783 1755                  kmem_free(shmd->shm_vpage, btopr(shm_amp->size));
1784 1756                  shmd->shm_vpage = NULL;
1785 1757          }
1786 1758  
1787 1759          /*
1788 1760           * Take shm_segfree_syncmtx lock to let segspt_reclaim() finish if it's
1789 1761           * still working with this segment without holding as lock.
1790 1762           */
1791 1763          ASSERT(shmd->shm_softlockcnt == 0);
1792 1764          mutex_enter(&shmd->shm_segfree_syncmtx);
1793 1765          mutex_destroy(&shmd->shm_segfree_syncmtx);
1794 1766  
1795 1767          kmem_free(shmd, sizeof (*shmd));
1796 1768  }
1797 1769  
1798 1770  /*ARGSUSED*/
1799 1771  int
1800 1772  segspt_shmsetprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
1801 1773  {
1802 1774          ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
1803 1775  
1804 1776          /*
1805 1777           * Shared page table is more than shared mapping.
1806 1778           *  Individual process sharing page tables can't change prot
1807 1779           *  because there is only one set of page tables.
1808 1780           *  This will be allowed after private page table is
1809 1781           *  supported.
1810 1782           */
1811 1783  /* need to return correct status error? */
1812 1784          return (0);
1813 1785  }
1814 1786  
1815 1787  
1816 1788  faultcode_t
1817 1789  segspt_dismfault(struct hat *hat, struct seg *seg, caddr_t addr,
1818 1790      size_t len, enum fault_type type, enum seg_rw rw)
1819 1791  {
1820 1792          struct  shm_data        *shmd = (struct shm_data *)seg->s_data;
1821 1793          struct  seg             *sptseg = shmd->shm_sptseg;
1822 1794          struct  as              *curspt = shmd->shm_sptas;
1823 1795          struct  spt_data        *sptd = sptseg->s_data;
1824 1796          pgcnt_t npages;
1825 1797          size_t  size;
1826 1798          caddr_t segspt_addr, shm_addr;
1827 1799          page_t  **ppa;
1828 1800          int     i;
1829 1801          ulong_t an_idx = 0;
1830 1802          int     err = 0;
1831 1803          int     dyn_ism_unmap = hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0);
1832 1804          size_t  pgsz;
1833 1805          pgcnt_t pgcnt;
1834 1806          caddr_t a;
1835 1807          pgcnt_t pidx;
1836 1808  
1837 1809  #ifdef lint
1838 1810          hat = hat;
1839 1811  #endif
1840 1812          ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
1841 1813  
1842 1814          /*
1843 1815           * Because of the way spt is implemented
1844 1816           * the realsize of the segment does not have to be
1845 1817           * equal to the segment size itself. The segment size is
1846 1818           * often in multiples of a page size larger than PAGESIZE.
1847 1819           * The realsize is rounded up to the nearest PAGESIZE
1848 1820           * based on what the user requested. This is a bit of
1849 1821           * ungliness that is historical but not easily fixed
1850 1822           * without re-designing the higher levels of ISM.
1851 1823           */
1852 1824          ASSERT(addr >= seg->s_base);
1853 1825          if (((addr + len) - seg->s_base) > sptd->spt_realsize)
1854 1826                  return (FC_NOMAP);
1855 1827          /*
1856 1828           * For all of the following cases except F_PROT, we need to
1857 1829           * make any necessary adjustments to addr and len
1858 1830           * and get all of the necessary page_t's into an array called ppa[].
1859 1831           *
1860 1832           * The code in shmat() forces base addr and len of ISM segment
1861 1833           * to be aligned to largest page size supported. Therefore,
1862 1834           * we are able to handle F_SOFTLOCK and F_INVAL calls in "large
1863 1835           * pagesize" chunks. We want to make sure that we HAT_LOAD_LOCK
1864 1836           * in large pagesize chunks, or else we will screw up the HAT
1865 1837           * layer by calling hat_memload_array() with differing page sizes
1866 1838           * over a given virtual range.
1867 1839           */
1868 1840          pgsz = page_get_pagesize(sptseg->s_szc);
1869 1841          pgcnt = page_get_pagecnt(sptseg->s_szc);
1870 1842          shm_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), pgsz);
1871 1843          size = P2ROUNDUP((uintptr_t)(((addr + len) - shm_addr)), pgsz);
1872 1844          npages = btopr(size);
1873 1845  
1874 1846          /*
1875 1847           * Now we need to convert from addr in segshm to addr in segspt.
1876 1848           */
1877 1849          an_idx = seg_page(seg, shm_addr);
1878 1850          segspt_addr = sptseg->s_base + ptob(an_idx);
1879 1851  
1880 1852          ASSERT((segspt_addr + ptob(npages)) <=
1881 1853              (sptseg->s_base + sptd->spt_realsize));
1882 1854          ASSERT(segspt_addr < (sptseg->s_base + sptseg->s_size));
1883 1855  
1884 1856          switch (type) {
1885 1857  
1886 1858          case F_SOFTLOCK:
1887 1859  
1888 1860                  atomic_add_long((ulong_t *)(&(shmd->shm_softlockcnt)), npages);
1889 1861                  /*
1890 1862                   * Fall through to the F_INVAL case to load up the hat layer
1891 1863                   * entries with the HAT_LOAD_LOCK flag.
1892 1864                   */
1893 1865                  /* FALLTHRU */
1894 1866          case F_INVAL:
1895 1867  
1896 1868                  if ((rw == S_EXEC) && !(sptd->spt_prot & PROT_EXEC))
1897 1869                          return (FC_NOMAP);
1898 1870  
1899 1871                  ppa = kmem_zalloc(npages * sizeof (page_t *), KM_SLEEP);
1900 1872  
1901 1873                  err = spt_anon_getpages(sptseg, segspt_addr, size, ppa);
1902 1874                  if (err != 0) {
1903 1875                          if (type == F_SOFTLOCK) {
1904 1876                                  atomic_add_long((ulong_t *)(
1905 1877                                      &(shmd->shm_softlockcnt)), -npages);
1906 1878                          }
1907 1879                          goto dism_err;
1908 1880                  }
1909 1881                  AS_LOCK_ENTER(sptseg->s_as, &sptseg->s_as->a_lock, RW_READER);
1910 1882                  a = segspt_addr;
1911 1883                  pidx = 0;
1912 1884                  if (type == F_SOFTLOCK) {
1913 1885  
1914 1886                          /*
1915 1887                           * Load up the translation keeping it
1916 1888                           * locked and don't unlock the page.
1917 1889                           */
1918 1890                          for (; pidx < npages; a += pgsz, pidx += pgcnt) {
1919 1891                                  hat_memload_array(sptseg->s_as->a_hat,
1920 1892                                      a, pgsz, &ppa[pidx], sptd->spt_prot,
1921 1893                                      HAT_LOAD_LOCK | HAT_LOAD_SHARE);
1922 1894                          }
1923 1895                  } else {
1924 1896                          /*
1925 1897                           * Migrate pages marked for migration
1926 1898                           */
1927 1899                          if (lgrp_optimizations())
1928 1900                                  page_migrate(seg, shm_addr, ppa, npages);
1929 1901  
1930 1902                          for (; pidx < npages; a += pgsz, pidx += pgcnt) {
1931 1903                                  hat_memload_array(sptseg->s_as->a_hat,
1932 1904                                      a, pgsz, &ppa[pidx],
1933 1905                                      sptd->spt_prot,
1934 1906                                      HAT_LOAD_SHARE);
1935 1907                          }
1936 1908  
1937 1909                          /*
1938 1910                           * And now drop the SE_SHARED lock(s).
1939 1911                           */
1940 1912                          if (dyn_ism_unmap) {
1941 1913                                  for (i = 0; i < npages; i++) {
1942 1914                                          page_unlock(ppa[i]);
1943 1915                                  }
1944 1916                          }
1945 1917                  }
1946 1918  
1947 1919                  if (!dyn_ism_unmap) {
1948 1920                          if (hat_share(seg->s_as->a_hat, shm_addr,
1949 1921                              curspt->a_hat, segspt_addr, ptob(npages),
1950 1922                              seg->s_szc) != 0) {
1951 1923                                  panic("hat_share err in DISM fault");
1952 1924                                  /* NOTREACHED */
1953 1925                          }
1954 1926                          if (type == F_INVAL) {
1955 1927                                  for (i = 0; i < npages; i++) {
1956 1928                                          page_unlock(ppa[i]);
1957 1929                                  }
1958 1930                          }
1959 1931                  }
1960 1932                  AS_LOCK_EXIT(sptseg->s_as, &sptseg->s_as->a_lock);
1961 1933  dism_err:
1962 1934                  kmem_free(ppa, npages * sizeof (page_t *));
1963 1935                  return (err);
1964 1936  
1965 1937          case F_SOFTUNLOCK:
1966 1938  
1967 1939                  /*
1968 1940                   * This is a bit ugly, we pass in the real seg pointer,
1969 1941                   * but the segspt_addr is the virtual address within the
1970 1942                   * dummy seg.
1971 1943                   */
1972 1944                  segspt_softunlock(seg, segspt_addr, size, rw);
1973 1945                  return (0);
1974 1946  
1975 1947          case F_PROT:
1976 1948  
1977 1949                  /*
1978 1950                   * This takes care of the unusual case where a user
1979 1951                   * allocates a stack in shared memory and a register
1980 1952                   * window overflow is written to that stack page before
1981 1953                   * it is otherwise modified.
1982 1954                   *
1983 1955                   * We can get away with this because ISM segments are
1984 1956                   * always rw. Other than this unusual case, there
1985 1957                   * should be no instances of protection violations.
1986 1958                   */
1987 1959                  return (0);
1988 1960  
1989 1961          default:
1990 1962  #ifdef DEBUG
1991 1963                  panic("segspt_dismfault default type?");
1992 1964  #else
1993 1965                  return (FC_NOMAP);
1994 1966  #endif
1995 1967          }
1996 1968  }
1997 1969  
1998 1970  
1999 1971  faultcode_t
2000 1972  segspt_shmfault(struct hat *hat, struct seg *seg, caddr_t addr,
2001 1973      size_t len, enum fault_type type, enum seg_rw rw)
2002 1974  {
2003 1975          struct shm_data         *shmd = (struct shm_data *)seg->s_data;
2004 1976          struct seg              *sptseg = shmd->shm_sptseg;
2005 1977          struct as               *curspt = shmd->shm_sptas;
2006 1978          struct spt_data         *sptd   = sptseg->s_data;
2007 1979          pgcnt_t npages;
2008 1980          size_t size;
2009 1981          caddr_t sptseg_addr, shm_addr;
2010 1982          page_t *pp, **ppa;
2011 1983          int     i;
2012 1984          u_offset_t offset;
2013 1985          ulong_t anon_index = 0;
2014 1986          struct vnode *vp;
2015 1987          struct anon_map *amp;           /* XXX - for locknest */
2016 1988          struct anon *ap = NULL;
2017 1989          size_t          pgsz;
2018 1990          pgcnt_t         pgcnt;
2019 1991          caddr_t         a;
2020 1992          pgcnt_t         pidx;
2021 1993          size_t          sz;
2022 1994  
2023 1995  #ifdef lint
2024 1996          hat = hat;
2025 1997  #endif
2026 1998  
2027 1999          ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2028 2000  
2029 2001          if (sptd->spt_flags & SHM_PAGEABLE) {
2030 2002                  return (segspt_dismfault(hat, seg, addr, len, type, rw));
2031 2003          }
2032 2004  
2033 2005          /*
2034 2006           * Because of the way spt is implemented
2035 2007           * the realsize of the segment does not have to be
2036 2008           * equal to the segment size itself. The segment size is
2037 2009           * often in multiples of a page size larger than PAGESIZE.
2038 2010           * The realsize is rounded up to the nearest PAGESIZE
2039 2011           * based on what the user requested. This is a bit of
2040 2012           * ungliness that is historical but not easily fixed
2041 2013           * without re-designing the higher levels of ISM.
2042 2014           */
2043 2015          ASSERT(addr >= seg->s_base);
2044 2016          if (((addr + len) - seg->s_base) > sptd->spt_realsize)
2045 2017                  return (FC_NOMAP);
2046 2018          /*
2047 2019           * For all of the following cases except F_PROT, we need to
2048 2020           * make any necessary adjustments to addr and len
2049 2021           * and get all of the necessary page_t's into an array called ppa[].
2050 2022           *
2051 2023           * The code in shmat() forces base addr and len of ISM segment
2052 2024           * to be aligned to largest page size supported. Therefore,
2053 2025           * we are able to handle F_SOFTLOCK and F_INVAL calls in "large
2054 2026           * pagesize" chunks. We want to make sure that we HAT_LOAD_LOCK
2055 2027           * in large pagesize chunks, or else we will screw up the HAT
2056 2028           * layer by calling hat_memload_array() with differing page sizes
2057 2029           * over a given virtual range.
2058 2030           */
2059 2031          pgsz = page_get_pagesize(sptseg->s_szc);
2060 2032          pgcnt = page_get_pagecnt(sptseg->s_szc);
2061 2033          shm_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), pgsz);
2062 2034          size = P2ROUNDUP((uintptr_t)(((addr + len) - shm_addr)), pgsz);
2063 2035          npages = btopr(size);
2064 2036  
2065 2037          /*
2066 2038           * Now we need to convert from addr in segshm to addr in segspt.
2067 2039           */
2068 2040          anon_index = seg_page(seg, shm_addr);
2069 2041          sptseg_addr = sptseg->s_base + ptob(anon_index);
2070 2042  
2071 2043          /*
2072 2044           * And now we may have to adjust npages downward if we have
2073 2045           * exceeded the realsize of the segment or initial anon
2074 2046           * allocations.
2075 2047           */
2076 2048          if ((sptseg_addr + ptob(npages)) >
2077 2049              (sptseg->s_base + sptd->spt_realsize))
2078 2050                  size = (sptseg->s_base + sptd->spt_realsize) - sptseg_addr;
2079 2051  
2080 2052          npages = btopr(size);
2081 2053  
2082 2054          ASSERT(sptseg_addr < (sptseg->s_base + sptseg->s_size));
2083 2055          ASSERT((sptd->spt_flags & SHM_PAGEABLE) == 0);
2084 2056  
2085 2057          switch (type) {
2086 2058  
2087 2059          case F_SOFTLOCK:
2088 2060  
2089 2061                  /*
2090 2062                   * availrmem is decremented once during anon_swap_adjust()
2091 2063                   * and is incremented during the anon_unresv(), which is
2092 2064                   * called from shm_rm_amp() when the segment is destroyed.
2093 2065                   */
2094 2066                  atomic_add_long((ulong_t *)(&(shmd->shm_softlockcnt)), npages);
2095 2067                  /*
2096 2068                   * Some platforms assume that ISM pages are SE_SHARED
2097 2069                   * locked for the entire life of the segment.
2098 2070                   */
2099 2071                  if (!hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0))
2100 2072                          return (0);
2101 2073                  /*
2102 2074                   * Fall through to the F_INVAL case to load up the hat layer
2103 2075                   * entries with the HAT_LOAD_LOCK flag.
2104 2076                   */
2105 2077  
2106 2078                  /* FALLTHRU */
2107 2079          case F_INVAL:
2108 2080  
2109 2081                  if ((rw == S_EXEC) && !(sptd->spt_prot & PROT_EXEC))
2110 2082                          return (FC_NOMAP);
2111 2083  
2112 2084                  /*
2113 2085                   * Some platforms that do NOT support DYNAMIC_ISM_UNMAP
2114 2086                   * may still rely on this call to hat_share(). That
2115 2087                   * would imply that those hat's can fault on a
2116 2088                   * HAT_LOAD_LOCK translation, which would seem
2117 2089                   * contradictory.
2118 2090                   */
2119 2091                  if (!hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) {
2120 2092                          if (hat_share(seg->s_as->a_hat, seg->s_base,
2121 2093                              curspt->a_hat, sptseg->s_base,
2122 2094                              sptseg->s_size, sptseg->s_szc) != 0) {
2123 2095                                  panic("hat_share error in ISM fault");
2124 2096                                  /*NOTREACHED*/
2125 2097                          }
2126 2098                          return (0);
2127 2099                  }
2128 2100                  ppa = kmem_zalloc(sizeof (page_t *) * npages, KM_SLEEP);
2129 2101  
2130 2102                  /*
2131 2103                   * I see no need to lock the real seg,
2132 2104                   * here, because all of our work will be on the underlying
2133 2105                   * dummy seg.
2134 2106                   *
2135 2107                   * sptseg_addr and npages now account for large pages.
2136 2108                   */
2137 2109                  amp = sptd->spt_amp;
2138 2110                  ASSERT(amp != NULL);
2139 2111                  anon_index = seg_page(sptseg, sptseg_addr);
2140 2112  
2141 2113                  ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
2142 2114                  for (i = 0; i < npages; i++) {
2143 2115                          ap = anon_get_ptr(amp->ahp, anon_index++);
2144 2116                          ASSERT(ap != NULL);
2145 2117                          swap_xlate(ap, &vp, &offset);
2146 2118                          pp = page_lookup(vp, offset, SE_SHARED);
2147 2119                          ASSERT(pp != NULL);
2148 2120                          ppa[i] = pp;
2149 2121                  }
2150 2122                  ANON_LOCK_EXIT(&amp->a_rwlock);
2151 2123                  ASSERT(i == npages);
2152 2124  
2153 2125                  /*
2154 2126                   * We are already holding the as->a_lock on the user's
2155 2127                   * real segment, but we need to hold the a_lock on the
2156 2128                   * underlying dummy as. This is mostly to satisfy the
2157 2129                   * underlying HAT layer.
2158 2130                   */
2159 2131                  AS_LOCK_ENTER(sptseg->s_as, &sptseg->s_as->a_lock, RW_READER);
2160 2132                  a = sptseg_addr;
2161 2133                  pidx = 0;
2162 2134                  if (type == F_SOFTLOCK) {
2163 2135                          /*
2164 2136                           * Load up the translation keeping it
2165 2137                           * locked and don't unlock the page.
2166 2138                           */
2167 2139                          for (; pidx < npages; a += pgsz, pidx += pgcnt) {
2168 2140                                  sz = MIN(pgsz, ptob(npages - pidx));
2169 2141                                  hat_memload_array(sptseg->s_as->a_hat, a,
2170 2142                                      sz, &ppa[pidx], sptd->spt_prot,
2171 2143                                      HAT_LOAD_LOCK | HAT_LOAD_SHARE);
2172 2144                          }
2173 2145                  } else {
2174 2146                          /*
2175 2147                           * Migrate pages marked for migration.
2176 2148                           */
2177 2149                          if (lgrp_optimizations())
2178 2150                                  page_migrate(seg, shm_addr, ppa, npages);
2179 2151  
2180 2152                          for (; pidx < npages; a += pgsz, pidx += pgcnt) {
2181 2153                                  sz = MIN(pgsz, ptob(npages - pidx));
2182 2154                                  hat_memload_array(sptseg->s_as->a_hat,
2183 2155                                      a, sz, &ppa[pidx],
2184 2156                                      sptd->spt_prot, HAT_LOAD_SHARE);
2185 2157                          }
2186 2158  
2187 2159                          /*
2188 2160                           * And now drop the SE_SHARED lock(s).
2189 2161                           */
2190 2162                          for (i = 0; i < npages; i++)
2191 2163                                  page_unlock(ppa[i]);
2192 2164                  }
2193 2165                  AS_LOCK_EXIT(sptseg->s_as, &sptseg->s_as->a_lock);
2194 2166  
2195 2167                  kmem_free(ppa, sizeof (page_t *) * npages);
2196 2168                  return (0);
2197 2169          case F_SOFTUNLOCK:
2198 2170  
2199 2171                  /*
2200 2172                   * This is a bit ugly, we pass in the real seg pointer,
2201 2173                   * but the sptseg_addr is the virtual address within the
2202 2174                   * dummy seg.
2203 2175                   */
2204 2176                  segspt_softunlock(seg, sptseg_addr, ptob(npages), rw);
2205 2177                  return (0);
2206 2178  
2207 2179          case F_PROT:
2208 2180  
2209 2181                  /*
2210 2182                   * This takes care of the unusual case where a user
2211 2183                   * allocates a stack in shared memory and a register
2212 2184                   * window overflow is written to that stack page before
2213 2185                   * it is otherwise modified.
2214 2186                   *
2215 2187                   * We can get away with this because ISM segments are
2216 2188                   * always rw. Other than this unusual case, there
2217 2189                   * should be no instances of protection violations.
2218 2190                   */
2219 2191                  return (0);
2220 2192  
2221 2193          default:
2222 2194  #ifdef DEBUG
2223 2195                  cmn_err(CE_WARN, "segspt_shmfault default type?");
2224 2196  #endif
2225 2197                  return (FC_NOMAP);
2226 2198          }
2227 2199  }
2228 2200  
2229 2201  /*ARGSUSED*/
2230 2202  static faultcode_t
2231 2203  segspt_shmfaulta(struct seg *seg, caddr_t addr)
2232 2204  {
2233 2205          return (0);
2234 2206  }
2235 2207  
2236 2208  /*ARGSUSED*/
2237 2209  static int
2238 2210  segspt_shmkluster(struct seg *seg, caddr_t addr, ssize_t delta)
2239 2211  {
2240 2212          return (0);
2241 2213  }
2242 2214  
2243 2215  /*
2244 2216   * duplicate the shared page tables
2245 2217   */
2246 2218  int
2247 2219  segspt_shmdup(struct seg *seg, struct seg *newseg)
2248 2220  {
2249 2221          struct shm_data         *shmd = (struct shm_data *)seg->s_data;
2250 2222          struct anon_map         *amp = shmd->shm_amp;
2251 2223          struct shm_data         *shmd_new;
2252 2224          struct seg              *spt_seg = shmd->shm_sptseg;
2253 2225          struct spt_data         *sptd = spt_seg->s_data;
2254 2226          int                     error = 0;
2255 2227  
2256 2228          ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
2257 2229  
2258 2230          shmd_new = kmem_zalloc((sizeof (*shmd_new)), KM_SLEEP);
2259 2231          newseg->s_data = (void *)shmd_new;
2260 2232          shmd_new->shm_sptas = shmd->shm_sptas;
2261 2233          shmd_new->shm_amp = amp;
2262 2234          shmd_new->shm_sptseg = shmd->shm_sptseg;
2263 2235          newseg->s_ops = &segspt_shmops;
2264 2236          newseg->s_szc = seg->s_szc;
2265 2237          ASSERT(seg->s_szc == shmd->shm_sptseg->s_szc);
2266 2238  
2267 2239          ANON_LOCK_ENTER(&amp->a_rwlock, RW_WRITER);
2268 2240          amp->refcnt++;
2269 2241          ANON_LOCK_EXIT(&amp->a_rwlock);
2270 2242  
2271 2243          if (sptd->spt_flags & SHM_PAGEABLE) {
2272 2244                  shmd_new->shm_vpage = kmem_zalloc(btopr(amp->size), KM_SLEEP);
2273 2245                  shmd_new->shm_lckpgs = 0;
2274 2246                  if (hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) {
2275 2247                          if ((error = hat_share(newseg->s_as->a_hat,
2276 2248                              newseg->s_base, shmd->shm_sptas->a_hat, SEGSPTADDR,
2277 2249                              seg->s_size, seg->s_szc)) != 0) {
2278 2250                                  kmem_free(shmd_new->shm_vpage,
2279 2251                                      btopr(amp->size));
2280 2252                          }
2281 2253                  }
2282 2254                  return (error);
2283 2255          } else {
2284 2256                  return (hat_share(newseg->s_as->a_hat, newseg->s_base,
2285 2257                      shmd->shm_sptas->a_hat, SEGSPTADDR, seg->s_size,
2286 2258                      seg->s_szc));
2287 2259  
2288 2260          }
2289 2261  }
2290 2262  
2291 2263  /*ARGSUSED*/
2292 2264  int
2293 2265  segspt_shmcheckprot(struct seg *seg, caddr_t addr, size_t size, uint_t prot)
2294 2266  {
2295 2267          struct shm_data *shmd = (struct shm_data *)seg->s_data;
2296 2268          struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2297 2269  
2298 2270          ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2299 2271  
2300 2272          /*
2301 2273           * ISM segment is always rw.
2302 2274           */
2303 2275          return (((sptd->spt_prot & prot) != prot) ? EACCES : 0);
2304 2276  }
2305 2277  
2306 2278  /*
2307 2279   * Return an array of locked large pages, for empty slots allocate
2308 2280   * private zero-filled anon pages.
2309 2281   */
2310 2282  static int
2311 2283  spt_anon_getpages(
2312 2284          struct seg *sptseg,
2313 2285          caddr_t sptaddr,
2314 2286          size_t len,
2315 2287          page_t *ppa[])
2316 2288  {
2317 2289          struct  spt_data *sptd = sptseg->s_data;
2318 2290          struct  anon_map *amp = sptd->spt_amp;
2319 2291          enum    seg_rw rw = sptd->spt_prot;
2320 2292          uint_t  szc = sptseg->s_szc;
2321 2293          size_t  pg_sz, share_sz = page_get_pagesize(szc);
2322 2294          pgcnt_t lp_npgs;
2323 2295          caddr_t lp_addr, e_sptaddr;
2324 2296          uint_t  vpprot, ppa_szc = 0;
2325 2297          struct  vpage *vpage = NULL;
2326 2298          ulong_t j, ppa_idx;
2327 2299          int     err, ierr = 0;
2328 2300          pgcnt_t an_idx;
2329 2301          anon_sync_obj_t cookie;
2330 2302          int anon_locked = 0;
2331 2303          pgcnt_t amp_pgs;
2332 2304  
2333 2305  
2334 2306          ASSERT(IS_P2ALIGNED(sptaddr, share_sz) && IS_P2ALIGNED(len, share_sz));
2335 2307          ASSERT(len != 0);
2336 2308  
2337 2309          pg_sz = share_sz;
2338 2310          lp_npgs = btop(pg_sz);
2339 2311          lp_addr = sptaddr;
2340 2312          e_sptaddr = sptaddr + len;
2341 2313          an_idx = seg_page(sptseg, sptaddr);
2342 2314          ppa_idx = 0;
2343 2315  
2344 2316          ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
2345 2317  
2346 2318          amp_pgs = page_get_pagecnt(amp->a_szc);
2347 2319  
2348 2320          /*CONSTCOND*/
2349 2321          while (1) {
2350 2322                  for (; lp_addr < e_sptaddr;
2351 2323                      an_idx += lp_npgs, lp_addr += pg_sz, ppa_idx += lp_npgs) {
2352 2324  
2353 2325                          /*
2354 2326                           * If we're currently locked, and we get to a new
2355 2327                           * page, unlock our current anon chunk.
2356 2328                           */
2357 2329                          if (anon_locked && P2PHASE(an_idx, amp_pgs) == 0) {
2358 2330                                  anon_array_exit(&cookie);
2359 2331                                  anon_locked = 0;
2360 2332                          }
2361 2333                          if (!anon_locked) {
2362 2334                                  anon_array_enter(amp, an_idx, &cookie);
2363 2335                                  anon_locked = 1;
2364 2336                          }
2365 2337                          ppa_szc = (uint_t)-1;
2366 2338                          ierr = anon_map_getpages(amp, an_idx, szc, sptseg,
2367 2339                              lp_addr, sptd->spt_prot, &vpprot, &ppa[ppa_idx],
2368 2340                              &ppa_szc, vpage, rw, 0, segvn_anypgsz, 0, kcred);
2369 2341  
2370 2342                          if (ierr != 0) {
2371 2343                                  if (ierr > 0) {
2372 2344                                          err = FC_MAKE_ERR(ierr);
2373 2345                                          goto lpgs_err;
2374 2346                                  }
2375 2347                                  break;
2376 2348                          }
2377 2349                  }
2378 2350                  if (lp_addr == e_sptaddr) {
2379 2351                          break;
2380 2352                  }
2381 2353                  ASSERT(lp_addr < e_sptaddr);
2382 2354  
2383 2355                  /*
2384 2356                   * ierr == -1 means we failed to allocate a large page.
2385 2357                   * so do a size down operation.
2386 2358                   *
2387 2359                   * ierr == -2 means some other process that privately shares
2388 2360                   * pages with this process has allocated a larger page and we
2389 2361                   * need to retry with larger pages. So do a size up
2390 2362                   * operation. This relies on the fact that large pages are
2391 2363                   * never partially shared i.e. if we share any constituent
2392 2364                   * page of a large page with another process we must share the
2393 2365                   * entire large page. Note this cannot happen for SOFTLOCK
2394 2366                   * case, unless current address (lpaddr) is at the beginning
2395 2367                   * of the next page size boundary because the other process
2396 2368                   * couldn't have relocated locked pages.
2397 2369                   */
2398 2370                  ASSERT(ierr == -1 || ierr == -2);
2399 2371                  if (segvn_anypgsz) {
2400 2372                          ASSERT(ierr == -2 || szc != 0);
2401 2373                          ASSERT(ierr == -1 || szc < sptseg->s_szc);
2402 2374                          szc = (ierr == -1) ? szc - 1 : szc + 1;
2403 2375                  } else {
2404 2376                          /*
2405 2377                           * For faults and segvn_anypgsz == 0
2406 2378                           * we need to be careful not to loop forever
2407 2379                           * if existing page is found with szc other
2408 2380                           * than 0 or seg->s_szc. This could be due
2409 2381                           * to page relocations on behalf of DR or
2410 2382                           * more likely large page creation. For this
2411 2383                           * case simply re-size to existing page's szc
2412 2384                           * if returned by anon_map_getpages().
2413 2385                           */
2414 2386                          if (ppa_szc == (uint_t)-1) {
2415 2387                                  szc = (ierr == -1) ? 0 : sptseg->s_szc;
2416 2388                          } else {
2417 2389                                  ASSERT(ppa_szc <= sptseg->s_szc);
2418 2390                                  ASSERT(ierr == -2 || ppa_szc < szc);
2419 2391                                  ASSERT(ierr == -1 || ppa_szc > szc);
2420 2392                                  szc = ppa_szc;
2421 2393                          }
2422 2394                  }
2423 2395                  pg_sz = page_get_pagesize(szc);
2424 2396                  lp_npgs = btop(pg_sz);
2425 2397                  ASSERT(IS_P2ALIGNED(lp_addr, pg_sz));
2426 2398          }
2427 2399          if (anon_locked) {
2428 2400                  anon_array_exit(&cookie);
2429 2401          }
2430 2402          ANON_LOCK_EXIT(&amp->a_rwlock);
2431 2403          return (0);
2432 2404  
2433 2405  lpgs_err:
2434 2406          if (anon_locked) {
2435 2407                  anon_array_exit(&cookie);
2436 2408          }
2437 2409          ANON_LOCK_EXIT(&amp->a_rwlock);
2438 2410          for (j = 0; j < ppa_idx; j++)
2439 2411                  page_unlock(ppa[j]);
2440 2412          return (err);
2441 2413  }
2442 2414  
2443 2415  /*
2444 2416   * count the number of bytes in a set of spt pages that are currently not
2445 2417   * locked
2446 2418   */
2447 2419  static rctl_qty_t
2448 2420  spt_unlockedbytes(pgcnt_t npages, page_t **ppa)
2449 2421  {
2450 2422          ulong_t i;
2451 2423          rctl_qty_t unlocked = 0;
2452 2424  
2453 2425          for (i = 0; i < npages; i++) {
2454 2426                  if (ppa[i]->p_lckcnt == 0)
2455 2427                          unlocked += PAGESIZE;
2456 2428          }
2457 2429          return (unlocked);
2458 2430  }
2459 2431  
2460 2432  extern  u_longlong_t randtick(void);
2461 2433  /* number of locks to reserve/skip by spt_lockpages() and spt_unlockpages() */
2462 2434  #define NLCK    (NCPU_P2)
2463 2435  /* Random number with a range [0, n-1], n must be power of two */
2464 2436  #define RAND_P2(n)      \
2465 2437          ((((long)curthread >> PTR24_LSB) ^ (long)randtick()) & ((n) - 1))
2466 2438  
2467 2439  int
2468 2440  spt_lockpages(struct seg *seg, pgcnt_t anon_index, pgcnt_t npages,
2469 2441      page_t **ppa, ulong_t *lockmap, size_t pos,
2470 2442      rctl_qty_t *locked)
2471 2443  {
2472 2444          struct  shm_data *shmd = seg->s_data;
2473 2445          struct  spt_data *sptd = shmd->shm_sptseg->s_data;
2474 2446          ulong_t i;
2475 2447          int     kernel;
2476 2448          pgcnt_t nlck = 0;
2477 2449          int     rv = 0;
2478 2450          int     use_reserved = 1;
2479 2451  
2480 2452          /* return the number of bytes actually locked */
2481 2453          *locked = 0;
2482 2454  
2483 2455          /*
2484 2456           * To avoid contention on freemem_lock, availrmem and pages_locked
2485 2457           * global counters are updated only every nlck locked pages instead of
2486 2458           * every time.  Reserve nlck locks up front and deduct from this
2487 2459           * reservation for each page that requires a lock.  When the reservation
2488 2460           * is consumed, reserve again.  nlck is randomized, so the competing
2489 2461           * threads do not fall into a cyclic lock contention pattern. When
2490 2462           * memory is low, the lock ahead is disabled, and instead page_pp_lock()
2491 2463           * is used to lock pages.
2492 2464           */
2493 2465          for (i = 0; i < npages; anon_index++, pos++, i++) {
2494 2466                  if (nlck == 0 && use_reserved == 1) {
2495 2467                          nlck = NLCK + RAND_P2(NLCK);
2496 2468                          /* if fewer loops left, decrease nlck */
2497 2469                          nlck = MIN(nlck, npages - i);
2498 2470                          /*
2499 2471                           * Reserve nlck locks up front and deduct from this
2500 2472                           * reservation for each page that requires a lock.  When
2501 2473                           * the reservation is consumed, reserve again.
2502 2474                           */
2503 2475                          mutex_enter(&freemem_lock);
2504 2476                          if ((availrmem - nlck) < pages_pp_maximum) {
2505 2477                                  /* Do not do advance memory reserves */
2506 2478                                  use_reserved = 0;
2507 2479                          } else {
2508 2480                                  availrmem       -= nlck;
2509 2481                                  pages_locked    += nlck;
2510 2482                          }
2511 2483                          mutex_exit(&freemem_lock);
2512 2484                  }
2513 2485                  if (!(shmd->shm_vpage[anon_index] & DISM_PG_LOCKED)) {
2514 2486                          if (sptd->spt_ppa_lckcnt[anon_index] <
2515 2487                              (ushort_t)DISM_LOCK_MAX) {
2516 2488                                  if (++sptd->spt_ppa_lckcnt[anon_index] ==
2517 2489                                      (ushort_t)DISM_LOCK_MAX) {
2518 2490                                          cmn_err(CE_WARN,
2519 2491                                              "DISM page lock limit "
2520 2492                                              "reached on DISM offset 0x%lx\n",
2521 2493                                              anon_index << PAGESHIFT);
2522 2494                                  }
2523 2495                                  kernel = (sptd->spt_ppa &&
2524 2496                                      sptd->spt_ppa[anon_index]);
2525 2497                                  if (!page_pp_lock(ppa[i], 0, kernel ||
2526 2498                                      use_reserved)) {
2527 2499                                          sptd->spt_ppa_lckcnt[anon_index]--;
2528 2500                                          rv = EAGAIN;
2529 2501                                          break;
2530 2502                                  }
2531 2503                                  /* if this is a newly locked page, count it */
2532 2504                                  if (ppa[i]->p_lckcnt == 1) {
2533 2505                                          if (kernel == 0 && use_reserved == 1)
2534 2506                                                  nlck--;
2535 2507                                          *locked += PAGESIZE;
2536 2508                                  }
2537 2509                                  shmd->shm_lckpgs++;
2538 2510                                  shmd->shm_vpage[anon_index] |= DISM_PG_LOCKED;
2539 2511                                  if (lockmap != NULL)
2540 2512                                          BT_SET(lockmap, pos);
2541 2513                          }
2542 2514                  }
2543 2515          }
2544 2516          /* Return unused lock reservation */
2545 2517          if (nlck != 0 && use_reserved == 1) {
2546 2518                  mutex_enter(&freemem_lock);
2547 2519                  availrmem       += nlck;
2548 2520                  pages_locked    -= nlck;
2549 2521                  mutex_exit(&freemem_lock);
2550 2522          }
2551 2523  
2552 2524          return (rv);
2553 2525  }
2554 2526  
2555 2527  int
2556 2528  spt_unlockpages(struct seg *seg, pgcnt_t anon_index, pgcnt_t npages,
2557 2529      rctl_qty_t *unlocked)
2558 2530  {
2559 2531          struct shm_data *shmd = seg->s_data;
2560 2532          struct spt_data *sptd = shmd->shm_sptseg->s_data;
2561 2533          struct anon_map *amp = sptd->spt_amp;
2562 2534          struct anon     *ap;
2563 2535          struct vnode    *vp;
2564 2536          u_offset_t      off;
2565 2537          struct page     *pp;
2566 2538          int             kernel;
2567 2539          anon_sync_obj_t cookie;
2568 2540          ulong_t         i;
2569 2541          pgcnt_t         nlck = 0;
2570 2542          pgcnt_t         nlck_limit = NLCK;
2571 2543  
2572 2544          ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
2573 2545          for (i = 0; i < npages; i++, anon_index++) {
2574 2546                  if (shmd->shm_vpage[anon_index] & DISM_PG_LOCKED) {
2575 2547                          anon_array_enter(amp, anon_index, &cookie);
2576 2548                          ap = anon_get_ptr(amp->ahp, anon_index);
2577 2549                          ASSERT(ap);
2578 2550  
2579 2551                          swap_xlate(ap, &vp, &off);
2580 2552                          anon_array_exit(&cookie);
2581 2553                          pp = page_lookup(vp, off, SE_SHARED);
2582 2554                          ASSERT(pp);
2583 2555                          /*
2584 2556                           * availrmem is decremented only for pages which are not
2585 2557                           * in seg pcache, for pages in seg pcache availrmem was
2586 2558                           * decremented in _dismpagelock()
2587 2559                           */
2588 2560                          kernel = (sptd->spt_ppa && sptd->spt_ppa[anon_index]);
2589 2561                          ASSERT(pp->p_lckcnt > 0);
2590 2562  
2591 2563                          /*
2592 2564                           * lock page but do not change availrmem, we do it
2593 2565                           * ourselves every nlck loops.
2594 2566                           */
2595 2567                          page_pp_unlock(pp, 0, 1);
2596 2568                          if (pp->p_lckcnt == 0) {
2597 2569                                  if (kernel == 0)
2598 2570                                          nlck++;
2599 2571                                  *unlocked += PAGESIZE;
2600 2572                          }
2601 2573                          page_unlock(pp);
2602 2574                          shmd->shm_vpage[anon_index] &= ~DISM_PG_LOCKED;
2603 2575                          sptd->spt_ppa_lckcnt[anon_index]--;
2604 2576                          shmd->shm_lckpgs--;
2605 2577                  }
2606 2578  
2607 2579                  /*
2608 2580                   * To reduce freemem_lock contention, do not update availrmem
2609 2581                   * until at least NLCK pages have been unlocked.
2610 2582                   * 1. No need to update if nlck is zero
2611 2583                   * 2. Always update if the last iteration
2612 2584                   */
2613 2585                  if (nlck > 0 && (nlck == nlck_limit || i == npages - 1)) {
2614 2586                          mutex_enter(&freemem_lock);
2615 2587                          availrmem       += nlck;
2616 2588                          pages_locked    -= nlck;
2617 2589                          mutex_exit(&freemem_lock);
2618 2590                          nlck = 0;
2619 2591                          nlck_limit = NLCK + RAND_P2(NLCK);
2620 2592                  }
2621 2593          }
2622 2594          ANON_LOCK_EXIT(&amp->a_rwlock);
2623 2595  
2624 2596          return (0);
2625 2597  }
2626 2598  
2627 2599  /*ARGSUSED*/
2628 2600  static int
2629 2601  segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len,
2630 2602      int attr, int op, ulong_t *lockmap, size_t pos)
2631 2603  {
2632 2604          struct shm_data *shmd = seg->s_data;
2633 2605          struct seg      *sptseg = shmd->shm_sptseg;
2634 2606          struct spt_data *sptd = sptseg->s_data;
2635 2607          struct kshmid   *sp = sptd->spt_amp->a_sp;
2636 2608          pgcnt_t         npages, a_npages;
2637 2609          page_t          **ppa;
2638 2610          pgcnt_t         an_idx, a_an_idx, ppa_idx;
2639 2611          caddr_t         spt_addr, a_addr;       /* spt and aligned address */
2640 2612          size_t          a_len;                  /* aligned len */
2641 2613          size_t          share_sz;
2642 2614          ulong_t         i;
2643 2615          int             sts = 0;
2644 2616          rctl_qty_t      unlocked = 0;
2645 2617          rctl_qty_t      locked = 0;
2646 2618          struct proc     *p = curproc;
2647 2619          kproject_t      *proj;
2648 2620  
2649 2621          ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2650 2622          ASSERT(sp != NULL);
2651 2623  
2652 2624          if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
2653 2625                  return (0);
2654 2626          }
2655 2627  
2656 2628          addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2657 2629          an_idx = seg_page(seg, addr);
2658 2630          npages = btopr(len);
2659 2631  
2660 2632          if (an_idx + npages > btopr(shmd->shm_amp->size)) {
2661 2633                  return (ENOMEM);
2662 2634          }
2663 2635  
2664 2636          /*
2665 2637           * A shm's project never changes, so no lock needed.
2666 2638           * The shm has a hold on the project, so it will not go away.
2667 2639           * Since we have a mapping to shm within this zone, we know
2668 2640           * that the zone will not go away.
2669 2641           */
2670 2642          proj = sp->shm_perm.ipc_proj;
2671 2643  
2672 2644          if (op == MC_LOCK) {
2673 2645  
2674 2646                  /*
2675 2647                   * Need to align addr and size request if they are not
2676 2648                   * aligned so we can always allocate large page(s) however
2677 2649                   * we only lock what was requested in initial request.
2678 2650                   */
2679 2651                  share_sz = page_get_pagesize(sptseg->s_szc);
2680 2652                  a_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), share_sz);
2681 2653                  a_len = P2ROUNDUP((uintptr_t)(((addr + len) - a_addr)),
2682 2654                      share_sz);
2683 2655                  a_npages = btop(a_len);
2684 2656                  a_an_idx = seg_page(seg, a_addr);
2685 2657                  spt_addr = sptseg->s_base + ptob(a_an_idx);
2686 2658                  ppa_idx = an_idx - a_an_idx;
2687 2659  
2688 2660                  if ((ppa = kmem_zalloc(((sizeof (page_t *)) * a_npages),
2689 2661                      KM_NOSLEEP)) == NULL) {
2690 2662                          return (ENOMEM);
2691 2663                  }
2692 2664  
2693 2665                  /*
2694 2666                   * Don't cache any new pages for IO and
2695 2667                   * flush any cached pages.
2696 2668                   */
2697 2669                  mutex_enter(&sptd->spt_lock);
2698 2670                  if (sptd->spt_ppa != NULL)
2699 2671                          sptd->spt_flags |= DISM_PPA_CHANGED;
2700 2672  
2701 2673                  sts = spt_anon_getpages(sptseg, spt_addr, a_len, ppa);
2702 2674                  if (sts != 0) {
2703 2675                          mutex_exit(&sptd->spt_lock);
2704 2676                          kmem_free(ppa, ((sizeof (page_t *)) * a_npages));
2705 2677                          return (sts);
2706 2678                  }
2707 2679  
2708 2680                  mutex_enter(&sp->shm_mlock);
2709 2681                  /* enforce locked memory rctl */
2710 2682                  unlocked = spt_unlockedbytes(npages, &ppa[ppa_idx]);
2711 2683  
2712 2684                  mutex_enter(&p->p_lock);
2713 2685                  if (rctl_incr_locked_mem(p, proj, unlocked, 0)) {
2714 2686                          mutex_exit(&p->p_lock);
2715 2687                          sts = EAGAIN;
2716 2688                  } else {
2717 2689                          mutex_exit(&p->p_lock);
2718 2690                          sts = spt_lockpages(seg, an_idx, npages,
2719 2691                              &ppa[ppa_idx], lockmap, pos, &locked);
2720 2692  
2721 2693                          /*
2722 2694                           * correct locked count if not all pages could be
2723 2695                           * locked
2724 2696                           */
2725 2697                          if ((unlocked - locked) > 0) {
2726 2698                                  rctl_decr_locked_mem(NULL, proj,
2727 2699                                      (unlocked - locked), 0);
2728 2700                          }
2729 2701                  }
2730 2702                  /*
2731 2703                   * unlock pages
2732 2704                   */
2733 2705                  for (i = 0; i < a_npages; i++)
2734 2706                          page_unlock(ppa[i]);
2735 2707                  if (sptd->spt_ppa != NULL)
2736 2708                          sptd->spt_flags |= DISM_PPA_CHANGED;
2737 2709                  mutex_exit(&sp->shm_mlock);
2738 2710                  mutex_exit(&sptd->spt_lock);
2739 2711  
2740 2712                  kmem_free(ppa, ((sizeof (page_t *)) * a_npages));
2741 2713  
2742 2714          } else if (op == MC_UNLOCK) { /* unlock */
2743 2715                  page_t          **ppa;
2744 2716  
2745 2717                  mutex_enter(&sptd->spt_lock);
2746 2718                  if (shmd->shm_lckpgs == 0) {
2747 2719                          mutex_exit(&sptd->spt_lock);
2748 2720                          return (0);
2749 2721                  }
2750 2722                  /*
2751 2723                   * Don't cache new IO pages.
2752 2724                   */
2753 2725                  if (sptd->spt_ppa != NULL)
2754 2726                          sptd->spt_flags |= DISM_PPA_CHANGED;
2755 2727  
2756 2728                  mutex_enter(&sp->shm_mlock);
2757 2729                  sts = spt_unlockpages(seg, an_idx, npages, &unlocked);
2758 2730                  if ((ppa = sptd->spt_ppa) != NULL)
2759 2731                          sptd->spt_flags |= DISM_PPA_CHANGED;
2760 2732                  mutex_exit(&sptd->spt_lock);
2761 2733  
2762 2734                  rctl_decr_locked_mem(NULL, proj, unlocked, 0);
2763 2735                  mutex_exit(&sp->shm_mlock);
2764 2736  
2765 2737                  if (ppa != NULL)
2766 2738                          seg_ppurge_wiredpp(ppa);
2767 2739          }
2768 2740          return (sts);
2769 2741  }
2770 2742  
2771 2743  /*ARGSUSED*/
2772 2744  int
2773 2745  segspt_shmgetprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
2774 2746  {
2775 2747          struct shm_data *shmd = (struct shm_data *)seg->s_data;
2776 2748          struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2777 2749          spgcnt_t pgno = seg_page(seg, addr+len) - seg_page(seg, addr) + 1;
2778 2750  
2779 2751          ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2780 2752  
2781 2753          /*
2782 2754           * ISM segment is always rw.
2783 2755           */
2784 2756          while (--pgno >= 0)
2785 2757                  *protv++ = sptd->spt_prot;
2786 2758          return (0);
2787 2759  }
2788 2760  
2789 2761  /*ARGSUSED*/
2790 2762  u_offset_t
2791 2763  segspt_shmgetoffset(struct seg *seg, caddr_t addr)
2792 2764  {
2793 2765          ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2794 2766  
2795 2767          /* Offset does not matter in ISM memory */
2796 2768  
2797 2769          return ((u_offset_t)0);
2798 2770  }
2799 2771  
2800 2772  /* ARGSUSED */
2801 2773  int
2802 2774  segspt_shmgettype(struct seg *seg, caddr_t addr)
2803 2775  {
2804 2776          struct shm_data *shmd = (struct shm_data *)seg->s_data;
2805 2777          struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2806 2778  
2807 2779          ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2808 2780  
2809 2781          /*
2810 2782           * The shared memory mapping is always MAP_SHARED, SWAP is only
2811 2783           * reserved for DISM
2812 2784           */
2813 2785          return (MAP_SHARED |
2814 2786              ((sptd->spt_flags & SHM_PAGEABLE) ? 0 : MAP_NORESERVE));
2815 2787  }
2816 2788  
2817 2789  /*ARGSUSED*/
2818 2790  int
2819 2791  segspt_shmgetvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
2820 2792  {
2821 2793          struct shm_data *shmd = (struct shm_data *)seg->s_data;
2822 2794          struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2823 2795  
2824 2796          ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2825 2797  
2826 2798          *vpp = sptd->spt_vp;
2827 2799          return (0);
2828 2800  }
2829 2801  
2830 2802  /*
2831 2803   * We need to wait for pending IO to complete to a DISM segment in order for
2832 2804   * pages to get kicked out of the seg_pcache.  120 seconds should be more
2833 2805   * than enough time to wait.
2834 2806   */
2835 2807  static clock_t spt_pcache_wait = 120;
2836 2808  
2837 2809  /*ARGSUSED*/
2838 2810  static int
2839 2811  segspt_shmadvise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
2840 2812  {
2841 2813          struct shm_data *shmd = (struct shm_data *)seg->s_data;
2842 2814          struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2843 2815          struct anon_map *amp;
2844 2816          pgcnt_t pg_idx;
2845 2817          ushort_t gen;
2846 2818          clock_t end_lbolt;
2847 2819          int writer;
2848 2820          page_t **ppa;
2849 2821  
2850 2822          ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2851 2823  
2852 2824          if (behav == MADV_FREE) {
2853 2825                  if ((sptd->spt_flags & SHM_PAGEABLE) == 0)
2854 2826                          return (0);
2855 2827  
2856 2828                  amp = sptd->spt_amp;
2857 2829                  pg_idx = seg_page(seg, addr);
2858 2830  
2859 2831                  mutex_enter(&sptd->spt_lock);
2860 2832                  if ((ppa = sptd->spt_ppa) == NULL) {
2861 2833                          mutex_exit(&sptd->spt_lock);
2862 2834                          ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
2863 2835                          anon_disclaim(amp, pg_idx, len);
2864 2836                          ANON_LOCK_EXIT(&amp->a_rwlock);
2865 2837                          return (0);
2866 2838                  }
2867 2839  
2868 2840                  sptd->spt_flags |= DISM_PPA_CHANGED;
2869 2841                  gen = sptd->spt_gen;
2870 2842  
2871 2843                  mutex_exit(&sptd->spt_lock);
2872 2844  
2873 2845                  /*
2874 2846                   * Purge all DISM cached pages
2875 2847                   */
2876 2848                  seg_ppurge_wiredpp(ppa);
2877 2849  
2878 2850                  /*
2879 2851                   * Drop the AS_LOCK so that other threads can grab it
2880 2852                   * in the as_pageunlock path and hopefully get the segment
2881 2853                   * kicked out of the seg_pcache.  We bump the shm_softlockcnt
2882 2854                   * to keep this segment resident.
2883 2855                   */
2884 2856                  writer = AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock);
2885 2857                  atomic_inc_ulong((ulong_t *)(&(shmd->shm_softlockcnt)));
2886 2858                  AS_LOCK_EXIT(seg->s_as, &seg->s_as->a_lock);
2887 2859  
2888 2860                  mutex_enter(&sptd->spt_lock);
2889 2861  
2890 2862                  end_lbolt = ddi_get_lbolt() + (hz * spt_pcache_wait);
2891 2863  
2892 2864                  /*
2893 2865                   * Try to wait for pages to get kicked out of the seg_pcache.
2894 2866                   */
2895 2867                  while (sptd->spt_gen == gen &&
2896 2868                      (sptd->spt_flags & DISM_PPA_CHANGED) &&
2897 2869                      ddi_get_lbolt() < end_lbolt) {
2898 2870                          if (!cv_timedwait_sig(&sptd->spt_cv,
2899 2871                              &sptd->spt_lock, end_lbolt)) {
2900 2872                                  break;
2901 2873                          }
2902 2874                  }
2903 2875  
2904 2876                  mutex_exit(&sptd->spt_lock);
2905 2877  
2906 2878                  /* Regrab the AS_LOCK and release our hold on the segment */
2907 2879                  AS_LOCK_ENTER(seg->s_as, &seg->s_as->a_lock,
2908 2880                      writer ? RW_WRITER : RW_READER);
2909 2881                  atomic_dec_ulong((ulong_t *)(&(shmd->shm_softlockcnt)));
2910 2882                  if (shmd->shm_softlockcnt <= 0) {
2911 2883                          if (AS_ISUNMAPWAIT(seg->s_as)) {
2912 2884                                  mutex_enter(&seg->s_as->a_contents);
2913 2885                                  if (AS_ISUNMAPWAIT(seg->s_as)) {
2914 2886                                          AS_CLRUNMAPWAIT(seg->s_as);
2915 2887                                          cv_broadcast(&seg->s_as->a_cv);
2916 2888                                  }
2917 2889                                  mutex_exit(&seg->s_as->a_contents);
2918 2890                          }
2919 2891                  }
2920 2892  
2921 2893                  ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
2922 2894                  anon_disclaim(amp, pg_idx, len);
2923 2895                  ANON_LOCK_EXIT(&amp->a_rwlock);
2924 2896          } else if (lgrp_optimizations() && (behav == MADV_ACCESS_LWP ||
2925 2897              behav == MADV_ACCESS_MANY || behav == MADV_ACCESS_DEFAULT)) {
2926 2898                  int                     already_set;
2927 2899                  ulong_t                 anon_index;
2928 2900                  lgrp_mem_policy_t       policy;
2929 2901                  caddr_t                 shm_addr;
2930 2902                  size_t                  share_size;
2931 2903                  size_t                  size;
2932 2904                  struct seg              *sptseg = shmd->shm_sptseg;
2933 2905                  caddr_t                 sptseg_addr;
2934 2906  
2935 2907                  /*
2936 2908                   * Align address and length to page size of underlying segment
2937 2909                   */
2938 2910                  share_size = page_get_pagesize(shmd->shm_sptseg->s_szc);
2939 2911                  shm_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), share_size);
2940 2912                  size = P2ROUNDUP((uintptr_t)(((addr + len) - shm_addr)),
2941 2913                      share_size);
2942 2914  
2943 2915                  amp = shmd->shm_amp;
2944 2916                  anon_index = seg_page(seg, shm_addr);
2945 2917  
2946 2918                  /*
2947 2919                   * And now we may have to adjust size downward if we have
2948 2920                   * exceeded the realsize of the segment or initial anon
2949 2921                   * allocations.
2950 2922                   */
2951 2923                  sptseg_addr = sptseg->s_base + ptob(anon_index);
2952 2924                  if ((sptseg_addr + size) >
2953 2925                      (sptseg->s_base + sptd->spt_realsize))
2954 2926                          size = (sptseg->s_base + sptd->spt_realsize) -
2955 2927                              sptseg_addr;
2956 2928  
2957 2929                  /*
2958 2930                   * Set memory allocation policy for this segment
2959 2931                   */
2960 2932                  policy = lgrp_madv_to_policy(behav, len, MAP_SHARED);
2961 2933                  already_set = lgrp_shm_policy_set(policy, amp, anon_index,
2962 2934                      NULL, 0, len);
2963 2935  
2964 2936                  /*
2965 2937                   * If random memory allocation policy set already,
2966 2938                   * don't bother reapplying it.
2967 2939                   */
2968 2940                  if (already_set && !LGRP_MEM_POLICY_REAPPLICABLE(policy))
2969 2941                          return (0);
2970 2942  
2971 2943                  /*
2972 2944                   * Mark any existing pages in the given range for
2973 2945                   * migration, flushing the I/O page cache, and using
2974 2946                   * underlying segment to calculate anon index and get
2975 2947                   * anonmap and vnode pointer from
2976 2948                   */
2977 2949                  if (shmd->shm_softlockcnt > 0)
2978 2950                          segspt_purge(seg);
2979 2951  
2980 2952                  page_mark_migrate(seg, shm_addr, size, amp, 0, NULL, 0, 0);
2981 2953          }
2982 2954  
2983 2955          return (0);
2984 2956  }
2985 2957  
2986 2958  /*ARGSUSED*/
2987 2959  void
2988 2960  segspt_shmdump(struct seg *seg)
2989 2961  {
2990 2962          /* no-op for ISM segment */
2991 2963  }
2992 2964  
2993 2965  /*ARGSUSED*/
2994 2966  static faultcode_t
2995 2967  segspt_shmsetpgsz(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
2996 2968  {
2997 2969          return (ENOTSUP);
2998 2970  }
2999 2971  
3000 2972  /*
3001 2973   * get a memory ID for an addr in a given segment
3002 2974   */
3003 2975  static int
3004 2976  segspt_shmgetmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
3005 2977  {
3006 2978          struct shm_data *shmd = (struct shm_data *)seg->s_data;
3007 2979          struct anon     *ap;
3008 2980          size_t          anon_index;
3009 2981          struct anon_map *amp = shmd->shm_amp;
3010 2982          struct spt_data *sptd = shmd->shm_sptseg->s_data;
3011 2983          struct seg      *sptseg = shmd->shm_sptseg;
3012 2984          anon_sync_obj_t cookie;
3013 2985  
3014 2986          anon_index = seg_page(seg, addr);
3015 2987  
3016 2988          if (addr > (seg->s_base + sptd->spt_realsize)) {
3017 2989                  return (EFAULT);
3018 2990          }
3019 2991  
3020 2992          ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
3021 2993          anon_array_enter(amp, anon_index, &cookie);
3022 2994          ap = anon_get_ptr(amp->ahp, anon_index);
3023 2995          if (ap == NULL) {
3024 2996                  struct page *pp;
3025 2997                  caddr_t spt_addr = sptseg->s_base + ptob(anon_index);
3026 2998  
3027 2999                  pp = anon_zero(sptseg, spt_addr, &ap, kcred);
3028 3000                  if (pp == NULL) {
3029 3001                          anon_array_exit(&cookie);
3030 3002                          ANON_LOCK_EXIT(&amp->a_rwlock);
3031 3003                          return (ENOMEM);
3032 3004                  }
3033 3005                  (void) anon_set_ptr(amp->ahp, anon_index, ap, ANON_SLEEP);
3034 3006                  page_unlock(pp);
3035 3007          }
3036 3008          anon_array_exit(&cookie);
3037 3009          ANON_LOCK_EXIT(&amp->a_rwlock);
3038 3010          memidp->val[0] = (uintptr_t)ap;
3039 3011          memidp->val[1] = (uintptr_t)addr & PAGEOFFSET;
3040 3012          return (0);
3041 3013  }
3042 3014  
3043 3015  /*
3044 3016   * Get memory allocation policy info for specified address in given segment
3045 3017   */
3046 3018  static lgrp_mem_policy_info_t *
3047 3019  segspt_shmgetpolicy(struct seg *seg, caddr_t addr)
3048 3020  {
3049 3021          struct anon_map         *amp;
3050 3022          ulong_t                 anon_index;
3051 3023          lgrp_mem_policy_info_t  *policy_info;
3052 3024          struct shm_data         *shm_data;
3053 3025  
3054 3026          ASSERT(seg != NULL);
3055 3027  
3056 3028          /*
3057 3029           * Get anon_map from segshm
3058 3030           *
3059 3031           * Assume that no lock needs to be held on anon_map, since
3060 3032           * it should be protected by its reference count which must be
3061 3033           * nonzero for an existing segment
3062 3034           * Need to grab readers lock on policy tree though
3063 3035           */
3064 3036          shm_data = (struct shm_data *)seg->s_data;
3065 3037          if (shm_data == NULL)
3066 3038                  return (NULL);
3067 3039          amp = shm_data->shm_amp;
3068 3040          ASSERT(amp->refcnt != 0);
3069 3041  
3070 3042          /*
3071 3043           * Get policy info
3072 3044           *
3073 3045           * Assume starting anon index of 0
3074 3046           */
3075 3047          anon_index = seg_page(seg, addr);
3076 3048          policy_info = lgrp_shm_policy_get(amp, anon_index, NULL, 0);
3077 3049  
3078 3050          return (policy_info);
3079 3051  }
3080 3052  
3081 3053  /*ARGSUSED*/
3082 3054  static int
3083 3055  segspt_shmcapable(struct seg *seg, segcapability_t capability)
3084 3056  {
3085 3057          return (0);
3086 3058  }

↓ open down ↓

2966 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX