patch-lower-case-segops Wdiff usr/src/uts/common/vm/vm_seg.c

Print this page

patch lower-case-segops

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/vm/vm_seg.c
          +++ new/usr/src/uts/common/vm/vm_seg.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   * Copyright (c) 2015, Joyent, Inc.
  25   25   */
  26   26  
  27   27  /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
  28   28  /*        All Rights Reserved   */
  29   29  
  30   30  /*
  31   31   * University Copyright- Copyright (c) 1982, 1986, 1988
  32   32   * The Regents of the University of California
  33   33   * All Rights Reserved
  34   34   *
  35   35   * University Acknowledgment- Portions of this document are derived from
  36   36   * software developed by the University of California, Berkeley, and its
  37   37   * contributors.
  38   38   */
  39   39  
  40   40  /*
  41   41   * VM - segment management.
  42   42   */
  43   43  
  44   44  #include <sys/types.h>
  45   45  #include <sys/inttypes.h>
  46   46  #include <sys/t_lock.h>
  47   47  #include <sys/param.h>
  48   48  #include <sys/systm.h>
  49   49  #include <sys/kmem.h>
  50   50  #include <sys/sysmacros.h>
  51   51  #include <sys/vmsystm.h>
  52   52  #include <sys/tuneable.h>
  53   53  #include <sys/debug.h>
  54   54  #include <sys/fs/swapnode.h>
  55   55  #include <sys/cmn_err.h>
  56   56  #include <sys/callb.h>
  57   57  #include <sys/mem_config.h>
  58   58  #include <sys/mman.h>
  59   59  
  60   60  #include <vm/hat.h>
  61   61  #include <vm/as.h>
  62   62  #include <vm/seg.h>
  63   63  #include <vm/seg_kmem.h>
  64   64  #include <vm/seg_spt.h>
  65   65  #include <vm/seg_vn.h>
  66   66  #include <vm/anon.h>
  67   67  
  68   68  /*
  69   69   * kstats for segment advise
  70   70   */
  71   71  segadvstat_t segadvstat = {
  72   72          { "MADV_FREE_hit",      KSTAT_DATA_ULONG },
  73   73          { "MADV_FREE_miss",     KSTAT_DATA_ULONG },
  74   74  };
  75   75  
  76   76  kstat_named_t *segadvstat_ptr = (kstat_named_t *)&segadvstat;
  77   77  uint_t segadvstat_ndata = sizeof (segadvstat) / sizeof (kstat_named_t);
  78   78  
  79   79  /*
  80   80   * entry in the segment page cache
  81   81   */
  82   82  struct seg_pcache {
  83   83          struct seg_pcache       *p_hnext;       /* list for hashed blocks */
  84   84          struct seg_pcache       *p_hprev;
  85   85          pcache_link_t           p_plink;        /* per segment/amp list */
  86   86          void                    *p_htag0;       /* segment/amp pointer */
  87   87          caddr_t                 p_addr;         /* base address/anon_idx */
  88   88          size_t                  p_len;          /* total bytes */
  89   89          size_t                  p_wlen;         /* writtable bytes at p_addr */
  90   90          struct page             **p_pp;         /* pp shadow list */
  91   91          seg_preclaim_cbfunc_t   p_callback;     /* reclaim callback function */
  92   92          clock_t                 p_lbolt;        /* lbolt from last use */
  93   93          struct seg_phash        *p_hashp;       /* our pcache hash bucket */
  94   94          uint_t                  p_active;       /* active count */
  95   95          uchar_t                 p_write;        /* true if S_WRITE */
  96   96          uchar_t                 p_ref;          /* reference byte */
  97   97          ushort_t                p_flags;        /* bit flags */
  98   98  };
  99   99  
 100  100  struct seg_phash {
 101  101          struct seg_pcache       *p_hnext;       /* list for hashed blocks */
 102  102          struct seg_pcache       *p_hprev;
 103  103          kmutex_t                p_hmutex;       /* protects hash bucket */
 104  104          pcache_link_t           p_halink[2];    /* active bucket linkages */
 105  105  };
 106  106  
 107  107  struct seg_phash_wired {
 108  108          struct seg_pcache       *p_hnext;       /* list for hashed blocks */
 109  109          struct seg_pcache       *p_hprev;
 110  110          kmutex_t                p_hmutex;       /* protects hash bucket */
 111  111  };
 112  112  
 113  113  /*
 114  114   * A parameter to control a maximum number of bytes that can be
 115  115   * purged from pcache at a time.
 116  116   */
 117  117  #define P_MAX_APURGE_BYTES      (1024 * 1024 * 1024)
 118  118  
 119  119  /*
 120  120   * log2(fraction of pcache to reclaim at a time).
 121  121   */
 122  122  #define P_SHRINK_SHFT           (5)
 123  123  
 124  124  /*
 125  125   * The following variables can be tuned via /etc/system.
 126  126   */
 127  127  
 128  128  int     segpcache_enabled = 1;          /* if 1, shadow lists are cached */
 129  129  pgcnt_t segpcache_maxwindow = 0;        /* max # of pages that can be cached */
 130  130  ulong_t segpcache_hashsize_win = 0;     /* # of non wired buckets */
 131  131  ulong_t segpcache_hashsize_wired = 0;   /* # of wired buckets */
 132  132  int     segpcache_reap_sec = 1;         /* reap check rate in secs */
 133  133  clock_t segpcache_reap_ticks = 0;       /* reap interval in ticks */
 134  134  int     segpcache_pcp_maxage_sec = 1;   /* pcp max age in secs */
 135  135  clock_t segpcache_pcp_maxage_ticks = 0; /* pcp max age in ticks */
 136  136  int     segpcache_shrink_shift = P_SHRINK_SHFT; /* log2 reap fraction */
 137  137  pgcnt_t segpcache_maxapurge_bytes = P_MAX_APURGE_BYTES; /* max purge bytes */
 138  138  
 139  139  static kmutex_t seg_pcache_mtx; /* protects seg_pdisabled counter */
 140  140  static kmutex_t seg_pasync_mtx; /* protects async thread scheduling */
 141  141  static kcondvar_t seg_pasync_cv;
 142  142  
 143  143  #pragma align 64(pctrl1)
 144  144  #pragma align 64(pctrl2)
 145  145  #pragma align 64(pctrl3)
 146  146  
 147  147  /*
 148  148   * Keep frequently used variables together in one cache line.
 149  149   */
 150  150  static struct p_ctrl1 {
 151  151          uint_t p_disabled;              /* if not 0, caching temporarily off */
 152  152          pgcnt_t p_maxwin;               /* max # of pages that can be cached */
 153  153          size_t p_hashwin_sz;            /* # of non wired buckets */
 154  154          struct seg_phash *p_htabwin;    /* hash table for non wired entries */
 155  155          size_t p_hashwired_sz;          /* # of wired buckets */
 156  156          struct seg_phash_wired *p_htabwired; /* hash table for wired entries */
 157  157          kmem_cache_t *p_kmcache;        /* kmem cache for seg_pcache structs */
 158  158  #ifdef _LP64
 159  159          ulong_t pad[1];
 160  160  #endif /* _LP64 */
 161  161  } pctrl1;
 162  162  
 163  163  static struct p_ctrl2 {
 164  164          kmutex_t p_mem_mtx;     /* protects window counter and p_halinks */
 165  165          pgcnt_t  p_locked_win;  /* # pages from window */
 166  166          pgcnt_t  p_locked;      /* # of pages cached by pagelock */
 167  167          uchar_t  p_ahcur;       /* current active links for insert/delete */
 168  168          uchar_t  p_athr_on;     /* async reclaim thread is running. */
 169  169          pcache_link_t p_ahhead[2]; /* active buckets linkages */
 170  170  } pctrl2;
 171  171  
 172  172  static struct p_ctrl3 {
 173  173          clock_t p_pcp_maxage;           /* max pcp age in ticks */
 174  174          ulong_t p_athr_empty_ahb;       /* athread walk stats */
 175  175          ulong_t p_athr_full_ahb;        /* athread walk stats */
 176  176          pgcnt_t p_maxapurge_npages;     /* max pages to purge at a time */
 177  177          int     p_shrink_shft;          /* reap shift factor */
 178  178  #ifdef _LP64
 179  179          ulong_t pad[3];
 180  180  #endif /* _LP64 */
 181  181  } pctrl3;
 182  182  
 183  183  #define seg_pdisabled                   pctrl1.p_disabled
 184  184  #define seg_pmaxwindow                  pctrl1.p_maxwin
 185  185  #define seg_phashsize_win               pctrl1.p_hashwin_sz
 186  186  #define seg_phashtab_win                pctrl1.p_htabwin
 187  187  #define seg_phashsize_wired             pctrl1.p_hashwired_sz
 188  188  #define seg_phashtab_wired              pctrl1.p_htabwired
 189  189  #define seg_pkmcache                    pctrl1.p_kmcache
 190  190  #define seg_pmem_mtx                    pctrl2.p_mem_mtx
 191  191  #define seg_plocked_window              pctrl2.p_locked_win
 192  192  #define seg_plocked                     pctrl2.p_locked
 193  193  #define seg_pahcur                      pctrl2.p_ahcur
 194  194  #define seg_pathr_on                    pctrl2.p_athr_on
 195  195  #define seg_pahhead                     pctrl2.p_ahhead
 196  196  #define seg_pmax_pcpage                 pctrl3.p_pcp_maxage
 197  197  #define seg_pathr_empty_ahb             pctrl3.p_athr_empty_ahb
 198  198  #define seg_pathr_full_ahb              pctrl3.p_athr_full_ahb
 199  199  #define seg_pshrink_shift               pctrl3.p_shrink_shft
 200  200  #define seg_pmaxapurge_npages           pctrl3.p_maxapurge_npages
 201  201  
 202  202  #define P_HASHWIN_MASK                  (seg_phashsize_win - 1)
 203  203  #define P_HASHWIRED_MASK                (seg_phashsize_wired - 1)
 204  204  #define P_BASESHIFT                     (6)
 205  205  
 206  206  kthread_t *seg_pasync_thr;
 207  207  
 208  208  extern struct seg_ops segvn_ops;
 209  209  extern struct seg_ops segspt_shmops;
 210  210  
 211  211  #define IS_PFLAGS_WIRED(flags) ((flags) & SEGP_FORCE_WIRED)
 212  212  #define IS_PCP_WIRED(pcp) IS_PFLAGS_WIRED((pcp)->p_flags)
 213  213  
 214  214  #define LBOLT_DELTA(t)  ((ulong_t)(ddi_get_lbolt() - (t)))
 215  215  
 216  216  #define PCP_AGE(pcp)    LBOLT_DELTA((pcp)->p_lbolt)
 217  217  
 218  218  /*
 219  219   * htag0 argument can be a seg or amp pointer.
 220  220   */
 221  221  #define P_HASHBP(seg, htag0, addr, flags)                               \
 222  222          (IS_PFLAGS_WIRED((flags)) ?                                     \
 223  223              ((struct seg_phash *)&seg_phashtab_wired[P_HASHWIRED_MASK & \
 224  224              ((uintptr_t)(htag0) >> P_BASESHIFT)]) :                     \
 225  225              (&seg_phashtab_win[P_HASHWIN_MASK &                         \
 226  226              (((uintptr_t)(htag0) >> 3) ^                                \
 227  227              ((uintptr_t)(addr) >> ((flags & SEGP_PSHIFT) ?              \
 228  228              (flags >> 16) : page_get_shift((seg)->s_szc))))]))
 229  229  
 230  230  /*
 231  231   * htag0 argument can be a seg or amp pointer.
 232  232   */
 233  233  #define P_MATCH(pcp, htag0, addr, len)                                  \
 234  234          ((pcp)->p_htag0 == (htag0) &&                                   \
 235  235          (pcp)->p_addr == (addr) &&                                      \
 236  236          (pcp)->p_len >= (len))
 237  237  
 238  238  #define P_MATCH_PP(pcp, htag0, addr, len, pp)                           \
 239  239          ((pcp)->p_pp == (pp) &&                                         \
 240  240          (pcp)->p_htag0 == (htag0) &&                                    \
 241  241          (pcp)->p_addr == (addr) &&                                      \
 242  242          (pcp)->p_len >= (len))
 243  243  
 244  244  #define plink2pcache(pl)        ((struct seg_pcache *)((uintptr_t)(pl) - \
 245  245      offsetof(struct seg_pcache, p_plink)))
 246  246  
 247  247  #define hlink2phash(hl, l)      ((struct seg_phash *)((uintptr_t)(hl) - \
 248  248      offsetof(struct seg_phash, p_halink[l])))
 249  249  
 250  250  /*
 251  251   * seg_padd_abuck()/seg_premove_abuck() link and unlink hash buckets from
 252  252   * active hash bucket lists. We maintain active bucket lists to reduce the
 253  253   * overhead of finding active buckets during asynchronous purging since there
 254  254   * can be 10s of millions of buckets on a large system but only a small subset
 255  255   * of them in actual use.
 256  256   *
 257  257   * There're 2 active bucket lists. Current active list (as per seg_pahcur) is
 258  258   * used by seg_pinsert()/seg_pinactive()/seg_ppurge() to add and delete
 259  259   * buckets. The other list is used by asynchronous purge thread. This allows
 260  260   * the purge thread to walk its active list without holding seg_pmem_mtx for a
 261  261   * long time. When asynchronous thread is done with its list it switches to
 262  262   * current active list and makes the list it just finished processing as
 263  263   * current active list.
 264  264   *
 265  265   * seg_padd_abuck() only adds the bucket to current list if the bucket is not
 266  266   * yet on any list.  seg_premove_abuck() may remove the bucket from either
 267  267   * list. If the bucket is on current list it will be always removed. Otherwise
 268  268   * the bucket is only removed if asynchronous purge thread is not currently
 269  269   * running or seg_premove_abuck() is called by asynchronous purge thread
 270  270   * itself. A given bucket can only be on one of active lists at a time. These
 271  271   * routines should be called with per bucket lock held.  The routines use
 272  272   * seg_pmem_mtx to protect list updates. seg_padd_abuck() must be called after
 273  273   * the first entry is added to the bucket chain and seg_premove_abuck() must
 274  274   * be called after the last pcp entry is deleted from its chain. Per bucket
 275  275   * lock should be held by the callers.  This avoids a potential race condition
 276  276   * when seg_premove_abuck() removes a bucket after pcp entries are added to
 277  277   * its list after the caller checked that the bucket has no entries. (this
 278  278   * race would cause a loss of an active bucket from the active lists).
 279  279   *
 280  280   * Both lists are circular doubly linked lists anchored at seg_pahhead heads.
 281  281   * New entries are added to the end of the list since LRU is used as the
 282  282   * purging policy.
 283  283   */
 284  284  static void
 285  285  seg_padd_abuck(struct seg_phash *hp)
 286  286  {
 287  287          int lix;
 288  288  
 289  289          ASSERT(MUTEX_HELD(&hp->p_hmutex));
 290  290          ASSERT((struct seg_phash *)hp->p_hnext != hp);
 291  291          ASSERT((struct seg_phash *)hp->p_hprev != hp);
 292  292          ASSERT(hp->p_hnext == hp->p_hprev);
 293  293          ASSERT(!IS_PCP_WIRED(hp->p_hnext));
 294  294          ASSERT(hp->p_hnext->p_hnext == (struct seg_pcache *)hp);
 295  295          ASSERT(hp->p_hprev->p_hprev == (struct seg_pcache *)hp);
 296  296          ASSERT(hp >= seg_phashtab_win &&
 297  297              hp < &seg_phashtab_win[seg_phashsize_win]);
 298  298  
 299  299          /*
 300  300           * This bucket can already be on one of active lists
 301  301           * since seg_premove_abuck() may have failed to remove it
 302  302           * before.
 303  303           */
 304  304          mutex_enter(&seg_pmem_mtx);
 305  305          lix = seg_pahcur;
 306  306          ASSERT(lix >= 0 && lix <= 1);
 307  307          if (hp->p_halink[lix].p_lnext != NULL) {
 308  308                  ASSERT(hp->p_halink[lix].p_lprev != NULL);
 309  309                  ASSERT(hp->p_halink[!lix].p_lnext == NULL);
 310  310                  ASSERT(hp->p_halink[!lix].p_lprev == NULL);
 311  311                  mutex_exit(&seg_pmem_mtx);
 312  312                  return;
 313  313          }
 314  314          ASSERT(hp->p_halink[lix].p_lprev == NULL);
 315  315  
 316  316          /*
 317  317           * If this bucket is still on list !lix async thread can't yet remove
 318  318           * it since we hold here per bucket lock. In this case just return
 319  319           * since async thread will eventually find and process this bucket.
 320  320           */
 321  321          if (hp->p_halink[!lix].p_lnext != NULL) {
 322  322                  ASSERT(hp->p_halink[!lix].p_lprev != NULL);
 323  323                  mutex_exit(&seg_pmem_mtx);
 324  324                  return;
 325  325          }
 326  326          ASSERT(hp->p_halink[!lix].p_lprev == NULL);
 327  327          /*
 328  328           * This bucket is not on any active bucket list yet.
 329  329           * Add the bucket to the tail of current active list.
 330  330           */
 331  331          hp->p_halink[lix].p_lnext = &seg_pahhead[lix];
 332  332          hp->p_halink[lix].p_lprev = seg_pahhead[lix].p_lprev;
 333  333          seg_pahhead[lix].p_lprev->p_lnext = &hp->p_halink[lix];
 334  334          seg_pahhead[lix].p_lprev = &hp->p_halink[lix];
 335  335          mutex_exit(&seg_pmem_mtx);
 336  336  }
 337  337  
 338  338  static void
 339  339  seg_premove_abuck(struct seg_phash *hp, int athr)
 340  340  {
 341  341          int lix;
 342  342  
 343  343          ASSERT(MUTEX_HELD(&hp->p_hmutex));
 344  344          ASSERT((struct seg_phash *)hp->p_hnext == hp);
 345  345          ASSERT((struct seg_phash *)hp->p_hprev == hp);
 346  346          ASSERT(hp >= seg_phashtab_win &&
 347  347              hp < &seg_phashtab_win[seg_phashsize_win]);
 348  348  
 349  349          if (athr) {
 350  350                  ASSERT(seg_pathr_on);
 351  351                  ASSERT(seg_pahcur <= 1);
 352  352                  /*
 353  353                   * We are called by asynchronous thread that found this bucket
 354  354                   * on not currently active (i.e. !seg_pahcur) list. Remove it
 355  355                   * from there.  Per bucket lock we are holding makes sure
 356  356                   * seg_pinsert() can't sneak in and add pcp entries to this
 357  357                   * bucket right before we remove the bucket from its list.
 358  358                   */
 359  359                  lix = !seg_pahcur;
 360  360                  ASSERT(hp->p_halink[lix].p_lnext != NULL);
 361  361                  ASSERT(hp->p_halink[lix].p_lprev != NULL);
 362  362                  ASSERT(hp->p_halink[!lix].p_lnext == NULL);
 363  363                  ASSERT(hp->p_halink[!lix].p_lprev == NULL);
 364  364                  hp->p_halink[lix].p_lnext->p_lprev = hp->p_halink[lix].p_lprev;
 365  365                  hp->p_halink[lix].p_lprev->p_lnext = hp->p_halink[lix].p_lnext;
 366  366                  hp->p_halink[lix].p_lnext = NULL;
 367  367                  hp->p_halink[lix].p_lprev = NULL;
 368  368                  return;
 369  369          }
 370  370  
 371  371          mutex_enter(&seg_pmem_mtx);
 372  372          lix = seg_pahcur;
 373  373          ASSERT(lix >= 0 && lix <= 1);
 374  374  
 375  375          /*
 376  376           * If the bucket is on currently active list just remove it from
 377  377           * there.
 378  378           */
 379  379          if (hp->p_halink[lix].p_lnext != NULL) {
 380  380                  ASSERT(hp->p_halink[lix].p_lprev != NULL);
 381  381                  ASSERT(hp->p_halink[!lix].p_lnext == NULL);
 382  382                  ASSERT(hp->p_halink[!lix].p_lprev == NULL);
 383  383                  hp->p_halink[lix].p_lnext->p_lprev = hp->p_halink[lix].p_lprev;
 384  384                  hp->p_halink[lix].p_lprev->p_lnext = hp->p_halink[lix].p_lnext;
 385  385                  hp->p_halink[lix].p_lnext = NULL;
 386  386                  hp->p_halink[lix].p_lprev = NULL;
 387  387                  mutex_exit(&seg_pmem_mtx);
 388  388                  return;
 389  389          }
 390  390          ASSERT(hp->p_halink[lix].p_lprev == NULL);
 391  391  
 392  392          /*
 393  393           * If asynchronous thread is not running we can remove the bucket from
 394  394           * not currently active list. The bucket must be on this list since we
 395  395           * already checked that it's not on the other list and the bucket from
 396  396           * which we just deleted the last pcp entry must be still on one of the
 397  397           * active bucket lists.
 398  398           */
 399  399          lix = !lix;
 400  400          ASSERT(hp->p_halink[lix].p_lnext != NULL);
 401  401          ASSERT(hp->p_halink[lix].p_lprev != NULL);
 402  402  
 403  403          if (!seg_pathr_on) {
 404  404                  hp->p_halink[lix].p_lnext->p_lprev = hp->p_halink[lix].p_lprev;
 405  405                  hp->p_halink[lix].p_lprev->p_lnext = hp->p_halink[lix].p_lnext;
 406  406                  hp->p_halink[lix].p_lnext = NULL;
 407  407                  hp->p_halink[lix].p_lprev = NULL;
 408  408          }
 409  409          mutex_exit(&seg_pmem_mtx);
 410  410  }
 411  411  
 412  412  /*
 413  413   * Check if bucket pointed by hp already has a pcp entry that matches request
 414  414   * htag0, addr and len. Set *found to 1 if match is found and to 0 otherwise.
 415  415   * Also delete matching entries that cover smaller address range but start
 416  416   * at the same address as addr argument. Return the list of deleted entries if
 417  417   * any. This is an internal helper function called from seg_pinsert() only
 418  418   * for non wired shadow lists. The caller already holds a per seg/amp list
 419  419   * lock.
 420  420   */
 421  421  static struct seg_pcache *
 422  422  seg_plookup_checkdup(struct seg_phash *hp, void *htag0,
 423  423      caddr_t addr, size_t len, int *found)
 424  424  {
 425  425          struct seg_pcache *pcp;
 426  426          struct seg_pcache *delcallb_list = NULL;
 427  427  
 428  428          ASSERT(MUTEX_HELD(&hp->p_hmutex));
 429  429  
 430  430          *found = 0;
 431  431          for (pcp = hp->p_hnext; pcp != (struct seg_pcache *)hp;
 432  432              pcp = pcp->p_hnext) {
 433  433                  ASSERT(pcp->p_hashp == hp);
 434  434                  if (pcp->p_htag0 == htag0 && pcp->p_addr == addr) {
 435  435                          ASSERT(!IS_PCP_WIRED(pcp));
 436  436                          if (pcp->p_len < len) {
 437  437                                  pcache_link_t *plinkp;
 438  438                                  if (pcp->p_active) {
 439  439                                          continue;
 440  440                                  }
 441  441                                  plinkp = &pcp->p_plink;
 442  442                                  plinkp->p_lprev->p_lnext = plinkp->p_lnext;
 443  443                                  plinkp->p_lnext->p_lprev = plinkp->p_lprev;
 444  444                                  pcp->p_hprev->p_hnext = pcp->p_hnext;
 445  445                                  pcp->p_hnext->p_hprev = pcp->p_hprev;
 446  446                                  pcp->p_hprev = delcallb_list;
 447  447                                  delcallb_list = pcp;
 448  448                          } else {
 449  449                                  *found = 1;
 450  450                                  break;
 451  451                          }
 452  452                  }
 453  453          }
 454  454          return (delcallb_list);
 455  455  }
 456  456  
 457  457  /*
 458  458   * lookup an address range in pagelock cache. Return shadow list and bump up
 459  459   * active count. If amp is not NULL use amp as a lookup tag otherwise use seg
 460  460   * as a lookup tag.
 461  461   */
 462  462  struct page **
 463  463  seg_plookup(struct seg *seg, struct anon_map *amp, caddr_t addr, size_t len,
 464  464      enum seg_rw rw, uint_t flags)
 465  465  {
 466  466          struct seg_pcache *pcp;
 467  467          struct seg_phash *hp;
 468  468          void *htag0;
 469  469  
 470  470          ASSERT(seg != NULL);
 471  471          ASSERT(rw == S_READ || rw == S_WRITE);
 472  472  
 473  473          /*
 474  474           * Skip pagelock cache, while DR is in progress or
 475  475           * seg_pcache is off.
 476  476           */
 477  477          if (seg_pdisabled) {
 478  478                  return (NULL);
 479  479          }
 480  480          ASSERT(seg_phashsize_win != 0);
 481  481  
 482  482          htag0 = (amp == NULL ? (void *)seg : (void *)amp);
 483  483          hp = P_HASHBP(seg, htag0, addr, flags);
 484  484          mutex_enter(&hp->p_hmutex);
 485  485          for (pcp = hp->p_hnext; pcp != (struct seg_pcache *)hp;
 486  486              pcp = pcp->p_hnext) {
 487  487                  ASSERT(pcp->p_hashp == hp);
 488  488                  if (P_MATCH(pcp, htag0, addr, len)) {
 489  489                          ASSERT(IS_PFLAGS_WIRED(flags) == IS_PCP_WIRED(pcp));
 490  490                          /*
 491  491                           * If this request wants to write pages
 492  492                           * but write permissions starting from
 493  493                           * addr don't cover the entire length len
 494  494                           * return lookup failure back to the caller.
 495  495                           * It will check protections and fail this
 496  496                           * pagelock operation with EACCESS error.
 497  497                           */
 498  498                          if (rw == S_WRITE && pcp->p_wlen < len) {
 499  499                                  break;
 500  500                          }
 501  501                          if (pcp->p_active == UINT_MAX) {
 502  502                                  break;
 503  503                          }
 504  504                          pcp->p_active++;
 505  505                          if (rw == S_WRITE && !pcp->p_write) {
 506  506                                  pcp->p_write = 1;
 507  507                          }
 508  508                          mutex_exit(&hp->p_hmutex);
 509  509                          return (pcp->p_pp);
 510  510                  }
 511  511          }
 512  512          mutex_exit(&hp->p_hmutex);
 513  513          return (NULL);
 514  514  }
 515  515  
 516  516  /*
 517  517   * mark address range inactive. If the cache is off or the address range is
 518  518   * not in the cache or another shadow list that covers bigger range is found
 519  519   * we call the segment driver to reclaim the pages. Otherwise just decrement
 520  520   * active count and set ref bit.  If amp is not NULL use amp as a lookup tag
 521  521   * otherwise use seg as a lookup tag.
 522  522   */
 523  523  void
 524  524  seg_pinactive(struct seg *seg, struct anon_map *amp, caddr_t addr,
 525  525      size_t len, struct page **pp, enum seg_rw rw, uint_t flags,
 526  526      seg_preclaim_cbfunc_t callback)
 527  527  {
 528  528          struct seg_pcache *pcp;
 529  529          struct seg_phash *hp;
 530  530          kmutex_t *pmtx = NULL;
 531  531          pcache_link_t *pheadp;
 532  532          void *htag0;
 533  533          pgcnt_t npages = 0;
 534  534          int keep = 0;
 535  535  
 536  536          ASSERT(seg != NULL);
 537  537          ASSERT(rw == S_READ || rw == S_WRITE);
 538  538  
 539  539          htag0 = (amp == NULL ? (void *)seg : (void *)amp);
 540  540  
 541  541          /*
 542  542           * Skip lookup if pcache is not configured.
 543  543           */
 544  544          if (seg_phashsize_win == 0) {
 545  545                  goto out;
 546  546          }
 547  547  
 548  548          /*
 549  549           * Grab per seg/amp lock before hash lock if we are going to remove
 550  550           * inactive entry from pcache.
 551  551           */
 552  552          if (!IS_PFLAGS_WIRED(flags) && seg_pdisabled) {
 553  553                  if (amp == NULL) {
 554  554                          pheadp = &seg->s_phead;
 555  555                          pmtx = &seg->s_pmtx;
 556  556                  } else {
 557  557                          pheadp = &amp->a_phead;
 558  558                          pmtx = &amp->a_pmtx;
 559  559                  }
 560  560                  mutex_enter(pmtx);
 561  561          }
 562  562  
 563  563          hp = P_HASHBP(seg, htag0, addr, flags);
 564  564          mutex_enter(&hp->p_hmutex);
 565  565  again:
 566  566          for (pcp = hp->p_hnext; pcp != (struct seg_pcache *)hp;
 567  567              pcp = pcp->p_hnext) {
 568  568                  ASSERT(pcp->p_hashp == hp);
 569  569                  if (P_MATCH_PP(pcp, htag0, addr, len, pp)) {
 570  570                          ASSERT(IS_PFLAGS_WIRED(flags) == IS_PCP_WIRED(pcp));
 571  571                          ASSERT(pcp->p_active);
 572  572                          if (keep) {
 573  573                                  /*
 574  574                                   * Don't remove this pcp entry
 575  575                                   * if we didn't find duplicate
 576  576                                   * shadow lists on second search.
 577  577                                   * Somebody removed those duplicates
 578  578                                   * since we dropped hash lock after first
 579  579                                   * search.
 580  580                                   */
 581  581                                  ASSERT(pmtx != NULL);
 582  582                                  ASSERT(!IS_PFLAGS_WIRED(flags));
 583  583                                  mutex_exit(pmtx);
 584  584                                  pmtx = NULL;
 585  585                          }
 586  586                          pcp->p_active--;
 587  587                          if (pcp->p_active == 0 && (pmtx != NULL ||
 588  588                              (seg_pdisabled && IS_PFLAGS_WIRED(flags)))) {
 589  589  
 590  590                                  /*
 591  591                                   * This entry is no longer active.  Remove it
 592  592                                   * now either because pcaching is temporarily
 593  593                                   * disabled or there're other pcp entries that
 594  594                                   * can match this pagelock request (i.e. this
 595  595                                   * entry is a duplicate).
 596  596                                   */
 597  597  
 598  598                                  ASSERT(callback == pcp->p_callback);
 599  599                                  if (pmtx != NULL) {
 600  600                                          pcache_link_t *plinkp = &pcp->p_plink;
 601  601                                          ASSERT(!IS_PCP_WIRED(pcp));
 602  602                                          ASSERT(pheadp->p_lnext != pheadp);
 603  603                                          ASSERT(pheadp->p_lprev != pheadp);
 604  604                                          plinkp->p_lprev->p_lnext =
 605  605                                              plinkp->p_lnext;
 606  606                                          plinkp->p_lnext->p_lprev =
 607  607                                              plinkp->p_lprev;
 608  608                                  }
 609  609                                  pcp->p_hprev->p_hnext = pcp->p_hnext;
 610  610                                  pcp->p_hnext->p_hprev = pcp->p_hprev;
 611  611                                  if (!IS_PCP_WIRED(pcp) &&
 612  612                                      hp->p_hnext == (struct seg_pcache *)hp) {
 613  613                                          /*
 614  614                                           * We removed the last entry from this
 615  615                                           * bucket.  Now remove the bucket from
 616  616                                           * its active list.
 617  617                                           */
 618  618                                          seg_premove_abuck(hp, 0);
 619  619                                  }
 620  620                                  mutex_exit(&hp->p_hmutex);
 621  621                                  if (pmtx != NULL) {
 622  622                                          mutex_exit(pmtx);
 623  623                                  }
 624  624                                  len = pcp->p_len;
 625  625                                  npages = btop(len);
 626  626                                  if (rw != S_WRITE && pcp->p_write) {
 627  627                                          rw = S_WRITE;
 628  628                                  }
 629  629                                  kmem_cache_free(seg_pkmcache, pcp);
 630  630                                  goto out;
 631  631                          } else {
 632  632                                  /*
 633  633                                   * We found a matching pcp entry but will not
 634  634                                   * free it right away even if it's no longer
 635  635                                   * active.
 636  636                                   */
 637  637                                  if (!pcp->p_active && !IS_PCP_WIRED(pcp)) {
 638  638                                          /*
 639  639                                           * Set the reference bit and mark the
 640  640                                           * time of last access to this pcp
 641  641                                           * so that asynchronous thread doesn't
 642  642                                           * free it immediately since
 643  643                                           * it may be reactivated very soon.
 644  644                                           */
 645  645                                          pcp->p_lbolt = ddi_get_lbolt();
 646  646                                          pcp->p_ref = 1;
 647  647                                  }
 648  648                                  mutex_exit(&hp->p_hmutex);
 649  649                                  if (pmtx != NULL) {
 650  650                                          mutex_exit(pmtx);
 651  651                                  }
 652  652                                  return;
 653  653                          }
 654  654                  } else if (!IS_PFLAGS_WIRED(flags) &&
 655  655                      P_MATCH(pcp, htag0, addr, len)) {
 656  656                          /*
 657  657                           * This is a duplicate pcp entry.  This situation may
 658  658                           * happen if a bigger shadow list that covers our
 659  659                           * range was added while our entry was still active.
 660  660                           * Now we can free our pcp entry if it becomes
 661  661                           * inactive.
 662  662                           */
 663  663                          if (!pcp->p_active) {
 664  664                                  /*
 665  665                                   * Mark this entry as referenced just in case
 666  666                                   * we'll free our own pcp entry soon.
 667  667                                   */
 668  668                                  pcp->p_lbolt = ddi_get_lbolt();
 669  669                                  pcp->p_ref = 1;
 670  670                          }
 671  671                          if (pmtx != NULL) {
 672  672                                  /*
 673  673                                   * we are already holding pmtx and found a
 674  674                                   * duplicate.  Don't keep our own pcp entry.
 675  675                                   */
 676  676                                  keep = 0;
 677  677                                  continue;
 678  678                          }
 679  679                          /*
 680  680                           * We have to use mutex_tryenter to attempt to lock
 681  681                           * seg/amp list lock since we already hold hash lock
 682  682                           * and seg/amp list lock is above hash lock in lock
 683  683                           * order.  If mutex_tryenter fails drop hash lock and
 684  684                           * retake both locks in correct order and research
 685  685                           * this hash chain.
 686  686                           */
 687  687                          ASSERT(keep == 0);
 688  688                          if (amp == NULL) {
 689  689                                  pheadp = &seg->s_phead;
 690  690                                  pmtx = &seg->s_pmtx;
 691  691                          } else {
 692  692                                  pheadp = &amp->a_phead;
 693  693                                  pmtx = &amp->a_pmtx;
 694  694                          }
 695  695                          if (!mutex_tryenter(pmtx)) {
 696  696                                  mutex_exit(&hp->p_hmutex);
 697  697                                  mutex_enter(pmtx);
 698  698                                  mutex_enter(&hp->p_hmutex);
 699  699                                  /*
 700  700                                   * If we don't find bigger shadow list on
 701  701                                   * second search (it may happen since we
 702  702                                   * dropped bucket lock) keep the entry that
 703  703                                   * matches our own shadow list.
 704  704                                   */
 705  705                                  keep = 1;
 706  706                                  goto again;
 707  707                          }
 708  708                  }
 709  709          }
 710  710          mutex_exit(&hp->p_hmutex);
 711  711          if (pmtx != NULL) {
 712  712                  mutex_exit(pmtx);
 713  713          }
 714  714  out:
 715  715          (*callback)(htag0, addr, len, pp, rw, 0);
 716  716          if (npages) {
 717  717                  mutex_enter(&seg_pmem_mtx);
 718  718                  ASSERT(seg_plocked >= npages);
 719  719                  seg_plocked -= npages;
 720  720                  if (!IS_PFLAGS_WIRED(flags)) {
 721  721                          ASSERT(seg_plocked_window >= npages);
 722  722                          seg_plocked_window -= npages;
 723  723                  }
 724  724                  mutex_exit(&seg_pmem_mtx);
 725  725          }
 726  726  
 727  727  }
 728  728  
 729  729  #ifdef DEBUG
 730  730  static uint32_t p_insert_chk_mtbf = 0;
 731  731  #endif
 732  732  
 733  733  /*
 734  734   * The seg_pinsert_check() is used by segment drivers to predict whether
 735  735   * a call to seg_pinsert will fail and thereby avoid wasteful pre-processing.
 736  736   */
 737  737  /*ARGSUSED*/
 738  738  int
 739  739  seg_pinsert_check(struct seg *seg, struct anon_map *amp, caddr_t addr,
 740  740      size_t len, uint_t flags)
 741  741  {
 742  742          ASSERT(seg != NULL);
 743  743  
 744  744  #ifdef DEBUG
 745  745          if (p_insert_chk_mtbf && !(gethrtime() % p_insert_chk_mtbf)) {
 746  746                  return (SEGP_FAIL);
 747  747          }
 748  748  #endif
 749  749  
 750  750          if (seg_pdisabled) {
 751  751                  return (SEGP_FAIL);
 752  752          }
 753  753          ASSERT(seg_phashsize_win != 0);
 754  754  
 755  755          if (IS_PFLAGS_WIRED(flags)) {
 756  756                  return (SEGP_SUCCESS);
 757  757          }
 758  758  
 759  759          if (seg_plocked_window + btop(len) > seg_pmaxwindow) {
 760  760                  return (SEGP_FAIL);
 761  761          }
 762  762  
 763  763          if (freemem < desfree) {
 764  764                  return (SEGP_FAIL);
 765  765          }
 766  766  
 767  767          return (SEGP_SUCCESS);
 768  768  }
 769  769  
 770  770  #ifdef DEBUG
 771  771  static uint32_t p_insert_mtbf = 0;
 772  772  #endif
 773  773  
 774  774  /*
 775  775   * Insert address range with shadow list into pagelock cache if there's no
 776  776   * shadow list already cached for this address range. If the cache is off or
 777  777   * caching is temporarily disabled or the allowed 'window' is exceeded return
 778  778   * SEGP_FAIL. Otherwise return SEGP_SUCCESS.
 779  779   *
 780  780   * For non wired shadow lists (segvn case) include address in the hashing
 781  781   * function to avoid linking all the entries from the same segment or amp on
 782  782   * the same bucket.  amp is used instead of seg if amp is not NULL. Non wired
 783  783   * pcache entries are also linked on a per segment/amp list so that all
 784  784   * entries can be found quickly during seg/amp purge without walking the
 785  785   * entire pcache hash table.  For wired shadow lists (segspt case) we
 786  786   * don't use address hashing and per segment linking because the caller
 787  787   * currently inserts only one entry per segment that covers the entire
 788  788   * segment. If we used per segment linking even for segspt it would complicate
 789  789   * seg_ppurge_wiredpp() locking.
 790  790   *
 791  791   * Both hash bucket and per seg/amp locks need to be held before adding a non
 792  792   * wired entry to hash and per seg/amp lists. per seg/amp lock should be taken
 793  793   * first.
 794  794   *
 795  795   * This function will also remove from pcache old inactive shadow lists that
 796  796   * overlap with this request but cover smaller range for the same start
 797  797   * address.
 798  798   */
 799  799  int
 800  800  seg_pinsert(struct seg *seg, struct anon_map *amp, caddr_t addr, size_t len,
 801  801      size_t wlen, struct page **pp, enum seg_rw rw, uint_t flags,
 802  802      seg_preclaim_cbfunc_t callback)
 803  803  {
 804  804          struct seg_pcache *pcp;
 805  805          struct seg_phash *hp;
 806  806          pgcnt_t npages;
 807  807          pcache_link_t *pheadp;
 808  808          kmutex_t *pmtx;
 809  809          struct seg_pcache *delcallb_list = NULL;
 810  810  
 811  811          ASSERT(seg != NULL);
 812  812          ASSERT(rw == S_READ || rw == S_WRITE);
 813  813          ASSERT(rw == S_READ || wlen == len);
 814  814          ASSERT(rw == S_WRITE || wlen <= len);
 815  815          ASSERT(amp == NULL || wlen == len);
 816  816  
 817  817  #ifdef DEBUG
 818  818          if (p_insert_mtbf && !(gethrtime() % p_insert_mtbf)) {
 819  819                  return (SEGP_FAIL);
 820  820          }
 821  821  #endif
 822  822  
 823  823          if (seg_pdisabled) {
 824  824                  return (SEGP_FAIL);
 825  825          }
 826  826          ASSERT(seg_phashsize_win != 0);
 827  827  
 828  828          ASSERT((len & PAGEOFFSET) == 0);
 829  829          npages = btop(len);
 830  830          mutex_enter(&seg_pmem_mtx);
 831  831          if (!IS_PFLAGS_WIRED(flags)) {
 832  832                  if (seg_plocked_window + npages > seg_pmaxwindow) {
 833  833                          mutex_exit(&seg_pmem_mtx);
 834  834                          return (SEGP_FAIL);
 835  835                  }
 836  836                  seg_plocked_window += npages;
 837  837          }
 838  838          seg_plocked += npages;
 839  839          mutex_exit(&seg_pmem_mtx);
 840  840  
 841  841          pcp = kmem_cache_alloc(seg_pkmcache, KM_SLEEP);
 842  842          /*
 843  843           * If amp is not NULL set htag0 to amp otherwise set it to seg.
 844  844           */
 845  845          if (amp == NULL) {
 846  846                  pcp->p_htag0 = (void *)seg;
 847  847                  pcp->p_flags = flags & 0xffff;
 848  848          } else {
 849  849                  pcp->p_htag0 = (void *)amp;
 850  850                  pcp->p_flags = (flags & 0xffff) | SEGP_AMP;
 851  851          }
 852  852          pcp->p_addr = addr;
 853  853          pcp->p_len = len;
 854  854          pcp->p_wlen = wlen;
 855  855          pcp->p_pp = pp;
 856  856          pcp->p_write = (rw == S_WRITE);
 857  857          pcp->p_callback = callback;
 858  858          pcp->p_active = 1;
 859  859  
 860  860          hp = P_HASHBP(seg, pcp->p_htag0, addr, flags);
 861  861          if (!IS_PFLAGS_WIRED(flags)) {
 862  862                  int found;
 863  863                  void *htag0;
 864  864                  if (amp == NULL) {
 865  865                          pheadp = &seg->s_phead;
 866  866                          pmtx = &seg->s_pmtx;
 867  867                          htag0 = (void *)seg;
 868  868                  } else {
 869  869                          pheadp = &amp->a_phead;
 870  870                          pmtx = &amp->a_pmtx;
 871  871                          htag0 = (void *)amp;
 872  872                  }
 873  873                  mutex_enter(pmtx);
 874  874                  mutex_enter(&hp->p_hmutex);
 875  875                  delcallb_list = seg_plookup_checkdup(hp, htag0, addr,
 876  876                      len, &found);
 877  877                  if (found) {
 878  878                          mutex_exit(&hp->p_hmutex);
 879  879                          mutex_exit(pmtx);
 880  880                          mutex_enter(&seg_pmem_mtx);
 881  881                          seg_plocked -= npages;
 882  882                          seg_plocked_window -= npages;
 883  883                          mutex_exit(&seg_pmem_mtx);
 884  884                          kmem_cache_free(seg_pkmcache, pcp);
 885  885                          goto out;
 886  886                  }
 887  887                  pcp->p_plink.p_lnext = pheadp->p_lnext;
 888  888                  pcp->p_plink.p_lprev = pheadp;
 889  889                  pheadp->p_lnext->p_lprev = &pcp->p_plink;
 890  890                  pheadp->p_lnext = &pcp->p_plink;
 891  891          } else {
 892  892                  mutex_enter(&hp->p_hmutex);
 893  893          }
 894  894          pcp->p_hashp = hp;
 895  895          pcp->p_hnext = hp->p_hnext;
 896  896          pcp->p_hprev = (struct seg_pcache *)hp;
 897  897          hp->p_hnext->p_hprev = pcp;
 898  898          hp->p_hnext = pcp;
 899  899          if (!IS_PFLAGS_WIRED(flags) &&
 900  900              hp->p_hprev == pcp) {
 901  901                  seg_padd_abuck(hp);
 902  902          }
 903  903          mutex_exit(&hp->p_hmutex);
 904  904          if (!IS_PFLAGS_WIRED(flags)) {
 905  905                  mutex_exit(pmtx);
 906  906          }
 907  907  
 908  908  out:
 909  909          npages = 0;
 910  910          while (delcallb_list != NULL) {
 911  911                  pcp = delcallb_list;
 912  912                  delcallb_list = pcp->p_hprev;
 913  913                  ASSERT(!IS_PCP_WIRED(pcp) && !pcp->p_active);
 914  914                  (void) (*pcp->p_callback)(pcp->p_htag0, pcp->p_addr,
 915  915                      pcp->p_len, pcp->p_pp, pcp->p_write ? S_WRITE : S_READ, 0);
 916  916                  npages += btop(pcp->p_len);
 917  917                  kmem_cache_free(seg_pkmcache, pcp);
 918  918          }
 919  919          if (npages) {
 920  920                  ASSERT(!IS_PFLAGS_WIRED(flags));
 921  921                  mutex_enter(&seg_pmem_mtx);
 922  922                  ASSERT(seg_plocked >= npages);
 923  923                  ASSERT(seg_plocked_window >= npages);
 924  924                  seg_plocked -= npages;
 925  925                  seg_plocked_window -= npages;
 926  926                  mutex_exit(&seg_pmem_mtx);
 927  927          }
 928  928  
 929  929          return (SEGP_SUCCESS);
 930  930  }
 931  931  
 932  932  /*
 933  933   * purge entries from the pagelock cache if not active
 934  934   * and not recently used.
 935  935   */
 936  936  static void
 937  937  seg_ppurge_async(int force)
 938  938  {
 939  939          struct seg_pcache *delcallb_list = NULL;
 940  940          struct seg_pcache *pcp;
 941  941          struct seg_phash *hp;
 942  942          pgcnt_t npages = 0;
 943  943          pgcnt_t npages_window = 0;
 944  944          pgcnt_t npgs_to_purge;
 945  945          pgcnt_t npgs_purged = 0;
 946  946          int hlinks = 0;
 947  947          int hlix;
 948  948          pcache_link_t *hlinkp;
 949  949          pcache_link_t *hlnextp = NULL;
 950  950          int lowmem;
 951  951          int trim;
 952  952  
 953  953          ASSERT(seg_phashsize_win != 0);
 954  954  
 955  955          /*
 956  956           * if the cache is off or empty, return
 957  957           */
 958  958          if (seg_plocked == 0 || (!force && seg_plocked_window == 0)) {
 959  959                  return;
 960  960          }
 961  961  
 962  962          if (!force) {
 963  963                  lowmem = 0;
 964  964                  trim = 0;
 965  965                  if (freemem < lotsfree + needfree) {
 966  966                          spgcnt_t fmem = MAX((spgcnt_t)(freemem - needfree), 0);
 967  967                          if (fmem <= 5 * (desfree >> 2)) {
 968  968                                  lowmem = 1;
 969  969                          } else if (fmem <= 7 * (lotsfree >> 3)) {
 970  970                                  if (seg_plocked_window >=
 971  971                                      (availrmem_initial >> 1)) {
 972  972                                          lowmem = 1;
 973  973                                  }
 974  974                          } else if (fmem < lotsfree) {
 975  975                                  if (seg_plocked_window >=
 976  976                                      3 * (availrmem_initial >> 2)) {
 977  977                                          lowmem = 1;
 978  978                                  }
 979  979                          }
 980  980                  }
 981  981                  if (seg_plocked_window >= 7 * (seg_pmaxwindow >> 3)) {
 982  982                          trim = 1;
 983  983                  }
 984  984                  if (!lowmem && !trim) {
 985  985                          return;
 986  986                  }
 987  987                  npgs_to_purge = seg_plocked_window >>
 988  988                      seg_pshrink_shift;
 989  989                  if (lowmem) {
 990  990                          npgs_to_purge = MIN(npgs_to_purge,
 991  991                              MAX(seg_pmaxapurge_npages, desfree));
 992  992                  } else {
 993  993                          npgs_to_purge = MIN(npgs_to_purge,
 994  994                              seg_pmaxapurge_npages);
 995  995                  }
 996  996                  if (npgs_to_purge == 0) {
 997  997                          return;
 998  998                  }
 999  999          } else {
1000 1000                  struct seg_phash_wired *hpw;
1001 1001  
1002 1002                  ASSERT(seg_phashsize_wired != 0);
1003 1003  
1004 1004                  for (hpw = seg_phashtab_wired;
1005 1005                      hpw < &seg_phashtab_wired[seg_phashsize_wired]; hpw++) {
1006 1006  
1007 1007                          if (hpw->p_hnext == (struct seg_pcache *)hpw) {
1008 1008                                  continue;
1009 1009                          }
1010 1010  
1011 1011                          mutex_enter(&hpw->p_hmutex);
1012 1012  
1013 1013                          for (pcp = hpw->p_hnext;
1014 1014                              pcp != (struct seg_pcache *)hpw;
1015 1015                              pcp = pcp->p_hnext) {
1016 1016  
1017 1017                                  ASSERT(IS_PCP_WIRED(pcp));
1018 1018                                  ASSERT(pcp->p_hashp ==
1019 1019                                      (struct seg_phash *)hpw);
1020 1020  
1021 1021                                  if (pcp->p_active) {
1022 1022                                          continue;
1023 1023                                  }
1024 1024                                  pcp->p_hprev->p_hnext = pcp->p_hnext;
1025 1025                                  pcp->p_hnext->p_hprev = pcp->p_hprev;
1026 1026                                  pcp->p_hprev = delcallb_list;
1027 1027                                  delcallb_list = pcp;
1028 1028                          }
1029 1029                          mutex_exit(&hpw->p_hmutex);
1030 1030                  }
1031 1031          }
1032 1032  
1033 1033          mutex_enter(&seg_pmem_mtx);
1034 1034          if (seg_pathr_on) {
1035 1035                  mutex_exit(&seg_pmem_mtx);
1036 1036                  goto runcb;
1037 1037          }
1038 1038          seg_pathr_on = 1;
1039 1039          mutex_exit(&seg_pmem_mtx);
1040 1040          ASSERT(seg_pahcur <= 1);
1041 1041          hlix = !seg_pahcur;
1042 1042  
1043 1043  again:
1044 1044          for (hlinkp = seg_pahhead[hlix].p_lnext; hlinkp != &seg_pahhead[hlix];
1045 1045              hlinkp = hlnextp) {
1046 1046  
1047 1047                  hlnextp = hlinkp->p_lnext;
1048 1048                  ASSERT(hlnextp != NULL);
1049 1049  
1050 1050                  hp = hlink2phash(hlinkp, hlix);
1051 1051                  if (hp->p_hnext == (struct seg_pcache *)hp) {
1052 1052                          seg_pathr_empty_ahb++;
1053 1053                          continue;
1054 1054                  }
1055 1055                  seg_pathr_full_ahb++;
1056 1056                  mutex_enter(&hp->p_hmutex);
1057 1057  
1058 1058                  for (pcp = hp->p_hnext; pcp != (struct seg_pcache *)hp;
1059 1059                      pcp = pcp->p_hnext) {
1060 1060                          pcache_link_t *pheadp;
1061 1061                          pcache_link_t *plinkp;
1062 1062                          void *htag0;
1063 1063                          kmutex_t *pmtx;
1064 1064  
1065 1065                          ASSERT(!IS_PCP_WIRED(pcp));
1066 1066                          ASSERT(pcp->p_hashp == hp);
1067 1067  
1068 1068                          if (pcp->p_active) {
1069 1069                                  continue;
1070 1070                          }
1071 1071                          if (!force && pcp->p_ref &&
1072 1072                              PCP_AGE(pcp) < seg_pmax_pcpage) {
1073 1073                                  pcp->p_ref = 0;
1074 1074                                  continue;
1075 1075                          }
1076 1076                          plinkp = &pcp->p_plink;
1077 1077                          htag0 = pcp->p_htag0;
1078 1078                          if (pcp->p_flags & SEGP_AMP) {
1079 1079                                  pheadp = &((amp_t *)htag0)->a_phead;
1080 1080                                  pmtx = &((amp_t *)htag0)->a_pmtx;
1081 1081                          } else {
1082 1082                                  pheadp = &((seg_t *)htag0)->s_phead;
1083 1083                                  pmtx = &((seg_t *)htag0)->s_pmtx;
1084 1084                          }
1085 1085                          if (!mutex_tryenter(pmtx)) {
1086 1086                                  continue;
1087 1087                          }
1088 1088                          ASSERT(pheadp->p_lnext != pheadp);
1089 1089                          ASSERT(pheadp->p_lprev != pheadp);
1090 1090                          plinkp->p_lprev->p_lnext =
1091 1091                              plinkp->p_lnext;
1092 1092                          plinkp->p_lnext->p_lprev =
1093 1093                              plinkp->p_lprev;
1094 1094                          pcp->p_hprev->p_hnext = pcp->p_hnext;
1095 1095                          pcp->p_hnext->p_hprev = pcp->p_hprev;
1096 1096                          mutex_exit(pmtx);
1097 1097                          pcp->p_hprev = delcallb_list;
1098 1098                          delcallb_list = pcp;
1099 1099                          npgs_purged += btop(pcp->p_len);
1100 1100                  }
1101 1101                  if (hp->p_hnext == (struct seg_pcache *)hp) {
1102 1102                          seg_premove_abuck(hp, 1);
1103 1103                  }
1104 1104                  mutex_exit(&hp->p_hmutex);
1105 1105                  if (npgs_purged >= seg_plocked_window) {
1106 1106                          break;
1107 1107                  }
1108 1108                  if (!force) {
1109 1109                          if (npgs_purged >= npgs_to_purge) {
1110 1110                                  break;
1111 1111                          }
1112 1112                          if (!trim && !(seg_pathr_full_ahb & 15)) {
1113 1113                                  ASSERT(lowmem);
1114 1114                                  if (freemem >= lotsfree + needfree) {
1115 1115                                          break;
1116 1116                                  }
1117 1117                          }
1118 1118                  }
1119 1119          }
1120 1120  
1121 1121          if (hlinkp == &seg_pahhead[hlix]) {
1122 1122                  /*
1123 1123                   * We processed the entire hlix active bucket list
1124 1124                   * but didn't find enough pages to reclaim.
1125 1125                   * Switch the lists and walk the other list
1126 1126                   * if we haven't done it yet.
1127 1127                   */
1128 1128                  mutex_enter(&seg_pmem_mtx);
1129 1129                  ASSERT(seg_pathr_on);
1130 1130                  ASSERT(seg_pahcur == !hlix);
1131 1131                  seg_pahcur = hlix;
1132 1132                  mutex_exit(&seg_pmem_mtx);
1133 1133                  if (++hlinks < 2) {
1134 1134                          hlix = !hlix;
1135 1135                          goto again;
1136 1136                  }
1137 1137          } else if ((hlinkp = hlnextp) != &seg_pahhead[hlix] &&
1138 1138              seg_pahhead[hlix].p_lnext != hlinkp) {
1139 1139                  ASSERT(hlinkp != NULL);
1140 1140                  ASSERT(hlinkp->p_lprev != &seg_pahhead[hlix]);
1141 1141                  ASSERT(seg_pahhead[hlix].p_lnext != &seg_pahhead[hlix]);
1142 1142                  ASSERT(seg_pahhead[hlix].p_lprev != &seg_pahhead[hlix]);
1143 1143  
1144 1144                  /*
1145 1145                   * Reinsert the header to point to hlinkp
1146 1146                   * so that we start from hlinkp bucket next time around.
1147 1147                   */
1148 1148                  seg_pahhead[hlix].p_lnext->p_lprev = seg_pahhead[hlix].p_lprev;
1149 1149                  seg_pahhead[hlix].p_lprev->p_lnext = seg_pahhead[hlix].p_lnext;
1150 1150                  seg_pahhead[hlix].p_lnext = hlinkp;
1151 1151                  seg_pahhead[hlix].p_lprev = hlinkp->p_lprev;
1152 1152                  hlinkp->p_lprev->p_lnext = &seg_pahhead[hlix];
1153 1153                  hlinkp->p_lprev = &seg_pahhead[hlix];
1154 1154          }
1155 1155  
1156 1156          mutex_enter(&seg_pmem_mtx);
1157 1157          ASSERT(seg_pathr_on);
1158 1158          seg_pathr_on = 0;
1159 1159          mutex_exit(&seg_pmem_mtx);
1160 1160  
1161 1161  runcb:
1162 1162          /*
1163 1163           * Run the delayed callback list. segments/amps can't go away until
1164 1164           * callback is executed since they must have non 0 softlockcnt. That's
1165 1165           * why we don't need to hold as/seg/amp locks to execute the callback.
1166 1166           */
1167 1167          while (delcallb_list != NULL) {
1168 1168                  pcp = delcallb_list;
1169 1169                  delcallb_list = pcp->p_hprev;
1170 1170                  ASSERT(!pcp->p_active);
1171 1171                  (void) (*pcp->p_callback)(pcp->p_htag0, pcp->p_addr,
1172 1172                      pcp->p_len, pcp->p_pp, pcp->p_write ? S_WRITE : S_READ, 1);
1173 1173                  npages += btop(pcp->p_len);
1174 1174                  if (!IS_PCP_WIRED(pcp)) {
1175 1175                          npages_window += btop(pcp->p_len);
1176 1176                  }
1177 1177                  kmem_cache_free(seg_pkmcache, pcp);
1178 1178          }
1179 1179          if (npages) {
1180 1180                  mutex_enter(&seg_pmem_mtx);
1181 1181                  ASSERT(seg_plocked >= npages);
1182 1182                  ASSERT(seg_plocked_window >= npages_window);
1183 1183                  seg_plocked -= npages;
1184 1184                  seg_plocked_window -= npages_window;
1185 1185                  mutex_exit(&seg_pmem_mtx);
1186 1186          }
1187 1187  }
1188 1188  
1189 1189  /*
1190 1190   * Remove cached pages for segment(s) entries from hashtable.  The segments
1191 1191   * are identified by pp array. This is useful for multiple seg's cached on
1192 1192   * behalf of dummy segment (ISM/DISM) with common pp array.
1193 1193   */
1194 1194  void
1195 1195  seg_ppurge_wiredpp(struct page **pp)
1196 1196  {
1197 1197          struct seg_pcache *pcp;
1198 1198          struct seg_phash_wired *hp;
1199 1199          pgcnt_t npages = 0;
1200 1200          struct  seg_pcache *delcallb_list = NULL;
1201 1201  
1202 1202          /*
1203 1203           * if the cache is empty, return
1204 1204           */
1205 1205          if (seg_plocked == 0) {
1206 1206                  return;
1207 1207          }
1208 1208          ASSERT(seg_phashsize_wired != 0);
1209 1209  
1210 1210          for (hp = seg_phashtab_wired;
1211 1211              hp < &seg_phashtab_wired[seg_phashsize_wired]; hp++) {
1212 1212                  if (hp->p_hnext == (struct seg_pcache *)hp) {
1213 1213                          continue;
1214 1214                  }
1215 1215                  mutex_enter(&hp->p_hmutex);
1216 1216                  pcp = hp->p_hnext;
1217 1217                  while (pcp != (struct seg_pcache *)hp) {
1218 1218                          ASSERT(pcp->p_hashp == (struct seg_phash *)hp);
1219 1219                          ASSERT(IS_PCP_WIRED(pcp));
1220 1220                          /*
1221 1221                           * purge entries which are not active
1222 1222                           */
1223 1223                          if (!pcp->p_active && pcp->p_pp == pp) {
1224 1224                                  ASSERT(pcp->p_htag0 != NULL);
1225 1225                                  pcp->p_hprev->p_hnext = pcp->p_hnext;
1226 1226                                  pcp->p_hnext->p_hprev = pcp->p_hprev;
1227 1227                                  pcp->p_hprev = delcallb_list;
1228 1228                                  delcallb_list = pcp;
1229 1229                          }
1230 1230                          pcp = pcp->p_hnext;
1231 1231                  }
1232 1232                  mutex_exit(&hp->p_hmutex);
1233 1233                  /*
1234 1234                   * segments can't go away until callback is executed since
1235 1235                   * they must have non 0 softlockcnt. That's why we don't
1236 1236                   * need to hold as/seg locks to execute the callback.
1237 1237                   */
1238 1238                  while (delcallb_list != NULL) {
1239 1239                          int done;
1240 1240                          pcp = delcallb_list;
1241 1241                          delcallb_list = pcp->p_hprev;
1242 1242                          ASSERT(!pcp->p_active);
1243 1243                          done = (*pcp->p_callback)(pcp->p_htag0, pcp->p_addr,
1244 1244                              pcp->p_len, pcp->p_pp,
1245 1245                              pcp->p_write ? S_WRITE : S_READ, 1);
1246 1246                          npages += btop(pcp->p_len);
1247 1247                          ASSERT(IS_PCP_WIRED(pcp));
1248 1248                          kmem_cache_free(seg_pkmcache, pcp);
1249 1249                          if (done) {
1250 1250                                  ASSERT(delcallb_list == NULL);
1251 1251                                  goto out;
1252 1252                          }
1253 1253                  }
1254 1254          }
1255 1255  
1256 1256  out:
1257 1257          mutex_enter(&seg_pmem_mtx);
1258 1258          ASSERT(seg_plocked >= npages);
1259 1259          seg_plocked -= npages;
1260 1260          mutex_exit(&seg_pmem_mtx);
1261 1261  }
1262 1262  
1263 1263  /*
1264 1264   * purge all entries for a given segment. Since we
1265 1265   * callback into the segment driver directly for page
1266 1266   * reclaim the caller needs to hold the right locks.
1267 1267   */
1268 1268  void
1269 1269  seg_ppurge(struct seg *seg, struct anon_map *amp, uint_t flags)
1270 1270  {
1271 1271          struct seg_pcache *delcallb_list = NULL;
1272 1272          struct seg_pcache *pcp;
1273 1273          struct seg_phash *hp;
1274 1274          pgcnt_t npages = 0;
1275 1275          void *htag0;
1276 1276  
1277 1277          if (seg_plocked == 0) {
1278 1278                  return;
1279 1279          }
1280 1280          ASSERT(seg_phashsize_win != 0);
1281 1281  
1282 1282          /*
1283 1283           * If amp is not NULL use amp as a lookup tag otherwise use seg
1284 1284           * as a lookup tag.
1285 1285           */
1286 1286          htag0 = (amp == NULL ? (void *)seg : (void *)amp);
1287 1287          ASSERT(htag0 != NULL);
1288 1288          if (IS_PFLAGS_WIRED(flags)) {
1289 1289                  hp = P_HASHBP(seg, htag0, 0, flags);
1290 1290                  mutex_enter(&hp->p_hmutex);
1291 1291                  pcp = hp->p_hnext;
1292 1292                  while (pcp != (struct seg_pcache *)hp) {
1293 1293                          ASSERT(pcp->p_hashp == hp);
1294 1294                          ASSERT(IS_PCP_WIRED(pcp));
1295 1295                          if (pcp->p_htag0 == htag0) {
1296 1296                                  if (pcp->p_active) {
1297 1297                                          break;
1298 1298                                  }
1299 1299                                  pcp->p_hprev->p_hnext = pcp->p_hnext;
1300 1300                                  pcp->p_hnext->p_hprev = pcp->p_hprev;
1301 1301                                  pcp->p_hprev = delcallb_list;
1302 1302                                  delcallb_list = pcp;
1303 1303                          }
1304 1304                          pcp = pcp->p_hnext;
1305 1305                  }
1306 1306                  mutex_exit(&hp->p_hmutex);
1307 1307          } else {
1308 1308                  pcache_link_t *plinkp;
1309 1309                  pcache_link_t *pheadp;
1310 1310                  kmutex_t *pmtx;
1311 1311  
1312 1312                  if (amp == NULL) {
1313 1313                          ASSERT(seg != NULL);
1314 1314                          pheadp = &seg->s_phead;
1315 1315                          pmtx = &seg->s_pmtx;
1316 1316                  } else {
1317 1317                          pheadp = &amp->a_phead;
1318 1318                          pmtx = &amp->a_pmtx;
1319 1319                  }
1320 1320                  mutex_enter(pmtx);
1321 1321                  while ((plinkp = pheadp->p_lnext) != pheadp) {
1322 1322                          pcp = plink2pcache(plinkp);
1323 1323                          ASSERT(!IS_PCP_WIRED(pcp));
1324 1324                          ASSERT(pcp->p_htag0 == htag0);
1325 1325                          hp = pcp->p_hashp;
1326 1326                          mutex_enter(&hp->p_hmutex);
1327 1327                          if (pcp->p_active) {
1328 1328                                  mutex_exit(&hp->p_hmutex);
1329 1329                                  break;
1330 1330                          }
1331 1331                          ASSERT(plinkp->p_lprev == pheadp);
1332 1332                          pheadp->p_lnext = plinkp->p_lnext;
1333 1333                          plinkp->p_lnext->p_lprev = pheadp;
1334 1334                          pcp->p_hprev->p_hnext = pcp->p_hnext;
1335 1335                          pcp->p_hnext->p_hprev = pcp->p_hprev;
1336 1336                          pcp->p_hprev = delcallb_list;
1337 1337                          delcallb_list = pcp;
1338 1338                          if (hp->p_hnext == (struct seg_pcache *)hp) {
1339 1339                                  seg_premove_abuck(hp, 0);
1340 1340                          }
1341 1341                          mutex_exit(&hp->p_hmutex);
1342 1342                  }
1343 1343                  mutex_exit(pmtx);
1344 1344          }
1345 1345          while (delcallb_list != NULL) {
1346 1346                  pcp = delcallb_list;
1347 1347                  delcallb_list = pcp->p_hprev;
1348 1348                  ASSERT(!pcp->p_active);
1349 1349                  (void) (*pcp->p_callback)(pcp->p_htag0, pcp->p_addr, pcp->p_len,
1350 1350                      pcp->p_pp, pcp->p_write ? S_WRITE : S_READ, 0);
1351 1351                  npages += btop(pcp->p_len);
1352 1352                  kmem_cache_free(seg_pkmcache, pcp);
1353 1353          }
1354 1354          mutex_enter(&seg_pmem_mtx);
1355 1355          ASSERT(seg_plocked >= npages);
1356 1356          seg_plocked -= npages;
1357 1357          if (!IS_PFLAGS_WIRED(flags)) {
1358 1358                  ASSERT(seg_plocked_window >= npages);
1359 1359                  seg_plocked_window -= npages;
1360 1360          }
1361 1361          mutex_exit(&seg_pmem_mtx);
1362 1362  }
1363 1363  
1364 1364  static void seg_pinit_mem_config(void);
1365 1365  
1366 1366  /*
1367 1367   * setup the pagelock cache
1368 1368   */
1369 1369  static void
1370 1370  seg_pinit(void)
1371 1371  {
1372 1372          struct seg_phash *hp;
1373 1373          ulong_t i;
1374 1374          pgcnt_t physmegs;
1375 1375  
1376 1376          seg_plocked = 0;
1377 1377          seg_plocked_window = 0;
1378 1378  
1379 1379          if (segpcache_enabled == 0) {
1380 1380                  seg_phashsize_win = 0;
1381 1381                  seg_phashsize_wired = 0;
1382 1382                  seg_pdisabled = 1;
1383 1383                  return;
1384 1384          }
1385 1385  
1386 1386          seg_pdisabled = 0;
1387 1387          seg_pkmcache = kmem_cache_create("seg_pcache",
1388 1388              sizeof (struct seg_pcache), 0, NULL, NULL, NULL, NULL, NULL, 0);
1389 1389          if (segpcache_pcp_maxage_ticks <= 0) {
1390 1390                  segpcache_pcp_maxage_ticks = segpcache_pcp_maxage_sec * hz;
1391 1391          }
1392 1392          seg_pmax_pcpage = segpcache_pcp_maxage_ticks;
1393 1393          seg_pathr_empty_ahb = 0;
1394 1394          seg_pathr_full_ahb = 0;
1395 1395          seg_pshrink_shift = segpcache_shrink_shift;
1396 1396          seg_pmaxapurge_npages = btop(segpcache_maxapurge_bytes);
1397 1397  
1398 1398          mutex_init(&seg_pcache_mtx, NULL, MUTEX_DEFAULT, NULL);
1399 1399          mutex_init(&seg_pmem_mtx, NULL, MUTEX_DEFAULT, NULL);
1400 1400          mutex_init(&seg_pasync_mtx, NULL, MUTEX_DEFAULT, NULL);
1401 1401          cv_init(&seg_pasync_cv, NULL, CV_DEFAULT, NULL);
1402 1402  
1403 1403          physmegs = physmem >> (20 - PAGESHIFT);
1404 1404  
1405 1405          /*
1406 1406           * If segpcache_hashsize_win was not set in /etc/system or it has
1407 1407           * absurd value set it to a default.
1408 1408           */
1409 1409          if (segpcache_hashsize_win == 0 || segpcache_hashsize_win > physmem) {
1410 1410                  /*
1411 1411                   * Create one bucket per 32K (or at least per 8 pages) of
1412 1412                   * available memory.
1413 1413                   */
1414 1414                  pgcnt_t pages_per_bucket = MAX(btop(32 * 1024), 8);
1415 1415                  segpcache_hashsize_win = MAX(1024, physmem / pages_per_bucket);
1416 1416          }
1417 1417          if (!ISP2(segpcache_hashsize_win)) {
1418 1418                  ulong_t rndfac = ~(1UL <<
1419 1419                      (highbit(segpcache_hashsize_win) - 1));
1420 1420                  rndfac &= segpcache_hashsize_win;
1421 1421                  segpcache_hashsize_win += rndfac;
1422 1422                  segpcache_hashsize_win = 1 <<
1423 1423                      (highbit(segpcache_hashsize_win) - 1);
1424 1424          }
1425 1425          seg_phashsize_win = segpcache_hashsize_win;
1426 1426          seg_phashtab_win = kmem_zalloc(
1427 1427              seg_phashsize_win * sizeof (struct seg_phash),
1428 1428              KM_SLEEP);
1429 1429          for (i = 0; i < seg_phashsize_win; i++) {
1430 1430                  hp = &seg_phashtab_win[i];
1431 1431                  hp->p_hnext = (struct seg_pcache *)hp;
1432 1432                  hp->p_hprev = (struct seg_pcache *)hp;
1433 1433                  mutex_init(&hp->p_hmutex, NULL, MUTEX_DEFAULT, NULL);
1434 1434          }
1435 1435  
1436 1436          seg_pahcur = 0;
1437 1437          seg_pathr_on = 0;
1438 1438          seg_pahhead[0].p_lnext = &seg_pahhead[0];
1439 1439          seg_pahhead[0].p_lprev = &seg_pahhead[0];
1440 1440          seg_pahhead[1].p_lnext = &seg_pahhead[1];
1441 1441          seg_pahhead[1].p_lprev = &seg_pahhead[1];
1442 1442  
1443 1443          /*
1444 1444           * If segpcache_hashsize_wired was not set in /etc/system or it has
1445 1445           * absurd value set it to a default.
1446 1446           */
1447 1447          if (segpcache_hashsize_wired == 0 ||
1448 1448              segpcache_hashsize_wired > physmem / 4) {
1449 1449                  /*
1450 1450                   * Choose segpcache_hashsize_wired based on physmem.
1451 1451                   * Create a bucket per 128K bytes upto 256K buckets.
1452 1452                   */
1453 1453                  if (physmegs < 20 * 1024) {
1454 1454                          segpcache_hashsize_wired = MAX(1024, physmegs << 3);
1455 1455                  } else {
1456 1456                          segpcache_hashsize_wired = 256 * 1024;
1457 1457                  }
1458 1458          }
1459 1459          if (!ISP2(segpcache_hashsize_wired)) {
1460 1460                  segpcache_hashsize_wired = 1 <<
1461 1461                      highbit(segpcache_hashsize_wired);
1462 1462          }
1463 1463          seg_phashsize_wired = segpcache_hashsize_wired;
1464 1464          seg_phashtab_wired = kmem_zalloc(
1465 1465              seg_phashsize_wired * sizeof (struct seg_phash_wired), KM_SLEEP);
1466 1466          for (i = 0; i < seg_phashsize_wired; i++) {
1467 1467                  hp = (struct seg_phash *)&seg_phashtab_wired[i];
1468 1468                  hp->p_hnext = (struct seg_pcache *)hp;
1469 1469                  hp->p_hprev = (struct seg_pcache *)hp;
1470 1470                  mutex_init(&hp->p_hmutex, NULL, MUTEX_DEFAULT, NULL);
1471 1471          }
1472 1472  
1473 1473          if (segpcache_maxwindow == 0) {
1474 1474                  if (physmegs < 64) {
1475 1475                          /* 3% of memory */
1476 1476                          segpcache_maxwindow = availrmem >> 5;
1477 1477                  } else if (physmegs < 512) {
1478 1478                          /* 12% of memory */
1479 1479                          segpcache_maxwindow = availrmem >> 3;
1480 1480                  } else if (physmegs < 1024) {
1481 1481                          /* 25% of memory */
1482 1482                          segpcache_maxwindow = availrmem >> 2;
1483 1483                  } else if (physmegs < 2048) {
1484 1484                          /* 50% of memory */
1485 1485                          segpcache_maxwindow = availrmem >> 1;
1486 1486                  } else {
1487 1487                          /* no limit */
1488 1488                          segpcache_maxwindow = (pgcnt_t)-1;
1489 1489                  }
1490 1490          }
1491 1491          seg_pmaxwindow = segpcache_maxwindow;
1492 1492          seg_pinit_mem_config();
1493 1493  }
1494 1494  
1495 1495  /*
1496 1496   * called by pageout if memory is low
1497 1497   */
1498 1498  void
1499 1499  seg_preap(void)
1500 1500  {
1501 1501          /*
1502 1502           * if the cache is off or empty, return
1503 1503           */
1504 1504          if (seg_plocked_window == 0) {
1505 1505                  return;
1506 1506          }
1507 1507          ASSERT(seg_phashsize_win != 0);
1508 1508  
1509 1509          /*
1510 1510           * If somebody is already purging pcache
1511 1511           * just return.
1512 1512           */
1513 1513          if (seg_pdisabled) {
1514 1514                  return;
1515 1515          }
1516 1516  
1517 1517          cv_signal(&seg_pasync_cv);
1518 1518  }
1519 1519  
1520 1520  /*
1521 1521   * run as a backgroud thread and reclaim pagelock
1522 1522   * pages which have not been used recently
1523 1523   */
1524 1524  void
1525 1525  seg_pasync_thread(void)
1526 1526  {
1527 1527          callb_cpr_t cpr_info;
1528 1528  
1529 1529          if (seg_phashsize_win == 0) {
1530 1530                  thread_exit();
1531 1531                  /*NOTREACHED*/
1532 1532          }
1533 1533  
1534 1534          seg_pasync_thr = curthread;
1535 1535  
1536 1536          CALLB_CPR_INIT(&cpr_info, &seg_pasync_mtx,
1537 1537              callb_generic_cpr, "seg_pasync");
1538 1538  
1539 1539          if (segpcache_reap_ticks <= 0) {
1540 1540                  segpcache_reap_ticks = segpcache_reap_sec * hz;
1541 1541          }
1542 1542  
1543 1543          mutex_enter(&seg_pasync_mtx);
1544 1544          for (;;) {
1545 1545                  CALLB_CPR_SAFE_BEGIN(&cpr_info);
1546 1546                  (void) cv_reltimedwait(&seg_pasync_cv, &seg_pasync_mtx,
1547 1547                      segpcache_reap_ticks, TR_CLOCK_TICK);
1548 1548                  CALLB_CPR_SAFE_END(&cpr_info, &seg_pasync_mtx);
1549 1549                  if (seg_pdisabled == 0) {
1550 1550                          seg_ppurge_async(0);
1551 1551                  }
1552 1552          }
1553 1553  }
1554 1554  
1555 1555  static struct kmem_cache *seg_cache;
1556 1556  
1557 1557  /*
1558 1558   * Initialize segment management data structures.
1559 1559   */
1560 1560  void
1561 1561  seg_init(void)
1562 1562  {
1563 1563          kstat_t *ksp;
1564 1564  
1565 1565          seg_cache = kmem_cache_create("seg_cache", sizeof (struct seg),
1566 1566              0, NULL, NULL, NULL, NULL, NULL, 0);
1567 1567  
1568 1568          ksp = kstat_create("unix", 0, "segadvstat", "vm", KSTAT_TYPE_NAMED,
1569 1569              segadvstat_ndata, KSTAT_FLAG_VIRTUAL);
1570 1570          if (ksp) {
1571 1571                  ksp->ks_data = (void *)segadvstat_ptr;
1572 1572                  kstat_install(ksp);
1573 1573          }
1574 1574  
1575 1575          seg_pinit();
1576 1576  }
1577 1577  
1578 1578  /*
1579 1579   * Allocate a segment to cover [base, base+size]
1580 1580   * and attach it to the specified address space.
1581 1581   */
1582 1582  struct seg *
1583 1583  seg_alloc(struct as *as, caddr_t base, size_t size)
1584 1584  {
1585 1585          struct seg *new;
1586 1586          caddr_t segbase;
1587 1587          size_t segsize;
1588 1588  
1589 1589          segbase = (caddr_t)((uintptr_t)base & (uintptr_t)PAGEMASK);
1590 1590          segsize = (((uintptr_t)(base + size) + PAGEOFFSET) & PAGEMASK) -
1591 1591              (uintptr_t)segbase;
1592 1592  
1593 1593          if (!valid_va_range(&segbase, &segsize, segsize, AH_LO))
1594 1594                  return ((struct seg *)NULL);    /* bad virtual addr range */
1595 1595  
1596 1596          if (as != &kas &&
1597 1597              valid_usr_range(segbase, segsize, 0, as,
1598 1598              as->a_userlimit) != RANGE_OKAY)
1599 1599                  return ((struct seg *)NULL);    /* bad virtual addr range */
1600 1600  
1601 1601          new = kmem_cache_alloc(seg_cache, KM_SLEEP);
1602 1602          new->s_ops = NULL;
1603 1603          new->s_data = NULL;
1604 1604          new->s_szc = 0;
1605 1605          new->s_flags = 0;
1606 1606          mutex_init(&new->s_pmtx, NULL, MUTEX_DEFAULT, NULL);
1607 1607          new->s_phead.p_lnext = &new->s_phead;
1608 1608          new->s_phead.p_lprev = &new->s_phead;
1609 1609          if (seg_attach(as, segbase, segsize, new) < 0) {
1610 1610                  kmem_cache_free(seg_cache, new);
1611 1611                  return ((struct seg *)NULL);
1612 1612          }
1613 1613          /* caller must fill in ops, data */
1614 1614          return (new);
1615 1615  }
1616 1616  
1617 1617  /*
1618 1618   * Attach a segment to the address space.  Used by seg_alloc()
1619 1619   * and for kernel startup to attach to static segments.
1620 1620   */
1621 1621  int
1622 1622  seg_attach(struct as *as, caddr_t base, size_t size, struct seg *seg)
1623 1623  {
1624 1624          seg->s_as = as;
1625 1625          seg->s_base = base;
1626 1626          seg->s_size = size;
1627 1627  
1628 1628          /*

↓ open down ↓

1628 lines elided

↑ open up ↑

1629 1629           * as_addseg() will add the segment at the appropraite point
1630 1630           * in the list. It will return -1 if there is overlap with
1631 1631           * an already existing segment.
1632 1632           */
1633 1633          return (as_addseg(as, seg));
1634 1634  }
1635 1635  
1636 1636  /*
1637 1637   * Unmap a segment and free it from its associated address space.
1638 1638   * This should be called by anybody who's finished with a whole segment's
1639      - * mapping.  Just calls SEGOP_UNMAP() on the whole mapping .  It is the
     1639 + * mapping.  Just calls segop_unmap() on the whole mapping .  It is the
1640 1640   * responsibility of the segment driver to unlink the the segment
1641 1641   * from the address space, and to free public and private data structures
1642 1642   * associated with the segment.  (This is typically done by a call to
1643 1643   * seg_free()).
1644 1644   */
1645 1645  void
1646 1646  seg_unmap(struct seg *seg)
1647 1647  {
1648 1648  #ifdef DEBUG
1649 1649          int ret;
1650 1650  #endif /* DEBUG */
1651 1651  
1652 1652          ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
1653 1653  
1654 1654          /* Shouldn't have called seg_unmap if mapping isn't yet established */
1655 1655          ASSERT(seg->s_data != NULL);
1656 1656  
1657 1657          /* Unmap the whole mapping */
1658 1658  #ifdef DEBUG
1659      -        ret = SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
     1659 +        ret = segop_unmap(seg, seg->s_base, seg->s_size);
1660 1660          ASSERT(ret == 0);
1661 1661  #else
1662      -        SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
     1662 +        segop_unmap(seg, seg->s_base, seg->s_size);
1663 1663  #endif /* DEBUG */
1664 1664  }
1665 1665  
1666 1666  /*
1667 1667   * Free the segment from its associated as. This should only be called
1668 1668   * if a mapping to the segment has not yet been established (e.g., if
1669 1669   * an error occurs in the middle of doing an as_map when the segment
1670 1670   * has already been partially set up) or if it has already been deleted
1671 1671   * (e.g., from a segment driver unmap routine if the unmap applies to the
1672 1672   * entire segment). If the mapping is currently set up then seg_unmap() should

1673 1673   * be called instead.
1674 1674   */
1675 1675  void
1676 1676  seg_free(struct seg *seg)
1677 1677  {

↓ open down ↓

5 lines elided

↑ open up ↑

1678 1678          register struct as *as = seg->s_as;
1679 1679          struct seg *tseg = as_removeseg(as, seg);
1680 1680  
1681 1681          ASSERT(tseg == seg);
1682 1682  
1683 1683          /*
1684 1684           * If the segment private data field is NULL,
1685 1685           * then segment driver is not attached yet.
1686 1686           */
1687 1687          if (seg->s_data != NULL)
1688      -                SEGOP_FREE(seg);
     1688 +                segop_free(seg);
1689 1689  
1690 1690          mutex_destroy(&seg->s_pmtx);
1691 1691          ASSERT(seg->s_phead.p_lnext == &seg->s_phead);
1692 1692          ASSERT(seg->s_phead.p_lprev == &seg->s_phead);
1693 1693          kmem_cache_free(seg_cache, seg);
1694 1694  }
1695 1695  
1696 1696  /*ARGSUSED*/
1697 1697  static void
1698 1698  seg_p_mem_config_post_add(

1699 1699          void *arg,
1700 1700          pgcnt_t delta_pages)
1701 1701  {
1702 1702          /* Nothing to do. */
1703 1703  }
1704 1704  
1705 1705  void
1706 1706  seg_p_enable(void)
1707 1707  {
1708 1708          mutex_enter(&seg_pcache_mtx);
1709 1709          ASSERT(seg_pdisabled != 0);
1710 1710          seg_pdisabled--;
1711 1711          mutex_exit(&seg_pcache_mtx);
1712 1712  }
1713 1713  
1714 1714  /*
1715 1715   * seg_p_disable - disables seg_pcache, and then attempts to empty the
1716 1716   * cache.
1717 1717   * Returns SEGP_SUCCESS if the cache was successfully emptied, or
1718 1718   * SEGP_FAIL if the cache could not be emptied.
1719 1719   */
1720 1720  int
1721 1721  seg_p_disable(void)
1722 1722  {
1723 1723          pgcnt_t old_plocked;
1724 1724          int stall_count = 0;
1725 1725  
1726 1726          mutex_enter(&seg_pcache_mtx);
1727 1727          seg_pdisabled++;
1728 1728          ASSERT(seg_pdisabled != 0);
1729 1729          mutex_exit(&seg_pcache_mtx);
1730 1730  
1731 1731          /*
1732 1732           * Attempt to empty the cache. Terminate if seg_plocked does not
1733 1733           * diminish with SEGP_STALL_THRESHOLD consecutive attempts.
1734 1734           */
1735 1735          while (seg_plocked != 0) {
1736 1736                  ASSERT(seg_phashsize_win != 0);
1737 1737                  old_plocked = seg_plocked;
1738 1738                  seg_ppurge_async(1);
1739 1739                  if (seg_plocked == old_plocked) {
1740 1740                          if (stall_count++ > SEGP_STALL_THRESHOLD) {
1741 1741                                  return (SEGP_FAIL);
1742 1742                          }
1743 1743                  } else
1744 1744                          stall_count = 0;
1745 1745                  if (seg_plocked != 0)
1746 1746                          delay(hz/SEGP_PREDEL_DELAY_FACTOR);
1747 1747          }
1748 1748          return (SEGP_SUCCESS);
1749 1749  }
1750 1750  
1751 1751  /*
1752 1752   * Attempt to purge seg_pcache.  May need to return before this has
1753 1753   * completed to allow other pre_del callbacks to unlock pages. This is
1754 1754   * ok because:
1755 1755   *      1) The seg_pdisabled flag has been set so at least we won't
1756 1756   *      cache anymore locks and the locks we couldn't purge
1757 1757   *      will not be held if they do get released by a subsequent
1758 1758   *      pre-delete callback.
1759 1759   *
1760 1760   *      2) The rest of the memory delete thread processing does not
1761 1761   *      depend on the changes made in this pre-delete callback. No
1762 1762   *      panics will result, the worst that will happen is that the
1763 1763   *      DR code will timeout and cancel the delete.
1764 1764   */
1765 1765  /*ARGSUSED*/
1766 1766  static int
1767 1767  seg_p_mem_config_pre_del(
1768 1768          void *arg,
1769 1769          pgcnt_t delta_pages)
1770 1770  {
1771 1771          if (seg_phashsize_win == 0) {
1772 1772                  return (0);
1773 1773          }
1774 1774          if (seg_p_disable() != SEGP_SUCCESS)
1775 1775                  cmn_err(CE_NOTE,
1776 1776                      "!Pre-delete couldn't purge"" pagelock cache - continuing");
1777 1777          return (0);
1778 1778  }
1779 1779  
1780 1780  /*ARGSUSED*/
1781 1781  static void
1782 1782  seg_p_mem_config_post_del(
1783 1783          void *arg,
1784 1784          pgcnt_t delta_pages,
1785 1785          int cancelled)
1786 1786  {
1787 1787          if (seg_phashsize_win == 0) {
1788 1788                  return;
1789 1789          }
1790 1790          seg_p_enable();
1791 1791  }
1792 1792  
1793 1793  static kphysm_setup_vector_t seg_p_mem_config_vec = {
1794 1794          KPHYSM_SETUP_VECTOR_VERSION,
1795 1795          seg_p_mem_config_post_add,
1796 1796          seg_p_mem_config_pre_del,
1797 1797          seg_p_mem_config_post_del,
1798 1798  };
1799 1799  
1800 1800  static void
1801 1801  seg_pinit_mem_config(void)
1802 1802  {
1803 1803          int ret;
1804 1804  
1805 1805          ret = kphysm_setup_func_register(&seg_p_mem_config_vec, (void *)NULL);
1806 1806          /*
1807 1807           * Want to catch this in the debug kernel. At run time, if the
1808 1808           * callbacks don't get run all will be OK as the disable just makes
1809 1809           * it more likely that the pages can be collected.
1810 1810           */
1811 1811          ASSERT(ret == 0);
1812 1812  }
1813 1813  
1814 1814  /*
1815 1815   * Verify that segment is not a shared anonymous segment which reserves
1816 1816   * swap.  zone.max-swap accounting (zone->zone_max_swap) cannot be transfered
1817 1817   * from one zone to another if any segments are shared.  This is because the
1818 1818   * last process to exit will credit the swap reservation.  This could lead
1819 1819   * to the swap being reserved by one zone, and credited to another.
1820 1820   */
1821 1821  boolean_t
1822 1822  seg_can_change_zones(struct seg *seg)
1823 1823  {
1824 1824          struct segvn_data *svd;
1825 1825  
1826 1826          if (seg->s_ops == &segspt_shmops)
1827 1827                  return (B_FALSE);
1828 1828  
1829 1829          if (seg->s_ops == &segvn_ops) {
1830 1830                  svd = (struct segvn_data *)seg->s_data;
1831 1831                  if (svd->type == MAP_SHARED &&
1832 1832                      svd->amp != NULL &&
1833 1833                      svd->amp->swresv > 0)
1834 1834                  return (B_FALSE);
1835 1835          }
1836 1836          return (B_TRUE);
1837 1837  }
1838 1838  
1839 1839  /*
1840 1840   * Return swap reserved by a segment backing a private mapping.
1841 1841   */
1842 1842  size_t
1843 1843  seg_swresv(struct seg *seg)
1844 1844  {
1845 1845          struct segvn_data *svd;
1846 1846          size_t swap = 0;

↓ open down ↓

148 lines elided

↑ open up ↑

1847 1847  
1848 1848          if (seg->s_ops == &segvn_ops) {
1849 1849                  svd = (struct segvn_data *)seg->s_data;
1850 1850                  if (svd->type == MAP_PRIVATE && svd->swresv > 0)
1851 1851                          swap = svd->swresv;
1852 1852          }
1853 1853          return (swap);
1854 1854  }
1855 1855  
1856 1856  /*
1857      - * General not supported function for SEGOP_INHERIT
     1857 + * General not supported function for segop_inherit
1858 1858   */
1859 1859  /* ARGSUSED */
1860 1860  int
1861 1861  seg_inherit_notsup(struct seg *seg, caddr_t addr, size_t len, uint_t op)
1862 1862  {
1863 1863          return (ENOTSUP);
1864 1864  }
1865 1865  
1866 1866  /*
1867 1867   * segop wrappers

1868 1868   */
1869 1869  int
1870 1870  segop_dup(struct seg *seg, struct seg *new)
1871 1871  {
1872 1872          VERIFY3P(seg->s_ops->dup, !=, NULL);
1873 1873  
1874 1874          return (seg->s_ops->dup(seg, new));
1875 1875  }
1876 1876  
1877 1877  int
1878 1878  segop_unmap(struct seg *seg, caddr_t addr, size_t len)
1879 1879  {
1880 1880          VERIFY3P(seg->s_ops->unmap, !=, NULL);
1881 1881  
1882 1882          return (seg->s_ops->unmap(seg, addr, len));
1883 1883  }
1884 1884  
1885 1885  void
1886 1886  segop_free(struct seg *seg)
1887 1887  {
1888 1888          VERIFY3P(seg->s_ops->free, !=, NULL);
1889 1889  
1890 1890          seg->s_ops->free(seg);
1891 1891  }
1892 1892  
1893 1893  faultcode_t
1894 1894  segop_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
1895 1895      enum fault_type type, enum seg_rw rw)
1896 1896  {
1897 1897          VERIFY3P(seg->s_ops->fault, !=, NULL);
1898 1898  
1899 1899          return (seg->s_ops->fault(hat, seg, addr, len, type, rw));
1900 1900  }
1901 1901  
1902 1902  faultcode_t
1903 1903  segop_faulta(struct seg *seg, caddr_t addr)
1904 1904  {
1905 1905          VERIFY3P(seg->s_ops->faulta, !=, NULL);
1906 1906  
1907 1907          return (seg->s_ops->faulta(seg, addr));
1908 1908  }
1909 1909  
1910 1910  int
1911 1911  segop_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
1912 1912  {
1913 1913          VERIFY3P(seg->s_ops->setprot, !=, NULL);
1914 1914  
1915 1915          return (seg->s_ops->setprot(seg, addr, len, prot));
1916 1916  }
1917 1917  
1918 1918  int
1919 1919  segop_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
1920 1920  {
1921 1921          VERIFY3P(seg->s_ops->checkprot, !=, NULL);
1922 1922  
1923 1923          return (seg->s_ops->checkprot(seg, addr, len, prot));
1924 1924  }
1925 1925  
1926 1926  int
1927 1927  segop_kluster(struct seg *seg, caddr_t addr, ssize_t d)
1928 1928  {
1929 1929          VERIFY3P(seg->s_ops->kluster, !=, NULL);
1930 1930  
1931 1931          return (seg->s_ops->kluster(seg, addr, d));
1932 1932  }
1933 1933  
1934 1934  int
1935 1935  segop_sync(struct seg *seg, caddr_t addr, size_t len, int atr, uint_t f)
1936 1936  {
1937 1937          VERIFY3P(seg->s_ops->sync, !=, NULL);
1938 1938  
1939 1939          return (seg->s_ops->sync(seg, addr, len, atr, f));
1940 1940  }
1941 1941  
1942 1942  size_t
1943 1943  segop_incore(struct seg *seg, caddr_t addr, size_t len, char *v)
1944 1944  {
1945 1945          VERIFY3P(seg->s_ops->incore, !=, NULL);
1946 1946  
1947 1947          return (seg->s_ops->incore(seg, addr, len, v));
1948 1948  }
1949 1949  
1950 1950  int
1951 1951  segop_lockop(struct seg *seg, caddr_t addr, size_t len, int atr, int op,
1952 1952      ulong_t *b, size_t p)
1953 1953  {
1954 1954          VERIFY3P(seg->s_ops->lockop, !=, NULL);
1955 1955  
1956 1956          return (seg->s_ops->lockop(seg, addr, len, atr, op, b, p));
1957 1957  }
1958 1958  
1959 1959  int
1960 1960  segop_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *p)
1961 1961  {
1962 1962          VERIFY3P(seg->s_ops->getprot, !=, NULL);
1963 1963  
1964 1964          return (seg->s_ops->getprot(seg, addr, len, p));
1965 1965  }
1966 1966  
1967 1967  u_offset_t
1968 1968  segop_getoffset(struct seg *seg, caddr_t addr)
1969 1969  {
1970 1970          VERIFY3P(seg->s_ops->getoffset, !=, NULL);
1971 1971  
1972 1972          return (seg->s_ops->getoffset(seg, addr));
1973 1973  }
1974 1974  
1975 1975  int
1976 1976  segop_gettype(struct seg *seg, caddr_t addr)
1977 1977  {
1978 1978          VERIFY3P(seg->s_ops->gettype, !=, NULL);
1979 1979  
1980 1980          return (seg->s_ops->gettype(seg, addr));
1981 1981  }
1982 1982  
1983 1983  int
1984 1984  segop_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
1985 1985  {
1986 1986          VERIFY3P(seg->s_ops->getvp, !=, NULL);
1987 1987  
1988 1988          return (seg->s_ops->getvp(seg, addr, vpp));
1989 1989  }
1990 1990  
1991 1991  int
1992 1992  segop_advise(struct seg *seg, caddr_t addr, size_t len, uint_t b)
1993 1993  {
1994 1994          VERIFY3P(seg->s_ops->advise, !=, NULL);
1995 1995  
1996 1996          return (seg->s_ops->advise(seg, addr, len, b));
1997 1997  }
1998 1998  
1999 1999  void
2000 2000  segop_dump(struct seg *seg)
2001 2001  {
2002 2002          VERIFY3P(seg->s_ops->dump, !=, NULL);
2003 2003  
2004 2004          seg->s_ops->dump(seg);
2005 2005  }
2006 2006  
2007 2007  int
2008 2008  segop_pagelock(struct seg *seg, caddr_t addr, size_t len, struct page ***page,
2009 2009      enum lock_type type, enum seg_rw rw)
2010 2010  {
2011 2011          VERIFY3P(seg->s_ops->pagelock, !=, NULL);
2012 2012  
2013 2013          return (seg->s_ops->pagelock(seg, addr, len, page, type, rw));
2014 2014  }
2015 2015  
2016 2016  int
2017 2017  segop_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
2018 2018  {
2019 2019          VERIFY3P(seg->s_ops->setpagesize, !=, NULL);
2020 2020  
2021 2021          return (seg->s_ops->setpagesize(seg, addr, len, szc));
2022 2022  }
2023 2023  
2024 2024  int
2025 2025  segop_getmemid(struct seg *seg, caddr_t addr, memid_t *mp)
2026 2026  {
2027 2027          VERIFY3P(seg->s_ops->getmemid, !=, NULL);
2028 2028  
2029 2029          return (seg->s_ops->getmemid(seg, addr, mp));
2030 2030  }
2031 2031  
2032 2032  struct lgrp_mem_policy_info *
2033 2033  segop_getpolicy(struct seg *seg, caddr_t addr)
2034 2034  {
2035 2035          if (seg->s_ops->getpolicy == NULL)
2036 2036                  return (NULL);
2037 2037  
2038 2038          return (seg->s_ops->getpolicy(seg, addr));
2039 2039  }
2040 2040  
2041 2041  int
2042 2042  segop_capable(struct seg *seg, segcapability_t cap)
2043 2043  {
2044 2044          VERIFY3P(seg->s_ops->capable, !=, NULL);
2045 2045  
2046 2046          return (seg->s_ops->capable(seg, cap));
2047 2047  }
2048 2048  
2049 2049  int
2050 2050  segop_inherit(struct seg *seg, caddr_t addr, size_t len, uint_t op)
2051 2051  {
2052 2052          if (seg->s_ops->inherit == NULL)
2053 2053                  return (ENOTSUP);
2054 2054  
2055 2055          return (seg->s_ops->inherit(seg, addr, len, op));
2056 2056  }

↓ open down ↓

189 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX