combined Wdiff usr/src/uts/common/vm/seg_kp.c

Print this page

const-ify make segment ops structures
There is no reason to keep the segment ops structures writable.
use NULL getmemid segop as a shorthand for ENODEV
Instead of forcing every segment driver to implement a dummy function to
return (hopefully) ENODEV, handle NULL getmemid segop function pointer as
"return ENODEV" shorthand.
use NULL capable segop as a shorthand for no-capabilities
Instead of forcing every segment driver to implement a dummy "return 0"
function, handle NULL capable segop function pointer as "no copabilities
supported" shorthand.
segop_getpolicy already checks for a NULL op
seg_inherit_notsup is redundant since segop_inherit checks for NULL properly
no need for bad-op segment op functions
The segment drivers have a number of bad-op functions that simply panic.
Keeping the function pointer NULL will accomplish the same thing in most
cases.  In other cases, keeping the function pointer NULL will result in
proper error code being returned.
use C99 initializers in segment ops structures
remove whole-process swapping
Long before Unix supported paging, it used process swapping to reclaim
memory.  The code is there and in theory it runs when we get *extremely* low
on memory.  In practice, it never runs since the definition of low-on-memory
is antiquated. (XXX: define what antiquated means)
You can check the number of swapout/swapin events with kstats:
$ kstat -p ::vm:swapin ::vm:swapout

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/vm/seg_kp.c
          +++ new/usr/src/uts/common/vm/seg_kp.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   */
  24   24  
  25   25  /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
  26   26  /*      All Rights Reserved   */
  27   27  
  28   28  /*
  29   29   * Portions of this source code were derived from Berkeley 4.3 BSD
  30   30   * under license from the Regents of the University of California.
  31   31   */
  32   32  
  33   33  /*
  34   34   * segkp is a segment driver that administers the allocation and deallocation
  35   35   * of pageable variable size chunks of kernel virtual address space. Each
  36   36   * allocated resource is page-aligned.
  37   37   *
  38   38   * The user may specify whether the resource should be initialized to 0,
  39   39   * include a redzone, or locked in memory.
  40   40   */
  41   41  
  42   42  #include <sys/types.h>
  43   43  #include <sys/t_lock.h>
  44   44  #include <sys/thread.h>
  45   45  #include <sys/param.h>
  46   46  #include <sys/errno.h>
  47   47  #include <sys/sysmacros.h>
  48   48  #include <sys/systm.h>
  49   49  #include <sys/buf.h>
  50   50  #include <sys/mman.h>
  51   51  #include <sys/vnode.h>
  52   52  #include <sys/cmn_err.h>
  53   53  #include <sys/swap.h>
  54   54  #include <sys/tuneable.h>
  55   55  #include <sys/kmem.h>
  56   56  #include <sys/vmem.h>
  57   57  #include <sys/cred.h>
  58   58  #include <sys/dumphdr.h>
  59   59  #include <sys/debug.h>
  60   60  #include <sys/vtrace.h>
  61   61  #include <sys/stack.h>
  62   62  #include <sys/atomic.h>
  63   63  #include <sys/archsystm.h>
  64   64  #include <sys/lgrp.h>
  65   65  
  66   66  #include <vm/as.h>
  67   67  #include <vm/seg.h>

↓ open down ↓

67 lines elided

↑ open up ↑

  68   68  #include <vm/seg_kp.h>
  69   69  #include <vm/seg_kmem.h>
  70   70  #include <vm/anon.h>
  71   71  #include <vm/page.h>
  72   72  #include <vm/hat.h>
  73   73  #include <sys/bitmap.h>
  74   74  
  75   75  /*
  76   76   * Private seg op routines
  77   77   */
  78      -static void     segkp_badop(void);
  79   78  static void     segkp_dump(struct seg *seg);
  80   79  static int      segkp_checkprot(struct seg *seg, caddr_t addr, size_t len,
  81   80                          uint_t prot);
  82   81  static int      segkp_kluster(struct seg *seg, caddr_t addr, ssize_t delta);
  83   82  static int      segkp_pagelock(struct seg *seg, caddr_t addr, size_t len,
  84   83                          struct page ***page, enum lock_type type,
  85   84                          enum seg_rw rw);
  86   85  static void     segkp_insert(struct seg *seg, struct segkp_data *kpd);
  87   86  static void     segkp_delete(struct seg *seg, struct segkp_data *kpd);
  88   87  static caddr_t  segkp_get_internal(struct seg *seg, size_t len, uint_t flags,
  89   88                          struct segkp_data **tkpd, struct anon_map *amp);
  90   89  static void     segkp_release_internal(struct seg *seg,
  91   90                          struct segkp_data *kpd, size_t len);
  92   91  static int      segkp_unlock(struct hat *hat, struct seg *seg, caddr_t vaddr,
  93   92                          size_t len, struct segkp_data *kpd, uint_t flags);
  94   93  static int      segkp_load(struct hat *hat, struct seg *seg, caddr_t vaddr,
  95   94                          size_t len, struct segkp_data *kpd, uint_t flags);
  96   95  static struct   segkp_data *segkp_find(struct seg *seg, caddr_t vaddr);
  97      -static int      segkp_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp);
  98      -static lgrp_mem_policy_info_t   *segkp_getpolicy(struct seg *seg,
  99      -    caddr_t addr);
 100      -static int      segkp_capable(struct seg *seg, segcapability_t capability);
 101   96  
 102   97  /*
 103   98   * Lock used to protect the hash table(s) and caches.
 104   99   */
 105  100  static kmutex_t segkp_lock;
 106  101  
 107  102  /*
 108  103   * The segkp caches
 109  104   */
 110  105  static struct segkp_cache segkp_cache[SEGKP_MAX_CACHE];
 111  106  
 112      -#define SEGKP_BADOP(t)  (t(*)())segkp_badop
 113      -
 114  107  /*
 115  108   * When there are fewer than red_minavail bytes left on the stack,
 116  109   * segkp_map_red() will map in the redzone (if called).  5000 seems
 117  110   * to work reasonably well...
 118  111   */
 119  112  long            red_minavail = 5000;
 120  113  
 121  114  /*
 122  115   * will be set to 1 for 32 bit x86 systems only, in startup.c
 123  116   */

 124  117  int     segkp_fromheap = 0;
 125  118  ulong_t *segkp_bitmap;
 126  119  
 127  120  /*
 128  121   * If segkp_map_red() is called with the redzone already mapped and
 129  122   * with less than RED_DEEP_THRESHOLD bytes available on the stack,
 130  123   * then the stack situation has become quite serious;  if much more stack
 131  124   * is consumed, we have the potential of scrogging the next thread/LWP
 132  125   * structure.  To help debug the "can't happen" panics which may
 133  126   * result from this condition, we record hrestime and the calling thread
 134  127   * in red_deep_hires and red_deep_thread respectively.
 135  128   */
 136  129  #define RED_DEEP_THRESHOLD      2000
 137  130

↓ open down ↓

14 lines elided

↑ open up ↑

 138  131  hrtime_t        red_deep_hires;
 139  132  kthread_t       *red_deep_thread;
 140  133  
 141  134  uint32_t        red_nmapped;
 142  135  uint32_t        red_closest = UINT_MAX;
 143  136  uint32_t        red_ndoubles;
 144  137  
 145  138  pgcnt_t anon_segkp_pages_locked;        /* See vm/anon.h */
 146  139  pgcnt_t anon_segkp_pages_resv;          /* anon reserved by seg_kp */
 147  140  
 148      -static struct   seg_ops segkp_ops = {
 149      -        SEGKP_BADOP(int),               /* dup */
 150      -        SEGKP_BADOP(int),               /* unmap */
 151      -        SEGKP_BADOP(void),              /* free */
 152      -        segkp_fault,
 153      -        SEGKP_BADOP(faultcode_t),       /* faulta */
 154      -        SEGKP_BADOP(int),               /* setprot */
 155      -        segkp_checkprot,
 156      -        segkp_kluster,
 157      -        SEGKP_BADOP(size_t),            /* swapout */
 158      -        SEGKP_BADOP(int),               /* sync */
 159      -        SEGKP_BADOP(size_t),            /* incore */
 160      -        SEGKP_BADOP(int),               /* lockop */
 161      -        SEGKP_BADOP(int),               /* getprot */
 162      -        SEGKP_BADOP(u_offset_t),                /* getoffset */
 163      -        SEGKP_BADOP(int),               /* gettype */
 164      -        SEGKP_BADOP(int),               /* getvp */
 165      -        SEGKP_BADOP(int),               /* advise */
 166      -        segkp_dump,                     /* dump */
 167      -        segkp_pagelock,                 /* pagelock */
 168      -        SEGKP_BADOP(int),               /* setpgsz */
 169      -        segkp_getmemid,                 /* getmemid */
 170      -        segkp_getpolicy,                /* getpolicy */
 171      -        segkp_capable,                  /* capable */
 172      -        seg_inherit_notsup              /* inherit */
      141 +static const struct seg_ops segkp_ops = {
      142 +        .fault          = segkp_fault,
      143 +        .checkprot      = segkp_checkprot,
      144 +        .kluster        = segkp_kluster,
      145 +        .dump           = segkp_dump,
      146 +        .pagelock       = segkp_pagelock,
 173  147  };
 174  148  
 175  149  
 176      -static void
 177      -segkp_badop(void)
 178      -{
 179      -        panic("segkp_badop");
 180      -        /*NOTREACHED*/
 181      -}
 182      -
 183  150  static void segkpinit_mem_config(struct seg *);
 184  151  
 185  152  static uint32_t segkp_indel;
 186  153  
 187  154  /*
 188  155   * Allocate the segment specific private data struct and fill it in
 189  156   * with the per kp segment mutex, anon ptr. array and hash table.
 190  157   */
 191  158  int
 192  159  segkp_create(struct seg *seg)

 193  160  {
 194  161          struct segkp_segdata *kpsd;
 195  162          size_t  np;
 196  163  
 197  164          ASSERT(seg != NULL && seg->s_as == &kas);
 198  165          ASSERT(RW_WRITE_HELD(&seg->s_as->a_lock));
 199  166  
 200  167          if (seg->s_size & PAGEOFFSET) {
 201  168                  panic("Bad segkp size");
 202  169                  /*NOTREACHED*/
 203  170          }
 204  171  
 205  172          kpsd = kmem_zalloc(sizeof (struct segkp_segdata), KM_SLEEP);
 206  173  
 207  174          /*
 208  175           * Allocate the virtual memory for segkp and initialize it
 209  176           */
 210  177          if (segkp_fromheap) {
 211  178                  np = btop(kvseg.s_size);
 212  179                  segkp_bitmap = kmem_zalloc(BT_SIZEOFMAP(np), KM_SLEEP);
 213  180                  kpsd->kpsd_arena = vmem_create("segkp", NULL, 0, PAGESIZE,
 214  181                      vmem_alloc, vmem_free, heap_arena, 5 * PAGESIZE, VM_SLEEP);
 215  182          } else {
 216  183                  segkp_bitmap = NULL;
 217  184                  np = btop(seg->s_size);
 218  185                  kpsd->kpsd_arena = vmem_create("segkp", seg->s_base,
 219  186                      seg->s_size, PAGESIZE, NULL, NULL, NULL, 5 * PAGESIZE,
 220  187                      VM_SLEEP);
 221  188          }
 222  189  
 223  190          kpsd->kpsd_anon = anon_create(np, ANON_SLEEP | ANON_ALLOC_FORCE);
 224  191  
 225  192          kpsd->kpsd_hash = kmem_zalloc(SEGKP_HASHSZ * sizeof (struct segkp *),
 226  193              KM_SLEEP);
 227  194          seg->s_data = (void *)kpsd;
 228  195          seg->s_ops = &segkp_ops;
 229  196          segkpinit_mem_config(seg);
 230  197          return (0);
 231  198  }
 232  199  
 233  200  
 234  201  /*
 235  202   * Find a free 'freelist' and initialize it with the appropriate attributes
 236  203   */
 237  204  void *
 238  205  segkp_cache_init(struct seg *seg, int maxsize, size_t len, uint_t flags)
 239  206  {
 240  207          int i;
 241  208  
 242  209          if ((flags & KPD_NO_ANON) && !(flags & KPD_LOCKED))
 243  210                  return ((void *)-1);
 244  211  
 245  212          mutex_enter(&segkp_lock);
 246  213          for (i = 0; i < SEGKP_MAX_CACHE; i++) {
 247  214                  if (segkp_cache[i].kpf_inuse)
 248  215                          continue;
 249  216                  segkp_cache[i].kpf_inuse = 1;
 250  217                  segkp_cache[i].kpf_max = maxsize;
 251  218                  segkp_cache[i].kpf_flags = flags;
 252  219                  segkp_cache[i].kpf_seg = seg;
 253  220                  segkp_cache[i].kpf_len = len;
 254  221                  mutex_exit(&segkp_lock);
 255  222                  return ((void *)(uintptr_t)i);
 256  223          }
 257  224          mutex_exit(&segkp_lock);
 258  225          return ((void *)-1);
 259  226  }
 260  227  
 261  228  /*
 262  229   * Free all the cache resources.
 263  230   */
 264  231  void
 265  232  segkp_cache_free(void)
 266  233  {
 267  234          struct segkp_data *kpd;
 268  235          struct seg *seg;
 269  236          int i;
 270  237  
 271  238          mutex_enter(&segkp_lock);
 272  239          for (i = 0; i < SEGKP_MAX_CACHE; i++) {
 273  240                  if (!segkp_cache[i].kpf_inuse)
 274  241                          continue;
 275  242                  /*
 276  243                   * Disconnect the freelist and process each element
 277  244                   */
 278  245                  kpd = segkp_cache[i].kpf_list;
 279  246                  seg = segkp_cache[i].kpf_seg;
 280  247                  segkp_cache[i].kpf_list = NULL;
 281  248                  segkp_cache[i].kpf_count = 0;
 282  249                  mutex_exit(&segkp_lock);
 283  250  
 284  251                  while (kpd != NULL) {
 285  252                          struct segkp_data *next;
 286  253  
 287  254                          next = kpd->kp_next;
 288  255                          segkp_release_internal(seg, kpd, kpd->kp_len);
 289  256                          kpd = next;
 290  257                  }
 291  258                  mutex_enter(&segkp_lock);
 292  259          }
 293  260          mutex_exit(&segkp_lock);
 294  261  }
 295  262  
 296  263  /*
 297  264   * There are 2 entries into segkp_get_internal. The first includes a cookie
 298  265   * used to access a pool of cached segkp resources. The second does not
 299  266   * use the cache.
 300  267   */
 301  268  caddr_t
 302  269  segkp_get(struct seg *seg, size_t len, uint_t flags)
 303  270  {
 304  271          struct segkp_data *kpd = NULL;
 305  272  
 306  273          if (segkp_get_internal(seg, len, flags, &kpd, NULL) != NULL) {
 307  274                  kpd->kp_cookie = -1;
 308  275                  return (stom(kpd->kp_base, flags));
 309  276          }
 310  277          return (NULL);
 311  278  }
 312  279  
 313  280  /*
 314  281   * Return a 'cached' segkp address
 315  282   */
 316  283  caddr_t
 317  284  segkp_cache_get(void *cookie)
 318  285  {
 319  286          struct segkp_cache *freelist = NULL;
 320  287          struct segkp_data *kpd = NULL;
 321  288          int index = (int)(uintptr_t)cookie;
 322  289          struct seg *seg;
 323  290          size_t len;
 324  291          uint_t flags;
 325  292  
 326  293          if (index < 0 || index >= SEGKP_MAX_CACHE)
 327  294                  return (NULL);
 328  295          freelist = &segkp_cache[index];
 329  296  
 330  297          mutex_enter(&segkp_lock);
 331  298          seg = freelist->kpf_seg;
 332  299          flags = freelist->kpf_flags;
 333  300          if (freelist->kpf_list != NULL) {
 334  301                  kpd = freelist->kpf_list;
 335  302                  freelist->kpf_list = kpd->kp_next;
 336  303                  freelist->kpf_count--;
 337  304                  mutex_exit(&segkp_lock);
 338  305                  kpd->kp_next = NULL;
 339  306                  segkp_insert(seg, kpd);
 340  307                  return (stom(kpd->kp_base, flags));
 341  308          }
 342  309          len = freelist->kpf_len;
 343  310          mutex_exit(&segkp_lock);
 344  311          if (segkp_get_internal(seg, len, flags, &kpd, NULL) != NULL) {
 345  312                  kpd->kp_cookie = index;
 346  313                  return (stom(kpd->kp_base, flags));
 347  314          }
 348  315          return (NULL);
 349  316  }
 350  317  
 351  318  caddr_t
 352  319  segkp_get_withanonmap(
 353  320          struct seg *seg,
 354  321          size_t len,
 355  322          uint_t flags,
 356  323          struct anon_map *amp)
 357  324  {
 358  325          struct segkp_data *kpd = NULL;
 359  326  
 360  327          ASSERT(amp != NULL);
 361  328          flags |= KPD_HASAMP;
 362  329          if (segkp_get_internal(seg, len, flags, &kpd, amp) != NULL) {
 363  330                  kpd->kp_cookie = -1;
 364  331                  return (stom(kpd->kp_base, flags));
 365  332          }
 366  333          return (NULL);
 367  334  }
 368  335  
 369  336  /*
 370  337   * This does the real work of segkp allocation.
 371  338   * Return to client base addr. len must be page-aligned. A null value is
 372  339   * returned if there are no more vm resources (e.g. pages, swap). The len
 373  340   * and base recorded in the private data structure include the redzone
 374  341   * and the redzone length (if applicable). If the user requests a redzone
 375  342   * either the first or last page is left unmapped depending whether stacks
 376  343   * grow to low or high memory.
 377  344   *
 378  345   * The client may also specify a no-wait flag. If that is set then the
 379  346   * request will choose a non-blocking path when requesting resources.
 380  347   * The default is make the client wait.
 381  348   */
 382  349  static caddr_t
 383  350  segkp_get_internal(
 384  351          struct seg *seg,
 385  352          size_t len,
 386  353          uint_t flags,
 387  354          struct segkp_data **tkpd,
 388  355          struct anon_map *amp)
 389  356  {
 390  357          struct segkp_segdata    *kpsd = (struct segkp_segdata *)seg->s_data;
 391  358          struct segkp_data       *kpd;
 392  359          caddr_t vbase = NULL;   /* always first virtual, may not be mapped */
 393  360          pgcnt_t np = 0;         /* number of pages in the resource */
 394  361          pgcnt_t segkpindex;
 395  362          long i;
 396  363          caddr_t va;
 397  364          pgcnt_t pages = 0;
 398  365          ulong_t anon_idx = 0;
 399  366          int kmflag = (flags & KPD_NOWAIT) ? KM_NOSLEEP : KM_SLEEP;
 400  367          caddr_t s_base = (segkp_fromheap) ? kvseg.s_base : seg->s_base;
 401  368  
 402  369          if (len & PAGEOFFSET) {
 403  370                  panic("segkp_get: len is not page-aligned");
 404  371                  /*NOTREACHED*/
 405  372          }
 406  373  
 407  374          ASSERT(((flags & KPD_HASAMP) == 0) == (amp == NULL));
 408  375  
 409  376          /* Only allow KPD_NO_ANON if we are going to lock it down */
 410  377          if ((flags & (KPD_LOCKED|KPD_NO_ANON)) == KPD_NO_ANON)
 411  378                  return (NULL);
 412  379  
 413  380          if ((kpd = kmem_zalloc(sizeof (struct segkp_data), kmflag)) == NULL)
 414  381                  return (NULL);
 415  382          /*
 416  383           * Fix up the len to reflect the REDZONE if applicable
 417  384           */
 418  385          if (flags & KPD_HASREDZONE)
 419  386                  len += PAGESIZE;
 420  387          np = btop(len);
 421  388  
 422  389          vbase = vmem_alloc(SEGKP_VMEM(seg), len, kmflag | VM_BESTFIT);
 423  390          if (vbase == NULL) {
 424  391                  kmem_free(kpd, sizeof (struct segkp_data));
 425  392                  return (NULL);
 426  393          }
 427  394  
 428  395          /* If locking, reserve physical memory */
 429  396          if (flags & KPD_LOCKED) {
 430  397                  pages = btop(SEGKP_MAPLEN(len, flags));
 431  398                  if (page_resv(pages, kmflag) == 0) {
 432  399                          vmem_free(SEGKP_VMEM(seg), vbase, len);
 433  400                          kmem_free(kpd, sizeof (struct segkp_data));
 434  401                          return (NULL);
 435  402                  }
 436  403                  if ((flags & KPD_NO_ANON) == 0)
 437  404                          atomic_add_long(&anon_segkp_pages_locked, pages);
 438  405          }
 439  406  
 440  407          /*
 441  408           * Reserve sufficient swap space for this vm resource.  We'll
 442  409           * actually allocate it in the loop below, but reserving it
 443  410           * here allows us to back out more gracefully than if we
 444  411           * had an allocation failure in the body of the loop.
 445  412           *
 446  413           * Note that we don't need swap space for the red zone page.
 447  414           */
 448  415          if (amp != NULL) {
 449  416                  /*
 450  417                   * The swap reservation has been done, if required, and the
 451  418                   * anon_hdr is separate.
 452  419                   */
 453  420                  anon_idx = 0;
 454  421                  kpd->kp_anon_idx = anon_idx;
 455  422                  kpd->kp_anon = amp->ahp;
 456  423  
 457  424                  TRACE_5(TR_FAC_VM, TR_ANON_SEGKP, "anon segkp:%p %p %lu %u %u",
 458  425                      kpd, vbase, len, flags, 1);
 459  426  
 460  427          } else if ((flags & KPD_NO_ANON) == 0) {
 461  428                  if (anon_resv_zone(SEGKP_MAPLEN(len, flags), NULL) == 0) {
 462  429                          if (flags & KPD_LOCKED) {
 463  430                                  atomic_add_long(&anon_segkp_pages_locked,
 464  431                                      -pages);
 465  432                                  page_unresv(pages);
 466  433                          }
 467  434                          vmem_free(SEGKP_VMEM(seg), vbase, len);
 468  435                          kmem_free(kpd, sizeof (struct segkp_data));
 469  436                          return (NULL);
 470  437                  }
 471  438                  atomic_add_long(&anon_segkp_pages_resv,
 472  439                      btop(SEGKP_MAPLEN(len, flags)));
 473  440                  anon_idx = ((uintptr_t)(vbase - s_base)) >> PAGESHIFT;
 474  441                  kpd->kp_anon_idx = anon_idx;
 475  442                  kpd->kp_anon = kpsd->kpsd_anon;
 476  443  
 477  444                  TRACE_5(TR_FAC_VM, TR_ANON_SEGKP, "anon segkp:%p %p %lu %u %u",
 478  445                      kpd, vbase, len, flags, 1);
 479  446          } else {
 480  447                  kpd->kp_anon = NULL;
 481  448                  kpd->kp_anon_idx = 0;
 482  449          }
 483  450  
 484  451          /*
 485  452           * Allocate page and anon resources for the virtual address range
 486  453           * except the redzone
 487  454           */
 488  455          if (segkp_fromheap)
 489  456                  segkpindex = btop((uintptr_t)(vbase - kvseg.s_base));
 490  457          for (i = 0, va = vbase; i < np; i++, va += PAGESIZE) {
 491  458                  page_t          *pl[2];
 492  459                  struct vnode    *vp;
 493  460                  anoff_t         off;
 494  461                  int             err;
 495  462                  page_t          *pp = NULL;
 496  463  
 497  464                  /*
 498  465                   * Mark this page to be a segkp page in the bitmap.
 499  466                   */
 500  467                  if (segkp_fromheap) {
 501  468                          BT_ATOMIC_SET(segkp_bitmap, segkpindex);
 502  469                          segkpindex++;
 503  470                  }
 504  471  
 505  472                  /*
 506  473                   * If this page is the red zone page, we don't need swap
 507  474                   * space for it.  Note that we skip over the code that
 508  475                   * establishes MMU mappings, so that the page remains
 509  476                   * invalid.
 510  477                   */
 511  478                  if ((flags & KPD_HASREDZONE) && KPD_REDZONE(kpd) == i)
 512  479                          continue;
 513  480  
 514  481                  if (kpd->kp_anon != NULL) {
 515  482                          struct anon *ap;
 516  483  
 517  484                          ASSERT(anon_get_ptr(kpd->kp_anon, anon_idx + i)
 518  485                              == NULL);
 519  486                          /*
 520  487                           * Determine the "vp" and "off" of the anon slot.
 521  488                           */
 522  489                          ap = anon_alloc(NULL, 0);
 523  490                          if (amp != NULL)
 524  491                                  ANON_LOCK_ENTER(&amp->a_rwlock, RW_WRITER);
 525  492                          (void) anon_set_ptr(kpd->kp_anon, anon_idx + i,
 526  493                              ap, ANON_SLEEP);
 527  494                          if (amp != NULL)
 528  495                                  ANON_LOCK_EXIT(&amp->a_rwlock);
 529  496                          swap_xlate(ap, &vp, &off);
 530  497  
 531  498                          /*
 532  499                           * Create a page with the specified identity.  The
 533  500                           * page is returned with the "shared" lock held.
 534  501                           */
 535  502                          err = VOP_GETPAGE(vp, (offset_t)off, PAGESIZE,
 536  503                              NULL, pl, PAGESIZE, seg, va, S_CREATE,
 537  504                              kcred, NULL);
 538  505                          if (err) {
 539  506                                  /*
 540  507                                   * XXX - This should not fail.
 541  508                                   */
 542  509                                  panic("segkp_get: no pages");
 543  510                                  /*NOTREACHED*/
 544  511                          }
 545  512                          pp = pl[0];
 546  513                  } else {
 547  514                          ASSERT(page_exists(&kvp,
 548  515                              (u_offset_t)(uintptr_t)va) == NULL);
 549  516  
 550  517                          if ((pp = page_create_va(&kvp,
 551  518                              (u_offset_t)(uintptr_t)va, PAGESIZE,
 552  519                              (flags & KPD_NOWAIT ? 0 : PG_WAIT) | PG_EXCL |
 553  520                              PG_NORELOC, seg, va)) == NULL) {
 554  521                                  /*
 555  522                                   * Legitimize resource; then destroy it.
 556  523                                   * Easier than trying to unwind here.
 557  524                                   */
 558  525                                  kpd->kp_flags = flags;
 559  526                                  kpd->kp_base = vbase;
 560  527                                  kpd->kp_len = len;
 561  528                                  segkp_release_internal(seg, kpd, va - vbase);
 562  529                                  return (NULL);
 563  530                          }
 564  531                          page_io_unlock(pp);
 565  532                  }
 566  533  
 567  534                  if (flags & KPD_ZERO)
 568  535                          pagezero(pp, 0, PAGESIZE);
 569  536  
 570  537                  /*
 571  538                   * Load and lock an MMU translation for the page.
 572  539                   */
 573  540                  hat_memload(seg->s_as->a_hat, va, pp, (PROT_READ|PROT_WRITE),
 574  541                      ((flags & KPD_LOCKED) ? HAT_LOAD_LOCK : HAT_LOAD));
 575  542  
 576  543                  /*
 577  544                   * Now, release lock on the page.
 578  545                   */
 579  546                  if (flags & KPD_LOCKED) {
 580  547                          /*
 581  548                           * Indicate to page_retire framework that this
 582  549                           * page can only be retired when it is freed.
 583  550                           */
 584  551                          PP_SETRAF(pp);
 585  552                          page_downgrade(pp);
 586  553                  } else
 587  554                          page_unlock(pp);
 588  555          }
 589  556  
 590  557          kpd->kp_flags = flags;
 591  558          kpd->kp_base = vbase;
 592  559          kpd->kp_len = len;
 593  560          segkp_insert(seg, kpd);
 594  561          *tkpd = kpd;
 595  562          return (stom(kpd->kp_base, flags));
 596  563  }
 597  564  
 598  565  /*
 599  566   * Release the resource to cache if the pool(designate by the cookie)
 600  567   * has less than the maximum allowable. If inserted in cache,
 601  568   * segkp_delete insures element is taken off of active list.
 602  569   */
 603  570  void
 604  571  segkp_release(struct seg *seg, caddr_t vaddr)
 605  572  {
 606  573          struct segkp_cache *freelist;
 607  574          struct segkp_data *kpd = NULL;
 608  575  
 609  576          if ((kpd = segkp_find(seg, vaddr)) == NULL) {
 610  577                  panic("segkp_release: null kpd");
 611  578                  /*NOTREACHED*/
 612  579          }
 613  580  
 614  581          if (kpd->kp_cookie != -1) {
 615  582                  freelist = &segkp_cache[kpd->kp_cookie];
 616  583                  mutex_enter(&segkp_lock);
 617  584                  if (!segkp_indel && freelist->kpf_count < freelist->kpf_max) {
 618  585                          segkp_delete(seg, kpd);
 619  586                          kpd->kp_next = freelist->kpf_list;
 620  587                          freelist->kpf_list = kpd;
 621  588                          freelist->kpf_count++;
 622  589                          mutex_exit(&segkp_lock);
 623  590                          return;
 624  591                  } else {
 625  592                          mutex_exit(&segkp_lock);
 626  593                          kpd->kp_cookie = -1;
 627  594                  }
 628  595          }
 629  596          segkp_release_internal(seg, kpd, kpd->kp_len);
 630  597  }
 631  598  
 632  599  /*
 633  600   * Free the entire resource. segkp_unlock gets called with the start of the
 634  601   * mapped portion of the resource. The length is the size of the mapped
 635  602   * portion
 636  603   */
 637  604  static void
 638  605  segkp_release_internal(struct seg *seg, struct segkp_data *kpd, size_t len)
 639  606  {
 640  607          caddr_t         va;
 641  608          long            i;
 642  609          long            redzone;
 643  610          size_t          np;
 644  611          page_t          *pp;
 645  612          struct vnode    *vp;
 646  613          anoff_t         off;
 647  614          struct anon     *ap;
 648  615          pgcnt_t         segkpindex;
 649  616  
 650  617          ASSERT(kpd != NULL);
 651  618          ASSERT((kpd->kp_flags & KPD_HASAMP) == 0 || kpd->kp_cookie == -1);
 652  619          np = btop(len);
 653  620  
 654  621          /* Remove from active hash list */
 655  622          if (kpd->kp_cookie == -1) {
 656  623                  mutex_enter(&segkp_lock);
 657  624                  segkp_delete(seg, kpd);
 658  625                  mutex_exit(&segkp_lock);
 659  626          }
 660  627  
 661  628          /*
 662  629           * Precompute redzone page index.
 663  630           */
 664  631          redzone = -1;
 665  632          if (kpd->kp_flags & KPD_HASREDZONE)
 666  633                  redzone = KPD_REDZONE(kpd);
 667  634  
 668  635  
 669  636          va = kpd->kp_base;
 670  637  
 671  638          hat_unload(seg->s_as->a_hat, va, (np << PAGESHIFT),
 672  639              ((kpd->kp_flags & KPD_LOCKED) ? HAT_UNLOAD_UNLOCK : HAT_UNLOAD));
 673  640          /*
 674  641           * Free up those anon resources that are quiescent.
 675  642           */
 676  643          if (segkp_fromheap)
 677  644                  segkpindex = btop((uintptr_t)(va - kvseg.s_base));
 678  645          for (i = 0; i < np; i++, va += PAGESIZE) {
 679  646  
 680  647                  /*
 681  648                   * Clear the bit for this page from the bitmap.
 682  649                   */
 683  650                  if (segkp_fromheap) {
 684  651                          BT_ATOMIC_CLEAR(segkp_bitmap, segkpindex);
 685  652                          segkpindex++;
 686  653                  }
 687  654  
 688  655                  if (i == redzone)
 689  656                          continue;
 690  657                  if (kpd->kp_anon) {
 691  658                          /*
 692  659                           * Free up anon resources and destroy the
 693  660                           * associated pages.
 694  661                           *
 695  662                           * Release the lock if there is one. Have to get the
 696  663                           * page to do this, unfortunately.
 697  664                           */
 698  665                          if (kpd->kp_flags & KPD_LOCKED) {
 699  666                                  ap = anon_get_ptr(kpd->kp_anon,
 700  667                                      kpd->kp_anon_idx + i);
 701  668                                  swap_xlate(ap, &vp, &off);
 702  669                                  /* Find the shared-locked page. */
 703  670                                  pp = page_find(vp, (u_offset_t)off);
 704  671                                  if (pp == NULL) {
 705  672                                          panic("segkp_release: "
 706  673                                              "kp_anon: no page to unlock ");
 707  674                                          /*NOTREACHED*/
 708  675                                  }
 709  676                                  if (PP_ISRAF(pp))
 710  677                                          PP_CLRRAF(pp);
 711  678  
 712  679                                  page_unlock(pp);
 713  680                          }
 714  681                          if ((kpd->kp_flags & KPD_HASAMP) == 0) {
 715  682                                  anon_free(kpd->kp_anon, kpd->kp_anon_idx + i,
 716  683                                      PAGESIZE);
 717  684                                  anon_unresv_zone(PAGESIZE, NULL);
 718  685                                  atomic_dec_ulong(&anon_segkp_pages_resv);
 719  686                          }
 720  687                          TRACE_5(TR_FAC_VM,
 721  688                              TR_ANON_SEGKP, "anon segkp:%p %p %lu %u %u",
 722  689                              kpd, va, PAGESIZE, 0, 0);
 723  690                  } else {
 724  691                          if (kpd->kp_flags & KPD_LOCKED) {
 725  692                                  pp = page_find(&kvp, (u_offset_t)(uintptr_t)va);
 726  693                                  if (pp == NULL) {
 727  694                                          panic("segkp_release: "
 728  695                                              "no page to unlock");
 729  696                                          /*NOTREACHED*/
 730  697                                  }
 731  698                                  if (PP_ISRAF(pp))
 732  699                                          PP_CLRRAF(pp);
 733  700                                  /*
 734  701                                   * We should just upgrade the lock here
 735  702                                   * but there is no upgrade that waits.
 736  703                                   */
 737  704                                  page_unlock(pp);
 738  705                          }
 739  706                          pp = page_lookup(&kvp, (u_offset_t)(uintptr_t)va,
 740  707                              SE_EXCL);
 741  708                          if (pp != NULL)
 742  709                                  page_destroy(pp, 0);
 743  710                  }
 744  711          }
 745  712  
 746  713          /* If locked, release physical memory reservation */
 747  714          if (kpd->kp_flags & KPD_LOCKED) {
 748  715                  pgcnt_t pages = btop(SEGKP_MAPLEN(kpd->kp_len, kpd->kp_flags));
 749  716                  if ((kpd->kp_flags & KPD_NO_ANON) == 0)
 750  717                          atomic_add_long(&anon_segkp_pages_locked, -pages);
 751  718                  page_unresv(pages);
 752  719          }

↓ open down ↓

560 lines elided

↑ open up ↑

 753  720  
 754  721          vmem_free(SEGKP_VMEM(seg), kpd->kp_base, kpd->kp_len);
 755  722          kmem_free(kpd, sizeof (struct segkp_data));
 756  723  }
 757  724  
 758  725  /*
 759  726   * segkp_map_red() will check the current frame pointer against the
 760  727   * stack base.  If the amount of stack remaining is questionable
 761  728   * (less than red_minavail), then segkp_map_red() will map in the redzone
 762  729   * and return 1.  Otherwise, it will return 0.  segkp_map_red() can
 763      - * _only_ be called when:
 764      - *
 765      - *   - it is safe to sleep on page_create_va().
 766      - *   - the caller is non-swappable.
      730 + * _only_ be called when it is safe to sleep on page_create_va().
 767  731   *
 768  732   * It is up to the caller to remember whether segkp_map_red() successfully
 769  733   * mapped the redzone, and, if so, to call segkp_unmap_red() at a later
 770      - * time.  Note that the caller must _remain_ non-swappable until after
 771      - * calling segkp_unmap_red().
      734 + * time.
 772  735   *
 773  736   * Currently, this routine is only called from pagefault() (which necessarily
 774  737   * satisfies the above conditions).
 775  738   */
 776  739  #if defined(STACK_GROWTH_DOWN)
 777  740  int
 778  741  segkp_map_red(void)
 779  742  {
 780  743          uintptr_t fp = STACK_BIAS + (uintptr_t)getfp();
 781  744  #ifndef _LP64
 782  745          caddr_t stkbase;
 783  746  #endif
 784  747  
 785      -        ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
 786      -
 787  748          /*
 788  749           * Optimize for the common case where we simply return.
 789  750           */
 790  751          if ((curthread->t_red_pp == NULL) &&
 791  752              (fp - (uintptr_t)curthread->t_stkbase >= red_minavail))
 792  753                  return (0);
 793  754  
 794  755  #if defined(_LP64)
 795  756          /*
 796  757           * XXX  We probably need something better than this.

 797  758           */
 798  759          panic("kernel stack overflow");
 799  760          /*NOTREACHED*/
 800  761  #else /* _LP64 */
 801  762          if (curthread->t_red_pp == NULL) {
 802  763                  page_t *red_pp;
 803  764                  struct seg kseg;
 804  765  
 805  766                  caddr_t red_va = (caddr_t)
 806  767                      (((uintptr_t)curthread->t_stkbase & (uintptr_t)PAGEMASK) -
 807  768                      PAGESIZE);
 808  769  
 809  770                  ASSERT(page_exists(&kvp, (u_offset_t)(uintptr_t)red_va) ==
 810  771                      NULL);
 811  772  
 812  773                  /*
 813  774                   * Allocate the physical for the red page.
 814  775                   */
 815  776                  /*
 816  777                   * No PG_NORELOC here to avoid waits. Unlikely to get
 817  778                   * a relocate happening in the short time the page exists
 818  779                   * and it will be OK anyway.
 819  780                   */
 820  781  
 821  782                  kseg.s_as = &kas;
 822  783                  red_pp = page_create_va(&kvp, (u_offset_t)(uintptr_t)red_va,
 823  784                      PAGESIZE, PG_WAIT | PG_EXCL, &kseg, red_va);
 824  785                  ASSERT(red_pp != NULL);
 825  786  
 826  787                  /*
 827  788                   * So we now have a page to jam into the redzone...
 828  789                   */
 829  790                  page_io_unlock(red_pp);
 830  791  
 831  792                  hat_memload(kas.a_hat, red_va, red_pp,
 832  793                      (PROT_READ|PROT_WRITE), HAT_LOAD_LOCK);
 833  794                  page_downgrade(red_pp);
 834  795  
 835  796                  /*
 836  797                   * The page is left SE_SHARED locked so we can hold on to
 837  798                   * the page_t pointer.
 838  799                   */
 839  800                  curthread->t_red_pp = red_pp;
 840  801  
 841  802                  atomic_inc_32(&red_nmapped);
 842  803                  while (fp - (uintptr_t)curthread->t_stkbase < red_closest) {
 843  804                          (void) atomic_cas_32(&red_closest, red_closest,
 844  805                              (uint32_t)(fp - (uintptr_t)curthread->t_stkbase));
 845  806                  }
 846  807                  return (1);
 847  808          }
 848  809  
 849  810          stkbase = (caddr_t)(((uintptr_t)curthread->t_stkbase &
 850  811              (uintptr_t)PAGEMASK) - PAGESIZE);
 851  812  
 852  813          atomic_inc_32(&red_ndoubles);
 853  814  
 854  815          if (fp - (uintptr_t)stkbase < RED_DEEP_THRESHOLD) {
 855  816                  /*
 856  817                   * Oh boy.  We're already deep within the mapped-in
 857  818                   * redzone page, and the caller is trying to prepare
 858  819                   * for a deep stack run.  We're running without a
 859  820                   * redzone right now:  if the caller plows off the
 860  821                   * end of the stack, it'll plow another thread or
 861  822                   * LWP structure.  That situation could result in
 862  823                   * a very hard-to-debug panic, so, in the spirit of
 863  824                   * recording the name of one's killer in one's own
 864  825                   * blood, we're going to record hrestime and the calling
 865  826                   * thread.
 866  827                   */
 867  828                  red_deep_hires = hrestime.tv_nsec;
 868  829                  red_deep_thread = curthread;
 869  830          }
 870  831  
 871  832          /*
 872  833           * If this is a DEBUG kernel, and we've run too deep for comfort, toss.
 873  834           */
 874  835          ASSERT(fp - (uintptr_t)stkbase >= RED_DEEP_THRESHOLD);
 875  836          return (0);
 876  837  #endif /* _LP64 */

↓ open down ↓

80 lines elided

↑ open up ↑

 877  838  }
 878  839  
 879  840  void
 880  841  segkp_unmap_red(void)
 881  842  {
 882  843          page_t *pp;
 883  844          caddr_t red_va = (caddr_t)(((uintptr_t)curthread->t_stkbase &
 884  845              (uintptr_t)PAGEMASK) - PAGESIZE);
 885  846  
 886  847          ASSERT(curthread->t_red_pp != NULL);
 887      -        ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
 888  848  
 889  849          /*
 890  850           * Because we locked the mapping down, we can't simply rely
 891  851           * on page_destroy() to clean everything up;  we need to call
 892  852           * hat_unload() to explicitly unlock the mapping resources.
 893  853           */
 894  854          hat_unload(kas.a_hat, red_va, PAGESIZE, HAT_UNLOAD_UNLOCK);
 895  855  
 896  856          pp = curthread->t_red_pp;
 897  857

 898  858          ASSERT(pp == page_find(&kvp, (u_offset_t)(uintptr_t)red_va));
 899  859  
 900  860          /*
 901  861           * Need to upgrade the SE_SHARED lock to SE_EXCL.
 902  862           */
 903  863          if (!page_tryupgrade(pp)) {
 904  864                  /*
 905  865                   * As there is now wait for upgrade, release the
 906  866                   * SE_SHARED lock and wait for SE_EXCL.
 907  867                   */
 908  868                  page_unlock(pp);
 909  869                  pp = page_lookup(&kvp, (u_offset_t)(uintptr_t)red_va, SE_EXCL);
 910  870                  /* pp may be NULL here, hence the test below */
 911  871          }
 912  872  
 913  873          /*
 914  874           * Destroy the page, with dontfree set to zero (i.e. free it).
 915  875           */
 916  876          if (pp != NULL)
 917  877                  page_destroy(pp, 0);
 918  878          curthread->t_red_pp = NULL;
 919  879  }
 920  880  #else
 921  881  #error Red stacks only supported with downwards stack growth.
 922  882  #endif
 923  883  
 924  884  /*
 925  885   * Handle a fault on an address corresponding to one of the
 926  886   * resources in the segkp segment.
 927  887   */
 928  888  faultcode_t
 929  889  segkp_fault(
 930  890          struct hat      *hat,
 931  891          struct seg      *seg,
 932  892          caddr_t         vaddr,
 933  893          size_t          len,
 934  894          enum fault_type type,
 935  895          enum seg_rw rw)
 936  896  {
 937  897          struct segkp_data       *kpd = NULL;
 938  898          int                     err;
 939  899  
 940  900          ASSERT(seg->s_as == &kas && RW_READ_HELD(&seg->s_as->a_lock));
 941  901  
 942  902          /*
 943  903           * Sanity checks.
 944  904           */
 945  905          if (type == F_PROT) {
 946  906                  panic("segkp_fault: unexpected F_PROT fault");
 947  907                  /*NOTREACHED*/
 948  908          }
 949  909  
 950  910          if ((kpd = segkp_find(seg, vaddr)) == NULL)
 951  911                  return (FC_NOMAP);
 952  912  
 953  913          mutex_enter(&kpd->kp_lock);
 954  914  
 955  915          if (type == F_SOFTLOCK) {
 956  916                  ASSERT(!(kpd->kp_flags & KPD_LOCKED));
 957  917                  /*
 958  918                   * The F_SOFTLOCK case has more stringent
 959  919                   * range requirements: the given range must exactly coincide
 960  920                   * with the resource's mapped portion. Note reference to
 961  921                   * redzone is handled since vaddr would not equal base
 962  922                   */
 963  923                  if (vaddr != stom(kpd->kp_base, kpd->kp_flags) ||
 964  924                      len != SEGKP_MAPLEN(kpd->kp_len, kpd->kp_flags)) {
 965  925                          mutex_exit(&kpd->kp_lock);
 966  926                          return (FC_MAKE_ERR(EFAULT));
 967  927                  }
 968  928  
 969  929                  if ((err = segkp_load(hat, seg, vaddr, len, kpd, KPD_LOCKED))) {
 970  930                          mutex_exit(&kpd->kp_lock);
 971  931                          return (FC_MAKE_ERR(err));
 972  932                  }
 973  933                  kpd->kp_flags |= KPD_LOCKED;
 974  934                  mutex_exit(&kpd->kp_lock);
 975  935                  return (0);
 976  936          }
 977  937  
 978  938          if (type == F_INVAL) {
 979  939                  ASSERT(!(kpd->kp_flags & KPD_NO_ANON));
 980  940  
 981  941                  /*
 982  942                   * Check if we touched the redzone. Somewhat optimistic
 983  943                   * here if we are touching the redzone of our own stack
 984  944                   * since we wouldn't have a stack to get this far...
 985  945                   */
 986  946                  if ((kpd->kp_flags & KPD_HASREDZONE) &&
 987  947                      btop((uintptr_t)(vaddr - kpd->kp_base)) == KPD_REDZONE(kpd))
 988  948                          panic("segkp_fault: accessing redzone");
 989  949  
 990  950                  /*
 991  951                   * This fault may occur while the page is being F_SOFTLOCK'ed.
 992  952                   * Return since a 2nd segkp_load is unnecessary and also would
 993  953                   * result in the page being locked twice and eventually
 994  954                   * hang the thread_reaper thread.
 995  955                   */
 996  956                  if (kpd->kp_flags & KPD_LOCKED) {
 997  957                          mutex_exit(&kpd->kp_lock);
 998  958                          return (0);
 999  959                  }
1000  960  
1001  961                  err = segkp_load(hat, seg, vaddr, len, kpd, kpd->kp_flags);
1002  962                  mutex_exit(&kpd->kp_lock);
1003  963                  return (err ? FC_MAKE_ERR(err) : 0);
1004  964          }
1005  965  
1006  966          if (type == F_SOFTUNLOCK) {
1007  967                  uint_t  flags;
1008  968  
1009  969                  /*
1010  970                   * Make sure the addr is LOCKED and it has anon backing
1011  971                   * before unlocking
1012  972                   */
1013  973                  if ((kpd->kp_flags & (KPD_LOCKED|KPD_NO_ANON)) != KPD_LOCKED) {
1014  974                          panic("segkp_fault: bad unlock");
1015  975                          /*NOTREACHED*/
1016  976                  }
1017  977  
1018  978                  if (vaddr != stom(kpd->kp_base, kpd->kp_flags) ||
1019  979                      len != SEGKP_MAPLEN(kpd->kp_len, kpd->kp_flags)) {
1020  980                          panic("segkp_fault: bad range");
1021  981                          /*NOTREACHED*/
1022  982                  }
1023  983  
1024  984                  if (rw == S_WRITE)
1025  985                          flags = kpd->kp_flags | KPD_WRITEDIRTY;
1026  986                  else
1027  987                          flags = kpd->kp_flags;
1028  988                  err = segkp_unlock(hat, seg, vaddr, len, kpd, flags);
1029  989                  kpd->kp_flags &= ~KPD_LOCKED;
1030  990                  mutex_exit(&kpd->kp_lock);
1031  991                  return (err ? FC_MAKE_ERR(err) : 0);
1032  992          }
1033  993          mutex_exit(&kpd->kp_lock);
1034  994          panic("segkp_fault: bogus fault type: %d\n", type);
1035  995          /*NOTREACHED*/
1036  996  }
1037  997  
1038  998  /*
1039  999   * Check that the given protections suffice over the range specified by
1040 1000   * vaddr and len.  For this segment type, the only issue is whether or
1041 1001   * not the range lies completely within the mapped part of an allocated
1042 1002   * resource.
1043 1003   */
1044 1004  /* ARGSUSED */
1045 1005  static int
1046 1006  segkp_checkprot(struct seg *seg, caddr_t vaddr, size_t len, uint_t prot)
1047 1007  {
1048 1008          struct segkp_data *kpd = NULL;
1049 1009          caddr_t mbase;
1050 1010          size_t mlen;
1051 1011  
1052 1012          if ((kpd = segkp_find(seg, vaddr)) == NULL)
1053 1013                  return (EACCES);
1054 1014  
1055 1015          mutex_enter(&kpd->kp_lock);
1056 1016          mbase = stom(kpd->kp_base, kpd->kp_flags);
1057 1017          mlen = SEGKP_MAPLEN(kpd->kp_len, kpd->kp_flags);
1058 1018          if (len > mlen || vaddr < mbase ||
1059 1019              ((vaddr + len) > (mbase + mlen))) {
1060 1020                  mutex_exit(&kpd->kp_lock);
1061 1021                  return (EACCES);
1062 1022          }
1063 1023          mutex_exit(&kpd->kp_lock);
1064 1024          return (0);
1065 1025  }
1066 1026  
1067 1027  
1068 1028  /*
1069 1029   * Check to see if it makes sense to do kluster/read ahead to
1070 1030   * addr + delta relative to the mapping at addr.  We assume here
1071 1031   * that delta is a signed PAGESIZE'd multiple (which can be negative).
1072 1032   *
1073 1033   * For seg_u we always "approve" of this action from our standpoint.
1074 1034   */
1075 1035  /*ARGSUSED*/
1076 1036  static int
1077 1037  segkp_kluster(struct seg *seg, caddr_t addr, ssize_t delta)
1078 1038  {
1079 1039          return (0);
1080 1040  }
1081 1041  
1082 1042  /*
1083 1043   * Load and possibly lock intra-slot resources in the range given by
1084 1044   * vaddr and len.
1085 1045   */
1086 1046  static int
1087 1047  segkp_load(
1088 1048          struct hat *hat,
1089 1049          struct seg *seg,
1090 1050          caddr_t vaddr,
1091 1051          size_t len,
1092 1052          struct segkp_data *kpd,
1093 1053          uint_t flags)
1094 1054  {
1095 1055          caddr_t va;
1096 1056          caddr_t vlim;
1097 1057          ulong_t i;
1098 1058          uint_t lock;
1099 1059  
1100 1060          ASSERT(MUTEX_HELD(&kpd->kp_lock));
1101 1061  
1102 1062          len = P2ROUNDUP(len, PAGESIZE);
1103 1063  
1104 1064          /* If locking, reserve physical memory */
1105 1065          if (flags & KPD_LOCKED) {
1106 1066                  pgcnt_t pages = btop(len);
1107 1067                  if ((kpd->kp_flags & KPD_NO_ANON) == 0)
1108 1068                          atomic_add_long(&anon_segkp_pages_locked, pages);
1109 1069                  (void) page_resv(pages, KM_SLEEP);
1110 1070          }
1111 1071  
1112 1072          /*
1113 1073           * Loop through the pages in the given range.
1114 1074           */
1115 1075          va = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
1116 1076          vaddr = va;
1117 1077          vlim = va + len;
1118 1078          lock = flags & KPD_LOCKED;
1119 1079          i = ((uintptr_t)(va - kpd->kp_base)) >> PAGESHIFT;
1120 1080          for (; va < vlim; va += PAGESIZE, i++) {
1121 1081                  page_t          *pl[2]; /* second element NULL terminator */
1122 1082                  struct vnode    *vp;
1123 1083                  anoff_t         off;
1124 1084                  int             err;
1125 1085                  struct anon     *ap;
1126 1086  
1127 1087                  /*
1128 1088                   * Summon the page.  If it's not resident, arrange
1129 1089                   * for synchronous i/o to pull it in.
1130 1090                   */
1131 1091                  ap = anon_get_ptr(kpd->kp_anon, kpd->kp_anon_idx + i);
1132 1092                  swap_xlate(ap, &vp, &off);
1133 1093  
1134 1094                  /*
1135 1095                   * The returned page list will have exactly one entry,
1136 1096                   * which is returned to us already kept.
1137 1097                   */
1138 1098                  err = VOP_GETPAGE(vp, (offset_t)off, PAGESIZE, NULL,
1139 1099                      pl, PAGESIZE, seg, va, S_READ, kcred, NULL);
1140 1100  
1141 1101                  if (err) {
1142 1102                          /*
1143 1103                           * Back out of what we've done so far.
1144 1104                           */
1145 1105                          (void) segkp_unlock(hat, seg, vaddr,
1146 1106                              (va - vaddr), kpd, flags);
1147 1107                          return (err);
1148 1108                  }
1149 1109  
1150 1110                  /*
1151 1111                   * Load an MMU translation for the page.
1152 1112                   */
1153 1113                  hat_memload(hat, va, pl[0], (PROT_READ|PROT_WRITE),
1154 1114                      lock ? HAT_LOAD_LOCK : HAT_LOAD);
1155 1115  
1156 1116                  if (!lock) {
1157 1117                          /*
1158 1118                           * Now, release "shared" lock on the page.
1159 1119                           */
1160 1120                          page_unlock(pl[0]);
1161 1121                  }
1162 1122          }
1163 1123          return (0);
1164 1124  }
1165 1125  
1166 1126  /*
1167 1127   * At the very least unload the mmu-translations and unlock the range if locked
1168 1128   * Can be called with the following flag value KPD_WRITEDIRTY which specifies
1169 1129   * any dirty pages should be written to disk.
1170 1130   */
1171 1131  static int
1172 1132  segkp_unlock(
1173 1133          struct hat *hat,
1174 1134          struct seg *seg,
1175 1135          caddr_t vaddr,
1176 1136          size_t len,
1177 1137          struct segkp_data *kpd,
1178 1138          uint_t flags)
1179 1139  {
1180 1140          caddr_t va;
1181 1141          caddr_t vlim;
1182 1142          ulong_t i;
1183 1143          struct page *pp;
1184 1144          struct vnode *vp;
1185 1145          anoff_t off;
1186 1146          struct anon *ap;
1187 1147  
1188 1148  #ifdef lint
1189 1149          seg = seg;
1190 1150  #endif /* lint */
1191 1151  
1192 1152          ASSERT(MUTEX_HELD(&kpd->kp_lock));
1193 1153  
1194 1154          /*
1195 1155           * Loop through the pages in the given range. It is assumed
1196 1156           * segkp_unlock is called with page aligned base
1197 1157           */
1198 1158          va = vaddr;
1199 1159          vlim = va + len;
1200 1160          i = ((uintptr_t)(va - kpd->kp_base)) >> PAGESHIFT;
1201 1161          hat_unload(hat, va, len,
1202 1162              ((flags & KPD_LOCKED) ? HAT_UNLOAD_UNLOCK : HAT_UNLOAD));
1203 1163          for (; va < vlim; va += PAGESIZE, i++) {
1204 1164                  /*
1205 1165                   * Find the page associated with this part of the
1206 1166                   * slot, tracking it down through its associated swap
1207 1167                   * space.
1208 1168                   */
1209 1169                  ap = anon_get_ptr(kpd->kp_anon, kpd->kp_anon_idx + i);
1210 1170                  swap_xlate(ap, &vp, &off);
1211 1171  
1212 1172                  if (flags & KPD_LOCKED) {
1213 1173                          if ((pp = page_find(vp, off)) == NULL) {
1214 1174                                  if (flags & KPD_LOCKED) {
1215 1175                                          panic("segkp_softunlock: missing page");
1216 1176                                          /*NOTREACHED*/
1217 1177                                  }
1218 1178                          }
1219 1179                  } else {
1220 1180                          /*
1221 1181                           * Nothing to do if the slot is not locked and the
1222 1182                           * page doesn't exist.
1223 1183                           */
1224 1184                          if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL)
1225 1185                                  continue;
1226 1186                  }
1227 1187  
1228 1188                  /*
1229 1189                   * If the page doesn't have any translations, is
1230 1190                   * dirty and not being shared, then push it out
1231 1191                   * asynchronously and avoid waiting for the
1232 1192                   * pageout daemon to do it for us.
1233 1193                   *
1234 1194                   * XXX - Do we really need to get the "exclusive"
1235 1195                   * lock via an upgrade?
1236 1196                   */
1237 1197                  if ((flags & KPD_WRITEDIRTY) && !hat_page_is_mapped(pp) &&
1238 1198                      hat_ismod(pp) && page_tryupgrade(pp)) {
1239 1199                          /*
1240 1200                           * Hold the vnode before releasing the page lock to
1241 1201                           * prevent it from being freed and re-used by some
1242 1202                           * other thread.
1243 1203                           */
1244 1204                          VN_HOLD(vp);
1245 1205                          page_unlock(pp);
1246 1206  
1247 1207                          /*
1248 1208                           * Want most powerful credentials we can get so
1249 1209                           * use kcred.
1250 1210                           */
1251 1211                          (void) VOP_PUTPAGE(vp, (offset_t)off, PAGESIZE,
1252 1212                              B_ASYNC | B_FREE, kcred, NULL);
1253 1213                          VN_RELE(vp);
1254 1214                  } else {
1255 1215                          page_unlock(pp);
1256 1216                  }
1257 1217          }
1258 1218  
1259 1219          /* If unlocking, release physical memory */
1260 1220          if (flags & KPD_LOCKED) {
1261 1221                  pgcnt_t pages = btopr(len);
1262 1222                  if ((kpd->kp_flags & KPD_NO_ANON) == 0)
1263 1223                          atomic_add_long(&anon_segkp_pages_locked, -pages);
1264 1224                  page_unresv(pages);
1265 1225          }
1266 1226          return (0);
1267 1227  }
1268 1228  
1269 1229  /*
1270 1230   * Insert the kpd in the hash table.
1271 1231   */
1272 1232  static void
1273 1233  segkp_insert(struct seg *seg, struct segkp_data *kpd)
1274 1234  {
1275 1235          struct segkp_segdata *kpsd = (struct segkp_segdata *)seg->s_data;
1276 1236          int index;
1277 1237  
1278 1238          /*
1279 1239           * Insert the kpd based on the address that will be returned
1280 1240           * via segkp_release.
1281 1241           */
1282 1242          index = SEGKP_HASH(stom(kpd->kp_base, kpd->kp_flags));
1283 1243          mutex_enter(&segkp_lock);
1284 1244          kpd->kp_next = kpsd->kpsd_hash[index];
1285 1245          kpsd->kpsd_hash[index] = kpd;
1286 1246          mutex_exit(&segkp_lock);
1287 1247  }
1288 1248  
1289 1249  /*
1290 1250   * Remove kpd from the hash table.
1291 1251   */
1292 1252  static void
1293 1253  segkp_delete(struct seg *seg, struct segkp_data *kpd)
1294 1254  {
1295 1255          struct segkp_segdata *kpsd = (struct segkp_segdata *)seg->s_data;
1296 1256          struct segkp_data **kpp;
1297 1257          int index;
1298 1258  
1299 1259          ASSERT(MUTEX_HELD(&segkp_lock));
1300 1260  
1301 1261          index = SEGKP_HASH(stom(kpd->kp_base, kpd->kp_flags));
1302 1262          for (kpp = &kpsd->kpsd_hash[index];
1303 1263              *kpp != NULL; kpp = &((*kpp)->kp_next)) {
1304 1264                  if (*kpp == kpd) {
1305 1265                          *kpp = kpd->kp_next;
1306 1266                          return;
1307 1267                  }
1308 1268          }
1309 1269          panic("segkp_delete: unable to find element to delete");
1310 1270          /*NOTREACHED*/
1311 1271  }
1312 1272  
1313 1273  /*
1314 1274   * Find the kpd associated with a vaddr.
1315 1275   *
1316 1276   * Most of the callers of segkp_find will pass the vaddr that
1317 1277   * hashes to the desired index, but there are cases where
1318 1278   * this is not true in which case we have to (potentially) scan
1319 1279   * the whole table looking for it. This should be very rare
1320 1280   * (e.g. a segkp_fault(F_INVAL) on an address somewhere in the
1321 1281   * middle of the segkp_data region).
1322 1282   */
1323 1283  static struct segkp_data *
1324 1284  segkp_find(struct seg *seg, caddr_t vaddr)
1325 1285  {
1326 1286          struct segkp_segdata *kpsd = (struct segkp_segdata *)seg->s_data;
1327 1287          struct segkp_data *kpd;
1328 1288          int     i;
1329 1289          int     stop;
1330 1290  
1331 1291          i = stop = SEGKP_HASH(vaddr);
1332 1292          mutex_enter(&segkp_lock);
1333 1293          do {
1334 1294                  for (kpd = kpsd->kpsd_hash[i]; kpd != NULL;
1335 1295                      kpd = kpd->kp_next) {
1336 1296                          if (vaddr >= kpd->kp_base &&
1337 1297                              vaddr < kpd->kp_base + kpd->kp_len) {
1338 1298                                  mutex_exit(&segkp_lock);
1339 1299                                  return (kpd);
1340 1300                          }
1341 1301                  }
1342 1302                  if (--i < 0)
1343 1303                          i = SEGKP_HASHSZ - 1;   /* Wrap */
1344 1304          } while (i != stop);
1345 1305          mutex_exit(&segkp_lock);
1346 1306          return (NULL);          /* Not found */
1347 1307  }
1348 1308  
1349 1309  /*
1350 1310   * returns size of swappable area.
1351 1311   */
1352 1312  size_t
1353 1313  swapsize(caddr_t v)
1354 1314  {
1355 1315          struct segkp_data *kpd;
1356 1316  
1357 1317          if ((kpd = segkp_find(segkp, v)) != NULL)
1358 1318                  return (SEGKP_MAPLEN(kpd->kp_len, kpd->kp_flags));
1359 1319          else
1360 1320                  return (NULL);
1361 1321  }
1362 1322  
1363 1323  /*
1364 1324   * Dump out all the active segkp pages
1365 1325   */
1366 1326  static void
1367 1327  segkp_dump(struct seg *seg)
1368 1328  {
1369 1329          int i;
1370 1330          struct segkp_data *kpd;
1371 1331          struct segkp_segdata *kpsd = (struct segkp_segdata *)seg->s_data;
1372 1332  
1373 1333          for (i = 0; i < SEGKP_HASHSZ; i++) {
1374 1334                  for (kpd = kpsd->kpsd_hash[i];
1375 1335                      kpd != NULL; kpd = kpd->kp_next) {
1376 1336                          pfn_t pfn;
1377 1337                          caddr_t addr;
1378 1338                          caddr_t eaddr;
1379 1339  
1380 1340                          addr = kpd->kp_base;
1381 1341                          eaddr = addr + kpd->kp_len;
1382 1342                          while (addr < eaddr) {
1383 1343                                  ASSERT(seg->s_as == &kas);
1384 1344                                  pfn = hat_getpfnum(seg->s_as->a_hat, addr);
1385 1345                                  if (pfn != PFN_INVALID)
1386 1346                                          dump_addpage(seg->s_as, addr, pfn);
1387 1347                                  addr += PAGESIZE;
1388 1348                                  dump_timeleft = dump_timeout;
1389 1349                          }

↓ open down ↓

492 lines elided

↑ open up ↑

1390 1350                  }
1391 1351          }
1392 1352  }
1393 1353  
1394 1354  /*ARGSUSED*/
1395 1355  static int
1396 1356  segkp_pagelock(struct seg *seg, caddr_t addr, size_t len,
1397 1357      struct page ***ppp, enum lock_type type, enum seg_rw rw)
1398 1358  {
1399 1359          return (ENOTSUP);
1400      -}
1401      -
1402      -/*ARGSUSED*/
1403      -static int
1404      -segkp_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
1405      -{
1406      -        return (ENODEV);
1407      -}
1408      -
1409      -/*ARGSUSED*/
1410      -static lgrp_mem_policy_info_t   *
1411      -segkp_getpolicy(struct seg *seg, caddr_t addr)
1412      -{
1413      -        return (NULL);
1414      -}
1415      -
1416      -/*ARGSUSED*/
1417      -static int
1418      -segkp_capable(struct seg *seg, segcapability_t capability)
1419      -{
1420      -        return (0);
1421 1360  }
1422 1361  
1423 1362  #include <sys/mem_config.h>
1424 1363  
1425 1364  /*ARGSUSED*/
1426 1365  static void
1427 1366  segkp_mem_config_post_add(void *arg, pgcnt_t delta_pages)
1428 1367  {}
1429 1368  
1430 1369  /*

1431 1370   * During memory delete, turn off caches so that pages are not held.
1432 1371   * A better solution may be to unlock the pages while they are
1433 1372   * in the cache so that they may be collected naturally.
1434 1373   */
1435 1374  
1436 1375  /*ARGSUSED*/
1437 1376  static int
1438 1377  segkp_mem_config_pre_del(void *arg, pgcnt_t delta_pages)
1439 1378  {
1440 1379          atomic_inc_32(&segkp_indel);
1441 1380          segkp_cache_free();
1442 1381          return (0);
1443 1382  }
1444 1383  
1445 1384  /*ARGSUSED*/
1446 1385  static void
1447 1386  segkp_mem_config_post_del(void *arg, pgcnt_t delta_pages, int cancelled)
1448 1387  {
1449 1388          atomic_dec_32(&segkp_indel);
1450 1389  }
1451 1390  
1452 1391  static kphysm_setup_vector_t segkp_mem_config_vec = {
1453 1392          KPHYSM_SETUP_VECTOR_VERSION,
1454 1393          segkp_mem_config_post_add,
1455 1394          segkp_mem_config_pre_del,
1456 1395          segkp_mem_config_post_del,
1457 1396  };
1458 1397  
1459 1398  static void
1460 1399  segkpinit_mem_config(struct seg *seg)
1461 1400  {
1462 1401          int ret;
1463 1402  
1464 1403          ret = kphysm_setup_func_register(&segkp_mem_config_vec, (void *)seg);
1465 1404          ASSERT(ret == 0);
1466 1405  }

↓ open down ↓

36 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX