no-need-for-bad-op-segment-op-functions Wdiff usr/src/uts/common/vm/seg_kp.c

Print this page

no need for bad-op segment op functions
The segment drivers have a number of bad-op functions that simply panic.
Keeping the function pointer NULL will accomplish the same thing in most
cases.  In other cases, keeping the function pointer NULL will result in
proper error code being returned.

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/vm/seg_kp.c
          +++ new/usr/src/uts/common/vm/seg_kp.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   */
  24   24  
  25   25  /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
  26   26  /*      All Rights Reserved   */
  27   27  
  28   28  /*
  29   29   * Portions of this source code were derived from Berkeley 4.3 BSD
  30   30   * under license from the Regents of the University of California.
  31   31   */
  32   32  
  33   33  /*
  34   34   * segkp is a segment driver that administers the allocation and deallocation
  35   35   * of pageable variable size chunks of kernel virtual address space. Each
  36   36   * allocated resource is page-aligned.
  37   37   *
  38   38   * The user may specify whether the resource should be initialized to 0,
  39   39   * include a redzone, or locked in memory.
  40   40   */
  41   41  
  42   42  #include <sys/types.h>
  43   43  #include <sys/t_lock.h>
  44   44  #include <sys/thread.h>
  45   45  #include <sys/param.h>
  46   46  #include <sys/errno.h>
  47   47  #include <sys/sysmacros.h>
  48   48  #include <sys/systm.h>
  49   49  #include <sys/buf.h>
  50   50  #include <sys/mman.h>
  51   51  #include <sys/vnode.h>
  52   52  #include <sys/cmn_err.h>
  53   53  #include <sys/swap.h>
  54   54  #include <sys/tuneable.h>
  55   55  #include <sys/kmem.h>
  56   56  #include <sys/vmem.h>
  57   57  #include <sys/cred.h>
  58   58  #include <sys/dumphdr.h>
  59   59  #include <sys/debug.h>
  60   60  #include <sys/vtrace.h>
  61   61  #include <sys/stack.h>
  62   62  #include <sys/atomic.h>
  63   63  #include <sys/archsystm.h>
  64   64  #include <sys/lgrp.h>
  65   65  
  66   66  #include <vm/as.h>
  67   67  #include <vm/seg.h>

↓ open down ↓

67 lines elided

↑ open up ↑

  68   68  #include <vm/seg_kp.h>
  69   69  #include <vm/seg_kmem.h>
  70   70  #include <vm/anon.h>
  71   71  #include <vm/page.h>
  72   72  #include <vm/hat.h>
  73   73  #include <sys/bitmap.h>
  74   74  
  75   75  /*
  76   76   * Private seg op routines
  77   77   */
  78      -static void     segkp_badop(void);
  79   78  static void     segkp_dump(struct seg *seg);
  80   79  static int      segkp_checkprot(struct seg *seg, caddr_t addr, size_t len,
  81   80                          uint_t prot);
  82   81  static int      segkp_kluster(struct seg *seg, caddr_t addr, ssize_t delta);
  83   82  static int      segkp_pagelock(struct seg *seg, caddr_t addr, size_t len,
  84   83                          struct page ***page, enum lock_type type,
  85   84                          enum seg_rw rw);
  86   85  static void     segkp_insert(struct seg *seg, struct segkp_data *kpd);
  87   86  static void     segkp_delete(struct seg *seg, struct segkp_data *kpd);
  88   87  static caddr_t  segkp_get_internal(struct seg *seg, size_t len, uint_t flags,

  89   88                          struct segkp_data **tkpd, struct anon_map *amp);
  90   89  static void     segkp_release_internal(struct seg *seg,
  91   90                          struct segkp_data *kpd, size_t len);
  92   91  static int      segkp_unlock(struct hat *hat, struct seg *seg, caddr_t vaddr,
  93   92                          size_t len, struct segkp_data *kpd, uint_t flags);
  94   93  static int      segkp_load(struct hat *hat, struct seg *seg, caddr_t vaddr,
  95   94                          size_t len, struct segkp_data *kpd, uint_t flags);
  96   95  static struct   segkp_data *segkp_find(struct seg *seg, caddr_t vaddr);
  97   96  static int      segkp_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp);
  98   97  static lgrp_mem_policy_info_t   *segkp_getpolicy(struct seg *seg,
  99   98      caddr_t addr);
 100   99  static int      segkp_capable(struct seg *seg, segcapability_t capability);
 101  100

↓ open down ↓

13 lines elided

↑ open up ↑

 102  101  /*
 103  102   * Lock used to protect the hash table(s) and caches.
 104  103   */
 105  104  static kmutex_t segkp_lock;
 106  105  
 107  106  /*
 108  107   * The segkp caches
 109  108   */
 110  109  static struct segkp_cache segkp_cache[SEGKP_MAX_CACHE];
 111  110  
 112      -#define SEGKP_BADOP(t)  (t(*)())segkp_badop
 113      -
 114  111  /*
 115  112   * When there are fewer than red_minavail bytes left on the stack,
 116  113   * segkp_map_red() will map in the redzone (if called).  5000 seems
 117  114   * to work reasonably well...
 118  115   */
 119  116  long            red_minavail = 5000;
 120  117  
 121  118  /*
 122  119   * will be set to 1 for 32 bit x86 systems only, in startup.c
 123  120   */

 124  121  int     segkp_fromheap = 0;
 125  122  ulong_t *segkp_bitmap;
 126  123  
 127  124  /*
 128  125   * If segkp_map_red() is called with the redzone already mapped and
 129  126   * with less than RED_DEEP_THRESHOLD bytes available on the stack,
 130  127   * then the stack situation has become quite serious;  if much more stack
 131  128   * is consumed, we have the potential of scrogging the next thread/LWP
 132  129   * structure.  To help debug the "can't happen" panics which may
 133  130   * result from this condition, we record hrestime and the calling thread
 134  131   * in red_deep_hires and red_deep_thread respectively.
 135  132   */
 136  133  #define RED_DEEP_THRESHOLD      2000
 137  134  
 138  135  hrtime_t        red_deep_hires;

↓ open down ↓

15 lines elided

↑ open up ↑

 139  136  kthread_t       *red_deep_thread;
 140  137  
 141  138  uint32_t        red_nmapped;
 142  139  uint32_t        red_closest = UINT_MAX;
 143  140  uint32_t        red_ndoubles;
 144  141  
 145  142  pgcnt_t anon_segkp_pages_locked;        /* See vm/anon.h */
 146  143  pgcnt_t anon_segkp_pages_resv;          /* anon reserved by seg_kp */
 147  144  
 148  145  static struct   seg_ops segkp_ops = {
 149      -        .dup            = SEGKP_BADOP(int),
 150      -        .unmap          = SEGKP_BADOP(int),
 151      -        .free           = SEGKP_BADOP(void),
 152  146          .fault          = segkp_fault,
 153      -        .faulta         = SEGKP_BADOP(faultcode_t),
 154      -        .setprot        = SEGKP_BADOP(int),
 155  147          .checkprot      = segkp_checkprot,
 156  148          .kluster        = segkp_kluster,
 157      -        .sync           = SEGKP_BADOP(int),
 158      -        .incore         = SEGKP_BADOP(size_t),
 159      -        .lockop         = SEGKP_BADOP(int),
 160      -        .getprot        = SEGKP_BADOP(int),
 161      -        .getoffset      = SEGKP_BADOP(u_offset_t),
 162      -        .gettype        = SEGKP_BADOP(int),
 163      -        .getvp          = SEGKP_BADOP(int),
 164      -        .advise         = SEGKP_BADOP(int),
 165  149          .dump           = segkp_dump,
 166  150          .pagelock       = segkp_pagelock,
 167      -        .setpagesize    = SEGKP_BADOP(int),
 168  151          .getmemid       = segkp_getmemid,
 169  152          .getpolicy      = segkp_getpolicy,
 170  153          .capable        = segkp_capable,
 171  154          .inherit        = seg_inherit_notsup,
 172  155  };
 173  156  
 174      -
 175      -static void
 176      -segkp_badop(void)
 177      -{
 178      -        panic("segkp_badop");
 179      -        /*NOTREACHED*/
 180      -}
 181  157  
 182  158  static void segkpinit_mem_config(struct seg *);
 183  159  
 184  160  static uint32_t segkp_indel;
 185  161  
 186  162  /*
 187  163   * Allocate the segment specific private data struct and fill it in
 188  164   * with the per kp segment mutex, anon ptr. array and hash table.
 189  165   */
 190  166  int

 191  167  segkp_create(struct seg *seg)
 192  168  {
 193  169          struct segkp_segdata *kpsd;
 194  170          size_t  np;
 195  171  
 196  172          ASSERT(seg != NULL && seg->s_as == &kas);
 197  173          ASSERT(RW_WRITE_HELD(&seg->s_as->a_lock));
 198  174  
 199  175          if (seg->s_size & PAGEOFFSET) {
 200  176                  panic("Bad segkp size");
 201  177                  /*NOTREACHED*/
 202  178          }
 203  179  
 204  180          kpsd = kmem_zalloc(sizeof (struct segkp_segdata), KM_SLEEP);
 205  181  
 206  182          /*
 207  183           * Allocate the virtual memory for segkp and initialize it
 208  184           */
 209  185          if (segkp_fromheap) {
 210  186                  np = btop(kvseg.s_size);
 211  187                  segkp_bitmap = kmem_zalloc(BT_SIZEOFMAP(np), KM_SLEEP);
 212  188                  kpsd->kpsd_arena = vmem_create("segkp", NULL, 0, PAGESIZE,
 213  189                      vmem_alloc, vmem_free, heap_arena, 5 * PAGESIZE, VM_SLEEP);
 214  190          } else {
 215  191                  segkp_bitmap = NULL;
 216  192                  np = btop(seg->s_size);
 217  193                  kpsd->kpsd_arena = vmem_create("segkp", seg->s_base,
 218  194                      seg->s_size, PAGESIZE, NULL, NULL, NULL, 5 * PAGESIZE,
 219  195                      VM_SLEEP);
 220  196          }
 221  197  
 222  198          kpsd->kpsd_anon = anon_create(np, ANON_SLEEP | ANON_ALLOC_FORCE);
 223  199  
 224  200          kpsd->kpsd_hash = kmem_zalloc(SEGKP_HASHSZ * sizeof (struct segkp *),
 225  201              KM_SLEEP);
 226  202          seg->s_data = (void *)kpsd;
 227  203          seg->s_ops = &segkp_ops;
 228  204          segkpinit_mem_config(seg);
 229  205          return (0);
 230  206  }
 231  207  
 232  208  
 233  209  /*
 234  210   * Find a free 'freelist' and initialize it with the appropriate attributes
 235  211   */
 236  212  void *
 237  213  segkp_cache_init(struct seg *seg, int maxsize, size_t len, uint_t flags)
 238  214  {
 239  215          int i;
 240  216  
 241  217          if ((flags & KPD_NO_ANON) && !(flags & KPD_LOCKED))
 242  218                  return ((void *)-1);
 243  219  
 244  220          mutex_enter(&segkp_lock);
 245  221          for (i = 0; i < SEGKP_MAX_CACHE; i++) {
 246  222                  if (segkp_cache[i].kpf_inuse)
 247  223                          continue;
 248  224                  segkp_cache[i].kpf_inuse = 1;
 249  225                  segkp_cache[i].kpf_max = maxsize;
 250  226                  segkp_cache[i].kpf_flags = flags;
 251  227                  segkp_cache[i].kpf_seg = seg;
 252  228                  segkp_cache[i].kpf_len = len;
 253  229                  mutex_exit(&segkp_lock);
 254  230                  return ((void *)(uintptr_t)i);
 255  231          }
 256  232          mutex_exit(&segkp_lock);
 257  233          return ((void *)-1);
 258  234  }
 259  235  
 260  236  /*
 261  237   * Free all the cache resources.
 262  238   */
 263  239  void
 264  240  segkp_cache_free(void)
 265  241  {
 266  242          struct segkp_data *kpd;
 267  243          struct seg *seg;
 268  244          int i;
 269  245  
 270  246          mutex_enter(&segkp_lock);
 271  247          for (i = 0; i < SEGKP_MAX_CACHE; i++) {
 272  248                  if (!segkp_cache[i].kpf_inuse)
 273  249                          continue;
 274  250                  /*
 275  251                   * Disconnect the freelist and process each element
 276  252                   */
 277  253                  kpd = segkp_cache[i].kpf_list;
 278  254                  seg = segkp_cache[i].kpf_seg;
 279  255                  segkp_cache[i].kpf_list = NULL;
 280  256                  segkp_cache[i].kpf_count = 0;
 281  257                  mutex_exit(&segkp_lock);
 282  258  
 283  259                  while (kpd != NULL) {
 284  260                          struct segkp_data *next;
 285  261  
 286  262                          next = kpd->kp_next;
 287  263                          segkp_release_internal(seg, kpd, kpd->kp_len);
 288  264                          kpd = next;
 289  265                  }
 290  266                  mutex_enter(&segkp_lock);
 291  267          }
 292  268          mutex_exit(&segkp_lock);
 293  269  }
 294  270  
 295  271  /*
 296  272   * There are 2 entries into segkp_get_internal. The first includes a cookie
 297  273   * used to access a pool of cached segkp resources. The second does not
 298  274   * use the cache.
 299  275   */
 300  276  caddr_t
 301  277  segkp_get(struct seg *seg, size_t len, uint_t flags)
 302  278  {
 303  279          struct segkp_data *kpd = NULL;
 304  280  
 305  281          if (segkp_get_internal(seg, len, flags, &kpd, NULL) != NULL) {
 306  282                  kpd->kp_cookie = -1;
 307  283                  return (stom(kpd->kp_base, flags));
 308  284          }
 309  285          return (NULL);
 310  286  }
 311  287  
 312  288  /*
 313  289   * Return a 'cached' segkp address
 314  290   */
 315  291  caddr_t
 316  292  segkp_cache_get(void *cookie)
 317  293  {
 318  294          struct segkp_cache *freelist = NULL;
 319  295          struct segkp_data *kpd = NULL;
 320  296          int index = (int)(uintptr_t)cookie;
 321  297          struct seg *seg;
 322  298          size_t len;
 323  299          uint_t flags;
 324  300  
 325  301          if (index < 0 || index >= SEGKP_MAX_CACHE)
 326  302                  return (NULL);
 327  303          freelist = &segkp_cache[index];
 328  304  
 329  305          mutex_enter(&segkp_lock);
 330  306          seg = freelist->kpf_seg;
 331  307          flags = freelist->kpf_flags;
 332  308          if (freelist->kpf_list != NULL) {
 333  309                  kpd = freelist->kpf_list;
 334  310                  freelist->kpf_list = kpd->kp_next;
 335  311                  freelist->kpf_count--;
 336  312                  mutex_exit(&segkp_lock);
 337  313                  kpd->kp_next = NULL;
 338  314                  segkp_insert(seg, kpd);
 339  315                  return (stom(kpd->kp_base, flags));
 340  316          }
 341  317          len = freelist->kpf_len;
 342  318          mutex_exit(&segkp_lock);
 343  319          if (segkp_get_internal(seg, len, flags, &kpd, NULL) != NULL) {
 344  320                  kpd->kp_cookie = index;
 345  321                  return (stom(kpd->kp_base, flags));
 346  322          }
 347  323          return (NULL);
 348  324  }
 349  325  
 350  326  caddr_t
 351  327  segkp_get_withanonmap(
 352  328          struct seg *seg,
 353  329          size_t len,
 354  330          uint_t flags,
 355  331          struct anon_map *amp)
 356  332  {
 357  333          struct segkp_data *kpd = NULL;
 358  334  
 359  335          ASSERT(amp != NULL);
 360  336          flags |= KPD_HASAMP;
 361  337          if (segkp_get_internal(seg, len, flags, &kpd, amp) != NULL) {
 362  338                  kpd->kp_cookie = -1;
 363  339                  return (stom(kpd->kp_base, flags));
 364  340          }
 365  341          return (NULL);
 366  342  }
 367  343  
 368  344  /*
 369  345   * This does the real work of segkp allocation.
 370  346   * Return to client base addr. len must be page-aligned. A null value is
 371  347   * returned if there are no more vm resources (e.g. pages, swap). The len
 372  348   * and base recorded in the private data structure include the redzone
 373  349   * and the redzone length (if applicable). If the user requests a redzone
 374  350   * either the first or last page is left unmapped depending whether stacks
 375  351   * grow to low or high memory.
 376  352   *
 377  353   * The client may also specify a no-wait flag. If that is set then the
 378  354   * request will choose a non-blocking path when requesting resources.
 379  355   * The default is make the client wait.
 380  356   */
 381  357  static caddr_t
 382  358  segkp_get_internal(
 383  359          struct seg *seg,
 384  360          size_t len,
 385  361          uint_t flags,
 386  362          struct segkp_data **tkpd,
 387  363          struct anon_map *amp)
 388  364  {
 389  365          struct segkp_segdata    *kpsd = (struct segkp_segdata *)seg->s_data;
 390  366          struct segkp_data       *kpd;
 391  367          caddr_t vbase = NULL;   /* always first virtual, may not be mapped */
 392  368          pgcnt_t np = 0;         /* number of pages in the resource */
 393  369          pgcnt_t segkpindex;
 394  370          long i;
 395  371          caddr_t va;
 396  372          pgcnt_t pages = 0;
 397  373          ulong_t anon_idx = 0;
 398  374          int kmflag = (flags & KPD_NOWAIT) ? KM_NOSLEEP : KM_SLEEP;
 399  375          caddr_t s_base = (segkp_fromheap) ? kvseg.s_base : seg->s_base;
 400  376  
 401  377          if (len & PAGEOFFSET) {
 402  378                  panic("segkp_get: len is not page-aligned");
 403  379                  /*NOTREACHED*/
 404  380          }
 405  381  
 406  382          ASSERT(((flags & KPD_HASAMP) == 0) == (amp == NULL));
 407  383  
 408  384          /* Only allow KPD_NO_ANON if we are going to lock it down */
 409  385          if ((flags & (KPD_LOCKED|KPD_NO_ANON)) == KPD_NO_ANON)
 410  386                  return (NULL);
 411  387  
 412  388          if ((kpd = kmem_zalloc(sizeof (struct segkp_data), kmflag)) == NULL)
 413  389                  return (NULL);
 414  390          /*
 415  391           * Fix up the len to reflect the REDZONE if applicable
 416  392           */
 417  393          if (flags & KPD_HASREDZONE)
 418  394                  len += PAGESIZE;
 419  395          np = btop(len);
 420  396  
 421  397          vbase = vmem_alloc(SEGKP_VMEM(seg), len, kmflag | VM_BESTFIT);
 422  398          if (vbase == NULL) {
 423  399                  kmem_free(kpd, sizeof (struct segkp_data));
 424  400                  return (NULL);
 425  401          }
 426  402  
 427  403          /* If locking, reserve physical memory */
 428  404          if (flags & KPD_LOCKED) {
 429  405                  pages = btop(SEGKP_MAPLEN(len, flags));
 430  406                  if (page_resv(pages, kmflag) == 0) {
 431  407                          vmem_free(SEGKP_VMEM(seg), vbase, len);
 432  408                          kmem_free(kpd, sizeof (struct segkp_data));
 433  409                          return (NULL);
 434  410                  }
 435  411                  if ((flags & KPD_NO_ANON) == 0)
 436  412                          atomic_add_long(&anon_segkp_pages_locked, pages);
 437  413          }
 438  414  
 439  415          /*
 440  416           * Reserve sufficient swap space for this vm resource.  We'll
 441  417           * actually allocate it in the loop below, but reserving it
 442  418           * here allows us to back out more gracefully than if we
 443  419           * had an allocation failure in the body of the loop.
 444  420           *
 445  421           * Note that we don't need swap space for the red zone page.
 446  422           */
 447  423          if (amp != NULL) {
 448  424                  /*
 449  425                   * The swap reservation has been done, if required, and the
 450  426                   * anon_hdr is separate.
 451  427                   */
 452  428                  anon_idx = 0;
 453  429                  kpd->kp_anon_idx = anon_idx;
 454  430                  kpd->kp_anon = amp->ahp;
 455  431  
 456  432                  TRACE_5(TR_FAC_VM, TR_ANON_SEGKP, "anon segkp:%p %p %lu %u %u",
 457  433                      kpd, vbase, len, flags, 1);
 458  434  
 459  435          } else if ((flags & KPD_NO_ANON) == 0) {
 460  436                  if (anon_resv_zone(SEGKP_MAPLEN(len, flags), NULL) == 0) {
 461  437                          if (flags & KPD_LOCKED) {
 462  438                                  atomic_add_long(&anon_segkp_pages_locked,
 463  439                                      -pages);
 464  440                                  page_unresv(pages);
 465  441                          }
 466  442                          vmem_free(SEGKP_VMEM(seg), vbase, len);
 467  443                          kmem_free(kpd, sizeof (struct segkp_data));
 468  444                          return (NULL);
 469  445                  }
 470  446                  atomic_add_long(&anon_segkp_pages_resv,
 471  447                      btop(SEGKP_MAPLEN(len, flags)));
 472  448                  anon_idx = ((uintptr_t)(vbase - s_base)) >> PAGESHIFT;
 473  449                  kpd->kp_anon_idx = anon_idx;
 474  450                  kpd->kp_anon = kpsd->kpsd_anon;
 475  451  
 476  452                  TRACE_5(TR_FAC_VM, TR_ANON_SEGKP, "anon segkp:%p %p %lu %u %u",
 477  453                      kpd, vbase, len, flags, 1);
 478  454          } else {
 479  455                  kpd->kp_anon = NULL;
 480  456                  kpd->kp_anon_idx = 0;
 481  457          }
 482  458  
 483  459          /*
 484  460           * Allocate page and anon resources for the virtual address range
 485  461           * except the redzone
 486  462           */
 487  463          if (segkp_fromheap)
 488  464                  segkpindex = btop((uintptr_t)(vbase - kvseg.s_base));
 489  465          for (i = 0, va = vbase; i < np; i++, va += PAGESIZE) {
 490  466                  page_t          *pl[2];
 491  467                  struct vnode    *vp;
 492  468                  anoff_t         off;
 493  469                  int             err;
 494  470                  page_t          *pp = NULL;
 495  471  
 496  472                  /*
 497  473                   * Mark this page to be a segkp page in the bitmap.
 498  474                   */
 499  475                  if (segkp_fromheap) {
 500  476                          BT_ATOMIC_SET(segkp_bitmap, segkpindex);
 501  477                          segkpindex++;
 502  478                  }
 503  479  
 504  480                  /*
 505  481                   * If this page is the red zone page, we don't need swap
 506  482                   * space for it.  Note that we skip over the code that
 507  483                   * establishes MMU mappings, so that the page remains
 508  484                   * invalid.
 509  485                   */
 510  486                  if ((flags & KPD_HASREDZONE) && KPD_REDZONE(kpd) == i)
 511  487                          continue;
 512  488  
 513  489                  if (kpd->kp_anon != NULL) {
 514  490                          struct anon *ap;
 515  491  
 516  492                          ASSERT(anon_get_ptr(kpd->kp_anon, anon_idx + i)
 517  493                              == NULL);
 518  494                          /*
 519  495                           * Determine the "vp" and "off" of the anon slot.
 520  496                           */
 521  497                          ap = anon_alloc(NULL, 0);
 522  498                          if (amp != NULL)
 523  499                                  ANON_LOCK_ENTER(&amp->a_rwlock, RW_WRITER);
 524  500                          (void) anon_set_ptr(kpd->kp_anon, anon_idx + i,
 525  501                              ap, ANON_SLEEP);
 526  502                          if (amp != NULL)
 527  503                                  ANON_LOCK_EXIT(&amp->a_rwlock);
 528  504                          swap_xlate(ap, &vp, &off);
 529  505  
 530  506                          /*
 531  507                           * Create a page with the specified identity.  The
 532  508                           * page is returned with the "shared" lock held.
 533  509                           */
 534  510                          err = VOP_GETPAGE(vp, (offset_t)off, PAGESIZE,
 535  511                              NULL, pl, PAGESIZE, seg, va, S_CREATE,
 536  512                              kcred, NULL);
 537  513                          if (err) {
 538  514                                  /*
 539  515                                   * XXX - This should not fail.
 540  516                                   */
 541  517                                  panic("segkp_get: no pages");
 542  518                                  /*NOTREACHED*/
 543  519                          }
 544  520                          pp = pl[0];
 545  521                  } else {
 546  522                          ASSERT(page_exists(&kvp,
 547  523                              (u_offset_t)(uintptr_t)va) == NULL);
 548  524  
 549  525                          if ((pp = page_create_va(&kvp,
 550  526                              (u_offset_t)(uintptr_t)va, PAGESIZE,
 551  527                              (flags & KPD_NOWAIT ? 0 : PG_WAIT) | PG_EXCL |
 552  528                              PG_NORELOC, seg, va)) == NULL) {
 553  529                                  /*
 554  530                                   * Legitimize resource; then destroy it.
 555  531                                   * Easier than trying to unwind here.
 556  532                                   */
 557  533                                  kpd->kp_flags = flags;
 558  534                                  kpd->kp_base = vbase;
 559  535                                  kpd->kp_len = len;
 560  536                                  segkp_release_internal(seg, kpd, va - vbase);
 561  537                                  return (NULL);
 562  538                          }
 563  539                          page_io_unlock(pp);
 564  540                  }
 565  541  
 566  542                  if (flags & KPD_ZERO)
 567  543                          pagezero(pp, 0, PAGESIZE);
 568  544  
 569  545                  /*
 570  546                   * Load and lock an MMU translation for the page.
 571  547                   */
 572  548                  hat_memload(seg->s_as->a_hat, va, pp, (PROT_READ|PROT_WRITE),
 573  549                      ((flags & KPD_LOCKED) ? HAT_LOAD_LOCK : HAT_LOAD));
 574  550  
 575  551                  /*
 576  552                   * Now, release lock on the page.
 577  553                   */
 578  554                  if (flags & KPD_LOCKED) {
 579  555                          /*
 580  556                           * Indicate to page_retire framework that this
 581  557                           * page can only be retired when it is freed.
 582  558                           */
 583  559                          PP_SETRAF(pp);
 584  560                          page_downgrade(pp);
 585  561                  } else
 586  562                          page_unlock(pp);
 587  563          }
 588  564  
 589  565          kpd->kp_flags = flags;
 590  566          kpd->kp_base = vbase;
 591  567          kpd->kp_len = len;
 592  568          segkp_insert(seg, kpd);
 593  569          *tkpd = kpd;
 594  570          return (stom(kpd->kp_base, flags));
 595  571  }
 596  572  
 597  573  /*
 598  574   * Release the resource to cache if the pool(designate by the cookie)
 599  575   * has less than the maximum allowable. If inserted in cache,
 600  576   * segkp_delete insures element is taken off of active list.
 601  577   */
 602  578  void
 603  579  segkp_release(struct seg *seg, caddr_t vaddr)
 604  580  {
 605  581          struct segkp_cache *freelist;
 606  582          struct segkp_data *kpd = NULL;
 607  583  
 608  584          if ((kpd = segkp_find(seg, vaddr)) == NULL) {
 609  585                  panic("segkp_release: null kpd");
 610  586                  /*NOTREACHED*/
 611  587          }
 612  588  
 613  589          if (kpd->kp_cookie != -1) {
 614  590                  freelist = &segkp_cache[kpd->kp_cookie];
 615  591                  mutex_enter(&segkp_lock);
 616  592                  if (!segkp_indel && freelist->kpf_count < freelist->kpf_max) {
 617  593                          segkp_delete(seg, kpd);
 618  594                          kpd->kp_next = freelist->kpf_list;
 619  595                          freelist->kpf_list = kpd;
 620  596                          freelist->kpf_count++;
 621  597                          mutex_exit(&segkp_lock);
 622  598                          return;
 623  599                  } else {
 624  600                          mutex_exit(&segkp_lock);
 625  601                          kpd->kp_cookie = -1;
 626  602                  }
 627  603          }
 628  604          segkp_release_internal(seg, kpd, kpd->kp_len);
 629  605  }
 630  606  
 631  607  /*
 632  608   * Free the entire resource. segkp_unlock gets called with the start of the
 633  609   * mapped portion of the resource. The length is the size of the mapped
 634  610   * portion
 635  611   */
 636  612  static void
 637  613  segkp_release_internal(struct seg *seg, struct segkp_data *kpd, size_t len)
 638  614  {
 639  615          caddr_t         va;
 640  616          long            i;
 641  617          long            redzone;
 642  618          size_t          np;
 643  619          page_t          *pp;
 644  620          struct vnode    *vp;
 645  621          anoff_t         off;
 646  622          struct anon     *ap;
 647  623          pgcnt_t         segkpindex;
 648  624  
 649  625          ASSERT(kpd != NULL);
 650  626          ASSERT((kpd->kp_flags & KPD_HASAMP) == 0 || kpd->kp_cookie == -1);
 651  627          np = btop(len);
 652  628  
 653  629          /* Remove from active hash list */
 654  630          if (kpd->kp_cookie == -1) {
 655  631                  mutex_enter(&segkp_lock);
 656  632                  segkp_delete(seg, kpd);
 657  633                  mutex_exit(&segkp_lock);
 658  634          }
 659  635  
 660  636          /*
 661  637           * Precompute redzone page index.
 662  638           */
 663  639          redzone = -1;
 664  640          if (kpd->kp_flags & KPD_HASREDZONE)
 665  641                  redzone = KPD_REDZONE(kpd);
 666  642  
 667  643  
 668  644          va = kpd->kp_base;
 669  645  
 670  646          hat_unload(seg->s_as->a_hat, va, (np << PAGESHIFT),
 671  647              ((kpd->kp_flags & KPD_LOCKED) ? HAT_UNLOAD_UNLOCK : HAT_UNLOAD));
 672  648          /*
 673  649           * Free up those anon resources that are quiescent.
 674  650           */
 675  651          if (segkp_fromheap)
 676  652                  segkpindex = btop((uintptr_t)(va - kvseg.s_base));
 677  653          for (i = 0; i < np; i++, va += PAGESIZE) {
 678  654  
 679  655                  /*
 680  656                   * Clear the bit for this page from the bitmap.
 681  657                   */
 682  658                  if (segkp_fromheap) {
 683  659                          BT_ATOMIC_CLEAR(segkp_bitmap, segkpindex);
 684  660                          segkpindex++;
 685  661                  }
 686  662  
 687  663                  if (i == redzone)
 688  664                          continue;
 689  665                  if (kpd->kp_anon) {
 690  666                          /*
 691  667                           * Free up anon resources and destroy the
 692  668                           * associated pages.
 693  669                           *
 694  670                           * Release the lock if there is one. Have to get the
 695  671                           * page to do this, unfortunately.
 696  672                           */
 697  673                          if (kpd->kp_flags & KPD_LOCKED) {
 698  674                                  ap = anon_get_ptr(kpd->kp_anon,
 699  675                                      kpd->kp_anon_idx + i);
 700  676                                  swap_xlate(ap, &vp, &off);
 701  677                                  /* Find the shared-locked page. */
 702  678                                  pp = page_find(vp, (u_offset_t)off);
 703  679                                  if (pp == NULL) {
 704  680                                          panic("segkp_release: "
 705  681                                              "kp_anon: no page to unlock ");
 706  682                                          /*NOTREACHED*/
 707  683                                  }
 708  684                                  if (PP_ISRAF(pp))
 709  685                                          PP_CLRRAF(pp);
 710  686  
 711  687                                  page_unlock(pp);
 712  688                          }
 713  689                          if ((kpd->kp_flags & KPD_HASAMP) == 0) {
 714  690                                  anon_free(kpd->kp_anon, kpd->kp_anon_idx + i,
 715  691                                      PAGESIZE);
 716  692                                  anon_unresv_zone(PAGESIZE, NULL);
 717  693                                  atomic_dec_ulong(&anon_segkp_pages_resv);
 718  694                          }
 719  695                          TRACE_5(TR_FAC_VM,
 720  696                              TR_ANON_SEGKP, "anon segkp:%p %p %lu %u %u",
 721  697                              kpd, va, PAGESIZE, 0, 0);
 722  698                  } else {
 723  699                          if (kpd->kp_flags & KPD_LOCKED) {
 724  700                                  pp = page_find(&kvp, (u_offset_t)(uintptr_t)va);
 725  701                                  if (pp == NULL) {
 726  702                                          panic("segkp_release: "
 727  703                                              "no page to unlock");
 728  704                                          /*NOTREACHED*/
 729  705                                  }
 730  706                                  if (PP_ISRAF(pp))
 731  707                                          PP_CLRRAF(pp);
 732  708                                  /*
 733  709                                   * We should just upgrade the lock here
 734  710                                   * but there is no upgrade that waits.
 735  711                                   */
 736  712                                  page_unlock(pp);
 737  713                          }
 738  714                          pp = page_lookup(&kvp, (u_offset_t)(uintptr_t)va,
 739  715                              SE_EXCL);
 740  716                          if (pp != NULL)
 741  717                                  page_destroy(pp, 0);
 742  718                  }
 743  719          }
 744  720  
 745  721          /* If locked, release physical memory reservation */
 746  722          if (kpd->kp_flags & KPD_LOCKED) {
 747  723                  pgcnt_t pages = btop(SEGKP_MAPLEN(kpd->kp_len, kpd->kp_flags));
 748  724                  if ((kpd->kp_flags & KPD_NO_ANON) == 0)
 749  725                          atomic_add_long(&anon_segkp_pages_locked, -pages);
 750  726                  page_unresv(pages);
 751  727          }
 752  728  
 753  729          vmem_free(SEGKP_VMEM(seg), kpd->kp_base, kpd->kp_len);
 754  730          kmem_free(kpd, sizeof (struct segkp_data));
 755  731  }
 756  732  
 757  733  /*
 758  734   * segkp_map_red() will check the current frame pointer against the
 759  735   * stack base.  If the amount of stack remaining is questionable
 760  736   * (less than red_minavail), then segkp_map_red() will map in the redzone
 761  737   * and return 1.  Otherwise, it will return 0.  segkp_map_red() can
 762  738   * _only_ be called when it is safe to sleep on page_create_va().
 763  739   *
 764  740   * It is up to the caller to remember whether segkp_map_red() successfully
 765  741   * mapped the redzone, and, if so, to call segkp_unmap_red() at a later
 766  742   * time.
 767  743   *
 768  744   * Currently, this routine is only called from pagefault() (which necessarily
 769  745   * satisfies the above conditions).
 770  746   */
 771  747  #if defined(STACK_GROWTH_DOWN)
 772  748  int
 773  749  segkp_map_red(void)
 774  750  {
 775  751          uintptr_t fp = STACK_BIAS + (uintptr_t)getfp();
 776  752  #ifndef _LP64
 777  753          caddr_t stkbase;
 778  754  #endif
 779  755  
 780  756          /*
 781  757           * Optimize for the common case where we simply return.
 782  758           */
 783  759          if ((curthread->t_red_pp == NULL) &&
 784  760              (fp - (uintptr_t)curthread->t_stkbase >= red_minavail))
 785  761                  return (0);
 786  762  
 787  763  #if defined(_LP64)
 788  764          /*
 789  765           * XXX  We probably need something better than this.
 790  766           */
 791  767          panic("kernel stack overflow");
 792  768          /*NOTREACHED*/
 793  769  #else /* _LP64 */
 794  770          if (curthread->t_red_pp == NULL) {
 795  771                  page_t *red_pp;
 796  772                  struct seg kseg;
 797  773  
 798  774                  caddr_t red_va = (caddr_t)
 799  775                      (((uintptr_t)curthread->t_stkbase & (uintptr_t)PAGEMASK) -
 800  776                      PAGESIZE);
 801  777  
 802  778                  ASSERT(page_exists(&kvp, (u_offset_t)(uintptr_t)red_va) ==
 803  779                      NULL);
 804  780  
 805  781                  /*
 806  782                   * Allocate the physical for the red page.
 807  783                   */
 808  784                  /*
 809  785                   * No PG_NORELOC here to avoid waits. Unlikely to get
 810  786                   * a relocate happening in the short time the page exists
 811  787                   * and it will be OK anyway.
 812  788                   */
 813  789  
 814  790                  kseg.s_as = &kas;
 815  791                  red_pp = page_create_va(&kvp, (u_offset_t)(uintptr_t)red_va,
 816  792                      PAGESIZE, PG_WAIT | PG_EXCL, &kseg, red_va);
 817  793                  ASSERT(red_pp != NULL);
 818  794  
 819  795                  /*
 820  796                   * So we now have a page to jam into the redzone...
 821  797                   */
 822  798                  page_io_unlock(red_pp);
 823  799  
 824  800                  hat_memload(kas.a_hat, red_va, red_pp,
 825  801                      (PROT_READ|PROT_WRITE), HAT_LOAD_LOCK);
 826  802                  page_downgrade(red_pp);
 827  803  
 828  804                  /*
 829  805                   * The page is left SE_SHARED locked so we can hold on to
 830  806                   * the page_t pointer.
 831  807                   */
 832  808                  curthread->t_red_pp = red_pp;
 833  809  
 834  810                  atomic_inc_32(&red_nmapped);
 835  811                  while (fp - (uintptr_t)curthread->t_stkbase < red_closest) {
 836  812                          (void) atomic_cas_32(&red_closest, red_closest,
 837  813                              (uint32_t)(fp - (uintptr_t)curthread->t_stkbase));
 838  814                  }
 839  815                  return (1);
 840  816          }
 841  817  
 842  818          stkbase = (caddr_t)(((uintptr_t)curthread->t_stkbase &
 843  819              (uintptr_t)PAGEMASK) - PAGESIZE);
 844  820  
 845  821          atomic_inc_32(&red_ndoubles);
 846  822  
 847  823          if (fp - (uintptr_t)stkbase < RED_DEEP_THRESHOLD) {
 848  824                  /*
 849  825                   * Oh boy.  We're already deep within the mapped-in
 850  826                   * redzone page, and the caller is trying to prepare
 851  827                   * for a deep stack run.  We're running without a
 852  828                   * redzone right now:  if the caller plows off the
 853  829                   * end of the stack, it'll plow another thread or
 854  830                   * LWP structure.  That situation could result in
 855  831                   * a very hard-to-debug panic, so, in the spirit of
 856  832                   * recording the name of one's killer in one's own
 857  833                   * blood, we're going to record hrestime and the calling
 858  834                   * thread.
 859  835                   */
 860  836                  red_deep_hires = hrestime.tv_nsec;
 861  837                  red_deep_thread = curthread;
 862  838          }
 863  839  
 864  840          /*
 865  841           * If this is a DEBUG kernel, and we've run too deep for comfort, toss.
 866  842           */
 867  843          ASSERT(fp - (uintptr_t)stkbase >= RED_DEEP_THRESHOLD);
 868  844          return (0);
 869  845  #endif /* _LP64 */
 870  846  }
 871  847  
 872  848  void
 873  849  segkp_unmap_red(void)
 874  850  {
 875  851          page_t *pp;
 876  852          caddr_t red_va = (caddr_t)(((uintptr_t)curthread->t_stkbase &
 877  853              (uintptr_t)PAGEMASK) - PAGESIZE);
 878  854  
 879  855          ASSERT(curthread->t_red_pp != NULL);
 880  856  
 881  857          /*
 882  858           * Because we locked the mapping down, we can't simply rely
 883  859           * on page_destroy() to clean everything up;  we need to call
 884  860           * hat_unload() to explicitly unlock the mapping resources.
 885  861           */
 886  862          hat_unload(kas.a_hat, red_va, PAGESIZE, HAT_UNLOAD_UNLOCK);
 887  863  
 888  864          pp = curthread->t_red_pp;
 889  865  
 890  866          ASSERT(pp == page_find(&kvp, (u_offset_t)(uintptr_t)red_va));
 891  867  
 892  868          /*
 893  869           * Need to upgrade the SE_SHARED lock to SE_EXCL.
 894  870           */
 895  871          if (!page_tryupgrade(pp)) {
 896  872                  /*
 897  873                   * As there is now wait for upgrade, release the
 898  874                   * SE_SHARED lock and wait for SE_EXCL.
 899  875                   */
 900  876                  page_unlock(pp);
 901  877                  pp = page_lookup(&kvp, (u_offset_t)(uintptr_t)red_va, SE_EXCL);
 902  878                  /* pp may be NULL here, hence the test below */
 903  879          }
 904  880  
 905  881          /*
 906  882           * Destroy the page, with dontfree set to zero (i.e. free it).
 907  883           */
 908  884          if (pp != NULL)
 909  885                  page_destroy(pp, 0);
 910  886          curthread->t_red_pp = NULL;
 911  887  }
 912  888  #else
 913  889  #error Red stacks only supported with downwards stack growth.
 914  890  #endif
 915  891  
 916  892  /*
 917  893   * Handle a fault on an address corresponding to one of the
 918  894   * resources in the segkp segment.
 919  895   */
 920  896  faultcode_t
 921  897  segkp_fault(
 922  898          struct hat      *hat,
 923  899          struct seg      *seg,
 924  900          caddr_t         vaddr,
 925  901          size_t          len,
 926  902          enum fault_type type,
 927  903          enum seg_rw rw)
 928  904  {
 929  905          struct segkp_data       *kpd = NULL;
 930  906          int                     err;
 931  907  
 932  908          ASSERT(seg->s_as == &kas && RW_READ_HELD(&seg->s_as->a_lock));
 933  909  
 934  910          /*
 935  911           * Sanity checks.
 936  912           */
 937  913          if (type == F_PROT) {
 938  914                  panic("segkp_fault: unexpected F_PROT fault");
 939  915                  /*NOTREACHED*/
 940  916          }
 941  917  
 942  918          if ((kpd = segkp_find(seg, vaddr)) == NULL)
 943  919                  return (FC_NOMAP);
 944  920  
 945  921          mutex_enter(&kpd->kp_lock);
 946  922  
 947  923          if (type == F_SOFTLOCK) {
 948  924                  ASSERT(!(kpd->kp_flags & KPD_LOCKED));
 949  925                  /*
 950  926                   * The F_SOFTLOCK case has more stringent
 951  927                   * range requirements: the given range must exactly coincide
 952  928                   * with the resource's mapped portion. Note reference to
 953  929                   * redzone is handled since vaddr would not equal base
 954  930                   */
 955  931                  if (vaddr != stom(kpd->kp_base, kpd->kp_flags) ||
 956  932                      len != SEGKP_MAPLEN(kpd->kp_len, kpd->kp_flags)) {
 957  933                          mutex_exit(&kpd->kp_lock);
 958  934                          return (FC_MAKE_ERR(EFAULT));
 959  935                  }
 960  936  
 961  937                  if ((err = segkp_load(hat, seg, vaddr, len, kpd, KPD_LOCKED))) {
 962  938                          mutex_exit(&kpd->kp_lock);
 963  939                          return (FC_MAKE_ERR(err));
 964  940                  }
 965  941                  kpd->kp_flags |= KPD_LOCKED;
 966  942                  mutex_exit(&kpd->kp_lock);
 967  943                  return (0);
 968  944          }
 969  945  
 970  946          if (type == F_INVAL) {
 971  947                  ASSERT(!(kpd->kp_flags & KPD_NO_ANON));
 972  948  
 973  949                  /*
 974  950                   * Check if we touched the redzone. Somewhat optimistic
 975  951                   * here if we are touching the redzone of our own stack
 976  952                   * since we wouldn't have a stack to get this far...
 977  953                   */
 978  954                  if ((kpd->kp_flags & KPD_HASREDZONE) &&
 979  955                      btop((uintptr_t)(vaddr - kpd->kp_base)) == KPD_REDZONE(kpd))
 980  956                          panic("segkp_fault: accessing redzone");
 981  957  
 982  958                  /*
 983  959                   * This fault may occur while the page is being F_SOFTLOCK'ed.
 984  960                   * Return since a 2nd segkp_load is unnecessary and also would
 985  961                   * result in the page being locked twice and eventually
 986  962                   * hang the thread_reaper thread.
 987  963                   */
 988  964                  if (kpd->kp_flags & KPD_LOCKED) {
 989  965                          mutex_exit(&kpd->kp_lock);
 990  966                          return (0);
 991  967                  }
 992  968  
 993  969                  err = segkp_load(hat, seg, vaddr, len, kpd, kpd->kp_flags);
 994  970                  mutex_exit(&kpd->kp_lock);
 995  971                  return (err ? FC_MAKE_ERR(err) : 0);
 996  972          }
 997  973  
 998  974          if (type == F_SOFTUNLOCK) {
 999  975                  uint_t  flags;
1000  976  
1001  977                  /*
1002  978                   * Make sure the addr is LOCKED and it has anon backing
1003  979                   * before unlocking
1004  980                   */
1005  981                  if ((kpd->kp_flags & (KPD_LOCKED|KPD_NO_ANON)) != KPD_LOCKED) {
1006  982                          panic("segkp_fault: bad unlock");
1007  983                          /*NOTREACHED*/
1008  984                  }
1009  985  
1010  986                  if (vaddr != stom(kpd->kp_base, kpd->kp_flags) ||
1011  987                      len != SEGKP_MAPLEN(kpd->kp_len, kpd->kp_flags)) {
1012  988                          panic("segkp_fault: bad range");
1013  989                          /*NOTREACHED*/
1014  990                  }
1015  991  
1016  992                  if (rw == S_WRITE)
1017  993                          flags = kpd->kp_flags | KPD_WRITEDIRTY;
1018  994                  else
1019  995                          flags = kpd->kp_flags;
1020  996                  err = segkp_unlock(hat, seg, vaddr, len, kpd, flags);
1021  997                  kpd->kp_flags &= ~KPD_LOCKED;
1022  998                  mutex_exit(&kpd->kp_lock);
1023  999                  return (err ? FC_MAKE_ERR(err) : 0);
1024 1000          }
1025 1001          mutex_exit(&kpd->kp_lock);
1026 1002          panic("segkp_fault: bogus fault type: %d\n", type);
1027 1003          /*NOTREACHED*/
1028 1004  }
1029 1005  
1030 1006  /*
1031 1007   * Check that the given protections suffice over the range specified by
1032 1008   * vaddr and len.  For this segment type, the only issue is whether or
1033 1009   * not the range lies completely within the mapped part of an allocated
1034 1010   * resource.
1035 1011   */
1036 1012  /* ARGSUSED */
1037 1013  static int
1038 1014  segkp_checkprot(struct seg *seg, caddr_t vaddr, size_t len, uint_t prot)
1039 1015  {
1040 1016          struct segkp_data *kpd = NULL;
1041 1017          caddr_t mbase;
1042 1018          size_t mlen;
1043 1019  
1044 1020          if ((kpd = segkp_find(seg, vaddr)) == NULL)
1045 1021                  return (EACCES);
1046 1022  
1047 1023          mutex_enter(&kpd->kp_lock);
1048 1024          mbase = stom(kpd->kp_base, kpd->kp_flags);
1049 1025          mlen = SEGKP_MAPLEN(kpd->kp_len, kpd->kp_flags);
1050 1026          if (len > mlen || vaddr < mbase ||
1051 1027              ((vaddr + len) > (mbase + mlen))) {
1052 1028                  mutex_exit(&kpd->kp_lock);
1053 1029                  return (EACCES);
1054 1030          }
1055 1031          mutex_exit(&kpd->kp_lock);
1056 1032          return (0);
1057 1033  }
1058 1034  
1059 1035  
1060 1036  /*
1061 1037   * Check to see if it makes sense to do kluster/read ahead to
1062 1038   * addr + delta relative to the mapping at addr.  We assume here
1063 1039   * that delta is a signed PAGESIZE'd multiple (which can be negative).
1064 1040   *
1065 1041   * For seg_u we always "approve" of this action from our standpoint.
1066 1042   */
1067 1043  /*ARGSUSED*/
1068 1044  static int
1069 1045  segkp_kluster(struct seg *seg, caddr_t addr, ssize_t delta)
1070 1046  {
1071 1047          return (0);
1072 1048  }
1073 1049  
1074 1050  /*
1075 1051   * Load and possibly lock intra-slot resources in the range given by
1076 1052   * vaddr and len.
1077 1053   */
1078 1054  static int
1079 1055  segkp_load(
1080 1056          struct hat *hat,
1081 1057          struct seg *seg,
1082 1058          caddr_t vaddr,
1083 1059          size_t len,
1084 1060          struct segkp_data *kpd,
1085 1061          uint_t flags)
1086 1062  {
1087 1063          caddr_t va;
1088 1064          caddr_t vlim;
1089 1065          ulong_t i;
1090 1066          uint_t lock;
1091 1067  
1092 1068          ASSERT(MUTEX_HELD(&kpd->kp_lock));
1093 1069  
1094 1070          len = P2ROUNDUP(len, PAGESIZE);
1095 1071  
1096 1072          /* If locking, reserve physical memory */
1097 1073          if (flags & KPD_LOCKED) {
1098 1074                  pgcnt_t pages = btop(len);
1099 1075                  if ((kpd->kp_flags & KPD_NO_ANON) == 0)
1100 1076                          atomic_add_long(&anon_segkp_pages_locked, pages);
1101 1077                  (void) page_resv(pages, KM_SLEEP);
1102 1078          }
1103 1079  
1104 1080          /*
1105 1081           * Loop through the pages in the given range.
1106 1082           */
1107 1083          va = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
1108 1084          vaddr = va;
1109 1085          vlim = va + len;
1110 1086          lock = flags & KPD_LOCKED;
1111 1087          i = ((uintptr_t)(va - kpd->kp_base)) >> PAGESHIFT;
1112 1088          for (; va < vlim; va += PAGESIZE, i++) {
1113 1089                  page_t          *pl[2]; /* second element NULL terminator */
1114 1090                  struct vnode    *vp;
1115 1091                  anoff_t         off;
1116 1092                  int             err;
1117 1093                  struct anon     *ap;
1118 1094  
1119 1095                  /*
1120 1096                   * Summon the page.  If it's not resident, arrange
1121 1097                   * for synchronous i/o to pull it in.
1122 1098                   */
1123 1099                  ap = anon_get_ptr(kpd->kp_anon, kpd->kp_anon_idx + i);
1124 1100                  swap_xlate(ap, &vp, &off);
1125 1101  
1126 1102                  /*
1127 1103                   * The returned page list will have exactly one entry,
1128 1104                   * which is returned to us already kept.
1129 1105                   */
1130 1106                  err = VOP_GETPAGE(vp, (offset_t)off, PAGESIZE, NULL,
1131 1107                      pl, PAGESIZE, seg, va, S_READ, kcred, NULL);
1132 1108  
1133 1109                  if (err) {
1134 1110                          /*
1135 1111                           * Back out of what we've done so far.
1136 1112                           */
1137 1113                          (void) segkp_unlock(hat, seg, vaddr,
1138 1114                              (va - vaddr), kpd, flags);
1139 1115                          return (err);
1140 1116                  }
1141 1117  
1142 1118                  /*
1143 1119                   * Load an MMU translation for the page.
1144 1120                   */
1145 1121                  hat_memload(hat, va, pl[0], (PROT_READ|PROT_WRITE),
1146 1122                      lock ? HAT_LOAD_LOCK : HAT_LOAD);
1147 1123  
1148 1124                  if (!lock) {
1149 1125                          /*
1150 1126                           * Now, release "shared" lock on the page.
1151 1127                           */
1152 1128                          page_unlock(pl[0]);
1153 1129                  }
1154 1130          }
1155 1131          return (0);
1156 1132  }
1157 1133  
1158 1134  /*
1159 1135   * At the very least unload the mmu-translations and unlock the range if locked
1160 1136   * Can be called with the following flag value KPD_WRITEDIRTY which specifies
1161 1137   * any dirty pages should be written to disk.
1162 1138   */
1163 1139  static int
1164 1140  segkp_unlock(
1165 1141          struct hat *hat,
1166 1142          struct seg *seg,
1167 1143          caddr_t vaddr,
1168 1144          size_t len,
1169 1145          struct segkp_data *kpd,
1170 1146          uint_t flags)
1171 1147  {
1172 1148          caddr_t va;
1173 1149          caddr_t vlim;
1174 1150          ulong_t i;
1175 1151          struct page *pp;
1176 1152          struct vnode *vp;
1177 1153          anoff_t off;
1178 1154          struct anon *ap;
1179 1155  
1180 1156  #ifdef lint
1181 1157          seg = seg;
1182 1158  #endif /* lint */
1183 1159  
1184 1160          ASSERT(MUTEX_HELD(&kpd->kp_lock));
1185 1161  
1186 1162          /*
1187 1163           * Loop through the pages in the given range. It is assumed
1188 1164           * segkp_unlock is called with page aligned base
1189 1165           */
1190 1166          va = vaddr;
1191 1167          vlim = va + len;
1192 1168          i = ((uintptr_t)(va - kpd->kp_base)) >> PAGESHIFT;
1193 1169          hat_unload(hat, va, len,
1194 1170              ((flags & KPD_LOCKED) ? HAT_UNLOAD_UNLOCK : HAT_UNLOAD));
1195 1171          for (; va < vlim; va += PAGESIZE, i++) {
1196 1172                  /*
1197 1173                   * Find the page associated with this part of the
1198 1174                   * slot, tracking it down through its associated swap
1199 1175                   * space.
1200 1176                   */
1201 1177                  ap = anon_get_ptr(kpd->kp_anon, kpd->kp_anon_idx + i);
1202 1178                  swap_xlate(ap, &vp, &off);
1203 1179  
1204 1180                  if (flags & KPD_LOCKED) {
1205 1181                          if ((pp = page_find(vp, off)) == NULL) {
1206 1182                                  if (flags & KPD_LOCKED) {
1207 1183                                          panic("segkp_softunlock: missing page");
1208 1184                                          /*NOTREACHED*/
1209 1185                                  }
1210 1186                          }
1211 1187                  } else {
1212 1188                          /*
1213 1189                           * Nothing to do if the slot is not locked and the
1214 1190                           * page doesn't exist.
1215 1191                           */
1216 1192                          if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL)
1217 1193                                  continue;
1218 1194                  }
1219 1195  
1220 1196                  /*
1221 1197                   * If the page doesn't have any translations, is
1222 1198                   * dirty and not being shared, then push it out
1223 1199                   * asynchronously and avoid waiting for the
1224 1200                   * pageout daemon to do it for us.
1225 1201                   *
1226 1202                   * XXX - Do we really need to get the "exclusive"
1227 1203                   * lock via an upgrade?
1228 1204                   */
1229 1205                  if ((flags & KPD_WRITEDIRTY) && !hat_page_is_mapped(pp) &&
1230 1206                      hat_ismod(pp) && page_tryupgrade(pp)) {
1231 1207                          /*
1232 1208                           * Hold the vnode before releasing the page lock to
1233 1209                           * prevent it from being freed and re-used by some
1234 1210                           * other thread.
1235 1211                           */
1236 1212                          VN_HOLD(vp);
1237 1213                          page_unlock(pp);
1238 1214  
1239 1215                          /*
1240 1216                           * Want most powerful credentials we can get so
1241 1217                           * use kcred.
1242 1218                           */
1243 1219                          (void) VOP_PUTPAGE(vp, (offset_t)off, PAGESIZE,
1244 1220                              B_ASYNC | B_FREE, kcred, NULL);
1245 1221                          VN_RELE(vp);
1246 1222                  } else {
1247 1223                          page_unlock(pp);
1248 1224                  }
1249 1225          }
1250 1226  
1251 1227          /* If unlocking, release physical memory */
1252 1228          if (flags & KPD_LOCKED) {
1253 1229                  pgcnt_t pages = btopr(len);
1254 1230                  if ((kpd->kp_flags & KPD_NO_ANON) == 0)
1255 1231                          atomic_add_long(&anon_segkp_pages_locked, -pages);
1256 1232                  page_unresv(pages);
1257 1233          }
1258 1234          return (0);
1259 1235  }
1260 1236  
1261 1237  /*
1262 1238   * Insert the kpd in the hash table.
1263 1239   */
1264 1240  static void
1265 1241  segkp_insert(struct seg *seg, struct segkp_data *kpd)
1266 1242  {
1267 1243          struct segkp_segdata *kpsd = (struct segkp_segdata *)seg->s_data;
1268 1244          int index;
1269 1245  
1270 1246          /*
1271 1247           * Insert the kpd based on the address that will be returned
1272 1248           * via segkp_release.
1273 1249           */
1274 1250          index = SEGKP_HASH(stom(kpd->kp_base, kpd->kp_flags));
1275 1251          mutex_enter(&segkp_lock);
1276 1252          kpd->kp_next = kpsd->kpsd_hash[index];
1277 1253          kpsd->kpsd_hash[index] = kpd;
1278 1254          mutex_exit(&segkp_lock);
1279 1255  }
1280 1256  
1281 1257  /*
1282 1258   * Remove kpd from the hash table.
1283 1259   */
1284 1260  static void
1285 1261  segkp_delete(struct seg *seg, struct segkp_data *kpd)
1286 1262  {
1287 1263          struct segkp_segdata *kpsd = (struct segkp_segdata *)seg->s_data;
1288 1264          struct segkp_data **kpp;
1289 1265          int index;
1290 1266  
1291 1267          ASSERT(MUTEX_HELD(&segkp_lock));
1292 1268  
1293 1269          index = SEGKP_HASH(stom(kpd->kp_base, kpd->kp_flags));
1294 1270          for (kpp = &kpsd->kpsd_hash[index];
1295 1271              *kpp != NULL; kpp = &((*kpp)->kp_next)) {
1296 1272                  if (*kpp == kpd) {
1297 1273                          *kpp = kpd->kp_next;
1298 1274                          return;
1299 1275                  }
1300 1276          }
1301 1277          panic("segkp_delete: unable to find element to delete");
1302 1278          /*NOTREACHED*/
1303 1279  }
1304 1280  
1305 1281  /*
1306 1282   * Find the kpd associated with a vaddr.
1307 1283   *
1308 1284   * Most of the callers of segkp_find will pass the vaddr that
1309 1285   * hashes to the desired index, but there are cases where
1310 1286   * this is not true in which case we have to (potentially) scan
1311 1287   * the whole table looking for it. This should be very rare
1312 1288   * (e.g. a segkp_fault(F_INVAL) on an address somewhere in the
1313 1289   * middle of the segkp_data region).
1314 1290   */
1315 1291  static struct segkp_data *
1316 1292  segkp_find(struct seg *seg, caddr_t vaddr)
1317 1293  {
1318 1294          struct segkp_segdata *kpsd = (struct segkp_segdata *)seg->s_data;
1319 1295          struct segkp_data *kpd;
1320 1296          int     i;
1321 1297          int     stop;
1322 1298  
1323 1299          i = stop = SEGKP_HASH(vaddr);
1324 1300          mutex_enter(&segkp_lock);
1325 1301          do {
1326 1302                  for (kpd = kpsd->kpsd_hash[i]; kpd != NULL;
1327 1303                      kpd = kpd->kp_next) {
1328 1304                          if (vaddr >= kpd->kp_base &&
1329 1305                              vaddr < kpd->kp_base + kpd->kp_len) {
1330 1306                                  mutex_exit(&segkp_lock);
1331 1307                                  return (kpd);
1332 1308                          }
1333 1309                  }
1334 1310                  if (--i < 0)
1335 1311                          i = SEGKP_HASHSZ - 1;   /* Wrap */
1336 1312          } while (i != stop);
1337 1313          mutex_exit(&segkp_lock);
1338 1314          return (NULL);          /* Not found */
1339 1315  }
1340 1316  
1341 1317  /*
1342 1318   * returns size of swappable area.
1343 1319   */
1344 1320  size_t
1345 1321  swapsize(caddr_t v)
1346 1322  {
1347 1323          struct segkp_data *kpd;
1348 1324  
1349 1325          if ((kpd = segkp_find(segkp, v)) != NULL)
1350 1326                  return (SEGKP_MAPLEN(kpd->kp_len, kpd->kp_flags));
1351 1327          else
1352 1328                  return (NULL);
1353 1329  }
1354 1330  
1355 1331  /*
1356 1332   * Dump out all the active segkp pages
1357 1333   */
1358 1334  static void
1359 1335  segkp_dump(struct seg *seg)
1360 1336  {
1361 1337          int i;
1362 1338          struct segkp_data *kpd;
1363 1339          struct segkp_segdata *kpsd = (struct segkp_segdata *)seg->s_data;
1364 1340  
1365 1341          for (i = 0; i < SEGKP_HASHSZ; i++) {
1366 1342                  for (kpd = kpsd->kpsd_hash[i];
1367 1343                      kpd != NULL; kpd = kpd->kp_next) {
1368 1344                          pfn_t pfn;
1369 1345                          caddr_t addr;
1370 1346                          caddr_t eaddr;
1371 1347  
1372 1348                          addr = kpd->kp_base;
1373 1349                          eaddr = addr + kpd->kp_len;
1374 1350                          while (addr < eaddr) {
1375 1351                                  ASSERT(seg->s_as == &kas);
1376 1352                                  pfn = hat_getpfnum(seg->s_as->a_hat, addr);
1377 1353                                  if (pfn != PFN_INVALID)
1378 1354                                          dump_addpage(seg->s_as, addr, pfn);
1379 1355                                  addr += PAGESIZE;
1380 1356                                  dump_timeleft = dump_timeout;
1381 1357                          }
1382 1358                  }
1383 1359          }
1384 1360  }
1385 1361  
1386 1362  /*ARGSUSED*/
1387 1363  static int
1388 1364  segkp_pagelock(struct seg *seg, caddr_t addr, size_t len,
1389 1365      struct page ***ppp, enum lock_type type, enum seg_rw rw)
1390 1366  {
1391 1367          return (ENOTSUP);
1392 1368  }
1393 1369  
1394 1370  /*ARGSUSED*/
1395 1371  static int
1396 1372  segkp_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
1397 1373  {
1398 1374          return (ENODEV);
1399 1375  }
1400 1376  
1401 1377  /*ARGSUSED*/
1402 1378  static lgrp_mem_policy_info_t   *
1403 1379  segkp_getpolicy(struct seg *seg, caddr_t addr)
1404 1380  {
1405 1381          return (NULL);
1406 1382  }
1407 1383  
1408 1384  /*ARGSUSED*/
1409 1385  static int
1410 1386  segkp_capable(struct seg *seg, segcapability_t capability)
1411 1387  {
1412 1388          return (0);
1413 1389  }
1414 1390  
1415 1391  #include <sys/mem_config.h>
1416 1392  
1417 1393  /*ARGSUSED*/
1418 1394  static void
1419 1395  segkp_mem_config_post_add(void *arg, pgcnt_t delta_pages)
1420 1396  {}
1421 1397  
1422 1398  /*
1423 1399   * During memory delete, turn off caches so that pages are not held.
1424 1400   * A better solution may be to unlock the pages while they are
1425 1401   * in the cache so that they may be collected naturally.
1426 1402   */
1427 1403  
1428 1404  /*ARGSUSED*/
1429 1405  static int
1430 1406  segkp_mem_config_pre_del(void *arg, pgcnt_t delta_pages)
1431 1407  {
1432 1408          atomic_inc_32(&segkp_indel);
1433 1409          segkp_cache_free();
1434 1410          return (0);
1435 1411  }
1436 1412  
1437 1413  /*ARGSUSED*/
1438 1414  static void
1439 1415  segkp_mem_config_post_del(void *arg, pgcnt_t delta_pages, int cancelled)
1440 1416  {
1441 1417          atomic_dec_32(&segkp_indel);
1442 1418  }
1443 1419  
1444 1420  static kphysm_setup_vector_t segkp_mem_config_vec = {
1445 1421          KPHYSM_SETUP_VECTOR_VERSION,
1446 1422          segkp_mem_config_post_add,
1447 1423          segkp_mem_config_pre_del,
1448 1424          segkp_mem_config_post_del,
1449 1425  };
1450 1426  
1451 1427  static void
1452 1428  segkpinit_mem_config(struct seg *seg)
1453 1429  {
1454 1430          int ret;
1455 1431  
1456 1432          ret = kphysm_setup_func_register(&segkp_mem_config_vec, (void *)seg);
1457 1433          ASSERT(ret == 0);
1458 1434  }

↓ open down ↓

1268 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX