combined Wdiff usr/src/uts/common/vm/seg_kmem.c

Print this page

const-ify make segment ops structures
There is no reason to keep the segment ops structures writable.
segop_getpolicy already checks for a NULL op
seg_inherit_notsup is redundant since segop_inherit checks for NULL properly
no need for bad-op segment op functions
The segment drivers have a number of bad-op functions that simply panic.
Keeping the function pointer NULL will accomplish the same thing in most
cases.  In other cases, keeping the function pointer NULL will result in
proper error code being returned.
patch lower-case-segops
use C99 initializers in segment ops structures
remove whole-process swapping
Long before Unix supported paging, it used process swapping to reclaim
memory.  The code is there and in theory it runs when we get *extremely* low
on memory.  In practice, it never runs since the definition of low-on-memory
is antiquated. (XXX: define what antiquated means)
You can check the number of swapout/swapin events with kstats:
$ kstat -p ::vm:swapin ::vm:swapout

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/vm/seg_kmem.c
          +++ new/usr/src/uts/common/vm/seg_kmem.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   */
  24   24  
  25   25  #include <sys/types.h>
  26   26  #include <sys/t_lock.h>
  27   27  #include <sys/param.h>
  28   28  #include <sys/sysmacros.h>
  29   29  #include <sys/tuneable.h>
  30   30  #include <sys/systm.h>
  31   31  #include <sys/vm.h>
  32   32  #include <sys/kmem.h>
  33   33  #include <sys/vmem.h>
  34   34  #include <sys/mman.h>
  35   35  #include <sys/cmn_err.h>
  36   36  #include <sys/debug.h>
  37   37  #include <sys/dumphdr.h>
  38   38  #include <sys/bootconf.h>
  39   39  #include <sys/lgrp.h>
  40   40  #include <vm/seg_kmem.h>
  41   41  #include <vm/hat.h>
  42   42  #include <vm/page.h>
  43   43  #include <vm/vm_dep.h>
  44   44  #include <vm/faultcode.h>
  45   45  #include <sys/promif.h>
  46   46  #include <vm/seg_kp.h>
  47   47  #include <sys/bitmap.h>
  48   48  #include <sys/mem_cage.h>
  49   49  
  50   50  #ifdef __sparc
  51   51  #include <sys/ivintr.h>
  52   52  #include <sys/panic.h>
  53   53  #endif
  54   54  
  55   55  /*
  56   56   * seg_kmem is the primary kernel memory segment driver.  It
  57   57   * maps the kernel heap [kernelheap, ekernelheap), module text,
  58   58   * and all memory which was allocated before the VM was initialized
  59   59   * into kas.
  60   60   *
  61   61   * Pages which belong to seg_kmem are hashed into &kvp vnode at
  62   62   * an offset equal to (u_offset_t)virt_addr, and have p_lckcnt >= 1.
  63   63   * They must never be paged out since segkmem_fault() is a no-op to
  64   64   * prevent recursive faults.
  65   65   *
  66   66   * Currently, seg_kmem pages are sharelocked (p_sharelock == 1) on
  67   67   * __x86 and are unlocked (p_sharelock == 0) on __sparc.  Once __x86
  68   68   * supports relocation the #ifdef kludges can be removed.
  69   69   *
  70   70   * seg_kmem pages may be subject to relocation by page_relocate(),
  71   71   * provided that the HAT supports it; if this is so, segkmem_reloc
  72   72   * will be set to a nonzero value. All boot time allocated memory as
  73   73   * well as static memory is considered off limits to relocation.
  74   74   * Pages are "relocatable" if p_state does not have P_NORELOC set, so
  75   75   * we request P_NORELOC pages for memory that isn't safe to relocate.
  76   76   *
  77   77   * The kernel heap is logically divided up into four pieces:
  78   78   *
  79   79   *   heap32_arena is for allocations that require 32-bit absolute
  80   80   *   virtual addresses (e.g. code that uses 32-bit pointers/offsets).
  81   81   *
  82   82   *   heap_core is for allocations that require 2GB *relative*
  83   83   *   offsets; in other words all memory from heap_core is within
  84   84   *   2GB of all other memory from the same arena. This is a requirement
  85   85   *   of the addressing modes of some processors in supervisor code.
  86   86   *
  87   87   *   heap_arena is the general heap arena.
  88   88   *
  89   89   *   static_arena is the static memory arena.  Allocations from it
  90   90   *   are not subject to relocation so it is safe to use the memory
  91   91   *   physical address as well as the virtual address (e.g. the VA to
  92   92   *   PA translations are static).  Caches may import from static_arena;
  93   93   *   all other static memory allocations should use static_alloc_arena.
  94   94   *
  95   95   * On some platforms which have limited virtual address space, seg_kmem
  96   96   * may share [kernelheap, ekernelheap) with seg_kp; if this is so,
  97   97   * segkp_bitmap is non-NULL, and each bit represents a page of virtual
  98   98   * address space which is actually seg_kp mapped.
  99   99   */
 100  100  
 101  101  extern ulong_t *segkp_bitmap;   /* Is set if segkp is from the kernel heap */
 102  102  
 103  103  char *kernelheap;               /* start of primary kernel heap */
 104  104  char *ekernelheap;              /* end of primary kernel heap */
 105  105  struct seg kvseg;               /* primary kernel heap segment */
 106  106  struct seg kvseg_core;          /* "core" kernel heap segment */
 107  107  struct seg kzioseg;             /* Segment for zio mappings */
 108  108  vmem_t *heap_arena;             /* primary kernel heap arena */
 109  109  vmem_t *heap_core_arena;        /* core kernel heap arena */
 110  110  char *heap_core_base;           /* start of core kernel heap arena */
 111  111  char *heap_lp_base;             /* start of kernel large page heap arena */
 112  112  char *heap_lp_end;              /* end of kernel large page heap arena */
 113  113  vmem_t *hat_memload_arena;      /* HAT translation data */
 114  114  struct seg kvseg32;             /* 32-bit kernel heap segment */
 115  115  vmem_t *heap32_arena;           /* 32-bit kernel heap arena */
 116  116  vmem_t *heaptext_arena;         /* heaptext arena */
 117  117  struct as kas;                  /* kernel address space */
 118  118  int segkmem_reloc;              /* enable/disable relocatable segkmem pages */
 119  119  vmem_t *static_arena;           /* arena for caches to import static memory */
 120  120  vmem_t *static_alloc_arena;     /* arena for allocating static memory */
 121  121  vmem_t *zio_arena = NULL;       /* arena for allocating zio memory */
 122  122  vmem_t *zio_alloc_arena = NULL; /* arena for allocating zio memory */
 123  123  
 124  124  /*
 125  125   * seg_kmem driver can map part of the kernel heap with large pages.
 126  126   * Currently this functionality is implemented for sparc platforms only.
 127  127   *
 128  128   * The large page size "segkmem_lpsize" for kernel heap is selected in the
 129  129   * platform specific code. It can also be modified via /etc/system file.
 130  130   * Setting segkmem_lpsize to PAGESIZE in /etc/system disables usage of large
 131  131   * pages for kernel heap. "segkmem_lpshift" is adjusted appropriately to
 132  132   * match segkmem_lpsize.
 133  133   *
 134  134   * At boot time we carve from kernel heap arena a range of virtual addresses
 135  135   * that will be used for large page mappings. This range [heap_lp_base,
 136  136   * heap_lp_end) is set up as a separate vmem arena - "heap_lp_arena". We also
 137  137   * create "kmem_lp_arena" that caches memory already backed up by large
 138  138   * pages. kmem_lp_arena imports virtual segments from heap_lp_arena.
 139  139   */
 140  140  
 141  141  size_t  segkmem_lpsize;
 142  142  static  uint_t  segkmem_lpshift = PAGESHIFT;
 143  143  int     segkmem_lpszc = 0;
 144  144  
 145  145  size_t  segkmem_kmemlp_quantum = 0x400000;      /* 4MB */
 146  146  size_t  segkmem_heaplp_quantum;
 147  147  vmem_t *heap_lp_arena;
 148  148  static  vmem_t *kmem_lp_arena;
 149  149  static  vmem_t *segkmem_ppa_arena;
 150  150  static  segkmem_lpcb_t segkmem_lpcb;
 151  151  
 152  152  /*
 153  153   * We use "segkmem_kmemlp_max" to limit the total amount of physical memory
 154  154   * consumed by the large page heap. By default this parameter is set to 1/8 of
 155  155   * physmem but can be adjusted through /etc/system either directly or
 156  156   * indirectly by setting "segkmem_kmemlp_pcnt" to the percent of physmem
 157  157   * we allow for large page heap.
 158  158   */
 159  159  size_t  segkmem_kmemlp_max;
 160  160  static  uint_t  segkmem_kmemlp_pcnt;
 161  161  
 162  162  /*
 163  163   * Getting large pages for kernel heap could be problematic due to
 164  164   * physical memory fragmentation. That's why we allow to preallocate
 165  165   * "segkmem_kmemlp_min" bytes at boot time.
 166  166   */
 167  167  static  size_t  segkmem_kmemlp_min;
 168  168  
 169  169  /*
 170  170   * Throttling is used to avoid expensive tries to allocate large pages
 171  171   * for kernel heap when a lot of succesive attempts to do so fail.
 172  172   */
 173  173  static  ulong_t segkmem_lpthrottle_max = 0x400000;
 174  174  static  ulong_t segkmem_lpthrottle_start = 0x40;
 175  175  static  ulong_t segkmem_use_lpthrottle = 1;
 176  176  
 177  177  /*
 178  178   * Freed pages accumulate on a garbage list until segkmem is ready,
 179  179   * at which point we call segkmem_gc() to free it all.
 180  180   */
 181  181  typedef struct segkmem_gc_list {
 182  182          struct segkmem_gc_list  *gc_next;
 183  183          vmem_t                  *gc_arena;
 184  184          size_t                  gc_size;
 185  185  } segkmem_gc_list_t;
 186  186  
 187  187  static segkmem_gc_list_t *segkmem_gc_list;
 188  188  
 189  189  /*
 190  190   * Allocations from the hat_memload arena add VM_MEMLOAD to their
 191  191   * vmflags so that segkmem_xalloc() can inform the hat layer that it needs
 192  192   * to take steps to prevent infinite recursion.  HAT allocations also
 193  193   * must be non-relocatable to prevent recursive page faults.
 194  194   */
 195  195  static void *
 196  196  hat_memload_alloc(vmem_t *vmp, size_t size, int flags)
 197  197  {
 198  198          flags |= (VM_MEMLOAD | VM_NORELOC);
 199  199          return (segkmem_alloc(vmp, size, flags));
 200  200  }
 201  201  
 202  202  /*
 203  203   * Allocations from static_arena arena (or any other arena that uses
 204  204   * segkmem_alloc_permanent()) require non-relocatable (permanently
 205  205   * wired) memory pages, since these pages are referenced by physical
 206  206   * as well as virtual address.
 207  207   */
 208  208  void *
 209  209  segkmem_alloc_permanent(vmem_t *vmp, size_t size, int flags)
 210  210  {
 211  211          return (segkmem_alloc(vmp, size, flags | VM_NORELOC));
 212  212  }
 213  213  
 214  214  /*
 215  215   * Initialize kernel heap boundaries.
 216  216   */
 217  217  void
 218  218  kernelheap_init(
 219  219          void *heap_start,
 220  220          void *heap_end,
 221  221          char *first_avail,
 222  222          void *core_start,
 223  223          void *core_end)
 224  224  {
 225  225          uintptr_t textbase;
 226  226          size_t core_size;
 227  227          size_t heap_size;
 228  228          vmem_t *heaptext_parent;
 229  229          size_t  heap_lp_size = 0;
 230  230  #ifdef __sparc
 231  231          size_t kmem64_sz = kmem64_aligned_end - kmem64_base;
 232  232  #endif  /* __sparc */
 233  233  
 234  234          kernelheap = heap_start;
 235  235          ekernelheap = heap_end;
 236  236  
 237  237  #ifdef __sparc
 238  238          heap_lp_size = (((uintptr_t)heap_end - (uintptr_t)heap_start) / 4);
 239  239          /*
 240  240           * Bias heap_lp start address by kmem64_sz to reduce collisions
 241  241           * in 4M kernel TSB between kmem64 area and heap_lp
 242  242           */
 243  243          kmem64_sz = P2ROUNDUP(kmem64_sz, MMU_PAGESIZE256M);
 244  244          if (kmem64_sz <= heap_lp_size / 2)
 245  245                  heap_lp_size -= kmem64_sz;
 246  246          heap_lp_base = ekernelheap - heap_lp_size;
 247  247          heap_lp_end = heap_lp_base + heap_lp_size;
 248  248  #endif  /* __sparc */
 249  249  
 250  250          /*
 251  251           * If this platform has a 'core' heap area, then the space for
 252  252           * overflow module text should be carved out of the end of that
 253  253           * heap.  Otherwise, it gets carved out of the general purpose
 254  254           * heap.
 255  255           */
 256  256          core_size = (uintptr_t)core_end - (uintptr_t)core_start;
 257  257          if (core_size > 0) {
 258  258                  ASSERT(core_size >= HEAPTEXT_SIZE);
 259  259                  textbase = (uintptr_t)core_end - HEAPTEXT_SIZE;
 260  260                  core_size -= HEAPTEXT_SIZE;
 261  261          }
 262  262  #ifndef __sparc
 263  263          else {
 264  264                  ekernelheap -= HEAPTEXT_SIZE;
 265  265                  textbase = (uintptr_t)ekernelheap;
 266  266          }
 267  267  #endif
 268  268  
 269  269          heap_size = (uintptr_t)ekernelheap - (uintptr_t)kernelheap;
 270  270          heap_arena = vmem_init("heap", kernelheap, heap_size, PAGESIZE,
 271  271              segkmem_alloc, segkmem_free);
 272  272  
 273  273          if (core_size > 0) {
 274  274                  heap_core_arena = vmem_create("heap_core", core_start,
 275  275                      core_size, PAGESIZE, NULL, NULL, NULL, 0, VM_SLEEP);
 276  276                  heap_core_base = core_start;
 277  277          } else {
 278  278                  heap_core_arena = heap_arena;
 279  279                  heap_core_base = kernelheap;
 280  280          }
 281  281  
 282  282          /*
 283  283           * reserve space for the large page heap. If large pages for kernel
 284  284           * heap is enabled large page heap arean will be created later in the
 285  285           * boot sequence in segkmem_heap_lp_init(). Otherwise the allocated
 286  286           * range will be returned back to the heap_arena.
 287  287           */
 288  288          if (heap_lp_size) {
 289  289                  (void) vmem_xalloc(heap_arena, heap_lp_size, PAGESIZE, 0, 0,
 290  290                      heap_lp_base, heap_lp_end,
 291  291                      VM_NOSLEEP | VM_BESTFIT | VM_PANIC);
 292  292          }
 293  293  
 294  294          /*
 295  295           * Remove the already-spoken-for memory range [kernelheap, first_avail).
 296  296           */
 297  297          (void) vmem_xalloc(heap_arena, first_avail - kernelheap, PAGESIZE,
 298  298              0, 0, kernelheap, first_avail, VM_NOSLEEP | VM_BESTFIT | VM_PANIC);
 299  299  
 300  300  #ifdef __sparc
 301  301          heap32_arena = vmem_create("heap32", (void *)SYSBASE32,
 302  302              SYSLIMIT32 - SYSBASE32 - HEAPTEXT_SIZE, PAGESIZE, NULL,
 303  303              NULL, NULL, 0, VM_SLEEP);
 304  304          /*
 305  305           * Prom claims the physical and virtual resources used by panicbuf
 306  306           * and inter_vec_table. So reserve space for panicbuf, intr_vec_table,
 307  307           * reserved interrupt vector data structures from 32-bit heap.
 308  308           */
 309  309          (void) vmem_xalloc(heap32_arena, PANICBUFSIZE, PAGESIZE, 0, 0,
 310  310              panicbuf, panicbuf + PANICBUFSIZE,
 311  311              VM_NOSLEEP | VM_BESTFIT | VM_PANIC);
 312  312  
 313  313          (void) vmem_xalloc(heap32_arena, IVSIZE, PAGESIZE, 0, 0,
 314  314              intr_vec_table, (caddr_t)intr_vec_table + IVSIZE,
 315  315              VM_NOSLEEP | VM_BESTFIT | VM_PANIC);
 316  316  
 317  317          textbase = SYSLIMIT32 - HEAPTEXT_SIZE;
 318  318          heaptext_parent = NULL;
 319  319  #else   /* __sparc */
 320  320          heap32_arena = heap_core_arena;
 321  321          heaptext_parent = heap_core_arena;
 322  322  #endif  /* __sparc */
 323  323  
 324  324          heaptext_arena = vmem_create("heaptext", (void *)textbase,
 325  325              HEAPTEXT_SIZE, PAGESIZE, NULL, NULL, heaptext_parent, 0, VM_SLEEP);
 326  326  
 327  327          /*
 328  328           * Create a set of arenas for memory with static translations
 329  329           * (e.g. VA -> PA translations cannot change).  Since using
 330  330           * kernel pages by physical address implies it isn't safe to
 331  331           * walk across page boundaries, the static_arena quantum must
 332  332           * be PAGESIZE.  Any kmem caches that require static memory
 333  333           * should source from static_arena, while direct allocations
 334  334           * should only use static_alloc_arena.
 335  335           */
 336  336          static_arena = vmem_create("static", NULL, 0, PAGESIZE,
 337  337              segkmem_alloc_permanent, segkmem_free, heap_arena, 0, VM_SLEEP);
 338  338          static_alloc_arena = vmem_create("static_alloc", NULL, 0,
 339  339              sizeof (uint64_t), vmem_alloc, vmem_free, static_arena,
 340  340              0, VM_SLEEP);
 341  341  
 342  342          /*
 343  343           * Create an arena for translation data (ptes, hmes, or hblks).
 344  344           * We need an arena for this because hat_memload() is essential
 345  345           * to vmem_populate() (see comments in common/os/vmem.c).
 346  346           *
 347  347           * Note: any kmem cache that allocates from hat_memload_arena
 348  348           * must be created as a KMC_NOHASH cache (i.e. no external slab
 349  349           * and bufctl structures to allocate) so that slab creation doesn't
 350  350           * require anything more than a single vmem_alloc().
 351  351           */
 352  352          hat_memload_arena = vmem_create("hat_memload", NULL, 0, PAGESIZE,
 353  353              hat_memload_alloc, segkmem_free, heap_arena, 0,
 354  354              VM_SLEEP | VMC_POPULATOR | VMC_DUMPSAFE);
 355  355  }
 356  356  
 357  357  void
 358  358  boot_mapin(caddr_t addr, size_t size)
 359  359  {
 360  360          caddr_t  eaddr;
 361  361          page_t  *pp;
 362  362          pfn_t    pfnum;
 363  363  
 364  364          if (page_resv(btop(size), KM_NOSLEEP) == 0)
 365  365                  panic("boot_mapin: page_resv failed");
 366  366  
 367  367          for (eaddr = addr + size; addr < eaddr; addr += PAGESIZE) {
 368  368                  pfnum = va_to_pfn(addr);
 369  369                  if (pfnum == PFN_INVALID)
 370  370                          continue;
 371  371                  if ((pp = page_numtopp_nolock(pfnum)) == NULL)
 372  372                          panic("boot_mapin(): No pp for pfnum = %lx", pfnum);
 373  373  
 374  374                  /*
 375  375                   * must break up any large pages that may have constituent
 376  376                   * pages being utilized for BOP_ALLOC()'s before calling
 377  377                   * page_numtopp().The locking code (ie. page_reclaim())
 378  378                   * can't handle them
 379  379                   */
 380  380                  if (pp->p_szc != 0)
 381  381                          page_boot_demote(pp);
 382  382  
 383  383                  pp = page_numtopp(pfnum, SE_EXCL);
 384  384                  if (pp == NULL || PP_ISFREE(pp))
 385  385                          panic("boot_alloc: pp is NULL or free");
 386  386  
 387  387                  /*
 388  388                   * If the cage is on but doesn't yet contain this page,
 389  389                   * mark it as non-relocatable.
 390  390                   */
 391  391                  if (kcage_on && !PP_ISNORELOC(pp)) {
 392  392                          PP_SETNORELOC(pp);
 393  393                          PLCNT_XFER_NORELOC(pp);
 394  394                  }
 395  395  
 396  396                  (void) page_hashin(pp, &kvp, (u_offset_t)(uintptr_t)addr, NULL);
 397  397                  pp->p_lckcnt = 1;
 398  398  #if defined(__x86)
 399  399                  page_downgrade(pp);
 400  400  #else
 401  401                  page_unlock(pp);
 402  402  #endif
 403  403          }
 404  404  }
 405  405  
 406  406  /*
 407  407   * Get pages from boot and hash them into the kernel's vp.
 408  408   * Used after page structs have been allocated, but before segkmem is ready.
 409  409   */
 410  410  void *
 411  411  boot_alloc(void *inaddr, size_t size, uint_t align)
 412  412  {
 413  413          caddr_t addr = inaddr;
 414  414  
 415  415          if (bootops == NULL)
 416  416                  prom_panic("boot_alloc: attempt to allocate memory after "
 417  417                      "BOP_GONE");
 418  418  
 419  419          size = ptob(btopr(size));
 420  420  #ifdef __sparc

↓ open down ↓

420 lines elided

↑ open up ↑

 421  421          if (bop_alloc_chunk(addr, size, align) != (caddr_t)addr)
 422  422                  panic("boot_alloc: bop_alloc_chunk failed");
 423  423  #else
 424  424          if (BOP_ALLOC(bootops, addr, size, align) != addr)
 425  425                  panic("boot_alloc: BOP_ALLOC failed");
 426  426  #endif
 427  427          boot_mapin((caddr_t)addr, size);
 428  428          return (addr);
 429  429  }
 430  430  
 431      -static void
 432      -segkmem_badop()
 433      -{
 434      -        panic("segkmem_badop");
 435      -}
 436      -
 437      -#define SEGKMEM_BADOP(t)        (t(*)())segkmem_badop
 438      -
 439  431  /*ARGSUSED*/
 440  432  static faultcode_t
 441  433  segkmem_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t size,
 442  434          enum fault_type type, enum seg_rw rw)
 443  435  {
 444  436          pgcnt_t npages;
 445  437          spgcnt_t pg;
 446  438          page_t *pp;
 447  439          struct vnode *vp = seg->s_data;
 448  440

 449  441          ASSERT(RW_READ_HELD(&seg->s_as->a_lock));

↓ open down ↓

1 lines elided

↑ open up ↑

 450  442  
 451  443          if (seg->s_as != &kas || size > seg->s_size ||
 452  444              addr < seg->s_base || addr + size > seg->s_base + seg->s_size)
 453  445                  panic("segkmem_fault: bad args");
 454  446  
 455  447          /*
 456  448           * If it is one of segkp pages, call segkp_fault.
 457  449           */
 458  450          if (segkp_bitmap && seg == &kvseg &&
 459  451              BT_TEST(segkp_bitmap, btop((uintptr_t)(addr - seg->s_base))))
 460      -                return (SEGOP_FAULT(hat, segkp, addr, size, type, rw));
      452 +                return (segop_fault(hat, segkp, addr, size, type, rw));
 461  453  
 462  454          if (rw != S_READ && rw != S_WRITE && rw != S_OTHER)
 463  455                  return (FC_NOSUPPORT);
 464  456  
 465  457          npages = btopr(size);
 466  458  
 467  459          switch (type) {
 468  460          case F_SOFTLOCK:        /* lock down already-loaded translations */
 469  461                  for (pg = 0; pg < npages; pg++) {
 470  462                          pp = page_lookup(vp, (u_offset_t)(uintptr_t)addr,

 471  463                              SE_SHARED);
 472  464                          if (pp == NULL) {
 473  465                                  /*
 474  466                                   * Hmm, no page. Does a kernel mapping
 475  467                                   * exist for it?
 476  468                                   */
 477  469                                  if (!hat_probe(kas.a_hat, addr)) {
 478  470                                          addr -= PAGESIZE;
 479  471                                          while (--pg >= 0) {
 480  472                                                  pp = page_find(vp, (u_offset_t)
 481  473                                                      (uintptr_t)addr);
 482  474                                                  if (pp)
 483  475                                                          page_unlock(pp);
 484  476                                                  addr -= PAGESIZE;
 485  477                                          }
 486  478                                          return (FC_NOMAP);
 487  479                                  }
 488  480                          }
 489  481                          addr += PAGESIZE;
 490  482                  }
 491  483                  if (rw == S_OTHER)
 492  484                          hat_reserve(seg->s_as, addr, size);
 493  485                  return (0);
 494  486          case F_SOFTUNLOCK:
 495  487                  while (npages--) {
 496  488                          pp = page_find(vp, (u_offset_t)(uintptr_t)addr);
 497  489                          if (pp)
 498  490                                  page_unlock(pp);
 499  491                          addr += PAGESIZE;
 500  492                  }
 501  493                  return (0);
 502  494          default:
 503  495                  return (FC_NOSUPPORT);
 504  496          }
 505  497          /*NOTREACHED*/
 506  498  }
 507  499  
 508  500  static int
 509  501  segkmem_setprot(struct seg *seg, caddr_t addr, size_t size, uint_t prot)
 510  502  {
 511  503          ASSERT(RW_LOCK_HELD(&seg->s_as->a_lock));

↓ open down ↓

41 lines elided

↑ open up ↑

 512  504  
 513  505          if (seg->s_as != &kas || size > seg->s_size ||
 514  506              addr < seg->s_base || addr + size > seg->s_base + seg->s_size)
 515  507                  panic("segkmem_setprot: bad args");
 516  508  
 517  509          /*
 518  510           * If it is one of segkp pages, call segkp.
 519  511           */
 520  512          if (segkp_bitmap && seg == &kvseg &&
 521  513              BT_TEST(segkp_bitmap, btop((uintptr_t)(addr - seg->s_base))))
 522      -                return (SEGOP_SETPROT(segkp, addr, size, prot));
      514 +                return (segop_setprot(segkp, addr, size, prot));
 523  515  
 524  516          if (prot == 0)
 525  517                  hat_unload(kas.a_hat, addr, size, HAT_UNLOAD);
 526  518          else
 527  519                  hat_chgprot(kas.a_hat, addr, size, prot);
 528  520          return (0);
 529  521  }
 530  522  
 531  523  /*
 532  524   * This is a dummy segkmem function overloaded to call segkp
 533  525   * when segkp is under the heap.
 534  526   */
 535  527  /* ARGSUSED */
 536  528  static int
 537  529  segkmem_checkprot(struct seg *seg, caddr_t addr, size_t size, uint_t prot)
 538  530  {
 539  531          ASSERT(RW_LOCK_HELD(&seg->s_as->a_lock));
 540  532  
 541  533          if (seg->s_as != &kas)
 542      -                segkmem_badop();
      534 +                panic("segkmem badop");
 543  535  
 544  536          /*
 545  537           * If it is one of segkp pages, call into segkp.
 546  538           */
 547  539          if (segkp_bitmap && seg == &kvseg &&
 548  540              BT_TEST(segkp_bitmap, btop((uintptr_t)(addr - seg->s_base))))
 549      -                return (SEGOP_CHECKPROT(segkp, addr, size, prot));
      541 +                return (segop_checkprot(segkp, addr, size, prot));
 550  542  
 551      -        segkmem_badop();
      543 +        panic("segkmem badop");
 552  544          return (0);
 553  545  }
 554  546  
 555  547  /*
 556  548   * This is a dummy segkmem function overloaded to call segkp
 557  549   * when segkp is under the heap.
 558  550   */
 559  551  /* ARGSUSED */
 560  552  static int
 561  553  segkmem_kluster(struct seg *seg, caddr_t addr, ssize_t delta)
 562  554  {
 563  555          ASSERT(RW_LOCK_HELD(&seg->s_as->a_lock));
 564  556  
 565  557          if (seg->s_as != &kas)
 566      -                segkmem_badop();
      558 +                panic("segkmem badop");
 567  559  
 568  560          /*
 569  561           * If it is one of segkp pages, call into segkp.
 570  562           */
 571  563          if (segkp_bitmap && seg == &kvseg &&
 572  564              BT_TEST(segkp_bitmap, btop((uintptr_t)(addr - seg->s_base))))
 573      -                return (SEGOP_KLUSTER(segkp, addr, delta));
      565 +                return (segop_kluster(segkp, addr, delta));
 574  566  
 575      -        segkmem_badop();
      567 +        panic("segkmem badop");
 576  568          return (0);
 577  569  }
 578  570  
 579  571  static void
 580  572  segkmem_xdump_range(void *arg, void *start, size_t size)
 581  573  {
 582  574          struct as *as = arg;
 583  575          caddr_t addr = start;
 584  576          caddr_t addr_end = addr + size;
 585  577

 586  578          while (addr < addr_end) {
 587  579                  pfn_t pfn = hat_getpfnum(kas.a_hat, addr);
 588  580                  if (pfn != PFN_INVALID && pfn <= physmax && pf_is_memory(pfn))
 589  581                          dump_addpage(as, addr, pfn);
 590  582                  addr += PAGESIZE;
 591  583                  dump_timeleft = dump_timeout;
 592  584          }
 593  585  }
 594  586  
 595  587  static void
 596  588  segkmem_dump_range(void *arg, void *start, size_t size)
 597  589  {
 598  590          caddr_t addr = start;
 599  591          caddr_t addr_end = addr + size;
 600  592  
 601  593          /*
 602  594           * If we are about to start dumping the range of addresses we
 603  595           * carved out of the kernel heap for the large page heap walk
 604  596           * heap_lp_arena to find what segments are actually populated
 605  597           */
 606  598          if (SEGKMEM_USE_LARGEPAGES &&
 607  599              addr == heap_lp_base && addr_end == heap_lp_end &&
 608  600              vmem_size(heap_lp_arena, VMEM_ALLOC) < size) {
 609  601                  vmem_walk(heap_lp_arena, VMEM_ALLOC | VMEM_REENTRANT,
 610  602                      segkmem_xdump_range, arg);
 611  603          } else {
 612  604                  segkmem_xdump_range(arg, start, size);
 613  605          }
 614  606  }
 615  607  
 616  608  static void
 617  609  segkmem_dump(struct seg *seg)
 618  610  {
 619  611          /*
 620  612           * The kernel's heap_arena (represented by kvseg) is a very large
 621  613           * VA space, most of which is typically unused.  To speed up dumping
 622  614           * we use vmem_walk() to quickly find the pieces of heap_arena that
 623  615           * are actually in use.  We do the same for heap32_arena and
 624  616           * heap_core.
 625  617           *
 626  618           * We specify VMEM_REENTRANT to vmem_walk() because dump_addpage()
 627  619           * may ultimately need to allocate memory.  Reentrant walks are
 628  620           * necessarily imperfect snapshots.  The kernel heap continues
 629  621           * to change during a live crash dump, for example.  For a normal
 630  622           * crash dump, however, we know that there won't be any other threads
 631  623           * messing with the heap.  Therefore, at worst, we may fail to dump
 632  624           * the pages that get allocated by the act of dumping; but we will
 633  625           * always dump every page that was allocated when the walk began.
 634  626           *
 635  627           * The other segkmem segments are dense (fully populated), so there's
 636  628           * no need to use this technique when dumping them.
 637  629           *
 638  630           * Note: when adding special dump handling for any new sparsely-
 639  631           * populated segments, be sure to add similar handling to the ::kgrep
 640  632           * code in mdb.
 641  633           */
 642  634          if (seg == &kvseg) {
 643  635                  vmem_walk(heap_arena, VMEM_ALLOC | VMEM_REENTRANT,
 644  636                      segkmem_dump_range, seg->s_as);
 645  637  #ifndef __sparc
 646  638                  vmem_walk(heaptext_arena, VMEM_ALLOC | VMEM_REENTRANT,
 647  639                      segkmem_dump_range, seg->s_as);
 648  640  #endif
 649  641          } else if (seg == &kvseg_core) {
 650  642                  vmem_walk(heap_core_arena, VMEM_ALLOC | VMEM_REENTRANT,
 651  643                      segkmem_dump_range, seg->s_as);
 652  644          } else if (seg == &kvseg32) {
 653  645                  vmem_walk(heap32_arena, VMEM_ALLOC | VMEM_REENTRANT,
 654  646                      segkmem_dump_range, seg->s_as);
 655  647                  vmem_walk(heaptext_arena, VMEM_ALLOC | VMEM_REENTRANT,
 656  648                      segkmem_dump_range, seg->s_as);
 657  649          } else if (seg == &kzioseg) {
 658  650                  /*
 659  651                   * We don't want to dump pages attached to kzioseg since they
 660  652                   * contain file data from ZFS.  If this page's segment is
 661  653                   * kzioseg return instead of writing it to the dump device.
 662  654                   */
 663  655                  return;
 664  656          } else {
 665  657                  segkmem_dump_range(seg->s_as, seg->s_base, seg->s_size);
 666  658          }
 667  659  }
 668  660  
 669  661  /*
 670  662   * lock/unlock kmem pages over a given range [addr, addr+len).
 671  663   * Returns a shadow list of pages in ppp. If there are holes
 672  664   * in the range (e.g. some of the kernel mappings do not have
 673  665   * underlying page_ts) returns ENOTSUP so that as_pagelock()
 674  666   * will handle the range via as_fault(F_SOFTLOCK).
 675  667   */
 676  668  /*ARGSUSED*/
 677  669  static int
 678  670  segkmem_pagelock(struct seg *seg, caddr_t addr, size_t len,
 679  671          page_t ***ppp, enum lock_type type, enum seg_rw rw)
 680  672  {
 681  673          page_t **pplist, *pp;
 682  674          pgcnt_t npages;
 683  675          spgcnt_t pg;

↓ open down ↓

98 lines elided

↑ open up ↑

 684  676          size_t nb;
 685  677          struct vnode *vp = seg->s_data;
 686  678  
 687  679          ASSERT(ppp != NULL);
 688  680  
 689  681          /*
 690  682           * If it is one of segkp pages, call into segkp.
 691  683           */
 692  684          if (segkp_bitmap && seg == &kvseg &&
 693  685              BT_TEST(segkp_bitmap, btop((uintptr_t)(addr - seg->s_base))))
 694      -                return (SEGOP_PAGELOCK(segkp, addr, len, ppp, type, rw));
      686 +                return (segop_pagelock(segkp, addr, len, ppp, type, rw));
 695  687  
 696  688          npages = btopr(len);
 697  689          nb = sizeof (page_t *) * npages;
 698  690  
 699  691          if (type == L_PAGEUNLOCK) {
 700  692                  pplist = *ppp;
 701  693                  ASSERT(pplist != NULL);
 702  694  
 703  695                  for (pg = 0; pg < npages; pg++) {
 704  696                          pp = pplist[pg];

 705  697                          page_unlock(pp);
 706  698                  }
 707  699                  kmem_free(pplist, nb);
 708  700                  return (0);
 709  701          }
 710  702  
 711  703          ASSERT(type == L_PAGELOCK);
 712  704  
 713  705          pplist = kmem_alloc(nb, KM_NOSLEEP);
 714  706          if (pplist == NULL) {
 715  707                  *ppp = NULL;
 716  708                  return (ENOTSUP);       /* take the slow path */
 717  709          }
 718  710  
 719  711          for (pg = 0; pg < npages; pg++) {
 720  712                  pp = page_lookup(vp, (u_offset_t)(uintptr_t)addr, SE_SHARED);
 721  713                  if (pp == NULL) {
 722  714                          while (--pg >= 0)
 723  715                                  page_unlock(pplist[pg]);
 724  716                          kmem_free(pplist, nb);
 725  717                          *ppp = NULL;
 726  718                          return (ENOTSUP);
 727  719                  }
 728  720                  pplist[pg] = pp;
 729  721                  addr += PAGESIZE;
 730  722          }
 731  723  
 732  724          *ppp = pplist;
 733  725          return (0);
 734  726  }
 735  727  
 736  728  /*

↓ open down ↓

32 lines elided

↑ open up ↑

 737  729   * This is a dummy segkmem function overloaded to call segkp
 738  730   * when segkp is under the heap.
 739  731   */
 740  732  /* ARGSUSED */
 741  733  static int
 742  734  segkmem_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
 743  735  {
 744  736          ASSERT(RW_LOCK_HELD(&seg->s_as->a_lock));
 745  737  
 746  738          if (seg->s_as != &kas)
 747      -                segkmem_badop();
      739 +                panic("segkmem badop");
 748  740  
 749  741          /*
 750  742           * If it is one of segkp pages, call into segkp.
 751  743           */
 752  744          if (segkp_bitmap && seg == &kvseg &&
 753  745              BT_TEST(segkp_bitmap, btop((uintptr_t)(addr - seg->s_base))))
 754      -                return (SEGOP_GETMEMID(segkp, addr, memidp));
      746 +                return (segop_getmemid(segkp, addr, memidp));
 755  747  
 756      -        segkmem_badop();
      748 +        panic("segkmem badop");
 757  749          return (0);
 758  750  }
 759  751  
 760  752  /*ARGSUSED*/
 761      -static lgrp_mem_policy_info_t *
 762      -segkmem_getpolicy(struct seg *seg, caddr_t addr)
 763      -{
 764      -        return (NULL);
 765      -}
 766      -
 767      -/*ARGSUSED*/
 768  753  static int
 769  754  segkmem_capable(struct seg *seg, segcapability_t capability)
 770  755  {
 771  756          if (capability == S_CAPABILITY_NOMINFLT)
 772  757                  return (1);
 773  758          return (0);
 774  759  }
 775  760  
 776      -static struct seg_ops segkmem_ops = {
 777      -        SEGKMEM_BADOP(int),             /* dup */
 778      -        SEGKMEM_BADOP(int),             /* unmap */
 779      -        SEGKMEM_BADOP(void),            /* free */
 780      -        segkmem_fault,
 781      -        SEGKMEM_BADOP(faultcode_t),     /* faulta */
 782      -        segkmem_setprot,
 783      -        segkmem_checkprot,
 784      -        segkmem_kluster,
 785      -        SEGKMEM_BADOP(size_t),          /* swapout */
 786      -        SEGKMEM_BADOP(int),             /* sync */
 787      -        SEGKMEM_BADOP(size_t),          /* incore */
 788      -        SEGKMEM_BADOP(int),             /* lockop */
 789      -        SEGKMEM_BADOP(int),             /* getprot */
 790      -        SEGKMEM_BADOP(u_offset_t),      /* getoffset */
 791      -        SEGKMEM_BADOP(int),             /* gettype */
 792      -        SEGKMEM_BADOP(int),             /* getvp */
 793      -        SEGKMEM_BADOP(int),             /* advise */
 794      -        segkmem_dump,
 795      -        segkmem_pagelock,
 796      -        SEGKMEM_BADOP(int),             /* setpgsz */
 797      -        segkmem_getmemid,
 798      -        segkmem_getpolicy,              /* getpolicy */
 799      -        segkmem_capable,                /* capable */
 800      -        seg_inherit_notsup              /* inherit */
      761 +static const struct seg_ops segkmem_ops = {
      762 +        .fault          = segkmem_fault,
      763 +        .setprot        = segkmem_setprot,
      764 +        .checkprot      = segkmem_checkprot,
      765 +        .kluster        = segkmem_kluster,
      766 +        .dump           = segkmem_dump,
      767 +        .pagelock       = segkmem_pagelock,
      768 +        .getmemid       = segkmem_getmemid,
      769 +        .capable        = segkmem_capable,
 801  770  };
 802  771  
 803  772  int
 804  773  segkmem_zio_create(struct seg *seg)
 805  774  {
 806  775          ASSERT(seg->s_as == &kas && RW_WRITE_HELD(&kas.a_lock));
 807  776          seg->s_ops = &segkmem_ops;
 808  777          seg->s_data = &zvp;
 809  778          kas.a_size += seg->s_size;
 810  779          return (0);

 811  780  }
 812  781  
 813  782  int
 814  783  segkmem_create(struct seg *seg)
 815  784  {
 816  785          ASSERT(seg->s_as == &kas && RW_WRITE_HELD(&kas.a_lock));
 817  786          seg->s_ops = &segkmem_ops;
 818  787          seg->s_data = &kvp;
 819  788          kas.a_size += seg->s_size;
 820  789          return (0);
 821  790  }
 822  791  
 823  792  /*ARGSUSED*/
 824  793  page_t *
 825  794  segkmem_page_create(void *addr, size_t size, int vmflag, void *arg)
 826  795  {
 827  796          struct seg kseg;
 828  797          int pgflags;
 829  798          struct vnode *vp = arg;
 830  799  
 831  800          if (vp == NULL)
 832  801                  vp = &kvp;
 833  802  
 834  803          kseg.s_as = &kas;
 835  804          pgflags = PG_EXCL;
 836  805  
 837  806          if (segkmem_reloc == 0 || (vmflag & VM_NORELOC))
 838  807                  pgflags |= PG_NORELOC;
 839  808          if ((vmflag & VM_NOSLEEP) == 0)
 840  809                  pgflags |= PG_WAIT;
 841  810          if (vmflag & VM_PANIC)
 842  811                  pgflags |= PG_PANIC;
 843  812          if (vmflag & VM_PUSHPAGE)
 844  813                  pgflags |= PG_PUSHPAGE;
 845  814          if (vmflag & VM_NORMALPRI) {
 846  815                  ASSERT(vmflag & VM_NOSLEEP);
 847  816                  pgflags |= PG_NORMALPRI;
 848  817          }
 849  818  
 850  819          return (page_create_va(vp, (u_offset_t)(uintptr_t)addr, size,
 851  820              pgflags, &kseg, addr));
 852  821  }
 853  822  
 854  823  /*
 855  824   * Allocate pages to back the virtual address range [addr, addr + size).
 856  825   * If addr is NULL, allocate the virtual address space as well.
 857  826   */
 858  827  void *
 859  828  segkmem_xalloc(vmem_t *vmp, void *inaddr, size_t size, int vmflag, uint_t attr,
 860  829          page_t *(*page_create_func)(void *, size_t, int, void *), void *pcarg)
 861  830  {
 862  831          page_t *ppl;
 863  832          caddr_t addr = inaddr;
 864  833          pgcnt_t npages = btopr(size);
 865  834          int allocflag;
 866  835  
 867  836          if (inaddr == NULL && (addr = vmem_alloc(vmp, size, vmflag)) == NULL)
 868  837                  return (NULL);
 869  838  
 870  839          ASSERT(((uintptr_t)addr & PAGEOFFSET) == 0);
 871  840  
 872  841          if (page_resv(npages, vmflag & VM_KMFLAGS) == 0) {
 873  842                  if (inaddr == NULL)
 874  843                          vmem_free(vmp, addr, size);
 875  844                  return (NULL);
 876  845          }
 877  846  
 878  847          ppl = page_create_func(addr, size, vmflag, pcarg);
 879  848          if (ppl == NULL) {
 880  849                  if (inaddr == NULL)
 881  850                          vmem_free(vmp, addr, size);
 882  851                  page_unresv(npages);
 883  852                  return (NULL);
 884  853          }
 885  854  
 886  855          /*
 887  856           * Under certain conditions, we need to let the HAT layer know
 888  857           * that it cannot safely allocate memory.  Allocations from
 889  858           * the hat_memload vmem arena always need this, to prevent
 890  859           * infinite recursion.
 891  860           *
 892  861           * In addition, the x86 hat cannot safely do memory
 893  862           * allocations while in vmem_populate(), because there
 894  863           * is no simple bound on its usage.
 895  864           */
 896  865          if (vmflag & VM_MEMLOAD)
 897  866                  allocflag = HAT_NO_KALLOC;
 898  867  #if defined(__x86)
 899  868          else if (vmem_is_populator())
 900  869                  allocflag = HAT_NO_KALLOC;
 901  870  #endif
 902  871          else
 903  872                  allocflag = 0;
 904  873  
 905  874          while (ppl != NULL) {
 906  875                  page_t *pp = ppl;
 907  876                  page_sub(&ppl, pp);
 908  877                  ASSERT(page_iolock_assert(pp));
 909  878                  ASSERT(PAGE_EXCL(pp));
 910  879                  page_io_unlock(pp);
 911  880                  hat_memload(kas.a_hat, (caddr_t)(uintptr_t)pp->p_offset, pp,
 912  881                      (PROT_ALL & ~PROT_USER) | HAT_NOSYNC | attr,
 913  882                      HAT_LOAD_LOCK | allocflag);
 914  883                  pp->p_lckcnt = 1;
 915  884  #if defined(__x86)
 916  885                  page_downgrade(pp);
 917  886  #else
 918  887                  if (vmflag & SEGKMEM_SHARELOCKED)
 919  888                          page_downgrade(pp);
 920  889                  else
 921  890                          page_unlock(pp);
 922  891  #endif
 923  892          }
 924  893  
 925  894          return (addr);
 926  895  }
 927  896  
 928  897  static void *
 929  898  segkmem_alloc_vn(vmem_t *vmp, size_t size, int vmflag, struct vnode *vp)
 930  899  {
 931  900          void *addr;
 932  901          segkmem_gc_list_t *gcp, **prev_gcpp;
 933  902  
 934  903          ASSERT(vp != NULL);
 935  904  
 936  905          if (kvseg.s_base == NULL) {
 937  906  #ifndef __sparc
 938  907                  if (bootops->bsys_alloc == NULL)
 939  908                          halt("Memory allocation between bop_alloc() and "
 940  909                              "kmem_alloc().\n");
 941  910  #endif
 942  911  
 943  912                  /*
 944  913                   * There's not a lot of memory to go around during boot,
 945  914                   * so recycle it if we can.
 946  915                   */
 947  916                  for (prev_gcpp = &segkmem_gc_list; (gcp = *prev_gcpp) != NULL;
 948  917                      prev_gcpp = &gcp->gc_next) {
 949  918                          if (gcp->gc_arena == vmp && gcp->gc_size == size) {
 950  919                                  *prev_gcpp = gcp->gc_next;
 951  920                                  return (gcp);
 952  921                          }
 953  922                  }
 954  923  
 955  924                  addr = vmem_alloc(vmp, size, vmflag | VM_PANIC);
 956  925                  if (boot_alloc(addr, size, BO_NO_ALIGN) != addr)
 957  926                          panic("segkmem_alloc: boot_alloc failed");
 958  927                  return (addr);
 959  928          }
 960  929          return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
 961  930              segkmem_page_create, vp));
 962  931  }
 963  932  
 964  933  void *
 965  934  segkmem_alloc(vmem_t *vmp, size_t size, int vmflag)
 966  935  {
 967  936          return (segkmem_alloc_vn(vmp, size, vmflag, &kvp));
 968  937  }
 969  938  
 970  939  void *
 971  940  segkmem_zio_alloc(vmem_t *vmp, size_t size, int vmflag)
 972  941  {
 973  942          return (segkmem_alloc_vn(vmp, size, vmflag, &zvp));
 974  943  }
 975  944  
 976  945  /*
 977  946   * Any changes to this routine must also be carried over to
 978  947   * devmap_free_pages() in the seg_dev driver. This is because
 979  948   * we currently don't have a special kernel segment for non-paged
 980  949   * kernel memory that is exported by drivers to user space.
 981  950   */
 982  951  static void
 983  952  segkmem_free_vn(vmem_t *vmp, void *inaddr, size_t size, struct vnode *vp,
 984  953      void (*func)(page_t *))
 985  954  {
 986  955          page_t *pp;
 987  956          caddr_t addr = inaddr;
 988  957          caddr_t eaddr;
 989  958          pgcnt_t npages = btopr(size);
 990  959  
 991  960          ASSERT(((uintptr_t)addr & PAGEOFFSET) == 0);
 992  961          ASSERT(vp != NULL);
 993  962  
 994  963          if (kvseg.s_base == NULL) {
 995  964                  segkmem_gc_list_t *gc = inaddr;
 996  965                  gc->gc_arena = vmp;
 997  966                  gc->gc_size = size;
 998  967                  gc->gc_next = segkmem_gc_list;
 999  968                  segkmem_gc_list = gc;
1000  969                  return;
1001  970          }
1002  971  
1003  972          hat_unload(kas.a_hat, addr, size, HAT_UNLOAD_UNLOCK);
1004  973  
1005  974          for (eaddr = addr + size; addr < eaddr; addr += PAGESIZE) {
1006  975  #if defined(__x86)
1007  976                  pp = page_find(vp, (u_offset_t)(uintptr_t)addr);
1008  977                  if (pp == NULL)
1009  978                          panic("segkmem_free: page not found");
1010  979                  if (!page_tryupgrade(pp)) {
1011  980                          /*
1012  981                           * Some other thread has a sharelock. Wait for
1013  982                           * it to drop the lock so we can free this page.
1014  983                           */
1015  984                          page_unlock(pp);
1016  985                          pp = page_lookup(vp, (u_offset_t)(uintptr_t)addr,
1017  986                              SE_EXCL);
1018  987                  }
1019  988  #else
1020  989                  pp = page_lookup(vp, (u_offset_t)(uintptr_t)addr, SE_EXCL);
1021  990  #endif
1022  991                  if (pp == NULL)
1023  992                          panic("segkmem_free: page not found");
1024  993                  /* Clear p_lckcnt so page_destroy() doesn't update availrmem */
1025  994                  pp->p_lckcnt = 0;
1026  995                  if (func)
1027  996                          func(pp);
1028  997                  else
1029  998                          page_destroy(pp, 0);
1030  999          }
1031 1000          if (func == NULL)
1032 1001                  page_unresv(npages);
1033 1002  
1034 1003          if (vmp != NULL)
1035 1004                  vmem_free(vmp, inaddr, size);
1036 1005  
1037 1006  }
1038 1007  
1039 1008  void
1040 1009  segkmem_xfree(vmem_t *vmp, void *inaddr, size_t size, void (*func)(page_t *))
1041 1010  {
1042 1011          segkmem_free_vn(vmp, inaddr, size, &kvp, func);
1043 1012  }
1044 1013  
1045 1014  void
1046 1015  segkmem_free(vmem_t *vmp, void *inaddr, size_t size)
1047 1016  {
1048 1017          segkmem_free_vn(vmp, inaddr, size, &kvp, NULL);
1049 1018  }
1050 1019  
1051 1020  void
1052 1021  segkmem_zio_free(vmem_t *vmp, void *inaddr, size_t size)
1053 1022  {
1054 1023          segkmem_free_vn(vmp, inaddr, size, &zvp, NULL);
1055 1024  }
1056 1025  
1057 1026  void
1058 1027  segkmem_gc(void)
1059 1028  {
1060 1029          ASSERT(kvseg.s_base != NULL);
1061 1030          while (segkmem_gc_list != NULL) {
1062 1031                  segkmem_gc_list_t *gc = segkmem_gc_list;
1063 1032                  segkmem_gc_list = gc->gc_next;
1064 1033                  segkmem_free(gc->gc_arena, gc, gc->gc_size);
1065 1034          }
1066 1035  }
1067 1036  
1068 1037  /*
1069 1038   * Legacy entry points from here to end of file.
1070 1039   */
1071 1040  void
1072 1041  segkmem_mapin(struct seg *seg, void *addr, size_t size, uint_t vprot,
1073 1042      pfn_t pfn, uint_t flags)
1074 1043  {
1075 1044          hat_unload(seg->s_as->a_hat, addr, size, HAT_UNLOAD_UNLOCK);
1076 1045          hat_devload(seg->s_as->a_hat, addr, size, pfn, vprot,
1077 1046              flags | HAT_LOAD_LOCK);
1078 1047  }
1079 1048  
1080 1049  void
1081 1050  segkmem_mapout(struct seg *seg, void *addr, size_t size)
1082 1051  {
1083 1052          hat_unload(seg->s_as->a_hat, addr, size, HAT_UNLOAD_UNLOCK);
1084 1053  }
1085 1054  
1086 1055  void *
1087 1056  kmem_getpages(pgcnt_t npages, int kmflag)
1088 1057  {
1089 1058          return (kmem_alloc(ptob(npages), kmflag));
1090 1059  }
1091 1060  
1092 1061  void
1093 1062  kmem_freepages(void *addr, pgcnt_t npages)
1094 1063  {
1095 1064          kmem_free(addr, ptob(npages));
1096 1065  }
1097 1066  
1098 1067  /*
1099 1068   * segkmem_page_create_large() allocates a large page to be used for the kmem
1100 1069   * caches. If kpr is enabled we ask for a relocatable page unless requested
1101 1070   * otherwise. If kpr is disabled we have to ask for a non-reloc page
1102 1071   */
1103 1072  static page_t *
1104 1073  segkmem_page_create_large(void *addr, size_t size, int vmflag, void *arg)
1105 1074  {
1106 1075          int pgflags;
1107 1076  
1108 1077          pgflags = PG_EXCL;
1109 1078  
1110 1079          if (segkmem_reloc == 0 || (vmflag & VM_NORELOC))
1111 1080                  pgflags |= PG_NORELOC;
1112 1081          if (!(vmflag & VM_NOSLEEP))
1113 1082                  pgflags |= PG_WAIT;
1114 1083          if (vmflag & VM_PUSHPAGE)
1115 1084                  pgflags |= PG_PUSHPAGE;
1116 1085          if (vmflag & VM_NORMALPRI)
1117 1086                  pgflags |= PG_NORMALPRI;
1118 1087  
1119 1088          return (page_create_va_large(&kvp, (u_offset_t)(uintptr_t)addr, size,
1120 1089              pgflags, &kvseg, addr, arg));
1121 1090  }
1122 1091  
1123 1092  /*
1124 1093   * Allocate a large page to back the virtual address range
1125 1094   * [addr, addr + size).  If addr is NULL, allocate the virtual address
1126 1095   * space as well.
1127 1096   */
1128 1097  static void *
1129 1098  segkmem_xalloc_lp(vmem_t *vmp, void *inaddr, size_t size, int vmflag,
1130 1099      uint_t attr, page_t *(*page_create_func)(void *, size_t, int, void *),
1131 1100      void *pcarg)
1132 1101  {
1133 1102          caddr_t addr = inaddr, pa;
1134 1103          size_t  lpsize = segkmem_lpsize;
1135 1104          pgcnt_t npages = btopr(size);
1136 1105          pgcnt_t nbpages = btop(lpsize);
1137 1106          pgcnt_t nlpages = size >> segkmem_lpshift;
1138 1107          size_t  ppasize = nbpages * sizeof (page_t *);
1139 1108          page_t *pp, *rootpp, **ppa, *pplist = NULL;
1140 1109          int i;
1141 1110  
1142 1111          vmflag |= VM_NOSLEEP;
1143 1112  
1144 1113          if (page_resv(npages, vmflag & VM_KMFLAGS) == 0) {
1145 1114                  return (NULL);
1146 1115          }
1147 1116  
1148 1117          /*
1149 1118           * allocate an array we need for hat_memload_array.
1150 1119           * we use a separate arena to avoid recursion.
1151 1120           * we will not need this array when hat_memload_array learns pp++
1152 1121           */
1153 1122          if ((ppa = vmem_alloc(segkmem_ppa_arena, ppasize, vmflag)) == NULL) {
1154 1123                  goto fail_array_alloc;
1155 1124          }
1156 1125  
1157 1126          if (inaddr == NULL && (addr = vmem_alloc(vmp, size, vmflag)) == NULL)
1158 1127                  goto fail_vmem_alloc;
1159 1128  
1160 1129          ASSERT(((uintptr_t)addr & (lpsize - 1)) == 0);
1161 1130  
1162 1131          /* create all the pages */
1163 1132          for (pa = addr, i = 0; i < nlpages; i++, pa += lpsize) {
1164 1133                  if ((pp = page_create_func(pa, lpsize, vmflag, pcarg)) == NULL)
1165 1134                          goto fail_page_create;
1166 1135                  page_list_concat(&pplist, &pp);
1167 1136          }
1168 1137  
1169 1138          /* at this point we have all the resource to complete the request */
1170 1139          while ((rootpp = pplist) != NULL) {
1171 1140                  for (i = 0; i < nbpages; i++) {
1172 1141                          ASSERT(pplist != NULL);
1173 1142                          pp = pplist;
1174 1143                          page_sub(&pplist, pp);
1175 1144                          ASSERT(page_iolock_assert(pp));
1176 1145                          page_io_unlock(pp);
1177 1146                          ppa[i] = pp;
1178 1147                  }
1179 1148                  /*
1180 1149                   * Load the locked entry. It's OK to preload the entry into the
1181 1150                   * TSB since we now support large mappings in the kernel TSB.
1182 1151                   */
1183 1152                  hat_memload_array(kas.a_hat,
1184 1153                      (caddr_t)(uintptr_t)rootpp->p_offset, lpsize,
1185 1154                      ppa, (PROT_ALL & ~PROT_USER) | HAT_NOSYNC | attr,
1186 1155                      HAT_LOAD_LOCK);
1187 1156  
1188 1157                  for (--i; i >= 0; --i) {
1189 1158                          ppa[i]->p_lckcnt = 1;
1190 1159                          page_unlock(ppa[i]);
1191 1160                  }
1192 1161          }
1193 1162  
1194 1163          vmem_free(segkmem_ppa_arena, ppa, ppasize);
1195 1164          return (addr);
1196 1165  
1197 1166  fail_page_create:
1198 1167          while ((rootpp = pplist) != NULL) {
1199 1168                  for (i = 0, pp = pplist; i < nbpages; i++, pp = pplist) {
1200 1169                          ASSERT(pp != NULL);
1201 1170                          page_sub(&pplist, pp);
1202 1171                          ASSERT(page_iolock_assert(pp));
1203 1172                          page_io_unlock(pp);
1204 1173                  }
1205 1174                  page_destroy_pages(rootpp);
1206 1175          }
1207 1176  
1208 1177          if (inaddr == NULL)
1209 1178                  vmem_free(vmp, addr, size);
1210 1179  
1211 1180  fail_vmem_alloc:
1212 1181          vmem_free(segkmem_ppa_arena, ppa, ppasize);
1213 1182  
1214 1183  fail_array_alloc:
1215 1184          page_unresv(npages);
1216 1185  
1217 1186          return (NULL);
1218 1187  }
1219 1188  
1220 1189  static void
1221 1190  segkmem_free_one_lp(caddr_t addr, size_t size)
1222 1191  {
1223 1192          page_t          *pp, *rootpp = NULL;
1224 1193          pgcnt_t         pgs_left = btopr(size);
1225 1194  
1226 1195          ASSERT(size == segkmem_lpsize);
1227 1196  
1228 1197          hat_unload(kas.a_hat, addr, size, HAT_UNLOAD_UNLOCK);
1229 1198  
1230 1199          for (; pgs_left > 0; addr += PAGESIZE, pgs_left--) {
1231 1200                  pp = page_lookup(&kvp, (u_offset_t)(uintptr_t)addr, SE_EXCL);
1232 1201                  if (pp == NULL)
1233 1202                          panic("segkmem_free_one_lp: page not found");
1234 1203                  ASSERT(PAGE_EXCL(pp));
1235 1204                  pp->p_lckcnt = 0;
1236 1205                  if (rootpp == NULL)
1237 1206                          rootpp = pp;
1238 1207          }
1239 1208          ASSERT(rootpp != NULL);
1240 1209          page_destroy_pages(rootpp);
1241 1210  
1242 1211          /* page_unresv() is done by the caller */
1243 1212  }
1244 1213  
1245 1214  /*
1246 1215   * This function is called to import new spans into the vmem arenas like
1247 1216   * kmem_default_arena and kmem_oversize_arena. It first tries to import
1248 1217   * spans from large page arena - kmem_lp_arena. In order to do this it might
1249 1218   * have to "upgrade the requested size" to kmem_lp_arena quantum. If
1250 1219   * it was not able to satisfy the upgraded request it then calls regular
1251 1220   * segkmem_alloc() that satisfies the request by importing from "*vmp" arena
1252 1221   */
1253 1222  /*ARGSUSED*/
1254 1223  void *
1255 1224  segkmem_alloc_lp(vmem_t *vmp, size_t *sizep, size_t align, int vmflag)
1256 1225  {
1257 1226          size_t size;
1258 1227          kthread_t *t = curthread;
1259 1228          segkmem_lpcb_t *lpcb = &segkmem_lpcb;
1260 1229  
1261 1230          ASSERT(sizep != NULL);
1262 1231  
1263 1232          size = *sizep;
1264 1233  
1265 1234          if (lpcb->lp_uselp && !(t->t_flag & T_PANIC) &&
1266 1235              !(vmflag & SEGKMEM_SHARELOCKED)) {
1267 1236  
1268 1237                  size_t kmemlp_qnt = segkmem_kmemlp_quantum;
1269 1238                  size_t asize = P2ROUNDUP(size, kmemlp_qnt);
1270 1239                  void  *addr = NULL;
1271 1240                  ulong_t *lpthrtp = &lpcb->lp_throttle;
1272 1241                  ulong_t lpthrt = *lpthrtp;
1273 1242                  int     dowakeup = 0;
1274 1243                  int     doalloc = 1;
1275 1244  
1276 1245                  ASSERT(kmem_lp_arena != NULL);
1277 1246                  ASSERT(asize >= size);
1278 1247  
1279 1248                  if (lpthrt != 0) {
1280 1249                          /* try to update the throttle value */
1281 1250                          lpthrt = atomic_inc_ulong_nv(lpthrtp);
1282 1251                          if (lpthrt >= segkmem_lpthrottle_max) {
1283 1252                                  lpthrt = atomic_cas_ulong(lpthrtp, lpthrt,
1284 1253                                      segkmem_lpthrottle_max / 4);
1285 1254                          }
1286 1255  
1287 1256                          /*
1288 1257                           * when we get above throttle start do an exponential
1289 1258                           * backoff at trying large pages and reaping
1290 1259                           */
1291 1260                          if (lpthrt > segkmem_lpthrottle_start &&
1292 1261                              !ISP2(lpthrt)) {
1293 1262                                  lpcb->allocs_throttled++;
1294 1263                                  lpthrt--;
1295 1264                                  if (ISP2(lpthrt))
1296 1265                                          kmem_reap();
1297 1266                                  return (segkmem_alloc(vmp, size, vmflag));
1298 1267                          }
1299 1268                  }
1300 1269  
1301 1270                  if (!(vmflag & VM_NOSLEEP) &&
1302 1271                      segkmem_heaplp_quantum >= (8 * kmemlp_qnt) &&
1303 1272                      vmem_size(kmem_lp_arena, VMEM_FREE) <= kmemlp_qnt &&
1304 1273                      asize < (segkmem_heaplp_quantum - kmemlp_qnt)) {
1305 1274  
1306 1275                          /*
1307 1276                           * we are low on free memory in kmem_lp_arena
1308 1277                           * we let only one guy to allocate heap_lp
1309 1278                           * quantum size chunk that everybody is going to
1310 1279                           * share
1311 1280                           */
1312 1281                          mutex_enter(&lpcb->lp_lock);
1313 1282  
1314 1283                          if (lpcb->lp_wait) {
1315 1284  
1316 1285                                  /* we are not the first one - wait */
1317 1286                                  cv_wait(&lpcb->lp_cv, &lpcb->lp_lock);
1318 1287                                  if (vmem_size(kmem_lp_arena, VMEM_FREE) <
1319 1288                                      kmemlp_qnt)  {
1320 1289                                          doalloc = 0;
1321 1290                                  }
1322 1291                          } else if (vmem_size(kmem_lp_arena, VMEM_FREE) <=
1323 1292                              kmemlp_qnt) {
1324 1293  
1325 1294                                  /*
1326 1295                                   * we are the first one, make sure we import
1327 1296                                   * a large page
1328 1297                                   */
1329 1298                                  if (asize == kmemlp_qnt)
1330 1299                                          asize += kmemlp_qnt;
1331 1300                                  dowakeup = 1;
1332 1301                                  lpcb->lp_wait = 1;
1333 1302                          }
1334 1303  
1335 1304                          mutex_exit(&lpcb->lp_lock);
1336 1305                  }
1337 1306  
1338 1307                  /*
1339 1308                   * VM_ABORT flag prevents sleeps in vmem_xalloc when
1340 1309                   * large pages are not available. In that case this allocation
1341 1310                   * attempt will fail and we will retry allocation with small
1342 1311                   * pages. We also do not want to panic if this allocation fails
1343 1312                   * because we are going to retry.
1344 1313                   */
1345 1314                  if (doalloc) {
1346 1315                          addr = vmem_alloc(kmem_lp_arena, asize,
1347 1316                              (vmflag | VM_ABORT) & ~VM_PANIC);
1348 1317  
1349 1318                          if (dowakeup) {
1350 1319                                  mutex_enter(&lpcb->lp_lock);
1351 1320                                  ASSERT(lpcb->lp_wait != 0);
1352 1321                                  lpcb->lp_wait = 0;
1353 1322                                  cv_broadcast(&lpcb->lp_cv);
1354 1323                                  mutex_exit(&lpcb->lp_lock);
1355 1324                          }
1356 1325                  }
1357 1326  
1358 1327                  if (addr != NULL) {
1359 1328                          *sizep = asize;
1360 1329                          *lpthrtp = 0;
1361 1330                          return (addr);
1362 1331                  }
1363 1332  
1364 1333                  if (vmflag & VM_NOSLEEP)
1365 1334                          lpcb->nosleep_allocs_failed++;
1366 1335                  else
1367 1336                          lpcb->sleep_allocs_failed++;
1368 1337                  lpcb->alloc_bytes_failed += size;
1369 1338  
1370 1339                  /* if large page throttling is not started yet do it */
1371 1340                  if (segkmem_use_lpthrottle && lpthrt == 0) {
1372 1341                          lpthrt = atomic_cas_ulong(lpthrtp, lpthrt, 1);
1373 1342                  }
1374 1343          }
1375 1344          return (segkmem_alloc(vmp, size, vmflag));
1376 1345  }
1377 1346  
1378 1347  void
1379 1348  segkmem_free_lp(vmem_t *vmp, void *inaddr, size_t size)
1380 1349  {
1381 1350          if (kmem_lp_arena == NULL || !IS_KMEM_VA_LARGEPAGE((caddr_t)inaddr)) {
1382 1351                  segkmem_free(vmp, inaddr, size);
1383 1352          } else {
1384 1353                  vmem_free(kmem_lp_arena, inaddr, size);
1385 1354          }
1386 1355  }
1387 1356  
1388 1357  /*
1389 1358   * segkmem_alloc_lpi() imports virtual memory from large page heap arena
1390 1359   * into kmem_lp arena. In the process it maps the imported segment with
1391 1360   * large pages
1392 1361   */
1393 1362  static void *
1394 1363  segkmem_alloc_lpi(vmem_t *vmp, size_t size, int vmflag)
1395 1364  {
1396 1365          segkmem_lpcb_t *lpcb = &segkmem_lpcb;
1397 1366          void  *addr;
1398 1367  
1399 1368          ASSERT(size != 0);
1400 1369          ASSERT(vmp == heap_lp_arena);
1401 1370  
1402 1371          /* do not allow large page heap grow beyound limits */
1403 1372          if (vmem_size(vmp, VMEM_ALLOC) >= segkmem_kmemlp_max) {
1404 1373                  lpcb->allocs_limited++;
1405 1374                  return (NULL);
1406 1375          }
1407 1376  
1408 1377          addr = segkmem_xalloc_lp(vmp, NULL, size, vmflag, 0,
1409 1378              segkmem_page_create_large, NULL);
1410 1379          return (addr);
1411 1380  }
1412 1381  
1413 1382  /*
1414 1383   * segkmem_free_lpi() returns virtual memory back into large page heap arena
1415 1384   * from kmem_lp arena. Beore doing this it unmaps the segment and frees
1416 1385   * large pages used to map it.
1417 1386   */
1418 1387  static void
1419 1388  segkmem_free_lpi(vmem_t *vmp, void *inaddr, size_t size)
1420 1389  {
1421 1390          pgcnt_t         nlpages = size >> segkmem_lpshift;
1422 1391          size_t          lpsize = segkmem_lpsize;
1423 1392          caddr_t         addr = inaddr;
1424 1393          pgcnt_t         npages = btopr(size);
1425 1394          int             i;
1426 1395  
1427 1396          ASSERT(vmp == heap_lp_arena);
1428 1397          ASSERT(IS_KMEM_VA_LARGEPAGE(addr));
1429 1398          ASSERT(((uintptr_t)inaddr & (lpsize - 1)) == 0);
1430 1399  
1431 1400          for (i = 0; i < nlpages; i++) {
1432 1401                  segkmem_free_one_lp(addr, lpsize);
1433 1402                  addr += lpsize;
1434 1403          }
1435 1404  
1436 1405          page_unresv(npages);
1437 1406  
1438 1407          vmem_free(vmp, inaddr, size);
1439 1408  }
1440 1409  
1441 1410  /*
1442 1411   * This function is called at system boot time by kmem_init right after
1443 1412   * /etc/system file has been read. It checks based on hardware configuration
1444 1413   * and /etc/system settings if system is going to use large pages. The
1445 1414   * initialiazation necessary to actually start using large pages
1446 1415   * happens later in the process after segkmem_heap_lp_init() is called.
1447 1416   */
1448 1417  int
1449 1418  segkmem_lpsetup()
1450 1419  {
1451 1420          int use_large_pages = 0;
1452 1421  
1453 1422  #ifdef __sparc
1454 1423  
1455 1424          size_t memtotal = physmem * PAGESIZE;
1456 1425  
1457 1426          if (heap_lp_base == NULL) {
1458 1427                  segkmem_lpsize = PAGESIZE;
1459 1428                  return (0);
1460 1429          }
1461 1430  
1462 1431          /* get a platform dependent value of large page size for kernel heap */
1463 1432          segkmem_lpsize = get_segkmem_lpsize(segkmem_lpsize);
1464 1433  
1465 1434          if (segkmem_lpsize <= PAGESIZE) {
1466 1435                  /*
1467 1436                   * put virtual space reserved for the large page kernel
1468 1437                   * back to the regular heap
1469 1438                   */
1470 1439                  vmem_xfree(heap_arena, heap_lp_base,
1471 1440                      heap_lp_end - heap_lp_base);
1472 1441                  heap_lp_base = NULL;
1473 1442                  heap_lp_end = NULL;
1474 1443                  segkmem_lpsize = PAGESIZE;
1475 1444                  return (0);
1476 1445          }
1477 1446  
1478 1447          /* set heap_lp quantum if necessary */
1479 1448          if (segkmem_heaplp_quantum == 0 || !ISP2(segkmem_heaplp_quantum) ||
1480 1449              P2PHASE(segkmem_heaplp_quantum, segkmem_lpsize)) {
1481 1450                  segkmem_heaplp_quantum = segkmem_lpsize;
1482 1451          }
1483 1452  
1484 1453          /* set kmem_lp quantum if necessary */
1485 1454          if (segkmem_kmemlp_quantum == 0 || !ISP2(segkmem_kmemlp_quantum) ||
1486 1455              segkmem_kmemlp_quantum > segkmem_heaplp_quantum) {
1487 1456                  segkmem_kmemlp_quantum = segkmem_heaplp_quantum;
1488 1457          }
1489 1458  
1490 1459          /* set total amount of memory allowed for large page kernel heap */
1491 1460          if (segkmem_kmemlp_max == 0) {
1492 1461                  if (segkmem_kmemlp_pcnt == 0 || segkmem_kmemlp_pcnt > 100)
1493 1462                          segkmem_kmemlp_pcnt = 12;
1494 1463                  segkmem_kmemlp_max = (memtotal * segkmem_kmemlp_pcnt) / 100;
1495 1464          }
1496 1465          segkmem_kmemlp_max = P2ROUNDUP(segkmem_kmemlp_max,
1497 1466              segkmem_heaplp_quantum);
1498 1467  
1499 1468          /* fix lp kmem preallocation request if necesssary */
1500 1469          if (segkmem_kmemlp_min) {
1501 1470                  segkmem_kmemlp_min = P2ROUNDUP(segkmem_kmemlp_min,
1502 1471                      segkmem_heaplp_quantum);
1503 1472                  if (segkmem_kmemlp_min > segkmem_kmemlp_max)
1504 1473                          segkmem_kmemlp_min = segkmem_kmemlp_max;
1505 1474          }
1506 1475  
1507 1476          use_large_pages = 1;
1508 1477          segkmem_lpszc = page_szc(segkmem_lpsize);
1509 1478          segkmem_lpshift = page_get_shift(segkmem_lpszc);
1510 1479  
1511 1480  #endif
1512 1481          return (use_large_pages);
1513 1482  }
1514 1483  
1515 1484  void
1516 1485  segkmem_zio_init(void *zio_mem_base, size_t zio_mem_size)
1517 1486  {
1518 1487          ASSERT(zio_mem_base != NULL);
1519 1488          ASSERT(zio_mem_size != 0);
1520 1489  
1521 1490          /*
1522 1491           * To reduce VA space fragmentation, we set up quantum caches for the
1523 1492           * smaller sizes;  we chose 32k because that translates to 128k VA
1524 1493           * slabs, which matches nicely with the common 128k zio_data bufs.
1525 1494           */
1526 1495          zio_arena = vmem_create("zfs_file_data", zio_mem_base, zio_mem_size,
1527 1496              PAGESIZE, NULL, NULL, NULL, 32 * 1024, VM_SLEEP);
1528 1497  
1529 1498          zio_alloc_arena = vmem_create("zfs_file_data_buf", NULL, 0, PAGESIZE,
1530 1499              segkmem_zio_alloc, segkmem_zio_free, zio_arena, 0, VM_SLEEP);
1531 1500  
1532 1501          ASSERT(zio_arena != NULL);
1533 1502          ASSERT(zio_alloc_arena != NULL);
1534 1503  }
1535 1504  
1536 1505  #ifdef __sparc
1537 1506  
1538 1507  
1539 1508  static void *
1540 1509  segkmem_alloc_ppa(vmem_t *vmp, size_t size, int vmflag)
1541 1510  {
1542 1511          size_t ppaquantum = btopr(segkmem_lpsize) * sizeof (page_t *);
1543 1512          void   *addr;
1544 1513  
1545 1514          if (ppaquantum <= PAGESIZE)
1546 1515                  return (segkmem_alloc(vmp, size, vmflag));
1547 1516  
1548 1517          ASSERT((size & (ppaquantum - 1)) == 0);
1549 1518  
1550 1519          addr = vmem_xalloc(vmp, size, ppaquantum, 0, 0, NULL, NULL, vmflag);
1551 1520          if (addr != NULL && segkmem_xalloc(vmp, addr, size, vmflag, 0,
1552 1521              segkmem_page_create, NULL) == NULL) {
1553 1522                  vmem_xfree(vmp, addr, size);
1554 1523                  addr = NULL;
1555 1524          }
1556 1525  
1557 1526          return (addr);
1558 1527  }
1559 1528  
1560 1529  static void
1561 1530  segkmem_free_ppa(vmem_t *vmp, void *addr, size_t size)
1562 1531  {
1563 1532          size_t ppaquantum = btopr(segkmem_lpsize) * sizeof (page_t *);
1564 1533  
1565 1534          ASSERT(addr != NULL);
1566 1535  
1567 1536          if (ppaquantum <= PAGESIZE) {
1568 1537                  segkmem_free(vmp, addr, size);
1569 1538          } else {
1570 1539                  segkmem_free(NULL, addr, size);
1571 1540                  vmem_xfree(vmp, addr, size);
1572 1541          }
1573 1542  }
1574 1543  
1575 1544  void
1576 1545  segkmem_heap_lp_init()
1577 1546  {
1578 1547          segkmem_lpcb_t *lpcb = &segkmem_lpcb;
1579 1548          size_t heap_lp_size = heap_lp_end - heap_lp_base;
1580 1549          size_t lpsize = segkmem_lpsize;
1581 1550          size_t ppaquantum;
1582 1551          void   *addr;
1583 1552  
1584 1553          if (segkmem_lpsize <= PAGESIZE) {
1585 1554                  ASSERT(heap_lp_base == NULL);
1586 1555                  ASSERT(heap_lp_end == NULL);
1587 1556                  return;
1588 1557          }
1589 1558  
1590 1559          ASSERT(segkmem_heaplp_quantum >= lpsize);
1591 1560          ASSERT((segkmem_heaplp_quantum & (lpsize - 1)) == 0);
1592 1561          ASSERT(lpcb->lp_uselp == 0);
1593 1562          ASSERT(heap_lp_base != NULL);
1594 1563          ASSERT(heap_lp_end != NULL);
1595 1564          ASSERT(heap_lp_base < heap_lp_end);
1596 1565          ASSERT(heap_lp_arena == NULL);
1597 1566          ASSERT(((uintptr_t)heap_lp_base & (lpsize - 1)) == 0);
1598 1567          ASSERT(((uintptr_t)heap_lp_end & (lpsize - 1)) == 0);
1599 1568  
1600 1569          /* create large page heap arena */
1601 1570          heap_lp_arena = vmem_create("heap_lp", heap_lp_base, heap_lp_size,
1602 1571              segkmem_heaplp_quantum, NULL, NULL, NULL, 0, VM_SLEEP);
1603 1572  
1604 1573          ASSERT(heap_lp_arena != NULL);
1605 1574  
1606 1575          /* This arena caches memory already mapped by large pages */
1607 1576          kmem_lp_arena = vmem_create("kmem_lp", NULL, 0, segkmem_kmemlp_quantum,
1608 1577              segkmem_alloc_lpi, segkmem_free_lpi, heap_lp_arena, 0, VM_SLEEP);
1609 1578  
1610 1579          ASSERT(kmem_lp_arena != NULL);
1611 1580  
1612 1581          mutex_init(&lpcb->lp_lock, NULL, MUTEX_DEFAULT, NULL);
1613 1582          cv_init(&lpcb->lp_cv, NULL, CV_DEFAULT, NULL);
1614 1583  
1615 1584          /*
1616 1585           * this arena is used for the array of page_t pointers necessary
1617 1586           * to call hat_mem_load_array
1618 1587           */
1619 1588          ppaquantum = btopr(lpsize) * sizeof (page_t *);
1620 1589          segkmem_ppa_arena = vmem_create("segkmem_ppa", NULL, 0, ppaquantum,
1621 1590              segkmem_alloc_ppa, segkmem_free_ppa, heap_arena, ppaquantum,
1622 1591              VM_SLEEP);
1623 1592  
1624 1593          ASSERT(segkmem_ppa_arena != NULL);
1625 1594  
1626 1595          /* prealloacate some memory for the lp kernel heap */
1627 1596          if (segkmem_kmemlp_min) {
1628 1597  
1629 1598                  ASSERT(P2PHASE(segkmem_kmemlp_min,
1630 1599                      segkmem_heaplp_quantum) == 0);
1631 1600  
1632 1601                  if ((addr = segkmem_alloc_lpi(heap_lp_arena,
1633 1602                      segkmem_kmemlp_min, VM_SLEEP)) != NULL) {
1634 1603  
1635 1604                          addr = vmem_add(kmem_lp_arena, addr,
1636 1605                              segkmem_kmemlp_min, VM_SLEEP);
1637 1606                          ASSERT(addr != NULL);
1638 1607                  }
1639 1608          }
1640 1609  
1641 1610          lpcb->lp_uselp = 1;
1642 1611  }
1643 1612  
1644 1613  #endif

↓ open down ↓

834 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX