Print this page
const-ify make segment ops structures
There is no reason to keep the segment ops structures writable.
use NULL getmemid segop as a shorthand for ENODEV
Instead of forcing every segment driver to implement a dummy function to
return (hopefully) ENODEV, handle NULL getmemid segop function pointer as
"return ENODEV" shorthand.
use NULL capable segop as a shorthand for no-capabilities
Instead of forcing every segment driver to implement a dummy "return 0"
function, handle NULL capable segop function pointer as "no copabilities
supported" shorthand.
segop_getpolicy already checks for a NULL op
seg_inherit_notsup is redundant since segop_inherit checks for NULL properly
no need for bad-op segment op functions
The segment drivers have a number of bad-op functions that simply panic.
Keeping the function pointer NULL will accomplish the same thing in most
cases.  In other cases, keeping the function pointer NULL will result in
proper error code being returned.
use C99 initializers in segment ops structures
remove whole-process swapping
Long before Unix supported paging, it used process swapping to reclaim
memory.  The code is there and in theory it runs when we get *extremely* low
on memory.  In practice, it never runs since the definition of low-on-memory
is antiquated. (XXX: define what antiquated means)
You can check the number of swapout/swapin events with kstats:
$ kstat -p ::vm:swapin ::vm:swapout


  58 #include <sys/dumphdr.h>
  59 #include <sys/debug.h>
  60 #include <sys/vtrace.h>
  61 #include <sys/stack.h>
  62 #include <sys/atomic.h>
  63 #include <sys/archsystm.h>
  64 #include <sys/lgrp.h>
  65 
  66 #include <vm/as.h>
  67 #include <vm/seg.h>
  68 #include <vm/seg_kp.h>
  69 #include <vm/seg_kmem.h>
  70 #include <vm/anon.h>
  71 #include <vm/page.h>
  72 #include <vm/hat.h>
  73 #include <sys/bitmap.h>
  74 
  75 /*
  76  * Private seg op routines
  77  */
  78 static void     segkp_badop(void);
  79 static void     segkp_dump(struct seg *seg);
  80 static int      segkp_checkprot(struct seg *seg, caddr_t addr, size_t len,
  81                         uint_t prot);
  82 static int      segkp_kluster(struct seg *seg, caddr_t addr, ssize_t delta);
  83 static int      segkp_pagelock(struct seg *seg, caddr_t addr, size_t len,
  84                         struct page ***page, enum lock_type type,
  85                         enum seg_rw rw);
  86 static void     segkp_insert(struct seg *seg, struct segkp_data *kpd);
  87 static void     segkp_delete(struct seg *seg, struct segkp_data *kpd);
  88 static caddr_t  segkp_get_internal(struct seg *seg, size_t len, uint_t flags,
  89                         struct segkp_data **tkpd, struct anon_map *amp);
  90 static void     segkp_release_internal(struct seg *seg,
  91                         struct segkp_data *kpd, size_t len);
  92 static int      segkp_unlock(struct hat *hat, struct seg *seg, caddr_t vaddr,
  93                         size_t len, struct segkp_data *kpd, uint_t flags);
  94 static int      segkp_load(struct hat *hat, struct seg *seg, caddr_t vaddr,
  95                         size_t len, struct segkp_data *kpd, uint_t flags);
  96 static struct   segkp_data *segkp_find(struct seg *seg, caddr_t vaddr);
  97 static int      segkp_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp);
  98 static lgrp_mem_policy_info_t   *segkp_getpolicy(struct seg *seg,
  99     caddr_t addr);
 100 static int      segkp_capable(struct seg *seg, segcapability_t capability);
 101 
 102 /*
 103  * Lock used to protect the hash table(s) and caches.
 104  */
 105 static kmutex_t segkp_lock;
 106 
 107 /*
 108  * The segkp caches
 109  */
 110 static struct segkp_cache segkp_cache[SEGKP_MAX_CACHE];
 111 
 112 #define SEGKP_BADOP(t)  (t(*)())segkp_badop
 113 
 114 /*
 115  * When there are fewer than red_minavail bytes left on the stack,
 116  * segkp_map_red() will map in the redzone (if called).  5000 seems
 117  * to work reasonably well...
 118  */
 119 long            red_minavail = 5000;
 120 
 121 /*
 122  * will be set to 1 for 32 bit x86 systems only, in startup.c
 123  */
 124 int     segkp_fromheap = 0;
 125 ulong_t *segkp_bitmap;
 126 
 127 /*
 128  * If segkp_map_red() is called with the redzone already mapped and
 129  * with less than RED_DEEP_THRESHOLD bytes available on the stack,
 130  * then the stack situation has become quite serious;  if much more stack
 131  * is consumed, we have the potential of scrogging the next thread/LWP
 132  * structure.  To help debug the "can't happen" panics which may
 133  * result from this condition, we record hrestime and the calling thread
 134  * in red_deep_hires and red_deep_thread respectively.
 135  */
 136 #define RED_DEEP_THRESHOLD      2000
 137 
 138 hrtime_t        red_deep_hires;
 139 kthread_t       *red_deep_thread;
 140 
 141 uint32_t        red_nmapped;
 142 uint32_t        red_closest = UINT_MAX;
 143 uint32_t        red_ndoubles;
 144 
 145 pgcnt_t anon_segkp_pages_locked;        /* See vm/anon.h */
 146 pgcnt_t anon_segkp_pages_resv;          /* anon reserved by seg_kp */
 147 
 148 static struct   seg_ops segkp_ops = {
 149         SEGKP_BADOP(int),               /* dup */
 150         SEGKP_BADOP(int),               /* unmap */
 151         SEGKP_BADOP(void),              /* free */
 152         segkp_fault,
 153         SEGKP_BADOP(faultcode_t),       /* faulta */
 154         SEGKP_BADOP(int),               /* setprot */
 155         segkp_checkprot,
 156         segkp_kluster,
 157         SEGKP_BADOP(size_t),            /* swapout */
 158         SEGKP_BADOP(int),               /* sync */
 159         SEGKP_BADOP(size_t),            /* incore */
 160         SEGKP_BADOP(int),               /* lockop */
 161         SEGKP_BADOP(int),               /* getprot */
 162         SEGKP_BADOP(u_offset_t),                /* getoffset */
 163         SEGKP_BADOP(int),               /* gettype */
 164         SEGKP_BADOP(int),               /* getvp */
 165         SEGKP_BADOP(int),               /* advise */
 166         segkp_dump,                     /* dump */
 167         segkp_pagelock,                 /* pagelock */
 168         SEGKP_BADOP(int),               /* setpgsz */
 169         segkp_getmemid,                 /* getmemid */
 170         segkp_getpolicy,                /* getpolicy */
 171         segkp_capable,                  /* capable */
 172         seg_inherit_notsup              /* inherit */
 173 };
 174 
 175 
 176 static void
 177 segkp_badop(void)
 178 {
 179         panic("segkp_badop");
 180         /*NOTREACHED*/
 181 }
 182 
 183 static void segkpinit_mem_config(struct seg *);
 184 
 185 static uint32_t segkp_indel;
 186 
 187 /*
 188  * Allocate the segment specific private data struct and fill it in
 189  * with the per kp segment mutex, anon ptr. array and hash table.
 190  */
 191 int
 192 segkp_create(struct seg *seg)
 193 {
 194         struct segkp_segdata *kpsd;
 195         size_t  np;
 196 
 197         ASSERT(seg != NULL && seg->s_as == &kas);
 198         ASSERT(RW_WRITE_HELD(&seg->s_as->a_lock));
 199 
 200         if (seg->s_size & PAGEOFFSET) {
 201                 panic("Bad segkp size");
 202                 /*NOTREACHED*/


 743                 }
 744         }
 745 
 746         /* If locked, release physical memory reservation */
 747         if (kpd->kp_flags & KPD_LOCKED) {
 748                 pgcnt_t pages = btop(SEGKP_MAPLEN(kpd->kp_len, kpd->kp_flags));
 749                 if ((kpd->kp_flags & KPD_NO_ANON) == 0)
 750                         atomic_add_long(&anon_segkp_pages_locked, -pages);
 751                 page_unresv(pages);
 752         }
 753 
 754         vmem_free(SEGKP_VMEM(seg), kpd->kp_base, kpd->kp_len);
 755         kmem_free(kpd, sizeof (struct segkp_data));
 756 }
 757 
 758 /*
 759  * segkp_map_red() will check the current frame pointer against the
 760  * stack base.  If the amount of stack remaining is questionable
 761  * (less than red_minavail), then segkp_map_red() will map in the redzone
 762  * and return 1.  Otherwise, it will return 0.  segkp_map_red() can
 763  * _only_ be called when:
 764  *
 765  *   - it is safe to sleep on page_create_va().
 766  *   - the caller is non-swappable.
 767  *
 768  * It is up to the caller to remember whether segkp_map_red() successfully
 769  * mapped the redzone, and, if so, to call segkp_unmap_red() at a later
 770  * time.  Note that the caller must _remain_ non-swappable until after
 771  * calling segkp_unmap_red().
 772  *
 773  * Currently, this routine is only called from pagefault() (which necessarily
 774  * satisfies the above conditions).
 775  */
 776 #if defined(STACK_GROWTH_DOWN)
 777 int
 778 segkp_map_red(void)
 779 {
 780         uintptr_t fp = STACK_BIAS + (uintptr_t)getfp();
 781 #ifndef _LP64
 782         caddr_t stkbase;
 783 #endif
 784 
 785         ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
 786 
 787         /*
 788          * Optimize for the common case where we simply return.
 789          */
 790         if ((curthread->t_red_pp == NULL) &&
 791             (fp - (uintptr_t)curthread->t_stkbase >= red_minavail))
 792                 return (0);
 793 
 794 #if defined(_LP64)
 795         /*
 796          * XXX  We probably need something better than this.
 797          */
 798         panic("kernel stack overflow");
 799         /*NOTREACHED*/
 800 #else /* _LP64 */
 801         if (curthread->t_red_pp == NULL) {
 802                 page_t *red_pp;
 803                 struct seg kseg;
 804 
 805                 caddr_t red_va = (caddr_t)
 806                     (((uintptr_t)curthread->t_stkbase & (uintptr_t)PAGEMASK) -


 867                 red_deep_hires = hrestime.tv_nsec;
 868                 red_deep_thread = curthread;
 869         }
 870 
 871         /*
 872          * If this is a DEBUG kernel, and we've run too deep for comfort, toss.
 873          */
 874         ASSERT(fp - (uintptr_t)stkbase >= RED_DEEP_THRESHOLD);
 875         return (0);
 876 #endif /* _LP64 */
 877 }
 878 
 879 void
 880 segkp_unmap_red(void)
 881 {
 882         page_t *pp;
 883         caddr_t red_va = (caddr_t)(((uintptr_t)curthread->t_stkbase &
 884             (uintptr_t)PAGEMASK) - PAGESIZE);
 885 
 886         ASSERT(curthread->t_red_pp != NULL);
 887         ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
 888 
 889         /*
 890          * Because we locked the mapping down, we can't simply rely
 891          * on page_destroy() to clean everything up;  we need to call
 892          * hat_unload() to explicitly unlock the mapping resources.
 893          */
 894         hat_unload(kas.a_hat, red_va, PAGESIZE, HAT_UNLOAD_UNLOCK);
 895 
 896         pp = curthread->t_red_pp;
 897 
 898         ASSERT(pp == page_find(&kvp, (u_offset_t)(uintptr_t)red_va));
 899 
 900         /*
 901          * Need to upgrade the SE_SHARED lock to SE_EXCL.
 902          */
 903         if (!page_tryupgrade(pp)) {
 904                 /*
 905                  * As there is now wait for upgrade, release the
 906                  * SE_SHARED lock and wait for SE_EXCL.
 907                  */


1380                         addr = kpd->kp_base;
1381                         eaddr = addr + kpd->kp_len;
1382                         while (addr < eaddr) {
1383                                 ASSERT(seg->s_as == &kas);
1384                                 pfn = hat_getpfnum(seg->s_as->a_hat, addr);
1385                                 if (pfn != PFN_INVALID)
1386                                         dump_addpage(seg->s_as, addr, pfn);
1387                                 addr += PAGESIZE;
1388                                 dump_timeleft = dump_timeout;
1389                         }
1390                 }
1391         }
1392 }
1393 
1394 /*ARGSUSED*/
1395 static int
1396 segkp_pagelock(struct seg *seg, caddr_t addr, size_t len,
1397     struct page ***ppp, enum lock_type type, enum seg_rw rw)
1398 {
1399         return (ENOTSUP);
1400 }
1401 
1402 /*ARGSUSED*/
1403 static int
1404 segkp_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
1405 {
1406         return (ENODEV);
1407 }
1408 
1409 /*ARGSUSED*/
1410 static lgrp_mem_policy_info_t   *
1411 segkp_getpolicy(struct seg *seg, caddr_t addr)
1412 {
1413         return (NULL);
1414 }
1415 
1416 /*ARGSUSED*/
1417 static int
1418 segkp_capable(struct seg *seg, segcapability_t capability)
1419 {
1420         return (0);
1421 }
1422 
1423 #include <sys/mem_config.h>
1424 
1425 /*ARGSUSED*/
1426 static void
1427 segkp_mem_config_post_add(void *arg, pgcnt_t delta_pages)
1428 {}
1429 
1430 /*
1431  * During memory delete, turn off caches so that pages are not held.
1432  * A better solution may be to unlock the pages while they are
1433  * in the cache so that they may be collected naturally.
1434  */
1435 
1436 /*ARGSUSED*/
1437 static int
1438 segkp_mem_config_pre_del(void *arg, pgcnt_t delta_pages)
1439 {
1440         atomic_inc_32(&segkp_indel);




  58 #include <sys/dumphdr.h>
  59 #include <sys/debug.h>
  60 #include <sys/vtrace.h>
  61 #include <sys/stack.h>
  62 #include <sys/atomic.h>
  63 #include <sys/archsystm.h>
  64 #include <sys/lgrp.h>
  65 
  66 #include <vm/as.h>
  67 #include <vm/seg.h>
  68 #include <vm/seg_kp.h>
  69 #include <vm/seg_kmem.h>
  70 #include <vm/anon.h>
  71 #include <vm/page.h>
  72 #include <vm/hat.h>
  73 #include <sys/bitmap.h>
  74 
  75 /*
  76  * Private seg op routines
  77  */

  78 static void     segkp_dump(struct seg *seg);
  79 static int      segkp_checkprot(struct seg *seg, caddr_t addr, size_t len,
  80                         uint_t prot);
  81 static int      segkp_kluster(struct seg *seg, caddr_t addr, ssize_t delta);
  82 static int      segkp_pagelock(struct seg *seg, caddr_t addr, size_t len,
  83                         struct page ***page, enum lock_type type,
  84                         enum seg_rw rw);
  85 static void     segkp_insert(struct seg *seg, struct segkp_data *kpd);
  86 static void     segkp_delete(struct seg *seg, struct segkp_data *kpd);
  87 static caddr_t  segkp_get_internal(struct seg *seg, size_t len, uint_t flags,
  88                         struct segkp_data **tkpd, struct anon_map *amp);
  89 static void     segkp_release_internal(struct seg *seg,
  90                         struct segkp_data *kpd, size_t len);
  91 static int      segkp_unlock(struct hat *hat, struct seg *seg, caddr_t vaddr,
  92                         size_t len, struct segkp_data *kpd, uint_t flags);
  93 static int      segkp_load(struct hat *hat, struct seg *seg, caddr_t vaddr,
  94                         size_t len, struct segkp_data *kpd, uint_t flags);
  95 static struct   segkp_data *segkp_find(struct seg *seg, caddr_t vaddr);




  96 
  97 /*
  98  * Lock used to protect the hash table(s) and caches.
  99  */
 100 static kmutex_t segkp_lock;
 101 
 102 /*
 103  * The segkp caches
 104  */
 105 static struct segkp_cache segkp_cache[SEGKP_MAX_CACHE];
 106 


 107 /*
 108  * When there are fewer than red_minavail bytes left on the stack,
 109  * segkp_map_red() will map in the redzone (if called).  5000 seems
 110  * to work reasonably well...
 111  */
 112 long            red_minavail = 5000;
 113 
 114 /*
 115  * will be set to 1 for 32 bit x86 systems only, in startup.c
 116  */
 117 int     segkp_fromheap = 0;
 118 ulong_t *segkp_bitmap;
 119 
 120 /*
 121  * If segkp_map_red() is called with the redzone already mapped and
 122  * with less than RED_DEEP_THRESHOLD bytes available on the stack,
 123  * then the stack situation has become quite serious;  if much more stack
 124  * is consumed, we have the potential of scrogging the next thread/LWP
 125  * structure.  To help debug the "can't happen" panics which may
 126  * result from this condition, we record hrestime and the calling thread
 127  * in red_deep_hires and red_deep_thread respectively.
 128  */
 129 #define RED_DEEP_THRESHOLD      2000
 130 
 131 hrtime_t        red_deep_hires;
 132 kthread_t       *red_deep_thread;
 133 
 134 uint32_t        red_nmapped;
 135 uint32_t        red_closest = UINT_MAX;
 136 uint32_t        red_ndoubles;
 137 
 138 pgcnt_t anon_segkp_pages_locked;        /* See vm/anon.h */
 139 pgcnt_t anon_segkp_pages_resv;          /* anon reserved by seg_kp */
 140 
 141 static const struct seg_ops segkp_ops = {
 142         .fault          = segkp_fault,
 143         .checkprot      = segkp_checkprot,
 144         .kluster        = segkp_kluster,
 145         .dump           = segkp_dump,
 146         .pagelock       = segkp_pagelock,



















 147 };
 148 
 149 







 150 static void segkpinit_mem_config(struct seg *);
 151 
 152 static uint32_t segkp_indel;
 153 
 154 /*
 155  * Allocate the segment specific private data struct and fill it in
 156  * with the per kp segment mutex, anon ptr. array and hash table.
 157  */
 158 int
 159 segkp_create(struct seg *seg)
 160 {
 161         struct segkp_segdata *kpsd;
 162         size_t  np;
 163 
 164         ASSERT(seg != NULL && seg->s_as == &kas);
 165         ASSERT(RW_WRITE_HELD(&seg->s_as->a_lock));
 166 
 167         if (seg->s_size & PAGEOFFSET) {
 168                 panic("Bad segkp size");
 169                 /*NOTREACHED*/


 710                 }
 711         }
 712 
 713         /* If locked, release physical memory reservation */
 714         if (kpd->kp_flags & KPD_LOCKED) {
 715                 pgcnt_t pages = btop(SEGKP_MAPLEN(kpd->kp_len, kpd->kp_flags));
 716                 if ((kpd->kp_flags & KPD_NO_ANON) == 0)
 717                         atomic_add_long(&anon_segkp_pages_locked, -pages);
 718                 page_unresv(pages);
 719         }
 720 
 721         vmem_free(SEGKP_VMEM(seg), kpd->kp_base, kpd->kp_len);
 722         kmem_free(kpd, sizeof (struct segkp_data));
 723 }
 724 
 725 /*
 726  * segkp_map_red() will check the current frame pointer against the
 727  * stack base.  If the amount of stack remaining is questionable
 728  * (less than red_minavail), then segkp_map_red() will map in the redzone
 729  * and return 1.  Otherwise, it will return 0.  segkp_map_red() can
 730  * _only_ be called when it is safe to sleep on page_create_va().



 731  *
 732  * It is up to the caller to remember whether segkp_map_red() successfully
 733  * mapped the redzone, and, if so, to call segkp_unmap_red() at a later
 734  * time.

 735  *
 736  * Currently, this routine is only called from pagefault() (which necessarily
 737  * satisfies the above conditions).
 738  */
 739 #if defined(STACK_GROWTH_DOWN)
 740 int
 741 segkp_map_red(void)
 742 {
 743         uintptr_t fp = STACK_BIAS + (uintptr_t)getfp();
 744 #ifndef _LP64
 745         caddr_t stkbase;
 746 #endif
 747 


 748         /*
 749          * Optimize for the common case where we simply return.
 750          */
 751         if ((curthread->t_red_pp == NULL) &&
 752             (fp - (uintptr_t)curthread->t_stkbase >= red_minavail))
 753                 return (0);
 754 
 755 #if defined(_LP64)
 756         /*
 757          * XXX  We probably need something better than this.
 758          */
 759         panic("kernel stack overflow");
 760         /*NOTREACHED*/
 761 #else /* _LP64 */
 762         if (curthread->t_red_pp == NULL) {
 763                 page_t *red_pp;
 764                 struct seg kseg;
 765 
 766                 caddr_t red_va = (caddr_t)
 767                     (((uintptr_t)curthread->t_stkbase & (uintptr_t)PAGEMASK) -


 828                 red_deep_hires = hrestime.tv_nsec;
 829                 red_deep_thread = curthread;
 830         }
 831 
 832         /*
 833          * If this is a DEBUG kernel, and we've run too deep for comfort, toss.
 834          */
 835         ASSERT(fp - (uintptr_t)stkbase >= RED_DEEP_THRESHOLD);
 836         return (0);
 837 #endif /* _LP64 */
 838 }
 839 
 840 void
 841 segkp_unmap_red(void)
 842 {
 843         page_t *pp;
 844         caddr_t red_va = (caddr_t)(((uintptr_t)curthread->t_stkbase &
 845             (uintptr_t)PAGEMASK) - PAGESIZE);
 846 
 847         ASSERT(curthread->t_red_pp != NULL);

 848 
 849         /*
 850          * Because we locked the mapping down, we can't simply rely
 851          * on page_destroy() to clean everything up;  we need to call
 852          * hat_unload() to explicitly unlock the mapping resources.
 853          */
 854         hat_unload(kas.a_hat, red_va, PAGESIZE, HAT_UNLOAD_UNLOCK);
 855 
 856         pp = curthread->t_red_pp;
 857 
 858         ASSERT(pp == page_find(&kvp, (u_offset_t)(uintptr_t)red_va));
 859 
 860         /*
 861          * Need to upgrade the SE_SHARED lock to SE_EXCL.
 862          */
 863         if (!page_tryupgrade(pp)) {
 864                 /*
 865                  * As there is now wait for upgrade, release the
 866                  * SE_SHARED lock and wait for SE_EXCL.
 867                  */


1340                         addr = kpd->kp_base;
1341                         eaddr = addr + kpd->kp_len;
1342                         while (addr < eaddr) {
1343                                 ASSERT(seg->s_as == &kas);
1344                                 pfn = hat_getpfnum(seg->s_as->a_hat, addr);
1345                                 if (pfn != PFN_INVALID)
1346                                         dump_addpage(seg->s_as, addr, pfn);
1347                                 addr += PAGESIZE;
1348                                 dump_timeleft = dump_timeout;
1349                         }
1350                 }
1351         }
1352 }
1353 
1354 /*ARGSUSED*/
1355 static int
1356 segkp_pagelock(struct seg *seg, caddr_t addr, size_t len,
1357     struct page ***ppp, enum lock_type type, enum seg_rw rw)
1358 {
1359         return (ENOTSUP);





















1360 }
1361 
1362 #include <sys/mem_config.h>
1363 
1364 /*ARGSUSED*/
1365 static void
1366 segkp_mem_config_post_add(void *arg, pgcnt_t delta_pages)
1367 {}
1368 
1369 /*
1370  * During memory delete, turn off caches so that pages are not held.
1371  * A better solution may be to unlock the pages while they are
1372  * in the cache so that they may be collected naturally.
1373  */
1374 
1375 /*ARGSUSED*/
1376 static int
1377 segkp_mem_config_pre_del(void *arg, pgcnt_t delta_pages)
1378 {
1379         atomic_inc_32(&segkp_indel);