Print this page
patch segpcache-maxwindow-is-useless
use NULL dump segop as a shorthand for no-op
Instead of forcing every segment driver to implement a dummy function that
does nothing, handle NULL dump segop function pointer as a no-op shorthand.
const-ify make segment ops structures
There is no reason to keep the segment ops structures writable.
use NULL setpagesize segop as a shorthand for ENOTSUP
Instead of forcing every segment driver to implement a dummp function to
return (hopefully) ENOTSUP, handle NULL setpagesize segop function pointer
as "return ENOTSUP" shorthand.
use NULL getmemid segop as a shorthand for ENODEV
Instead of forcing every segment driver to implement a dummy function to
return (hopefully) ENODEV, handle NULL getmemid segop function pointer as
"return ENODEV" shorthand.
use NULL capable segop as a shorthand for no-capabilities
Instead of forcing every segment driver to implement a dummy "return 0"
function, handle NULL capable segop function pointer as "no copabilities
supported" shorthand.
seg_inherit_notsup is redundant since segop_inherit checks for NULL properly
patch lower-case-segops
instead using SEGOP_* macros, define full-fledged segop_* functions
This will allow us to do some sanity checking or even implement stub
functionality in one place instead of duplicating it wherever these wrappers
are used.


 109         struct seg_pcache       *p_hprev;
 110         kmutex_t                p_hmutex;       /* protects hash bucket */
 111 };
 112 
 113 /*
 114  * A parameter to control a maximum number of bytes that can be
 115  * purged from pcache at a time.
 116  */
 117 #define P_MAX_APURGE_BYTES      (1024 * 1024 * 1024)
 118 
 119 /*
 120  * log2(fraction of pcache to reclaim at a time).
 121  */
 122 #define P_SHRINK_SHFT           (5)
 123 
 124 /*
 125  * The following variables can be tuned via /etc/system.
 126  */
 127 
 128 int     segpcache_enabled = 1;          /* if 1, shadow lists are cached */
 129 pgcnt_t segpcache_maxwindow = 0;        /* max # of pages that can be cached */
 130 ulong_t segpcache_hashsize_win = 0;     /* # of non wired buckets */
 131 ulong_t segpcache_hashsize_wired = 0;   /* # of wired buckets */
 132 int     segpcache_reap_sec = 1;         /* reap check rate in secs */
 133 clock_t segpcache_reap_ticks = 0;       /* reap interval in ticks */
 134 int     segpcache_pcp_maxage_sec = 1;   /* pcp max age in secs */
 135 clock_t segpcache_pcp_maxage_ticks = 0; /* pcp max age in ticks */
 136 int     segpcache_shrink_shift = P_SHRINK_SHFT; /* log2 reap fraction */
 137 pgcnt_t segpcache_maxapurge_bytes = P_MAX_APURGE_BYTES; /* max purge bytes */
 138 
 139 static kmutex_t seg_pcache_mtx; /* protects seg_pdisabled counter */
 140 static kmutex_t seg_pasync_mtx; /* protects async thread scheduling */
 141 static kcondvar_t seg_pasync_cv;
 142 
 143 #pragma align 64(pctrl1)
 144 #pragma align 64(pctrl2)
 145 #pragma align 64(pctrl3)
 146 
 147 /*
 148  * Keep frequently used variables together in one cache line.
 149  */
 150 static struct p_ctrl1 {
 151         uint_t p_disabled;              /* if not 0, caching temporarily off */
 152         pgcnt_t p_maxwin;               /* max # of pages that can be cached */
 153         size_t p_hashwin_sz;            /* # of non wired buckets */
 154         struct seg_phash *p_htabwin;    /* hash table for non wired entries */
 155         size_t p_hashwired_sz;          /* # of wired buckets */
 156         struct seg_phash_wired *p_htabwired; /* hash table for wired entries */
 157         kmem_cache_t *p_kmcache;        /* kmem cache for seg_pcache structs */
 158 #ifdef _LP64
 159         ulong_t pad[1];
 160 #endif /* _LP64 */
 161 } pctrl1;
 162 
 163 static struct p_ctrl2 {
 164         kmutex_t p_mem_mtx;     /* protects window counter and p_halinks */
 165         pgcnt_t  p_locked_win;  /* # pages from window */
 166         pgcnt_t  p_locked;      /* # of pages cached by pagelock */
 167         uchar_t  p_ahcur;       /* current active links for insert/delete */
 168         uchar_t  p_athr_on;     /* async reclaim thread is running. */
 169         pcache_link_t p_ahhead[2]; /* active buckets linkages */
 170 } pctrl2;
 171 
 172 static struct p_ctrl3 {
 173         clock_t p_pcp_maxage;           /* max pcp age in ticks */
 174         ulong_t p_athr_empty_ahb;       /* athread walk stats */
 175         ulong_t p_athr_full_ahb;        /* athread walk stats */
 176         pgcnt_t p_maxapurge_npages;     /* max pages to purge at a time */
 177         int     p_shrink_shft;          /* reap shift factor */
 178 #ifdef _LP64
 179         ulong_t pad[3];
 180 #endif /* _LP64 */
 181 } pctrl3;
 182 
 183 #define seg_pdisabled                   pctrl1.p_disabled
 184 #define seg_pmaxwindow                  pctrl1.p_maxwin
 185 #define seg_phashsize_win               pctrl1.p_hashwin_sz
 186 #define seg_phashtab_win                pctrl1.p_htabwin
 187 #define seg_phashsize_wired             pctrl1.p_hashwired_sz
 188 #define seg_phashtab_wired              pctrl1.p_htabwired
 189 #define seg_pkmcache                    pctrl1.p_kmcache
 190 #define seg_pmem_mtx                    pctrl2.p_mem_mtx
 191 #define seg_plocked_window              pctrl2.p_locked_win
 192 #define seg_plocked                     pctrl2.p_locked
 193 #define seg_pahcur                      pctrl2.p_ahcur
 194 #define seg_pathr_on                    pctrl2.p_athr_on
 195 #define seg_pahhead                     pctrl2.p_ahhead
 196 #define seg_pmax_pcpage                 pctrl3.p_pcp_maxage
 197 #define seg_pathr_empty_ahb             pctrl3.p_athr_empty_ahb
 198 #define seg_pathr_full_ahb              pctrl3.p_athr_full_ahb
 199 #define seg_pshrink_shift               pctrl3.p_shrink_shft
 200 #define seg_pmaxapurge_npages           pctrl3.p_maxapurge_npages
 201 
 202 #define P_HASHWIN_MASK                  (seg_phashsize_win - 1)
 203 #define P_HASHWIRED_MASK                (seg_phashsize_wired - 1)
 204 #define P_BASESHIFT                     (6)
 205 
 206 kthread_t *seg_pasync_thr;
 207 
 208 extern struct seg_ops segvn_ops;
 209 extern struct seg_ops segspt_shmops;
 210 
 211 #define IS_PFLAGS_WIRED(flags) ((flags) & SEGP_FORCE_WIRED)
 212 #define IS_PCP_WIRED(pcp) IS_PFLAGS_WIRED((pcp)->p_flags)
 213 
 214 #define LBOLT_DELTA(t)  ((ulong_t)(ddi_get_lbolt() - (t)))
 215 
 216 #define PCP_AGE(pcp)    LBOLT_DELTA((pcp)->p_lbolt)
 217 
 218 /*
 219  * htag0 argument can be a seg or amp pointer.
 220  */
 221 #define P_HASHBP(seg, htag0, addr, flags)                               \
 222         (IS_PFLAGS_WIRED((flags)) ?                                     \
 223             ((struct seg_phash *)&seg_phashtab_wired[P_HASHWIRED_MASK & \
 224             ((uintptr_t)(htag0) >> P_BASESHIFT)]) :                       \
 225             (&seg_phashtab_win[P_HASHWIN_MASK &                         \
 226             (((uintptr_t)(htag0) >> 3) ^                          \
 227             ((uintptr_t)(addr) >> ((flags & SEGP_PSHIFT) ?            \
 228             (flags >> 16) : page_get_shift((seg)->s_szc))))]))
 229 


 739 seg_pinsert_check(struct seg *seg, struct anon_map *amp, caddr_t addr,
 740     size_t len, uint_t flags)
 741 {
 742         ASSERT(seg != NULL);
 743 
 744 #ifdef DEBUG
 745         if (p_insert_chk_mtbf && !(gethrtime() % p_insert_chk_mtbf)) {
 746                 return (SEGP_FAIL);
 747         }
 748 #endif
 749 
 750         if (seg_pdisabled) {
 751                 return (SEGP_FAIL);
 752         }
 753         ASSERT(seg_phashsize_win != 0);
 754 
 755         if (IS_PFLAGS_WIRED(flags)) {
 756                 return (SEGP_SUCCESS);
 757         }
 758 
 759         if (seg_plocked_window + btop(len) > seg_pmaxwindow) {
 760                 return (SEGP_FAIL);
 761         }
 762 
 763         if (freemem < desfree) {
 764                 return (SEGP_FAIL);
 765         }
 766 
 767         return (SEGP_SUCCESS);
 768 }
 769 
 770 #ifdef DEBUG
 771 static uint32_t p_insert_mtbf = 0;
 772 #endif
 773 
 774 /*
 775  * Insert address range with shadow list into pagelock cache if there's no
 776  * shadow list already cached for this address range. If the cache is off or
 777  * caching is temporarily disabled or the allowed 'window' is exceeded return
 778  * SEGP_FAIL. Otherwise return SEGP_SUCCESS.
 779  *
 780  * For non wired shadow lists (segvn case) include address in the hashing
 781  * function to avoid linking all the entries from the same segment or amp on
 782  * the same bucket.  amp is used instead of seg if amp is not NULL. Non wired


 812         ASSERT(rw == S_READ || rw == S_WRITE);
 813         ASSERT(rw == S_READ || wlen == len);
 814         ASSERT(rw == S_WRITE || wlen <= len);
 815         ASSERT(amp == NULL || wlen == len);
 816 
 817 #ifdef DEBUG
 818         if (p_insert_mtbf && !(gethrtime() % p_insert_mtbf)) {
 819                 return (SEGP_FAIL);
 820         }
 821 #endif
 822 
 823         if (seg_pdisabled) {
 824                 return (SEGP_FAIL);
 825         }
 826         ASSERT(seg_phashsize_win != 0);
 827 
 828         ASSERT((len & PAGEOFFSET) == 0);
 829         npages = btop(len);
 830         mutex_enter(&seg_pmem_mtx);
 831         if (!IS_PFLAGS_WIRED(flags)) {
 832                 if (seg_plocked_window + npages > seg_pmaxwindow) {
 833                         mutex_exit(&seg_pmem_mtx);
 834                         return (SEGP_FAIL);
 835                 }
 836                 seg_plocked_window += npages;
 837         }
 838         seg_plocked += npages;
 839         mutex_exit(&seg_pmem_mtx);
 840 
 841         pcp = kmem_cache_alloc(seg_pkmcache, KM_SLEEP);
 842         /*
 843          * If amp is not NULL set htag0 to amp otherwise set it to seg.
 844          */
 845         if (amp == NULL) {
 846                 pcp->p_htag0 = (void *)seg;
 847                 pcp->p_flags = flags & 0xffff;
 848         } else {
 849                 pcp->p_htag0 = (void *)amp;
 850                 pcp->p_flags = (flags & 0xffff) | SEGP_AMP;
 851         }
 852         pcp->p_addr = addr;
 853         pcp->p_len = len;
 854         pcp->p_wlen = wlen;
 855         pcp->p_pp = pp;


 931 
 932 /*
 933  * purge entries from the pagelock cache if not active
 934  * and not recently used.
 935  */
 936 static void
 937 seg_ppurge_async(int force)
 938 {
 939         struct seg_pcache *delcallb_list = NULL;
 940         struct seg_pcache *pcp;
 941         struct seg_phash *hp;
 942         pgcnt_t npages = 0;
 943         pgcnt_t npages_window = 0;
 944         pgcnt_t npgs_to_purge;
 945         pgcnt_t npgs_purged = 0;
 946         int hlinks = 0;
 947         int hlix;
 948         pcache_link_t *hlinkp;
 949         pcache_link_t *hlnextp = NULL;
 950         int lowmem;
 951         int trim;
 952 
 953         ASSERT(seg_phashsize_win != 0);
 954 
 955         /*
 956          * if the cache is off or empty, return
 957          */
 958         if (seg_plocked == 0 || (!force && seg_plocked_window == 0)) {
 959                 return;
 960         }
 961 
 962         if (!force) {
 963                 lowmem = 0;
 964                 trim = 0;
 965                 if (freemem < lotsfree + needfree) {
 966                         spgcnt_t fmem = MAX((spgcnt_t)(freemem - needfree), 0);
 967                         if (fmem <= 5 * (desfree >> 2)) {
 968                                 lowmem = 1;
 969                         } else if (fmem <= 7 * (lotsfree >> 3)) {
 970                                 if (seg_plocked_window >=
 971                                     (availrmem_initial >> 1)) {
 972                                         lowmem = 1;
 973                                 }
 974                         } else if (fmem < lotsfree) {
 975                                 if (seg_plocked_window >=
 976                                     3 * (availrmem_initial >> 2)) {
 977                                         lowmem = 1;
 978                                 }
 979                         }
 980                 }
 981                 if (seg_plocked_window >= 7 * (seg_pmaxwindow >> 3)) {
 982                         trim = 1;
 983                 }
 984                 if (!lowmem && !trim) {
 985                         return;
 986                 }
 987                 npgs_to_purge = seg_plocked_window >>
 988                     seg_pshrink_shift;
 989                 if (lowmem) {
 990                         npgs_to_purge = MIN(npgs_to_purge,
 991                             MAX(seg_pmaxapurge_npages, desfree));
 992                 } else {
 993                         npgs_to_purge = MIN(npgs_to_purge,
 994                             seg_pmaxapurge_npages);
 995                 }
 996                 if (npgs_to_purge == 0) {
 997                         return;
 998                 }
 999         } else {
1000                 struct seg_phash_wired *hpw;
1001 
1002                 ASSERT(seg_phashsize_wired != 0);
1003 
1004                 for (hpw = seg_phashtab_wired;


1092                         plinkp->p_lnext->p_lprev =
1093                             plinkp->p_lprev;
1094                         pcp->p_hprev->p_hnext = pcp->p_hnext;
1095                         pcp->p_hnext->p_hprev = pcp->p_hprev;
1096                         mutex_exit(pmtx);
1097                         pcp->p_hprev = delcallb_list;
1098                         delcallb_list = pcp;
1099                         npgs_purged += btop(pcp->p_len);
1100                 }
1101                 if (hp->p_hnext == (struct seg_pcache *)hp) {
1102                         seg_premove_abuck(hp, 1);
1103                 }
1104                 mutex_exit(&hp->p_hmutex);
1105                 if (npgs_purged >= seg_plocked_window) {
1106                         break;
1107                 }
1108                 if (!force) {
1109                         if (npgs_purged >= npgs_to_purge) {
1110                                 break;
1111                         }
1112                         if (!trim && !(seg_pathr_full_ahb & 15)) {
1113                                 ASSERT(lowmem);
1114                                 if (freemem >= lotsfree + needfree) {
1115                                         break;
1116                                 }
1117                         }
1118                 }
1119         }
1120 
1121         if (hlinkp == &seg_pahhead[hlix]) {
1122                 /*
1123                  * We processed the entire hlix active bucket list
1124                  * but didn't find enough pages to reclaim.
1125                  * Switch the lists and walk the other list
1126                  * if we haven't done it yet.
1127                  */
1128                 mutex_enter(&seg_pmem_mtx);
1129                 ASSERT(seg_pathr_on);
1130                 ASSERT(seg_pahcur == !hlix);
1131                 seg_pahcur = hlix;
1132                 mutex_exit(&seg_pmem_mtx);


1453                 if (physmegs < 20 * 1024) {
1454                         segpcache_hashsize_wired = MAX(1024, physmegs << 3);
1455                 } else {
1456                         segpcache_hashsize_wired = 256 * 1024;
1457                 }
1458         }
1459         if (!ISP2(segpcache_hashsize_wired)) {
1460                 segpcache_hashsize_wired = 1 <<
1461                     highbit(segpcache_hashsize_wired);
1462         }
1463         seg_phashsize_wired = segpcache_hashsize_wired;
1464         seg_phashtab_wired = kmem_zalloc(
1465             seg_phashsize_wired * sizeof (struct seg_phash_wired), KM_SLEEP);
1466         for (i = 0; i < seg_phashsize_wired; i++) {
1467                 hp = (struct seg_phash *)&seg_phashtab_wired[i];
1468                 hp->p_hnext = (struct seg_pcache *)hp;
1469                 hp->p_hprev = (struct seg_pcache *)hp;
1470                 mutex_init(&hp->p_hmutex, NULL, MUTEX_DEFAULT, NULL);
1471         }
1472 
1473         if (segpcache_maxwindow == 0) {
1474                 if (physmegs < 64) {
1475                         /* 3% of memory */
1476                         segpcache_maxwindow = availrmem >> 5;
1477                 } else if (physmegs < 512) {
1478                         /* 12% of memory */
1479                         segpcache_maxwindow = availrmem >> 3;
1480                 } else if (physmegs < 1024) {
1481                         /* 25% of memory */
1482                         segpcache_maxwindow = availrmem >> 2;
1483                 } else if (physmegs < 2048) {
1484                         /* 50% of memory */
1485                         segpcache_maxwindow = availrmem >> 1;
1486                 } else {
1487                         /* no limit */
1488                         segpcache_maxwindow = (pgcnt_t)-1;
1489                 }
1490         }
1491         seg_pmaxwindow = segpcache_maxwindow;
1492         seg_pinit_mem_config();
1493 }
1494 
1495 /*
1496  * called by pageout if memory is low
1497  */
1498 void
1499 seg_preap(void)
1500 {
1501         /*
1502          * if the cache is off or empty, return
1503          */
1504         if (seg_plocked_window == 0) {
1505                 return;
1506         }
1507         ASSERT(seg_phashsize_win != 0);
1508 
1509         /*
1510          * If somebody is already purging pcache
1511          * just return.


1619  * and for kernel startup to attach to static segments.
1620  */
1621 int
1622 seg_attach(struct as *as, caddr_t base, size_t size, struct seg *seg)
1623 {
1624         seg->s_as = as;
1625         seg->s_base = base;
1626         seg->s_size = size;
1627 
1628         /*
1629          * as_addseg() will add the segment at the appropraite point
1630          * in the list. It will return -1 if there is overlap with
1631          * an already existing segment.
1632          */
1633         return (as_addseg(as, seg));
1634 }
1635 
1636 /*
1637  * Unmap a segment and free it from its associated address space.
1638  * This should be called by anybody who's finished with a whole segment's
1639  * mapping.  Just calls SEGOP_UNMAP() on the whole mapping .  It is the
1640  * responsibility of the segment driver to unlink the the segment
1641  * from the address space, and to free public and private data structures
1642  * associated with the segment.  (This is typically done by a call to
1643  * seg_free()).
1644  */
1645 void
1646 seg_unmap(struct seg *seg)
1647 {
1648 #ifdef DEBUG
1649         int ret;
1650 #endif /* DEBUG */
1651 
1652         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
1653 
1654         /* Shouldn't have called seg_unmap if mapping isn't yet established */
1655         ASSERT(seg->s_data != NULL);
1656 
1657         /* Unmap the whole mapping */
1658 #ifdef DEBUG
1659         ret = SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
1660         ASSERT(ret == 0);
1661 #else
1662         SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
1663 #endif /* DEBUG */
1664 }
1665 
1666 /*
1667  * Free the segment from its associated as. This should only be called
1668  * if a mapping to the segment has not yet been established (e.g., if
1669  * an error occurs in the middle of doing an as_map when the segment
1670  * has already been partially set up) or if it has already been deleted
1671  * (e.g., from a segment driver unmap routine if the unmap applies to the
1672  * entire segment). If the mapping is currently set up then seg_unmap() should
1673  * be called instead.
1674  */
1675 void
1676 seg_free(struct seg *seg)
1677 {
1678         register struct as *as = seg->s_as;
1679         struct seg *tseg = as_removeseg(as, seg);
1680 
1681         ASSERT(tseg == seg);
1682 
1683         /*
1684          * If the segment private data field is NULL,
1685          * then segment driver is not attached yet.
1686          */
1687         if (seg->s_data != NULL)
1688                 SEGOP_FREE(seg);
1689 
1690         mutex_destroy(&seg->s_pmtx);
1691         ASSERT(seg->s_phead.p_lnext == &seg->s_phead);
1692         ASSERT(seg->s_phead.p_lprev == &seg->s_phead);
1693         kmem_cache_free(seg_cache, seg);
1694 }
1695 
1696 /*ARGSUSED*/
1697 static void
1698 seg_p_mem_config_post_add(
1699         void *arg,
1700         pgcnt_t delta_pages)
1701 {
1702         /* Nothing to do. */
1703 }
1704 
1705 void
1706 seg_p_enable(void)
1707 {
1708         mutex_enter(&seg_pcache_mtx);


1837 }
1838 
1839 /*
1840  * Return swap reserved by a segment backing a private mapping.
1841  */
1842 size_t
1843 seg_swresv(struct seg *seg)
1844 {
1845         struct segvn_data *svd;
1846         size_t swap = 0;
1847 
1848         if (seg->s_ops == &segvn_ops) {
1849                 svd = (struct segvn_data *)seg->s_data;
1850                 if (svd->type == MAP_PRIVATE && svd->swresv > 0)
1851                         swap = svd->swresv;
1852         }
1853         return (swap);
1854 }
1855 
1856 /*
1857  * General not supported function for SEGOP_INHERIT
1858  */
1859 /* ARGSUSED */
1860 int
1861 seg_inherit_notsup(struct seg *seg, caddr_t addr, size_t len, uint_t op)








1862 {

















































































































































































1863         return (ENOTSUP);


1864 }


 109         struct seg_pcache       *p_hprev;
 110         kmutex_t                p_hmutex;       /* protects hash bucket */
 111 };
 112 
 113 /*
 114  * A parameter to control a maximum number of bytes that can be
 115  * purged from pcache at a time.
 116  */
 117 #define P_MAX_APURGE_BYTES      (1024 * 1024 * 1024)
 118 
 119 /*
 120  * log2(fraction of pcache to reclaim at a time).
 121  */
 122 #define P_SHRINK_SHFT           (5)
 123 
 124 /*
 125  * The following variables can be tuned via /etc/system.
 126  */
 127 
 128 int     segpcache_enabled = 1;          /* if 1, shadow lists are cached */

 129 ulong_t segpcache_hashsize_win = 0;     /* # of non wired buckets */
 130 ulong_t segpcache_hashsize_wired = 0;   /* # of wired buckets */
 131 int     segpcache_reap_sec = 1;         /* reap check rate in secs */
 132 clock_t segpcache_reap_ticks = 0;       /* reap interval in ticks */
 133 int     segpcache_pcp_maxage_sec = 1;   /* pcp max age in secs */
 134 clock_t segpcache_pcp_maxage_ticks = 0; /* pcp max age in ticks */
 135 int     segpcache_shrink_shift = P_SHRINK_SHFT; /* log2 reap fraction */
 136 pgcnt_t segpcache_maxapurge_bytes = P_MAX_APURGE_BYTES; /* max purge bytes */
 137 
 138 static kmutex_t seg_pcache_mtx; /* protects seg_pdisabled counter */
 139 static kmutex_t seg_pasync_mtx; /* protects async thread scheduling */
 140 static kcondvar_t seg_pasync_cv;
 141 
 142 #pragma align 64(pctrl1)
 143 #pragma align 64(pctrl2)
 144 #pragma align 64(pctrl3)
 145 
 146 /*
 147  * Keep frequently used variables together in one cache line.
 148  */
 149 static struct p_ctrl1 {
 150         uint_t p_disabled;              /* if not 0, caching temporarily off */

 151         size_t p_hashwin_sz;            /* # of non wired buckets */
 152         struct seg_phash *p_htabwin;    /* hash table for non wired entries */
 153         size_t p_hashwired_sz;          /* # of wired buckets */
 154         struct seg_phash_wired *p_htabwired; /* hash table for wired entries */
 155         kmem_cache_t *p_kmcache;        /* kmem cache for seg_pcache structs */
 156 #ifdef _LP64
 157         ulong_t pad[2];
 158 #endif /* _LP64 */
 159 } pctrl1;
 160 
 161 static struct p_ctrl2 {
 162         kmutex_t p_mem_mtx;     /* protects window counter and p_halinks */
 163         pgcnt_t  p_locked_win;  /* # pages from window */
 164         pgcnt_t  p_locked;      /* # of pages cached by pagelock */
 165         uchar_t  p_ahcur;       /* current active links for insert/delete */
 166         uchar_t  p_athr_on;     /* async reclaim thread is running. */
 167         pcache_link_t p_ahhead[2]; /* active buckets linkages */
 168 } pctrl2;
 169 
 170 static struct p_ctrl3 {
 171         clock_t p_pcp_maxage;           /* max pcp age in ticks */
 172         ulong_t p_athr_empty_ahb;       /* athread walk stats */
 173         ulong_t p_athr_full_ahb;        /* athread walk stats */
 174         pgcnt_t p_maxapurge_npages;     /* max pages to purge at a time */
 175         int     p_shrink_shft;          /* reap shift factor */
 176 #ifdef _LP64
 177         ulong_t pad[3];
 178 #endif /* _LP64 */
 179 } pctrl3;
 180 
 181 #define seg_pdisabled                   pctrl1.p_disabled

 182 #define seg_phashsize_win               pctrl1.p_hashwin_sz
 183 #define seg_phashtab_win                pctrl1.p_htabwin
 184 #define seg_phashsize_wired             pctrl1.p_hashwired_sz
 185 #define seg_phashtab_wired              pctrl1.p_htabwired
 186 #define seg_pkmcache                    pctrl1.p_kmcache
 187 #define seg_pmem_mtx                    pctrl2.p_mem_mtx
 188 #define seg_plocked_window              pctrl2.p_locked_win
 189 #define seg_plocked                     pctrl2.p_locked
 190 #define seg_pahcur                      pctrl2.p_ahcur
 191 #define seg_pathr_on                    pctrl2.p_athr_on
 192 #define seg_pahhead                     pctrl2.p_ahhead
 193 #define seg_pmax_pcpage                 pctrl3.p_pcp_maxage
 194 #define seg_pathr_empty_ahb             pctrl3.p_athr_empty_ahb
 195 #define seg_pathr_full_ahb              pctrl3.p_athr_full_ahb
 196 #define seg_pshrink_shift               pctrl3.p_shrink_shft
 197 #define seg_pmaxapurge_npages           pctrl3.p_maxapurge_npages
 198 
 199 #define P_HASHWIN_MASK                  (seg_phashsize_win - 1)
 200 #define P_HASHWIRED_MASK                (seg_phashsize_wired - 1)
 201 #define P_BASESHIFT                     (6)
 202 
 203 kthread_t *seg_pasync_thr;
 204 
 205 extern const struct seg_ops segvn_ops;
 206 extern const struct seg_ops segspt_shmops;
 207 
 208 #define IS_PFLAGS_WIRED(flags) ((flags) & SEGP_FORCE_WIRED)
 209 #define IS_PCP_WIRED(pcp) IS_PFLAGS_WIRED((pcp)->p_flags)
 210 
 211 #define LBOLT_DELTA(t)  ((ulong_t)(ddi_get_lbolt() - (t)))
 212 
 213 #define PCP_AGE(pcp)    LBOLT_DELTA((pcp)->p_lbolt)
 214 
 215 /*
 216  * htag0 argument can be a seg or amp pointer.
 217  */
 218 #define P_HASHBP(seg, htag0, addr, flags)                               \
 219         (IS_PFLAGS_WIRED((flags)) ?                                     \
 220             ((struct seg_phash *)&seg_phashtab_wired[P_HASHWIRED_MASK & \
 221             ((uintptr_t)(htag0) >> P_BASESHIFT)]) :                       \
 222             (&seg_phashtab_win[P_HASHWIN_MASK &                         \
 223             (((uintptr_t)(htag0) >> 3) ^                          \
 224             ((uintptr_t)(addr) >> ((flags & SEGP_PSHIFT) ?            \
 225             (flags >> 16) : page_get_shift((seg)->s_szc))))]))
 226 


 736 seg_pinsert_check(struct seg *seg, struct anon_map *amp, caddr_t addr,
 737     size_t len, uint_t flags)
 738 {
 739         ASSERT(seg != NULL);
 740 
 741 #ifdef DEBUG
 742         if (p_insert_chk_mtbf && !(gethrtime() % p_insert_chk_mtbf)) {
 743                 return (SEGP_FAIL);
 744         }
 745 #endif
 746 
 747         if (seg_pdisabled) {
 748                 return (SEGP_FAIL);
 749         }
 750         ASSERT(seg_phashsize_win != 0);
 751 
 752         if (IS_PFLAGS_WIRED(flags)) {
 753                 return (SEGP_SUCCESS);
 754         }
 755 




 756         if (freemem < desfree) {
 757                 return (SEGP_FAIL);
 758         }
 759 
 760         return (SEGP_SUCCESS);
 761 }
 762 
 763 #ifdef DEBUG
 764 static uint32_t p_insert_mtbf = 0;
 765 #endif
 766 
 767 /*
 768  * Insert address range with shadow list into pagelock cache if there's no
 769  * shadow list already cached for this address range. If the cache is off or
 770  * caching is temporarily disabled or the allowed 'window' is exceeded return
 771  * SEGP_FAIL. Otherwise return SEGP_SUCCESS.
 772  *
 773  * For non wired shadow lists (segvn case) include address in the hashing
 774  * function to avoid linking all the entries from the same segment or amp on
 775  * the same bucket.  amp is used instead of seg if amp is not NULL. Non wired


 805         ASSERT(rw == S_READ || rw == S_WRITE);
 806         ASSERT(rw == S_READ || wlen == len);
 807         ASSERT(rw == S_WRITE || wlen <= len);
 808         ASSERT(amp == NULL || wlen == len);
 809 
 810 #ifdef DEBUG
 811         if (p_insert_mtbf && !(gethrtime() % p_insert_mtbf)) {
 812                 return (SEGP_FAIL);
 813         }
 814 #endif
 815 
 816         if (seg_pdisabled) {
 817                 return (SEGP_FAIL);
 818         }
 819         ASSERT(seg_phashsize_win != 0);
 820 
 821         ASSERT((len & PAGEOFFSET) == 0);
 822         npages = btop(len);
 823         mutex_enter(&seg_pmem_mtx);
 824         if (!IS_PFLAGS_WIRED(flags)) {




 825                 seg_plocked_window += npages;
 826         }
 827         seg_plocked += npages;
 828         mutex_exit(&seg_pmem_mtx);
 829 
 830         pcp = kmem_cache_alloc(seg_pkmcache, KM_SLEEP);
 831         /*
 832          * If amp is not NULL set htag0 to amp otherwise set it to seg.
 833          */
 834         if (amp == NULL) {
 835                 pcp->p_htag0 = (void *)seg;
 836                 pcp->p_flags = flags & 0xffff;
 837         } else {
 838                 pcp->p_htag0 = (void *)amp;
 839                 pcp->p_flags = (flags & 0xffff) | SEGP_AMP;
 840         }
 841         pcp->p_addr = addr;
 842         pcp->p_len = len;
 843         pcp->p_wlen = wlen;
 844         pcp->p_pp = pp;


 920 
 921 /*
 922  * purge entries from the pagelock cache if not active
 923  * and not recently used.
 924  */
 925 static void
 926 seg_ppurge_async(int force)
 927 {
 928         struct seg_pcache *delcallb_list = NULL;
 929         struct seg_pcache *pcp;
 930         struct seg_phash *hp;
 931         pgcnt_t npages = 0;
 932         pgcnt_t npages_window = 0;
 933         pgcnt_t npgs_to_purge;
 934         pgcnt_t npgs_purged = 0;
 935         int hlinks = 0;
 936         int hlix;
 937         pcache_link_t *hlinkp;
 938         pcache_link_t *hlnextp = NULL;
 939         int lowmem;

 940 
 941         ASSERT(seg_phashsize_win != 0);
 942 
 943         /*
 944          * if the cache is off or empty, return
 945          */
 946         if (seg_plocked == 0 || (!force && seg_plocked_window == 0)) {
 947                 return;
 948         }
 949 
 950         if (!force) {
 951                 lowmem = 0;

 952                 if (freemem < lotsfree + needfree) {
 953                         spgcnt_t fmem = MAX((spgcnt_t)(freemem - needfree), 0);
 954                         if (fmem <= 5 * (desfree >> 2)) {
 955                                 lowmem = 1;
 956                         } else if (fmem <= 7 * (lotsfree >> 3)) {
 957                                 if (seg_plocked_window >=
 958                                     (availrmem_initial >> 1)) {
 959                                         lowmem = 1;
 960                                 }
 961                         } else if (fmem < lotsfree) {
 962                                 if (seg_plocked_window >=
 963                                     3 * (availrmem_initial >> 2)) {
 964                                         lowmem = 1;
 965                                 }
 966                         }
 967                 }
 968                 if (!lowmem) {



 969                         return;
 970                 }
 971                 npgs_to_purge = seg_plocked_window >>
 972                     seg_pshrink_shift;
 973                 if (lowmem) {
 974                         npgs_to_purge = MIN(npgs_to_purge,
 975                             MAX(seg_pmaxapurge_npages, desfree));
 976                 } else {
 977                         npgs_to_purge = MIN(npgs_to_purge,
 978                             seg_pmaxapurge_npages);
 979                 }
 980                 if (npgs_to_purge == 0) {
 981                         return;
 982                 }
 983         } else {
 984                 struct seg_phash_wired *hpw;
 985 
 986                 ASSERT(seg_phashsize_wired != 0);
 987 
 988                 for (hpw = seg_phashtab_wired;


1076                         plinkp->p_lnext->p_lprev =
1077                             plinkp->p_lprev;
1078                         pcp->p_hprev->p_hnext = pcp->p_hnext;
1079                         pcp->p_hnext->p_hprev = pcp->p_hprev;
1080                         mutex_exit(pmtx);
1081                         pcp->p_hprev = delcallb_list;
1082                         delcallb_list = pcp;
1083                         npgs_purged += btop(pcp->p_len);
1084                 }
1085                 if (hp->p_hnext == (struct seg_pcache *)hp) {
1086                         seg_premove_abuck(hp, 1);
1087                 }
1088                 mutex_exit(&hp->p_hmutex);
1089                 if (npgs_purged >= seg_plocked_window) {
1090                         break;
1091                 }
1092                 if (!force) {
1093                         if (npgs_purged >= npgs_to_purge) {
1094                                 break;
1095                         }
1096                         if (!(seg_pathr_full_ahb & 15)) {
1097                                 ASSERT(lowmem);
1098                                 if (freemem >= lotsfree + needfree) {
1099                                         break;
1100                                 }
1101                         }
1102                 }
1103         }
1104 
1105         if (hlinkp == &seg_pahhead[hlix]) {
1106                 /*
1107                  * We processed the entire hlix active bucket list
1108                  * but didn't find enough pages to reclaim.
1109                  * Switch the lists and walk the other list
1110                  * if we haven't done it yet.
1111                  */
1112                 mutex_enter(&seg_pmem_mtx);
1113                 ASSERT(seg_pathr_on);
1114                 ASSERT(seg_pahcur == !hlix);
1115                 seg_pahcur = hlix;
1116                 mutex_exit(&seg_pmem_mtx);


1437                 if (physmegs < 20 * 1024) {
1438                         segpcache_hashsize_wired = MAX(1024, physmegs << 3);
1439                 } else {
1440                         segpcache_hashsize_wired = 256 * 1024;
1441                 }
1442         }
1443         if (!ISP2(segpcache_hashsize_wired)) {
1444                 segpcache_hashsize_wired = 1 <<
1445                     highbit(segpcache_hashsize_wired);
1446         }
1447         seg_phashsize_wired = segpcache_hashsize_wired;
1448         seg_phashtab_wired = kmem_zalloc(
1449             seg_phashsize_wired * sizeof (struct seg_phash_wired), KM_SLEEP);
1450         for (i = 0; i < seg_phashsize_wired; i++) {
1451                 hp = (struct seg_phash *)&seg_phashtab_wired[i];
1452                 hp->p_hnext = (struct seg_pcache *)hp;
1453                 hp->p_hprev = (struct seg_pcache *)hp;
1454                 mutex_init(&hp->p_hmutex, NULL, MUTEX_DEFAULT, NULL);
1455         }
1456 



















1457         seg_pinit_mem_config();
1458 }
1459 
1460 /*
1461  * called by pageout if memory is low
1462  */
1463 void
1464 seg_preap(void)
1465 {
1466         /*
1467          * if the cache is off or empty, return
1468          */
1469         if (seg_plocked_window == 0) {
1470                 return;
1471         }
1472         ASSERT(seg_phashsize_win != 0);
1473 
1474         /*
1475          * If somebody is already purging pcache
1476          * just return.


1584  * and for kernel startup to attach to static segments.
1585  */
1586 int
1587 seg_attach(struct as *as, caddr_t base, size_t size, struct seg *seg)
1588 {
1589         seg->s_as = as;
1590         seg->s_base = base;
1591         seg->s_size = size;
1592 
1593         /*
1594          * as_addseg() will add the segment at the appropraite point
1595          * in the list. It will return -1 if there is overlap with
1596          * an already existing segment.
1597          */
1598         return (as_addseg(as, seg));
1599 }
1600 
1601 /*
1602  * Unmap a segment and free it from its associated address space.
1603  * This should be called by anybody who's finished with a whole segment's
1604  * mapping.  Just calls segop_unmap() on the whole mapping .  It is the
1605  * responsibility of the segment driver to unlink the the segment
1606  * from the address space, and to free public and private data structures
1607  * associated with the segment.  (This is typically done by a call to
1608  * seg_free()).
1609  */
1610 void
1611 seg_unmap(struct seg *seg)
1612 {
1613 #ifdef DEBUG
1614         int ret;
1615 #endif /* DEBUG */
1616 
1617         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
1618 
1619         /* Shouldn't have called seg_unmap if mapping isn't yet established */
1620         ASSERT(seg->s_data != NULL);
1621 
1622         /* Unmap the whole mapping */
1623 #ifdef DEBUG
1624         ret = segop_unmap(seg, seg->s_base, seg->s_size);
1625         ASSERT(ret == 0);
1626 #else
1627         segop_unmap(seg, seg->s_base, seg->s_size);
1628 #endif /* DEBUG */
1629 }
1630 
1631 /*
1632  * Free the segment from its associated as. This should only be called
1633  * if a mapping to the segment has not yet been established (e.g., if
1634  * an error occurs in the middle of doing an as_map when the segment
1635  * has already been partially set up) or if it has already been deleted
1636  * (e.g., from a segment driver unmap routine if the unmap applies to the
1637  * entire segment). If the mapping is currently set up then seg_unmap() should
1638  * be called instead.
1639  */
1640 void
1641 seg_free(struct seg *seg)
1642 {
1643         register struct as *as = seg->s_as;
1644         struct seg *tseg = as_removeseg(as, seg);
1645 
1646         ASSERT(tseg == seg);
1647 
1648         /*
1649          * If the segment private data field is NULL,
1650          * then segment driver is not attached yet.
1651          */
1652         if (seg->s_data != NULL)
1653                 segop_free(seg);
1654 
1655         mutex_destroy(&seg->s_pmtx);
1656         ASSERT(seg->s_phead.p_lnext == &seg->s_phead);
1657         ASSERT(seg->s_phead.p_lprev == &seg->s_phead);
1658         kmem_cache_free(seg_cache, seg);
1659 }
1660 
1661 /*ARGSUSED*/
1662 static void
1663 seg_p_mem_config_post_add(
1664         void *arg,
1665         pgcnt_t delta_pages)
1666 {
1667         /* Nothing to do. */
1668 }
1669 
1670 void
1671 seg_p_enable(void)
1672 {
1673         mutex_enter(&seg_pcache_mtx);


1802 }
1803 
1804 /*
1805  * Return swap reserved by a segment backing a private mapping.
1806  */
1807 size_t
1808 seg_swresv(struct seg *seg)
1809 {
1810         struct segvn_data *svd;
1811         size_t swap = 0;
1812 
1813         if (seg->s_ops == &segvn_ops) {
1814                 svd = (struct segvn_data *)seg->s_data;
1815                 if (svd->type == MAP_PRIVATE && svd->swresv > 0)
1816                         swap = svd->swresv;
1817         }
1818         return (swap);
1819 }
1820 
1821 /*
1822  * segop wrappers
1823  */

1824 int
1825 segop_dup(struct seg *seg, struct seg *new)
1826 {
1827         VERIFY3P(seg->s_ops->dup, !=, NULL);
1828 
1829         return (seg->s_ops->dup(seg, new));
1830 }
1831 
1832 int
1833 segop_unmap(struct seg *seg, caddr_t addr, size_t len)
1834 {
1835         VERIFY3P(seg->s_ops->unmap, !=, NULL);
1836 
1837         return (seg->s_ops->unmap(seg, addr, len));
1838 }
1839 
1840 void
1841 segop_free(struct seg *seg)
1842 {
1843         VERIFY3P(seg->s_ops->free, !=, NULL);
1844 
1845         seg->s_ops->free(seg);
1846 }
1847 
1848 faultcode_t
1849 segop_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
1850     enum fault_type type, enum seg_rw rw)
1851 {
1852         VERIFY3P(seg->s_ops->fault, !=, NULL);
1853 
1854         return (seg->s_ops->fault(hat, seg, addr, len, type, rw));
1855 }
1856 
1857 faultcode_t
1858 segop_faulta(struct seg *seg, caddr_t addr)
1859 {
1860         VERIFY3P(seg->s_ops->faulta, !=, NULL);
1861 
1862         return (seg->s_ops->faulta(seg, addr));
1863 }
1864 
1865 int
1866 segop_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
1867 {
1868         VERIFY3P(seg->s_ops->setprot, !=, NULL);
1869 
1870         return (seg->s_ops->setprot(seg, addr, len, prot));
1871 }
1872 
1873 int
1874 segop_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
1875 {
1876         VERIFY3P(seg->s_ops->checkprot, !=, NULL);
1877 
1878         return (seg->s_ops->checkprot(seg, addr, len, prot));
1879 }
1880 
1881 int
1882 segop_kluster(struct seg *seg, caddr_t addr, ssize_t d)
1883 {
1884         VERIFY3P(seg->s_ops->kluster, !=, NULL);
1885 
1886         return (seg->s_ops->kluster(seg, addr, d));
1887 }
1888 
1889 int
1890 segop_sync(struct seg *seg, caddr_t addr, size_t len, int atr, uint_t f)
1891 {
1892         VERIFY3P(seg->s_ops->sync, !=, NULL);
1893 
1894         return (seg->s_ops->sync(seg, addr, len, atr, f));
1895 }
1896 
1897 size_t
1898 segop_incore(struct seg *seg, caddr_t addr, size_t len, char *v)
1899 {
1900         VERIFY3P(seg->s_ops->incore, !=, NULL);
1901 
1902         return (seg->s_ops->incore(seg, addr, len, v));
1903 }
1904 
1905 int
1906 segop_lockop(struct seg *seg, caddr_t addr, size_t len, int atr, int op,
1907     ulong_t *b, size_t p)
1908 {
1909         VERIFY3P(seg->s_ops->lockop, !=, NULL);
1910 
1911         return (seg->s_ops->lockop(seg, addr, len, atr, op, b, p));
1912 }
1913 
1914 int
1915 segop_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *p)
1916 {
1917         VERIFY3P(seg->s_ops->getprot, !=, NULL);
1918 
1919         return (seg->s_ops->getprot(seg, addr, len, p));
1920 }
1921 
1922 u_offset_t
1923 segop_getoffset(struct seg *seg, caddr_t addr)
1924 {
1925         VERIFY3P(seg->s_ops->getoffset, !=, NULL);
1926 
1927         return (seg->s_ops->getoffset(seg, addr));
1928 }
1929 
1930 int
1931 segop_gettype(struct seg *seg, caddr_t addr)
1932 {
1933         VERIFY3P(seg->s_ops->gettype, !=, NULL);
1934 
1935         return (seg->s_ops->gettype(seg, addr));
1936 }
1937 
1938 int
1939 segop_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
1940 {
1941         VERIFY3P(seg->s_ops->getvp, !=, NULL);
1942 
1943         return (seg->s_ops->getvp(seg, addr, vpp));
1944 }
1945 
1946 int
1947 segop_advise(struct seg *seg, caddr_t addr, size_t len, uint_t b)
1948 {
1949         VERIFY3P(seg->s_ops->advise, !=, NULL);
1950 
1951         return (seg->s_ops->advise(seg, addr, len, b));
1952 }
1953 
1954 void
1955 segop_dump(struct seg *seg)
1956 {
1957         if (seg->s_ops->dump == NULL)
1958                 return;
1959 
1960         seg->s_ops->dump(seg);
1961 }
1962 
1963 int
1964 segop_pagelock(struct seg *seg, caddr_t addr, size_t len, struct page ***page,
1965     enum lock_type type, enum seg_rw rw)
1966 {
1967         VERIFY3P(seg->s_ops->pagelock, !=, NULL);
1968 
1969         return (seg->s_ops->pagelock(seg, addr, len, page, type, rw));
1970 }
1971 
1972 int
1973 segop_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
1974 {
1975         if (seg->s_ops->setpagesize == NULL)
1976                 return (ENOTSUP);
1977 
1978         return (seg->s_ops->setpagesize(seg, addr, len, szc));
1979 }
1980 
1981 int
1982 segop_getmemid(struct seg *seg, caddr_t addr, memid_t *mp)
1983 {
1984         if (seg->s_ops->getmemid == NULL)
1985                 return (ENODEV);
1986 
1987         return (seg->s_ops->getmemid(seg, addr, mp));
1988 }
1989 
1990 struct lgrp_mem_policy_info *
1991 segop_getpolicy(struct seg *seg, caddr_t addr)
1992 {
1993         if (seg->s_ops->getpolicy == NULL)
1994                 return (NULL);
1995 
1996         return (seg->s_ops->getpolicy(seg, addr));
1997 }
1998 
1999 int
2000 segop_capable(struct seg *seg, segcapability_t cap)
2001 {
2002         if (seg->s_ops->capable == NULL)
2003                 return (0);
2004 
2005         return (seg->s_ops->capable(seg, cap));
2006 }
2007 
2008 int
2009 segop_inherit(struct seg *seg, caddr_t addr, size_t len, uint_t op)
2010 {
2011         if (seg->s_ops->inherit == NULL)
2012                 return (ENOTSUP);
2013 
2014         return (seg->s_ops->inherit(seg, addr, len, op));
2015 }