combined Sdiff usr/src/uts/sfmmu/vm/hat

Print this page

remove whole-process swapping
Long before Unix supported paging, it used process swapping to reclaim
memory.  The code is there and in theory it runs when we get *extremely* low
on memory.  In practice, it never runs since the definition of low-on-memory
is antiquated. (XXX: define what antiquated means)
You can check the number of swapout/swapin events with kstats:
$ kstat -p ::vm:swapin ::vm:swapout
remove xhat
The xhat infrastructure was added to support hardware such as the zulu
graphics card - hardware which had on-board MMUs.  The VM used the xhat code
to keep the CPU's and Zulu's page tables in-sync.  Since the only xhat user
was zulu (which is gone), we can safely remove it simplifying the whole VM
subsystem.
Assorted notes:
- AS_BUSY flag was used solely by xhat

  64 #include <vm/seg_kmem.h>
  65 #include <vm/seg_kpm.h>
  66 #include <vm/rm.h>
  67 #include <sys/t_lock.h>
  68 #include <sys/obpdefs.h>
  69 #include <sys/vm_machparam.h>
  70 #include <sys/var.h>
  71 #include <sys/trap.h>
  72 #include <sys/machtrap.h>
  73 #include <sys/scb.h>
  74 #include <sys/bitmap.h>
  75 #include <sys/machlock.h>
  76 #include <sys/membar.h>
  77 #include <sys/atomic.h>
  78 #include <sys/cpu_module.h>
  79 #include <sys/prom_debug.h>
  80 #include <sys/ksynch.h>
  81 #include <sys/mem_config.h>
  82 #include <sys/mem_cage.h>
  83 #include <vm/vm_dep.h>
  84 #include <vm/xhat_sfmmu.h>
  85 #include <sys/fpu/fpusystm.h>
  86 #include <vm/mach_kpm.h>
  87 #include <sys/callb.h>
  88 
  89 #ifdef  DEBUG
  90 #define SFMMU_VALIDATE_HMERID(hat, rid, saddr, len)                     \
  91         if (SFMMU_IS_SHMERID_VALID(rid)) {                              \
  92                 caddr_t _eaddr = (saddr) + (len);                       \
  93                 sf_srd_t *_srdp;                                        \
  94                 sf_region_t *_rgnp;                                     \
  95                 ASSERT((rid) < SFMMU_MAX_HME_REGIONS);                       \
  96                 ASSERT(SF_RGNMAP_TEST(hat->sfmmu_hmeregion_map, rid));       \
  97                 ASSERT((hat) != ksfmmup);                               \
  98                 _srdp = (hat)->sfmmu_srdp;                           \
  99                 ASSERT(_srdp != NULL);                                  \
 100                 ASSERT(_srdp->srd_refcnt != 0);                              \
 101                 _rgnp = _srdp->srd_hmergnp[(rid)];                   \
 102                 ASSERT(_rgnp != NULL && _rgnp->rgn_id == rid);               \
 103                 ASSERT(_rgnp->rgn_refcnt != 0);                              \
 104                 ASSERT(!(_rgnp->rgn_flags & SFMMU_REGION_FREE)); \

1333          * Reserve some kernel virtual address space for the locked TTEs
1334          * that allow us to probe the TSB from TL>0.
1335          */
1336         utsb_vabase = vmem_xalloc(heap_arena, tsb_slab_size, tsb_slab_size,
1337             0, 0, NULL, NULL, VM_SLEEP);
1338         utsb4m_vabase = vmem_xalloc(heap_arena, tsb_slab_size, tsb_slab_size,
1339             0, 0, NULL, NULL, VM_SLEEP);
1340 #endif
1341 
1342 #ifdef VAC
1343         /*
1344          * The big page VAC handling code assumes VAC
1345          * will not be bigger than the smallest big
1346          * page- which is 64K.
1347          */
1348         if (TTEPAGES(TTE64K) < CACHE_NUM_COLOR) {
1349                 cmn_err(CE_PANIC, "VAC too big!");
1350         }
1351 #endif
1352 
1353         (void) xhat_init();
1354 
1355         uhme_hash_pa = va_to_pa(uhme_hash);
1356         khme_hash_pa = va_to_pa(khme_hash);
1357 
1358         /*
1359          * Initialize relocation locks. kpr_suspendlock is held
1360          * at PIL_MAX to prevent interrupts from pinning the holder
1361          * of a suspended TTE which may access it leading to a
1362          * deadlock condition.
1363          */
1364         mutex_init(&kpr_mutex, NULL, MUTEX_DEFAULT, NULL);
1365         mutex_init(&kpr_suspendlock, NULL, MUTEX_SPIN, (void *)PIL_MAX);
1366 
1367         /*
1368          * If Shared context support is disabled via /etc/system
1369          * set shctx_on to 0 here if it was set to 1 earlier in boot
1370          * sequence by cpu module initialization code.
1371          */
1372         if (shctx_on && disable_shctx) {
1373                 shctx_on = 0;
1374         }

1522         for (i = 0; i < max_mmu_page_sizes; i++) {
1523                 sfmmup->sfmmu_ttecnt[i] = 0;
1524                 sfmmup->sfmmu_scdrttecnt[i] = 0;
1525                 sfmmup->sfmmu_ismttecnt[i] = 0;
1526                 sfmmup->sfmmu_scdismttecnt[i] = 0;
1527                 sfmmup->sfmmu_pgsz[i] = TTE8K;
1528         }
1529         sfmmup->sfmmu_tsb0_4minflcnt = 0;
1530         sfmmup->sfmmu_iblk = NULL;
1531         sfmmup->sfmmu_ismhat = 0;
1532         sfmmup->sfmmu_scdhat = 0;
1533         sfmmup->sfmmu_ismblkpa = (uint64_t)-1;
1534         if (sfmmup == ksfmmup) {
1535                 CPUSET_ALL(sfmmup->sfmmu_cpusran);
1536         } else {
1537                 CPUSET_ZERO(sfmmup->sfmmu_cpusran);
1538         }
1539         sfmmup->sfmmu_free = 0;
1540         sfmmup->sfmmu_rmstat = 0;
1541         sfmmup->sfmmu_clrbin = sfmmup->sfmmu_clrstart;
1542         sfmmup->sfmmu_xhat_provider = NULL;
1543         cv_init(&sfmmup->sfmmu_tsb_cv, NULL, CV_DEFAULT, NULL);
1544         sfmmup->sfmmu_srdp = NULL;
1545         SF_RGNMAP_ZERO(sfmmup->sfmmu_region_map);
1546         bzero(sfmmup->sfmmu_hmeregion_links, SFMMU_L1_HMERLINKS_SIZE);
1547         sfmmup->sfmmu_scdp = NULL;
1548         sfmmup->sfmmu_scd_link.next = NULL;
1549         sfmmup->sfmmu_scd_link.prev = NULL;
1550         return (sfmmup);
1551 }
1552 
1553 /*
1554  * Create per-MMU context domain kstats for a given MMU ctx.
1555  */
1556 static void
1557 sfmmu_mmu_kstat_create(mmu_ctx_t *mmu_ctxp)
1558 {
1559         mmu_ctx_stat_t  stat;
1560         kstat_t         *mmu_kstat;
1561 
1562         ASSERT(MUTEX_HELD(&cpu_lock));

1902                  * INVALID_CONTEXT to it.
1903                  * Compatibility Note: hw takes care of MMU_SCONTEXT1
1904                  */
1905                 sfmmu_setctx_sec(INVALID_CONTEXT);
1906                 sfmmu_clear_utsbinfo();
1907 
1908                 kpreempt_enable();
1909                 sfmmu_hat_exit(hatlockp);
1910         }
1911 }
1912 
1913 /*
1914  * Free all the translation resources for the specified address space.
1915  * Called from as_free when an address space is being destroyed.
1916  */
1917 void
1918 hat_free_start(struct hat *sfmmup)
1919 {
1920         ASSERT(AS_WRITE_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
1921         ASSERT(sfmmup != ksfmmup);
1922         ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
1923 
1924         sfmmup->sfmmu_free = 1;
1925         if (sfmmup->sfmmu_scdp != NULL) {
1926                 sfmmu_leave_scd(sfmmup, 0);
1927         }
1928 
1929         ASSERT(sfmmup->sfmmu_scdp == NULL);
1930 }
1931 
1932 void
1933 hat_free_end(struct hat *sfmmup)
1934 {
1935         int i;
1936 
1937         ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
1938         ASSERT(sfmmup->sfmmu_free == 1);
1939         ASSERT(sfmmup->sfmmu_ttecnt[TTE8K] == 0);
1940         ASSERT(sfmmup->sfmmu_ttecnt[TTE64K] == 0);
1941         ASSERT(sfmmup->sfmmu_ttecnt[TTE512K] == 0);
1942         ASSERT(sfmmup->sfmmu_ttecnt[TTE4M] == 0);
1943         ASSERT(sfmmup->sfmmu_ttecnt[TTE32M] == 0);
1944         ASSERT(sfmmup->sfmmu_ttecnt[TTE256M] == 0);
1945 
1946         if (sfmmup->sfmmu_rmstat) {
1947                 hat_freestat(sfmmup->sfmmu_as, NULL);
1948         }
1949 
1950         while (sfmmup->sfmmu_tsb != NULL) {
1951                 struct tsb_info *next = sfmmup->sfmmu_tsb->tsb_next;
1952                 sfmmu_tsbinfo_free(sfmmup->sfmmu_tsb);
1953                 sfmmup->sfmmu_tsb = next;
1954         }
1955 
1956         if (sfmmup->sfmmu_srdp != NULL) {
1957                 sfmmu_leave_srd(sfmmup);

1959                 for (i = 0; i < SFMMU_L1_HMERLINKS; i++) {
1960                         if (sfmmup->sfmmu_hmeregion_links[i] != NULL) {
1961                                 kmem_free(sfmmup->sfmmu_hmeregion_links[i],
1962                                     SFMMU_L2_HMERLINKS_SIZE);
1963                                 sfmmup->sfmmu_hmeregion_links[i] = NULL;
1964                         }
1965                 }
1966         }
1967         sfmmu_free_sfmmu(sfmmup);
1968 
1969 #ifdef DEBUG
1970         for (i = 0; i < SFMMU_L1_HMERLINKS; i++) {
1971                 ASSERT(sfmmup->sfmmu_hmeregion_links[i] == NULL);
1972         }
1973 #endif
1974 
1975         kmem_cache_free(sfmmuid_cache, sfmmup);
1976 }
1977 
1978 /*
1979  * Set up any translation structures, for the specified address space,
1980  * that are needed or preferred when the process is being swapped in.
1981  */
1982 /* ARGSUSED */
1983 void
1984 hat_swapin(struct hat *hat)
1985 {
1986         ASSERT(hat->sfmmu_xhat_provider == NULL);
1987 }
1988 
1989 /*
1990  * Free all of the translation resources, for the specified address space,
1991  * that can be freed while the process is swapped out. Called from as_swapout.
1992  * Also, free up the ctx that this process was using.
1993  */
1994 void
1995 hat_swapout(struct hat *sfmmup)
1996 {
1997         struct hmehash_bucket *hmebp;
1998         struct hme_blk *hmeblkp;
1999         struct hme_blk *pr_hblk = NULL;
2000         struct hme_blk *nx_hblk;
2001         int i;
2002         struct hme_blk *list = NULL;
2003         hatlock_t *hatlockp;
2004         struct tsb_info *tsbinfop;
2005         struct free_tsb {
2006                 struct free_tsb *next;
2007                 struct tsb_info *tsbinfop;
2008         };                      /* free list of TSBs */
2009         struct free_tsb *freelist, *last, *next;
2010 
2011         ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
2012         SFMMU_STAT(sf_swapout);
2013 
2014         /*
2015          * There is no way to go from an as to all its translations in sfmmu.
2016          * Here is one of the times when we take the big hit and traverse
2017          * the hash looking for hme_blks to free up.  Not only do we free up
2018          * this as hme_blks but all those that are free.  We are obviously
2019          * swapping because we need memory so let's free up as much
2020          * as we can.
2021          *
2022          * Note that we don't flush TLB/TSB here -- it's not necessary
2023          * because:
2024          *  1) we free the ctx we're using and throw away the TSB(s);
2025          *  2) processes aren't runnable while being swapped out.
2026          */
2027         ASSERT(sfmmup != KHATID);
2028         for (i = 0; i <= UHMEHASH_SZ; i++) {
2029                 hmebp = &uhme_hash[i];
2030                 SFMMU_HASH_LOCK(hmebp);
2031                 hmeblkp = hmebp->hmeblkp;
2032                 pr_hblk = NULL;
2033                 while (hmeblkp) {
2034 
2035                         ASSERT(!hmeblkp->hblk_xhat_bit);
2036 
2037                         if ((hmeblkp->hblk_tag.htag_id == sfmmup) &&
2038                             !hmeblkp->hblk_shw_bit && !hmeblkp->hblk_lckcnt) {
2039                                 ASSERT(!hmeblkp->hblk_shared);
2040                                 (void) sfmmu_hblk_unload(sfmmup, hmeblkp,
2041                                     (caddr_t)get_hblk_base(hmeblkp),
2042                                     get_hblk_endaddr(hmeblkp),
2043                                     NULL, HAT_UNLOAD);
2044                         }
2045                         nx_hblk = hmeblkp->hblk_next;
2046                         if (!hmeblkp->hblk_vcnt && !hmeblkp->hblk_hmecnt) {
2047                                 ASSERT(!hmeblkp->hblk_lckcnt);
2048                                 sfmmu_hblk_hash_rm(hmebp, hmeblkp, pr_hblk,
2049                                     &list, 0);
2050                         } else {
2051                                 pr_hblk = hmeblkp;
2052                         }
2053                         hmeblkp = nx_hblk;
2054                 }
2055                 SFMMU_HASH_UNLOCK(hmebp);
2056         }
2057 
2058         sfmmu_hblks_list_purge(&list, 0);
2059 
2060         /*
2061          * Now free up the ctx so that others can reuse it.
2062          */
2063         hatlockp = sfmmu_hat_enter(sfmmup);
2064 
2065         sfmmu_invalidate_ctx(sfmmup);
2066 
2067         /*
2068          * Free TSBs, but not tsbinfos, and set SWAPPED flag.
2069          * If TSBs were never swapped in, just return.
2070          * This implies that we don't support partial swapping
2071          * of TSBs -- either all are swapped out, or none are.
2072          *
2073          * We must hold the HAT lock here to prevent racing with another
2074          * thread trying to unmap TTEs from the TSB or running the post-
2075          * relocator after relocating the TSB's memory.  Unfortunately, we
2076          * can't free memory while holding the HAT lock or we could
2077          * deadlock, so we build a list of TSBs to be freed after marking
2078          * the tsbinfos as swapped out and free them after dropping the
2079          * lock.
2080          */
2081         if (SFMMU_FLAGS_ISSET(sfmmup, HAT_SWAPPED)) {
2082                 sfmmu_hat_exit(hatlockp);
2083                 return;
2084         }
2085 
2086         SFMMU_FLAGS_SET(sfmmup, HAT_SWAPPED);
2087         last = freelist = NULL;
2088         for (tsbinfop = sfmmup->sfmmu_tsb; tsbinfop != NULL;
2089             tsbinfop = tsbinfop->tsb_next) {
2090                 ASSERT((tsbinfop->tsb_flags & TSB_SWAPPED) == 0);
2091 
2092                 /*
2093                  * Cast the TSB into a struct free_tsb and put it on the free
2094                  * list.
2095                  */
2096                 if (freelist == NULL) {
2097                         last = freelist = (struct free_tsb *)tsbinfop->tsb_va;
2098                 } else {
2099                         last->next = (struct free_tsb *)tsbinfop->tsb_va;
2100                         last = last->next;
2101                 }
2102                 last->next = NULL;
2103                 last->tsbinfop = tsbinfop;
2104                 tsbinfop->tsb_flags |= TSB_SWAPPED;
2105                 /*
2106                  * Zero out the TTE to clear the valid bit.
2107                  * Note we can't use a value like 0xbad because we want to
2108                  * ensure diagnostic bits are NEVER set on TTEs that might
2109                  * be loaded.  The intent is to catch any invalid access
2110                  * to the swapped TSB, such as a thread running with a valid
2111                  * context without first calling sfmmu_tsb_swapin() to
2112                  * allocate TSB memory.
2113                  */
2114                 tsbinfop->tsb_tte.ll = 0;
2115         }
2116 
2117         /* Now we can drop the lock and free the TSB memory. */
2118         sfmmu_hat_exit(hatlockp);
2119         for (; freelist != NULL; freelist = next) {
2120                 next = freelist->next;
2121                 sfmmu_tsb_free(freelist->tsbinfop);
2122         }
2123 }
2124 
2125 /*
2126  * Duplicate the translations of an as into another newas
2127  */
2128 /* ARGSUSED */
2129 int
2130 hat_dup(struct hat *hat, struct hat *newhat, caddr_t addr, size_t len,
2131         uint_t flag)
2132 {
2133         sf_srd_t *srdp;
2134         sf_scd_t *scdp;
2135         int i;
2136         extern uint_t get_color_start(struct as *);
2137 
2138         ASSERT(hat->sfmmu_xhat_provider == NULL);
2139         ASSERT((flag == 0) || (flag == HAT_DUP_ALL) || (flag == HAT_DUP_COW) ||
2140             (flag == HAT_DUP_SRD));
2141         ASSERT(hat != ksfmmup);
2142         ASSERT(newhat != ksfmmup);
2143         ASSERT(flag != HAT_DUP_ALL || hat->sfmmu_srdp == newhat->sfmmu_srdp);
2144 
2145         if (flag == HAT_DUP_COW) {
2146                 panic("hat_dup: HAT_DUP_COW not supported");
2147         }
2148 
2149         if (flag == HAT_DUP_SRD && ((srdp = hat->sfmmu_srdp) != NULL)) {
2150                 ASSERT(srdp->srd_evp != NULL);
2151                 VN_HOLD(srdp->srd_evp);
2152                 ASSERT(srdp->srd_refcnt > 0);
2153                 newhat->sfmmu_srdp = srdp;
2154                 atomic_inc_32((volatile uint_t *)&srdp->srd_refcnt);
2155         }
2156 
2157         /*
2158          * HAT_DUP_ALL flag is used after as duplication is done.

2188         if (flag == HAT_DUP_ALL && consistent_coloring == 0 &&
2189             update_proc_pgcolorbase_after_fork != 0) {
2190                 hat->sfmmu_clrbin = get_color_start(hat->sfmmu_as);
2191         }
2192         return (0);
2193 }
2194 
2195 void
2196 hat_memload(struct hat *hat, caddr_t addr, struct page *pp,
2197         uint_t attr, uint_t flags)
2198 {
2199         hat_do_memload(hat, addr, pp, attr, flags,
2200             SFMMU_INVALID_SHMERID);
2201 }
2202 
2203 void
2204 hat_memload_region(struct hat *hat, caddr_t addr, struct page *pp,
2205         uint_t attr, uint_t flags, hat_region_cookie_t rcookie)
2206 {
2207         uint_t rid;
2208         if (rcookie == HAT_INVALID_REGION_COOKIE ||
2209             hat->sfmmu_xhat_provider != NULL) {
2210                 hat_do_memload(hat, addr, pp, attr, flags,
2211                     SFMMU_INVALID_SHMERID);
2212                 return;
2213         }
2214         rid = (uint_t)((uint64_t)rcookie);
2215         ASSERT(rid < SFMMU_MAX_HME_REGIONS);
2216         hat_do_memload(hat, addr, pp, attr, flags, rid);
2217 }
2218 
2219 /*
2220  * Set up addr to map to page pp with protection prot.
2221  * As an optimization we also load the TSB with the
2222  * corresponding tte but it is no big deal if  the tte gets kicked out.
2223  */
2224 static void
2225 hat_do_memload(struct hat *hat, caddr_t addr, struct page *pp,
2226         uint_t attr, uint_t flags, uint_t rid)
2227 {
2228         tte_t tte;
2229 
2230 
2231         ASSERT(hat != NULL);
2232         ASSERT(PAGE_LOCKED(pp));
2233         ASSERT(!((uintptr_t)addr & MMU_PAGEOFFSET));
2234         ASSERT(!(flags & ~SFMMU_LOAD_ALLFLAG));
2235         ASSERT(!(attr & ~SFMMU_LOAD_ALLATTR));
2236         SFMMU_VALIDATE_HMERID(hat, rid, addr, MMU_PAGESIZE);
2237 
2238         if (PP_ISFREE(pp)) {
2239                 panic("hat_memload: loading a mapping to free page %p",
2240                     (void *)pp);
2241         }
2242 
2243         if (hat->sfmmu_xhat_provider) {
2244                 /* no regions for xhats */
2245                 ASSERT(!SFMMU_IS_SHMERID_VALID(rid));
2246                 XHAT_MEMLOAD(hat, addr, pp, attr, flags);
2247                 return;
2248         }
2249 
2250         ASSERT((hat == ksfmmup) ||
2251             AS_LOCK_HELD(hat->sfmmu_as, &hat->sfmmu_as->a_lock));
2252 
2253         if (flags & ~SFMMU_LOAD_ALLFLAG)
2254                 cmn_err(CE_NOTE, "hat_memload: unsupported flags %d",
2255                     flags & ~SFMMU_LOAD_ALLFLAG);
2256 
2257         if (hat->sfmmu_rmstat)
2258                 hat_resvstat(MMU_PAGESIZE, hat->sfmmu_as, addr);
2259 
2260 #if defined(SF_ERRATA_57)
2261         if ((hat != ksfmmup) && AS_TYPE_64BIT(hat->sfmmu_as) &&
2262             (addr < errata57_limit) && (attr & PROT_EXEC) &&
2263             !(flags & HAT_LOAD_SHARE)) {
2264                 cmn_err(CE_WARN, "hat_memload: illegal attempt to make user "
2265                     " page executable");
2266                 attr &= ~PROT_EXEC;
2267         }
2268 #endif
2269

2279 }
2280 
2281 /*
2282  * hat_devload can be called to map real memory (e.g.
2283  * /dev/kmem) and even though hat_devload will determine pf is
2284  * for memory, it will be unable to get a shared lock on the
2285  * page (because someone else has it exclusively) and will
2286  * pass dp = NULL.  If tteload doesn't get a non-NULL
2287  * page pointer it can't cache memory.
2288  */
2289 void
2290 hat_devload(struct hat *hat, caddr_t addr, size_t len, pfn_t pfn,
2291         uint_t attr, int flags)
2292 {
2293         tte_t tte;
2294         struct page *pp = NULL;
2295         int use_lgpg = 0;
2296 
2297         ASSERT(hat != NULL);
2298 
2299         if (hat->sfmmu_xhat_provider) {
2300                 XHAT_DEVLOAD(hat, addr, len, pfn, attr, flags);
2301                 return;
2302         }
2303 
2304         ASSERT(!(flags & ~SFMMU_LOAD_ALLFLAG));
2305         ASSERT(!(attr & ~SFMMU_LOAD_ALLATTR));
2306         ASSERT((hat == ksfmmup) ||
2307             AS_LOCK_HELD(hat->sfmmu_as, &hat->sfmmu_as->a_lock));
2308         if (len == 0)
2309                 panic("hat_devload: zero len");
2310         if (flags & ~SFMMU_LOAD_ALLFLAG)
2311                 cmn_err(CE_NOTE, "hat_devload: unsupported flags %d",
2312                     flags & ~SFMMU_LOAD_ALLFLAG);
2313 
2314 #if defined(SF_ERRATA_57)
2315         if ((hat != ksfmmup) && AS_TYPE_64BIT(hat->sfmmu_as) &&
2316             (addr < errata57_limit) && (attr & PROT_EXEC) &&
2317             !(flags & HAT_LOAD_SHARE)) {
2318                 cmn_err(CE_WARN, "hat_devload: illegal attempt to make user "
2319                     " page executable");
2320                 attr &= ~PROT_EXEC;
2321         }
2322 #endif
2323

2430          */
2431         if ((flags & HAT_LOAD_SHARE) == 0) {
2432                 sfmmu_check_page_sizes(hat, 1);
2433         }
2434 }
2435 
2436 void
2437 hat_memload_array(struct hat *hat, caddr_t addr, size_t len,
2438         struct page **pps, uint_t attr, uint_t flags)
2439 {
2440         hat_do_memload_array(hat, addr, len, pps, attr, flags,
2441             SFMMU_INVALID_SHMERID);
2442 }
2443 
2444 void
2445 hat_memload_array_region(struct hat *hat, caddr_t addr, size_t len,
2446         struct page **pps, uint_t attr, uint_t flags,
2447         hat_region_cookie_t rcookie)
2448 {
2449         uint_t rid;
2450         if (rcookie == HAT_INVALID_REGION_COOKIE ||
2451             hat->sfmmu_xhat_provider != NULL) {
2452                 hat_do_memload_array(hat, addr, len, pps, attr, flags,
2453                     SFMMU_INVALID_SHMERID);
2454                 return;
2455         }
2456         rid = (uint_t)((uint64_t)rcookie);
2457         ASSERT(rid < SFMMU_MAX_HME_REGIONS);
2458         hat_do_memload_array(hat, addr, len, pps, attr, flags, rid);
2459 }
2460 
2461 /*
2462  * Map the largest extend possible out of the page array. The array may NOT
2463  * be in order.  The largest possible mapping a page can have
2464  * is specified in the p_szc field.  The p_szc field
2465  * cannot change as long as there any mappings (large or small)
2466  * to any of the pages that make up the large page. (ie. any
2467  * promotion/demotion of page size is not up to the hat but up to
2468  * the page free list manager).  The array
2469  * should consist of properly aligned contigous pages that are
2470  * part of a big page for a large mapping to be created.
2471  */
2472 static void
2473 hat_do_memload_array(struct hat *hat, caddr_t addr, size_t len,
2474         struct page **pps, uint_t attr, uint_t flags, uint_t rid)
2475 {
2476         int  ttesz;
2477         size_t mapsz;
2478         pgcnt_t numpg, npgs;
2479         tte_t tte;
2480         page_t *pp;
2481         uint_t large_pages_disable;
2482 
2483         ASSERT(!((uintptr_t)addr & MMU_PAGEOFFSET));
2484         SFMMU_VALIDATE_HMERID(hat, rid, addr, len);
2485 
2486         if (hat->sfmmu_xhat_provider) {
2487                 ASSERT(!SFMMU_IS_SHMERID_VALID(rid));
2488                 XHAT_MEMLOAD_ARRAY(hat, addr, len, pps, attr, flags);
2489                 return;
2490         }
2491 
2492         if (hat->sfmmu_rmstat)
2493                 hat_resvstat(len, hat->sfmmu_as, addr);
2494 
2495 #if defined(SF_ERRATA_57)
2496         if ((hat != ksfmmup) && AS_TYPE_64BIT(hat->sfmmu_as) &&
2497             (addr < errata57_limit) && (attr & PROT_EXEC) &&
2498             !(flags & HAT_LOAD_SHARE)) {
2499                 cmn_err(CE_WARN, "hat_memload_array: illegal attempt to make "
2500                     "user page executable");
2501                 attr &= ~PROT_EXEC;
2502         }
2503 #endif
2504 
2505         /* Get number of pages */
2506         npgs = len >> MMU_PAGESHIFT;
2507 
2508         if (flags & HAT_LOAD_SHARE) {
2509                 large_pages_disable = disable_ism_large_pages;
2510         } else {
2511                 large_pages_disable = disable_large_pages;

3954                             rsz, rgnp->rgn_obj,
3955                             rgnp->rgn_objoff);
3956                 }
3957                 ttesz--;
3958         }
3959 }
3960 
3961 /*
3962  * Release one hardware address translation lock on the given address range.
3963  */
3964 void
3965 hat_unlock(struct hat *sfmmup, caddr_t addr, size_t len)
3966 {
3967         struct hmehash_bucket *hmebp;
3968         hmeblk_tag hblktag;
3969         int hmeshift, hashno = 1;
3970         struct hme_blk *hmeblkp, *list = NULL;
3971         caddr_t endaddr;
3972 
3973         ASSERT(sfmmup != NULL);
3974         ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
3975 
3976         ASSERT((sfmmup == ksfmmup) ||
3977             AS_LOCK_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
3978         ASSERT((len & MMU_PAGEOFFSET) == 0);
3979         endaddr = addr + len;
3980         hblktag.htag_id = sfmmup;
3981         hblktag.htag_rid = SFMMU_INVALID_SHMERID;
3982 
3983         /*
3984          * Spitfire supports 4 page sizes.
3985          * Most pages are expected to be of the smallest page size (8K) and
3986          * these will not need to be rehashed. 64K pages also don't need to be
3987          * rehashed because an hmeblk spans 64K of address space. 512K pages
3988          * might need 1 rehash and and 4M pages might need 2 rehashes.
3989          */
3990         while (addr < endaddr) {
3991                 hmeshift = HME_HASH_SHIFT(hashno);
3992                 hblktag.htag_bspage = HME_HASH_BSPAGE(addr, hmeshift);
3993                 hblktag.htag_rehash = hashno;
3994                 hmebp = HME_HASH_FUNCTION(sfmmup, addr, hmeshift);

4039 {
4040         sf_srd_t *srdp;
4041         sf_region_t *rgnp;
4042         int ttesz;
4043         uint_t rid;
4044         caddr_t eaddr;
4045         caddr_t va;
4046         int hmeshift;
4047         hmeblk_tag hblktag;
4048         struct hmehash_bucket *hmebp;
4049         struct hme_blk *hmeblkp;
4050         struct hme_blk *pr_hblk;
4051         struct hme_blk *list;
4052 
4053         if (rcookie == HAT_INVALID_REGION_COOKIE) {
4054                 hat_unlock(sfmmup, addr, len);
4055                 return;
4056         }
4057 
4058         ASSERT(sfmmup != NULL);
4059         ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
4060         ASSERT(sfmmup != ksfmmup);
4061 
4062         srdp = sfmmup->sfmmu_srdp;
4063         rid = (uint_t)((uint64_t)rcookie);
4064         VERIFY3U(rid, <, SFMMU_MAX_HME_REGIONS);
4065         eaddr = addr + len;
4066         va = addr;
4067         list = NULL;
4068         rgnp = srdp->srd_hmergnp[rid];
4069         SFMMU_VALIDATE_HMERID(sfmmup, rid, addr, len);
4070 
4071         ASSERT(IS_P2ALIGNED(addr, TTEBYTES(rgnp->rgn_pgszc)));
4072         ASSERT(IS_P2ALIGNED(len, TTEBYTES(rgnp->rgn_pgszc)));
4073         if (rgnp->rgn_pgszc < HBLK_MIN_TTESZ) {
4074                 ttesz = HBLK_MIN_TTESZ;
4075         } else {
4076                 ttesz = rgnp->rgn_pgszc;
4077         }
4078         while (va < eaddr) {
4079                 while (ttesz < rgnp->rgn_pgszc &&

4751                 page_unlock(pp);
4752 }
4753 
4754 /*
4755  * hat_probe returns 1 if the translation for the address 'addr' is
4756  * loaded, zero otherwise.
4757  *
4758  * hat_probe should be used only for advisorary purposes because it may
4759  * occasionally return the wrong value. The implementation must guarantee that
4760  * returning the wrong value is a very rare event. hat_probe is used
4761  * to implement optimizations in the segment drivers.
4762  *
4763  */
4764 int
4765 hat_probe(struct hat *sfmmup, caddr_t addr)
4766 {
4767         pfn_t pfn;
4768         tte_t tte;
4769 
4770         ASSERT(sfmmup != NULL);
4771         ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
4772 
4773         ASSERT((sfmmup == ksfmmup) ||
4774             AS_LOCK_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
4775 
4776         if (sfmmup == ksfmmup) {
4777                 while ((pfn = sfmmu_vatopfn(addr, sfmmup, &tte))
4778                     == PFN_SUSPENDED) {
4779                         sfmmu_vatopfn_suspended(addr, sfmmup, &tte);
4780                 }
4781         } else {
4782                 pfn = sfmmu_uvatopfn(addr, sfmmup, NULL);
4783         }
4784 
4785         if (pfn != PFN_INVALID)
4786                 return (1);
4787         else
4788                 return (0);
4789 }
4790 
4791 ssize_t
4792 hat_getpagesize(struct hat *sfmmup, caddr_t addr)
4793 {
4794         tte_t tte;
4795 
4796         ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
4797 
4798         if (sfmmup == ksfmmup) {
4799                 if (sfmmu_vatopfn(addr, sfmmup, &tte) == PFN_INVALID) {
4800                         return (-1);
4801                 }
4802         } else {
4803                 if (sfmmu_uvatopfn(addr, sfmmup, &tte) == PFN_INVALID) {
4804                         return (-1);
4805                 }
4806         }
4807 
4808         ASSERT(TTE_IS_VALID(&tte));
4809         return (TTEBYTES(TTE_CSZ(&tte)));
4810 }
4811 
4812 uint_t
4813 hat_getattr(struct hat *sfmmup, caddr_t addr, uint_t *attr)
4814 {
4815         tte_t tte;
4816 
4817         ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
4818 
4819         if (sfmmup == ksfmmup) {
4820                 if (sfmmu_vatopfn(addr, sfmmup, &tte) == PFN_INVALID) {
4821                         tte.ll = 0;
4822                 }
4823         } else {
4824                 if (sfmmu_uvatopfn(addr, sfmmup, &tte) == PFN_INVALID) {
4825                         tte.ll = 0;
4826                 }
4827         }
4828         if (TTE_IS_VALID(&tte)) {
4829                 *attr = sfmmu_ptov_attr(&tte);
4830                 return (0);
4831         }
4832         *attr = 0;
4833         return ((uint_t)0xffffffff);
4834 }
4835 
4836 /*
4837  * Enables more attributes on specified address range (ie. logical OR)
4838  */
4839 void
4840 hat_setattr(struct hat *hat, caddr_t addr, size_t len, uint_t attr)
4841 {
4842         if (hat->sfmmu_xhat_provider) {
4843                 XHAT_SETATTR(hat, addr, len, attr);
4844                 return;
4845         } else {
4846                 /*
4847                  * This must be a CPU HAT. If the address space has
4848                  * XHATs attached, change attributes for all of them,
4849                  * just in case
4850                  */
4851                 ASSERT(hat->sfmmu_as != NULL);
4852                 if (hat->sfmmu_as->a_xhat != NULL)
4853                         xhat_setattr_all(hat->sfmmu_as, addr, len, attr);
4854         }
4855 
4856         sfmmu_chgattr(hat, addr, len, attr, SFMMU_SETATTR);
4857 }
4858 
4859 /*
4860  * Assigns attributes to the specified address range.  All the attributes
4861  * are specified.
4862  */
4863 void
4864 hat_chgattr(struct hat *hat, caddr_t addr, size_t len, uint_t attr)
4865 {
4866         if (hat->sfmmu_xhat_provider) {
4867                 XHAT_CHGATTR(hat, addr, len, attr);
4868                 return;
4869         } else {
4870                 /*
4871                  * This must be a CPU HAT. If the address space has
4872                  * XHATs attached, change attributes for all of them,
4873                  * just in case
4874                  */
4875                 ASSERT(hat->sfmmu_as != NULL);
4876                 if (hat->sfmmu_as->a_xhat != NULL)
4877                         xhat_chgattr_all(hat->sfmmu_as, addr, len, attr);
4878         }
4879 
4880         sfmmu_chgattr(hat, addr, len, attr, SFMMU_CHGATTR);
4881 }
4882 
4883 /*
4884  * Remove attributes on the specified address range (ie. loginal NAND)
4885  */
4886 void
4887 hat_clrattr(struct hat *hat, caddr_t addr, size_t len, uint_t attr)
4888 {
4889         if (hat->sfmmu_xhat_provider) {
4890                 XHAT_CLRATTR(hat, addr, len, attr);
4891                 return;
4892         } else {
4893                 /*
4894                  * This must be a CPU HAT. If the address space has
4895                  * XHATs attached, change attributes for all of them,
4896                  * just in case
4897                  */
4898                 ASSERT(hat->sfmmu_as != NULL);
4899                 if (hat->sfmmu_as->a_xhat != NULL)
4900                         xhat_clrattr_all(hat->sfmmu_as, addr, len, attr);
4901         }
4902 
4903         sfmmu_chgattr(hat, addr, len, attr, SFMMU_CLRATTR);
4904 }
4905 
4906 /*
4907  * Change attributes on an address range to that specified by attr and mode.
4908  */
4909 static void
4910 sfmmu_chgattr(struct hat *sfmmup, caddr_t addr, size_t len, uint_t attr,
4911         int mode)
4912 {
4913         struct hmehash_bucket *hmebp;
4914         hmeblk_tag hblktag;
4915         int hmeshift, hashno = 1;
4916         struct hme_blk *hmeblkp, *list = NULL;
4917         caddr_t endaddr;
4918         cpuset_t cpuset;
4919         demap_range_t dmr;
4920 
4921         CPUSET_ZERO(cpuset);

5234  * Change the protections in the virtual address range
5235  * given to the specified virtual protection.  If vprot is ~PROT_WRITE,
5236  * then remove write permission, leaving the other
5237  * permissions unchanged.  If vprot is ~PROT_USER, remove user permissions.
5238  *
5239  */
5240 void
5241 hat_chgprot(struct hat *sfmmup, caddr_t addr, size_t len, uint_t vprot)
5242 {
5243         struct hmehash_bucket *hmebp;
5244         hmeblk_tag hblktag;
5245         int hmeshift, hashno = 1;
5246         struct hme_blk *hmeblkp, *list = NULL;
5247         caddr_t endaddr;
5248         cpuset_t cpuset;
5249         demap_range_t dmr;
5250 
5251         ASSERT((len & MMU_PAGEOFFSET) == 0);
5252         ASSERT(((uintptr_t)addr & MMU_PAGEOFFSET) == 0);
5253 
5254         if (sfmmup->sfmmu_xhat_provider) {
5255                 XHAT_CHGPROT(sfmmup, addr, len, vprot);
5256                 return;
5257         } else {
5258                 /*
5259                  * This must be a CPU HAT. If the address space has
5260                  * XHATs attached, change attributes for all of them,
5261                  * just in case
5262                  */
5263                 ASSERT(sfmmup->sfmmu_as != NULL);
5264                 if (sfmmup->sfmmu_as->a_xhat != NULL)
5265                         xhat_chgprot_all(sfmmup->sfmmu_as, addr, len, vprot);
5266         }
5267 
5268         CPUSET_ZERO(cpuset);
5269 
5270         if ((vprot != (uint_t)~PROT_WRITE) && (vprot & PROT_USER) &&
5271             ((addr + len) > (caddr_t)USERLIMIT)) {
5272                 panic("user addr %p vprot %x in kernel space",
5273                     (void *)addr, vprot);
5274         }
5275         endaddr = addr + len;
5276         hblktag.htag_id = sfmmup;
5277         hblktag.htag_rid = SFMMU_INVALID_SHMERID;
5278         DEMAP_RANGE_INIT(sfmmup, &dmr);
5279 
5280         while (addr < endaddr) {
5281                 hmeshift = HME_HASH_SHIFT(hashno);
5282                 hblktag.htag_bspage = HME_HASH_BSPAGE(addr, hmeshift);
5283                 hblktag.htag_rehash = hashno;
5284                 hmebp = HME_HASH_FUNCTION(sfmmup, addr, hmeshift);
5285 
5286                 SFMMU_HASH_LOCK(hmebp);

5681 hat_unload_callback(
5682         struct hat *sfmmup,
5683         caddr_t addr,
5684         size_t len,
5685         uint_t flags,
5686         hat_callback_t *callback)
5687 {
5688         struct hmehash_bucket *hmebp;
5689         hmeblk_tag hblktag;
5690         int hmeshift, hashno, iskernel;
5691         struct hme_blk *hmeblkp, *pr_hblk, *list = NULL;
5692         caddr_t endaddr;
5693         cpuset_t cpuset;
5694         int addr_count = 0;
5695         int a;
5696         caddr_t cb_start_addr[MAX_CB_ADDR];
5697         caddr_t cb_end_addr[MAX_CB_ADDR];
5698         int issegkmap = ISSEGKMAP(sfmmup, addr);
5699         demap_range_t dmr, *dmrp;
5700 
5701         if (sfmmup->sfmmu_xhat_provider) {
5702                 XHAT_UNLOAD_CALLBACK(sfmmup, addr, len, flags, callback);
5703                 return;
5704         } else {
5705                 /*
5706                  * This must be a CPU HAT. If the address space has
5707                  * XHATs attached, unload the mappings for all of them,
5708                  * just in case
5709                  */
5710                 ASSERT(sfmmup->sfmmu_as != NULL);
5711                 if (sfmmup->sfmmu_as->a_xhat != NULL)
5712                         xhat_unload_callback_all(sfmmup->sfmmu_as, addr,
5713                             len, flags, callback);
5714         }
5715 
5716         ASSERT((sfmmup == ksfmmup) || (flags & HAT_UNLOAD_OTHER) || \
5717             AS_LOCK_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
5718 
5719         ASSERT(sfmmup != NULL);
5720         ASSERT((len & MMU_PAGEOFFSET) == 0);
5721         ASSERT(!((uintptr_t)addr & MMU_PAGEOFFSET));
5722 
5723         /*
5724          * Probing through a large VA range (say 63 bits) will be slow, even
5725          * at 4 Meg steps between the probes. So, when the virtual address range
5726          * is very large, search the HME entries for what to unload.
5727          *
5728          *      len >> TTE_PAGE_SHIFT(TTE4M) is the # of 4Meg probes we'd need
5729          *
5730          *      UHMEHASH_SZ is number of hash buckets to examine
5731          *
5732          */
5733         if (sfmmup != KHATID && (len >> TTE_PAGE_SHIFT(TTE4M)) > UHMEHASH_SZ) {
5734                 hat_unload_large_virtual(sfmmup, addr, len, flags, callback);

5971                         callback->hcb_start_addr = cb_start_addr[a];
5972                         callback->hcb_end_addr = cb_end_addr[a];
5973                         callback->hcb_function(callback);
5974                 }
5975         }
5976 
5977         /*
5978          * Check TSB and TLB page sizes if the process isn't exiting.
5979          */
5980         if (!sfmmup->sfmmu_free)
5981                 sfmmu_check_page_sizes(sfmmup, 0);
5982 }
5983 
5984 /*
5985  * Unload all the mappings in the range [addr..addr+len). addr and len must
5986  * be MMU_PAGESIZE aligned.
5987  */
5988 void
5989 hat_unload(struct hat *sfmmup, caddr_t addr, size_t len, uint_t flags)
5990 {
5991         if (sfmmup->sfmmu_xhat_provider) {
5992                 XHAT_UNLOAD(sfmmup, addr, len, flags);
5993                 return;
5994         }
5995         hat_unload_callback(sfmmup, addr, len, flags, NULL);
5996 }
5997 
5998 
5999 /*
6000  * Find the largest mapping size for this page.
6001  */
6002 int
6003 fnd_mapping_sz(page_t *pp)
6004 {
6005         int sz;
6006         int p_index;
6007 
6008         p_index = PP_MAPINDEX(pp);
6009 
6010         sz = 0;
6011         p_index >>= 1;    /* don't care about 8K bit */
6012         for (; p_index; p_index >>= 1) {
6013                 sz++;
6014         }

6314                 va += sz;
6315         }
6316 }
6317 
6318 /*
6319  * Synchronize all the mappings in the range [addr..addr+len).
6320  * Can be called with clearflag having two states:
6321  * HAT_SYNC_DONTZERO means just return the rm stats
6322  * HAT_SYNC_ZERORM means zero rm bits in the tte and return the stats
6323  */
6324 void
6325 hat_sync(struct hat *sfmmup, caddr_t addr, size_t len, uint_t clearflag)
6326 {
6327         struct hmehash_bucket *hmebp;
6328         hmeblk_tag hblktag;
6329         int hmeshift, hashno = 1;
6330         struct hme_blk *hmeblkp, *list = NULL;
6331         caddr_t endaddr;
6332         cpuset_t cpuset;
6333 
6334         ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
6335         ASSERT((sfmmup == ksfmmup) ||
6336             AS_LOCK_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
6337         ASSERT((len & MMU_PAGEOFFSET) == 0);
6338         ASSERT((clearflag == HAT_SYNC_DONTZERO) ||
6339             (clearflag == HAT_SYNC_ZERORM));
6340 
6341         CPUSET_ZERO(cpuset);
6342 
6343         endaddr = addr + len;
6344         hblktag.htag_id = sfmmup;
6345         hblktag.htag_rid = SFMMU_INVALID_SHMERID;
6346 
6347         /*
6348          * Spitfire supports 4 page sizes.
6349          * Most pages are expected to be of the smallest page
6350          * size (8K) and these will not need to be rehashed. 64K
6351          * pages also don't need to be rehashed because the an hmeblk
6352          * spans 64K of address space. 512K pages might need 1 rehash and
6353          * and 4M pages 2 rehashes.
6354          */

7121                         return;         /* non-fatal */
7122         }
7123         panic("pa_hment leaked: 0x%p", (void *)pahmep);
7124 }
7125 
7126 /*
7127  * Remove all mappings to page 'pp'.
7128  */
7129 int
7130 hat_pageunload(struct page *pp, uint_t forceflag)
7131 {
7132         struct page *origpp = pp;
7133         struct sf_hment *sfhme, *tmphme;
7134         struct hme_blk *hmeblkp;
7135         kmutex_t *pml;
7136 #ifdef VAC
7137         kmutex_t *pmtx;
7138 #endif
7139         cpuset_t cpuset, tset;
7140         int index, cons;
7141         int xhme_blks;
7142         int pa_hments;
7143 
7144         ASSERT(PAGE_EXCL(pp));
7145 
7146 retry_xhat:
7147         tmphme = NULL;
7148         xhme_blks = 0;
7149         pa_hments = 0;
7150         CPUSET_ZERO(cpuset);
7151 
7152         pml = sfmmu_mlist_enter(pp);
7153 
7154 #ifdef VAC
7155         if (pp->p_kpmref)
7156                 sfmmu_kpm_pageunload(pp);
7157         ASSERT(!PP_ISMAPPED_KPM(pp));
7158 #endif
7159         /*
7160          * Clear vpm reference. Since the page is exclusively locked
7161          * vpm cannot be referencing it.
7162          */
7163         if (vpm_enable) {
7164                 pp->p_vpmref = 0;
7165         }
7166 
7167         index = PP_MAPINDEX(pp);
7168         cons = TTE8K;
7169 retry:
7170         for (sfhme = pp->p_mapping; sfhme; sfhme = tmphme) {
7171                 tmphme = sfhme->hme_next;
7172 
7173                 if (IS_PAHME(sfhme)) {
7174                         ASSERT(sfhme->hme_data != NULL);
7175                         pa_hments++;
7176                         continue;
7177                 }
7178 
7179                 hmeblkp = sfmmu_hmetohblk(sfhme);
7180                 if (hmeblkp->hblk_xhat_bit) {
7181                         struct xhat_hme_blk *xblk =
7182                             (struct xhat_hme_blk *)hmeblkp;
7183 
7184                         (void) XHAT_PAGEUNLOAD(xblk->xhat_hme_blk_hat,
7185                             pp, forceflag, XBLK2PROVBLK(xblk));
7186 
7187                         xhme_blks = 1;
7188                         continue;
7189                 }
7190 
7191                 /*
7192                  * If there are kernel mappings don't unload them, they will
7193                  * be suspended.
7194                  */
7195                 if (forceflag == SFMMU_KERNEL_RELOC && hmeblkp->hblk_lckcnt &&
7196                     hmeblkp->hblk_tag.htag_id == ksfmmup)
7197                         continue;
7198 
7199                 tset = sfmmu_pageunload(pp, sfhme, cons);
7200                 CPUSET_OR(cpuset, tset);
7201         }
7202 
7203         while (index != 0) {
7204                 index = index >> 1;
7205                 if (index != 0)
7206                         cons++;
7207                 if (index & 0x1) {
7208                         /* Go to leading page */
7209                         pp = PP_GROUPLEADER(pp, cons);
7210                         ASSERT(sfmmu_mlist_held(pp));
7211                         goto retry;
7212                 }
7213         }
7214 
7215         /*
7216          * cpuset may be empty if the page was only mapped by segkpm,
7217          * in which case we won't actually cross-trap.
7218          */
7219         xt_sync(cpuset);
7220 
7221         /*
7222          * The page should have no mappings at this point, unless
7223          * we were called from hat_page_relocate() in which case we
7224          * leave the locked mappings which will be suspended later.
7225          */
7226         ASSERT(!PP_ISMAPPED(origpp) || xhme_blks || pa_hments ||
7227             (forceflag == SFMMU_KERNEL_RELOC));
7228 
7229 #ifdef VAC
7230         if (PP_ISTNC(pp)) {
7231                 if (cons == TTE8K) {
7232                         pmtx = sfmmu_page_enter(pp);
7233                         PP_CLRTNC(pp);
7234                         sfmmu_page_exit(pmtx);
7235                 } else {
7236                         conv_tnc(pp, cons);
7237                 }
7238         }
7239 #endif  /* VAC */
7240 
7241         if (pa_hments && forceflag != SFMMU_KERNEL_RELOC) {
7242                 /*
7243                  * Unlink any pa_hments and free them, calling back
7244                  * the responsible subsystem to notify it of the error.
7245                  * This can occur in situations such as drivers leaking
7246                  * DMA handles: naughty, but common enough that we'd like
7247                  * to keep the system running rather than bringing it
7248                  * down with an obscure error like "pa_hment leaked"
7249                  * which doesn't aid the user in debugging their driver.
7250                  */
7251                 for (sfhme = pp->p_mapping; sfhme; sfhme = tmphme) {
7252                         tmphme = sfhme->hme_next;
7253                         if (IS_PAHME(sfhme)) {
7254                                 struct pa_hment *pahmep = sfhme->hme_data;
7255                                 sfmmu_pahment_leaked(pahmep);
7256                                 HME_SUB(sfhme, pp);
7257                                 kmem_cache_free(pa_hment_cache, pahmep);
7258                         }
7259                 }
7260 
7261                 ASSERT(!PP_ISMAPPED(origpp) || xhme_blks);
7262         }
7263 
7264         sfmmu_mlist_exit(pml);
7265 
7266         /*
7267          * XHAT may not have finished unloading pages
7268          * because some other thread was waiting for
7269          * mlist lock and XHAT_PAGEUNLOAD let it do
7270          * the job.
7271          */
7272         if (xhme_blks) {
7273                 pp = origpp;
7274                 goto retry_xhat;
7275         }
7276 
7277         return (0);
7278 }
7279 
7280 cpuset_t
7281 sfmmu_pageunload(page_t *pp, struct sf_hment *sfhme, int cons)
7282 {
7283         struct hme_blk *hmeblkp;
7284         sfmmu_t *sfmmup;
7285         tte_t tte, ttemod;
7286 #ifdef DEBUG
7287         tte_t orig_old;
7288 #endif /* DEBUG */
7289         caddr_t addr;
7290         int ttesz;
7291         int ret;
7292         cpuset_t cpuset;
7293 
7294         ASSERT(pp != NULL);
7295         ASSERT(sfmmu_mlist_held(pp));
7296         ASSERT(!PP_ISKAS(pp));

7537 
7538         clearflag &= ~HAT_SYNC_STOPON_SHARED;
7539         pml = sfmmu_mlist_enter(pp);
7540         index = PP_MAPINDEX(pp);
7541         cons = TTE8K;
7542 retry:
7543         for (sfhme = pp->p_mapping; sfhme; sfhme = tmphme) {
7544                 /*
7545                  * We need to save the next hment on the list since
7546                  * it is possible for pagesync to remove an invalid hment
7547                  * from the list.
7548                  */
7549                 tmphme = sfhme->hme_next;
7550                 if (IS_PAHME(sfhme))
7551                         continue;
7552                 /*
7553                  * If we are looking for large mappings and this hme doesn't
7554                  * reach the range we are seeking, just ignore it.
7555                  */
7556                 hmeblkp = sfmmu_hmetohblk(sfhme);
7557                 if (hmeblkp->hblk_xhat_bit)
7558                         continue;
7559 
7560                 if (hme_size(sfhme) < cons)
7561                         continue;
7562 
7563                 if (stop_on_sh) {
7564                         if (hmeblkp->hblk_shared) {
7565                                 sf_srd_t *srdp = hblktosrd(hmeblkp);
7566                                 uint_t rid = hmeblkp->hblk_tag.htag_rid;
7567                                 sf_region_t *rgnp;
7568                                 ASSERT(SFMMU_IS_SHMERID_VALID(rid));
7569                                 ASSERT(rid < SFMMU_MAX_HME_REGIONS);
7570                                 ASSERT(srdp != NULL);
7571                                 rgnp = srdp->srd_hmergnp[rid];
7572                                 SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, srdp,
7573                                     rgnp, rid);
7574                                 shcnt += rgnp->rgn_refcnt;
7575                         } else {
7576                                 shcnt++;
7577                         }
7578                         if (shcnt > po_share) {

7698 {
7699         caddr_t addr;
7700         tte_t tte;
7701         tte_t ttemod;
7702         struct hme_blk *hmeblkp;
7703         int ret;
7704         sfmmu_t *sfmmup;
7705         cpuset_t cpuset;
7706 
7707         ASSERT(pp != NULL);
7708         ASSERT(sfmmu_mlist_held(pp));
7709 
7710         CPUSET_ZERO(cpuset);
7711         SFMMU_STAT(sf_clrwrt);
7712 
7713 retry:
7714 
7715         sfmmu_copytte(&sfhme->hme_tte, &tte);
7716         if (TTE_IS_VALID(&tte) && TTE_IS_WRITABLE(&tte)) {
7717                 hmeblkp = sfmmu_hmetohblk(sfhme);
7718 
7719                 /*
7720                  * xhat mappings should never be to a VMODSORT page.
7721                  */
7722                 ASSERT(hmeblkp->hblk_xhat_bit == 0);
7723 
7724                 sfmmup = hblktosfmmu(hmeblkp);
7725                 addr = tte_to_vaddr(hmeblkp, tte);
7726 
7727                 ttemod = tte;
7728                 TTE_CLR_WRT(&ttemod);
7729                 TTE_CLR_MOD(&ttemod);
7730                 ret = sfmmu_modifytte_try(&tte, &ttemod, &sfhme->hme_tte);
7731 
7732                 /*
7733                  * if cas failed and the new value is not what
7734                  * we want retry
7735                  */
7736                 if (ret < 0)
7737                         goto retry;
7738 
7739                 /* we win the cas */
7740                 if (ret > 0) {
7741                         if (hmeblkp->hblk_shared) {
7742                                 sf_srd_t *srdp = (sf_srd_t *)sfmmup;
7743                                 uint_t rid = hmeblkp->hblk_tag.htag_rid;

7968  * Returns a page frame number for a given virtual address.
7969  * Returns PFN_INVALID to indicate an invalid mapping
7970  */
7971 pfn_t
7972 hat_getpfnum(struct hat *hat, caddr_t addr)
7973 {
7974         pfn_t pfn;
7975         tte_t tte;
7976 
7977         /*
7978          * We would like to
7979          * ASSERT(AS_LOCK_HELD(as, &as->a_lock));
7980          * but we can't because the iommu driver will call this
7981          * routine at interrupt time and it can't grab the as lock
7982          * or it will deadlock: A thread could have the as lock
7983          * and be waiting for io.  The io can't complete
7984          * because the interrupt thread is blocked trying to grab
7985          * the as lock.
7986          */
7987 
7988         ASSERT(hat->sfmmu_xhat_provider == NULL);
7989 
7990         if (hat == ksfmmup) {
7991                 if (IS_KMEM_VA_LARGEPAGE(addr)) {
7992                         ASSERT(segkmem_lpszc > 0);
7993                         pfn = sfmmu_kvaszc2pfn(addr, segkmem_lpszc);
7994                         if (pfn != PFN_INVALID) {
7995                                 sfmmu_check_kpfn(pfn);
7996                                 return (pfn);
7997                         }
7998                 } else if (segkpm && IS_KPM_ADDR(addr)) {
7999                         return (sfmmu_kpm_vatopfn(addr));
8000                 }
8001                 while ((pfn = sfmmu_vatopfn(addr, ksfmmup, &tte))
8002                     == PFN_SUSPENDED) {
8003                         sfmmu_vatopfn_suspended(addr, ksfmmup, &tte);
8004                 }
8005                 sfmmu_check_kpfn(pfn);
8006                 return (pfn);
8007         } else {
8008                 return (sfmmu_uvatopfn(addr, hat, NULL));
8009         }

8153                                 SFMMU_HASH_UNLOCK(hmebp);
8154                                 pfn = PFN_INVALID;
8155                                 return (pfn);
8156                         }
8157                 }
8158                 SFMMU_HASH_UNLOCK(hmebp);
8159                 hashno++;
8160         } while (hashno <= mmu_hashcnt);
8161         return (PFN_INVALID);
8162 }
8163 
8164 
8165 /*
8166  * For compatability with AT&T and later optimizations
8167  */
8168 /* ARGSUSED */
8169 void
8170 hat_map(struct hat *hat, caddr_t addr, size_t len, uint_t flags)
8171 {
8172         ASSERT(hat != NULL);
8173         ASSERT(hat->sfmmu_xhat_provider == NULL);
8174 }
8175 
8176 /*
8177  * Return the number of mappings to a particular page.  This number is an
8178  * approximation of the number of people sharing the page.
8179  *
8180  * shared hmeblks or ism hmeblks are counted as 1 mapping here.
8181  * hat_page_checkshare() can be used to compare threshold to share
8182  * count that reflects the number of region sharers albeit at higher cost.
8183  */
8184 ulong_t
8185 hat_page_getshare(page_t *pp)
8186 {
8187         page_t *spp = pp;       /* start page */
8188         kmutex_t *pml;
8189         ulong_t cnt;
8190         int index, sz = TTE64K;
8191 
8192         /*
8193          * We need to grab the mlist lock to make sure any outstanding

8246 
8247         if (vpm_enable && pp->p_vpmref) {
8248                 cnt += 1;
8249         }
8250 
8251         if (pp->p_share + cnt > sh_thresh) {
8252                 sfmmu_mlist_exit(pml);
8253                 return (1);
8254         }
8255 
8256         index = PP_MAPINDEX(pp);
8257 
8258 again:
8259         for (sfhme = pp->p_mapping; sfhme; sfhme = tmphme) {
8260                 tmphme = sfhme->hme_next;
8261                 if (IS_PAHME(sfhme)) {
8262                         continue;
8263                 }
8264 
8265                 hmeblkp = sfmmu_hmetohblk(sfhme);
8266                 if (hmeblkp->hblk_xhat_bit) {
8267                         cnt++;
8268                         if (cnt > sh_thresh) {
8269                                 sfmmu_mlist_exit(pml);
8270                                 return (1);
8271                         }
8272                         continue;
8273                 }
8274                 if (hme_size(sfhme) != sz) {
8275                         continue;
8276                 }
8277 
8278                 if (hmeblkp->hblk_shared) {
8279                         sf_srd_t *srdp = hblktosrd(hmeblkp);
8280                         uint_t rid = hmeblkp->hblk_tag.htag_rid;
8281                         sf_region_t *rgnp;
8282                         ASSERT(SFMMU_IS_SHMERID_VALID(rid));
8283                         ASSERT(rid < SFMMU_MAX_HME_REGIONS);
8284                         ASSERT(srdp != NULL);
8285                         rgnp = srdp->srd_hmergnp[rid];
8286                         SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, srdp,
8287                             rgnp, rid);
8288                         cnt += rgnp->rgn_refcnt;
8289                 } else {
8290                         cnt++;
8291                 }
8292                 if (cnt > sh_thresh) {
8293                         sfmmu_mlist_exit(pml);

8368                 CPUSET_ZERO(cpuset);
8369                 sz = TTE64K;
8370                 sync = 1;
8371         }
8372 
8373         while (index) {
8374                 if (!(index & 0x1)) {
8375                         index >>= 1;
8376                         sz++;
8377                         continue;
8378                 }
8379                 ASSERT(sz <= pszc);
8380                 rootpp = PP_GROUPLEADER(pp, sz);
8381                 for (sfhme = rootpp->p_mapping; sfhme; sfhme = tmphme) {
8382                         tmphme = sfhme->hme_next;
8383                         ASSERT(!IS_PAHME(sfhme));
8384                         hmeblkp = sfmmu_hmetohblk(sfhme);
8385                         if (hme_size(sfhme) != sz) {
8386                                 continue;
8387                         }
8388                         if (hmeblkp->hblk_xhat_bit) {
8389                                 cmn_err(CE_PANIC,
8390                                     "hat_page_demote: xhat hmeblk");
8391                         }
8392                         tset = sfmmu_pageunload(rootpp, sfhme, sz);
8393                         CPUSET_OR(cpuset, tset);
8394                 }
8395                 if (index >>= 1) {
8396                         sz++;
8397                 }
8398         }
8399 
8400         ASSERT(!PP_ISMAPPED_LARGE(pp));
8401 
8402         if (sync) {
8403                 xt_sync(cpuset);
8404 #ifdef VAC
8405                 if (PP_ISTNC(pp)) {
8406                         conv_tnc(rootpp, sz);
8407                 }
8408 #endif  /* VAC */
8409         }
8410 
8411         pmtx = sfmmu_page_enter(pp);

8499  * This is currently implemented as the number of bytes that have active
8500  * hardware translations that have page structures.  Therefore, it can
8501  * underestimate the traditional resident set size, eg, if the
8502  * physical page is present and the hardware translation is missing;
8503  * and it can overestimate the rss, eg, if there are active
8504  * translations to a frame buffer with page structs.
8505  * Also, it does not take sharing into account.
8506  *
8507  * Note that we don't acquire locks here since this function is most often
8508  * called from the clock thread.
8509  */
8510 size_t
8511 hat_get_mapped_size(struct hat *hat)
8512 {
8513         size_t          assize = 0;
8514         int             i;
8515 
8516         if (hat == NULL)
8517                 return (0);
8518 
8519         ASSERT(hat->sfmmu_xhat_provider == NULL);
8520 
8521         for (i = 0; i < mmu_page_sizes; i++)
8522                 assize += ((pgcnt_t)hat->sfmmu_ttecnt[i] +
8523                     (pgcnt_t)hat->sfmmu_scdrttecnt[i]) * TTEBYTES(i);
8524 
8525         if (hat->sfmmu_iblk == NULL)
8526                 return (assize);
8527 
8528         for (i = 0; i < mmu_page_sizes; i++)
8529                 assize += ((pgcnt_t)hat->sfmmu_ismttecnt[i] +
8530                     (pgcnt_t)hat->sfmmu_scdismttecnt[i]) * TTEBYTES(i);
8531 
8532         return (assize);
8533 }
8534 
8535 int
8536 hat_stats_enable(struct hat *hat)
8537 {
8538         hatlock_t       *hatlockp;
8539 
8540         ASSERT(hat->sfmmu_xhat_provider == NULL);
8541 
8542         hatlockp = sfmmu_hat_enter(hat);
8543         hat->sfmmu_rmstat++;
8544         sfmmu_hat_exit(hatlockp);
8545         return (1);
8546 }
8547 
8548 void
8549 hat_stats_disable(struct hat *hat)
8550 {
8551         hatlock_t       *hatlockp;
8552 
8553         ASSERT(hat->sfmmu_xhat_provider == NULL);
8554 
8555         hatlockp = sfmmu_hat_enter(hat);
8556         hat->sfmmu_rmstat--;
8557         sfmmu_hat_exit(hatlockp);
8558 }
8559 
8560 /*
8561  * Routines for entering or removing  ourselves from the
8562  * ism_hat's mapping list. This is used for both private and
8563  * SCD hats.
8564  */
8565 static void
8566 iment_add(struct ism_ment *iment,  struct hat *ism_hat)
8567 {
8568         ASSERT(MUTEX_HELD(&ism_mlist_lock));
8569 
8570         iment->iment_prev = NULL;
8571         iment->iment_next = ism_hat->sfmmu_iment;
8572         if (ism_hat->sfmmu_iment) {
8573                 ism_hat->sfmmu_iment->iment_prev = iment;
8574         }

8636         sf_scd_t        *old_scdp;
8637 
8638 #ifdef DEBUG
8639         caddr_t         eaddr = addr + len;
8640 #endif /* DEBUG */
8641 
8642         ASSERT(ism_hatid != NULL && sfmmup != NULL);
8643         ASSERT(sptaddr == ISMID_STARTADDR);
8644         /*
8645          * Check the alignment.
8646          */
8647         if (!ISM_ALIGNED(ismshift, addr) || !ISM_ALIGNED(ismshift, sptaddr))
8648                 return (EINVAL);
8649 
8650         /*
8651          * Check size alignment.
8652          */
8653         if (!ISM_ALIGNED(ismshift, len))
8654                 return (EINVAL);
8655 
8656         ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
8657 
8658         /*
8659          * Allocate ism_ment for the ism_hat's mapping list, and an
8660          * ism map blk in case we need one.  We must do our
8661          * allocations before acquiring locks to prevent a deadlock
8662          * in the kmem allocator on the mapping list lock.
8663          */
8664         new_iblk = kmem_cache_alloc(ism_blk_cache, KM_SLEEP);
8665         ism_ment = kmem_cache_alloc(ism_ment_cache, KM_SLEEP);
8666 
8667         /*
8668          * Serialize ISM mappings with the ISM busy flag, and also the
8669          * trap handlers.
8670          */
8671         sfmmu_ismhat_enter(sfmmup, 0);
8672 
8673         /*
8674          * Allocate an ism map blk if necessary.
8675          */
8676         if (sfmmup->sfmmu_iblk == NULL) {
8677                 sfmmup->sfmmu_iblk = new_iblk;

8849 void
8850 hat_unshare(struct hat *sfmmup, caddr_t addr, size_t len, uint_t ismszc)
8851 {
8852         ism_map_t       *ism_map;
8853         ism_ment_t      *free_ment = NULL;
8854         ism_blk_t       *ism_blkp;
8855         struct hat      *ism_hatid;
8856         int             found, i;
8857         hatlock_t       *hatlockp;
8858         struct tsb_info *tsbinfo;
8859         uint_t          ismshift = page_get_shift(ismszc);
8860         size_t          sh_size = ISM_SHIFT(ismshift, len);
8861         uchar_t         ism_rid;
8862         sf_scd_t        *old_scdp;
8863 
8864         ASSERT(ISM_ALIGNED(ismshift, addr));
8865         ASSERT(ISM_ALIGNED(ismshift, len));
8866         ASSERT(sfmmup != NULL);
8867         ASSERT(sfmmup != ksfmmup);
8868 
8869         if (sfmmup->sfmmu_xhat_provider) {
8870                 XHAT_UNSHARE(sfmmup, addr, len);
8871                 return;
8872         } else {
8873                 /*
8874                  * This must be a CPU HAT. If the address space has
8875                  * XHATs attached, inform all XHATs that ISM segment
8876                  * is going away
8877                  */
8878                 ASSERT(sfmmup->sfmmu_as != NULL);
8879                 if (sfmmup->sfmmu_as->a_xhat != NULL)
8880                         xhat_unshare_all(sfmmup->sfmmu_as, addr, len);
8881         }
8882 
8883         /*
8884          * Make sure that during the entire time ISM mappings are removed,
8885          * the trap handlers serialize behind us, and that no one else
8886          * can be mucking with ISM mappings.  This also lets us get away
8887          * with not doing expensive cross calls to flush the TLB -- we
8888          * just discard the context, flush the entire TSB, and call it
8889          * a day.
8890          */
8891         sfmmu_ismhat_enter(sfmmup, 0);
8892 
8893         /*
8894          * Remove the mapping.
8895          *
8896          * We can't have any holes in the ism map.
8897          * The tsb miss code while searching the ism map will
8898          * stop on an empty map slot.  So we must move
8899          * everyone past the hole up 1 if any.
8900          *
8901          * Also empty ism map blks are not freed until the

9315                  * Always convert all mappings to TNC.
9316                  */
9317                 sz = fnd_mapping_sz(pp);
9318                 pp = PP_GROUPLEADER(pp, sz);
9319                 SFMMU_STAT_ADD(sf_uncache_conflict, TTEPAGES(sz));
9320                 sfmmu_page_cache_array(pp, HAT_TMPNC, CACHE_FLUSH,
9321                     TTEPAGES(sz));
9322 
9323                 return;
9324         }
9325 
9326         /*
9327          * check if any mapping is in same as or if it is locked
9328          * since in that case we need to uncache.
9329          */
9330         for (sfhmep = pp->p_mapping; sfhmep; sfhmep = tmphme) {
9331                 tmphme = sfhmep->hme_next;
9332                 if (IS_PAHME(sfhmep))
9333                         continue;
9334                 hmeblkp = sfmmu_hmetohblk(sfhmep);
9335                 if (hmeblkp->hblk_xhat_bit)
9336                         continue;
9337                 tmphat = hblktosfmmu(hmeblkp);
9338                 sfmmu_copytte(&sfhmep->hme_tte, &tte);
9339                 ASSERT(TTE_IS_VALID(&tte));
9340                 if (hmeblkp->hblk_shared || tmphat == hat ||
9341                     hmeblkp->hblk_lckcnt) {
9342                         /*
9343                          * We have an uncache conflict
9344                          */
9345                         SFMMU_STAT(sf_uncache_conflict);
9346                         sfmmu_page_cache_array(pp, HAT_TMPNC, CACHE_FLUSH, 1);
9347                         return;
9348                 }
9349         }
9350 
9351         /*
9352          * We have an unload conflict
9353          * We have already checked for LARGE mappings, therefore
9354          * the remaining mapping(s) must be TTE8K.
9355          */
9356         SFMMU_STAT(sf_unload_conflict);
9357 
9358         for (sfhmep = pp->p_mapping; sfhmep; sfhmep = tmphme) {
9359                 tmphme = sfhmep->hme_next;
9360                 if (IS_PAHME(sfhmep))
9361                         continue;
9362                 hmeblkp = sfmmu_hmetohblk(sfhmep);
9363                 if (hmeblkp->hblk_xhat_bit)
9364                         continue;
9365                 ASSERT(!hmeblkp->hblk_shared);
9366                 (void) sfmmu_pageunload(pp, sfhmep, TTE8K);
9367         }
9368 
9369         if (PP_ISMAPPED_KPM(pp))
9370                 sfmmu_kpm_vac_unload(pp, addr);
9371 
9372         /*
9373          * Unloads only do TLB flushes so we need to flush the
9374          * cache here.
9375          */
9376         sfmmu_cache_flush(pp->p_pagenum, PP_GET_VCOLOR(pp));
9377         PP_SET_VCOLOR(pp, vcolor);
9378 }
9379 
9380 /*
9381  * Whenever a mapping is unloaded and the page is in TNC state,
9382  * we see if the page can be made cacheable again. 'pp' is
9383  * the page that we just unloaded a mapping from, the size
9384  * of mapping that was unloaded is 'ottesz'.

9492 
9493                 if (PP_ISPNC(pp)) {
9494                         return (0);
9495                 }
9496 
9497                 clr_valid = 0;
9498                 if (PP_ISMAPPED_KPM(pp)) {
9499                         caddr_t kpmvaddr;
9500 
9501                         ASSERT(kpm_enable);
9502                         kpmvaddr = hat_kpm_page2va(pp, 1);
9503                         ASSERT(!(npages > 1 && IS_KPM_ALIAS_RANGE(kpmvaddr)));
9504                         color1 = addr_to_vcolor(kpmvaddr);
9505                         clr_valid = 1;
9506                 }
9507 
9508                 for (sfhme = pp->p_mapping; sfhme; sfhme = sfhme->hme_next) {
9509                         if (IS_PAHME(sfhme))
9510                                 continue;
9511                         hmeblkp = sfmmu_hmetohblk(sfhme);
9512                         if (hmeblkp->hblk_xhat_bit)
9513                                 continue;
9514 
9515                         sfmmu_copytte(&sfhme->hme_tte, &tte);
9516                         ASSERT(TTE_IS_VALID(&tte));
9517 
9518                         vaddr = tte_to_vaddr(hmeblkp, tte);
9519                         color = addr_to_vcolor(vaddr);
9520 
9521                         if (npages > 1) {
9522                                 /*
9523                                  * If there is a big mapping, make sure
9524                                  * 8K mapping is consistent with the big
9525                                  * mapping.
9526                                  */
9527                                 bcolor = i % ncolors;
9528                                 if (color != bcolor) {
9529                                         return (0);
9530                                 }
9531                         }
9532                         if (!clr_valid) {
9533                                 clr_valid = 1;

9641 static void
9642 sfmmu_page_cache(page_t *pp, int flags, int cache_flush_flag, int bcolor)
9643 {
9644         struct  sf_hment *sfhme;
9645         struct  hme_blk *hmeblkp;
9646         sfmmu_t *sfmmup;
9647         tte_t   tte, ttemod;
9648         caddr_t vaddr;
9649         int     ret, color;
9650         pfn_t   pfn;
9651 
9652         color = bcolor;
9653         pfn = pp->p_pagenum;
9654 
9655         for (sfhme = pp->p_mapping; sfhme; sfhme = sfhme->hme_next) {
9656 
9657                 if (IS_PAHME(sfhme))
9658                         continue;
9659                 hmeblkp = sfmmu_hmetohblk(sfhme);
9660 
9661                 if (hmeblkp->hblk_xhat_bit)
9662                         continue;
9663 
9664                 sfmmu_copytte(&sfhme->hme_tte, &tte);
9665                 ASSERT(TTE_IS_VALID(&tte));
9666                 vaddr = tte_to_vaddr(hmeblkp, tte);
9667                 color = addr_to_vcolor(vaddr);
9668 
9669 #ifdef DEBUG
9670                 if ((flags & HAT_CACHE) && bcolor != NO_VCOLOR) {
9671                         ASSERT(color == bcolor);
9672                 }
9673 #endif
9674 
9675                 ASSERT(flags != HAT_TMPNC || color == PP_GET_VCOLOR(pp));
9676 
9677                 ttemod = tte;
9678                 if (flags & (HAT_UNCACHE | HAT_TMPNC)) {
9679                         TTE_CLR_VCACHEABLE(&ttemod);
9680                 } else {        /* flags & HAT_CACHE */
9681                         TTE_SET_VCACHEABLE(&ttemod);
9682                 }
9683                 ret = sfmmu_modifytte_try(&tte, &ttemod, &sfhme->hme_tte);

9979                         curcnum = sfmmu_getctx_sec();
9980                         if (curcnum == cnum)
9981                                 sfmmu_load_mmustate(sfmmup);
9982                         sfmmu_enable_intrs(pstate_save);
9983                         ASSERT(curcnum == cnum || curcnum == INVALID_CONTEXT);
9984                 }
9985         } else {
9986                 /*
9987                  * multi-thread
9988                  * or when sfmmup is not the same as the curproc.
9989                  */
9990                 sfmmu_invalidate_ctx(sfmmup);
9991         }
9992 
9993         kpreempt_enable();
9994 }
9995 
9996 
9997 /*
9998  * Replace the specified TSB with a new TSB.  This function gets called when
9999  * we grow, shrink or swapin a TSB.  When swapping in a TSB (TSB_SWAPIN), the
10000  * TSB_FORCEALLOC flag may be used to force allocation of a minimum-sized TSB
10001  * (8K).
10002  *
10003  * Caller must hold the HAT lock, but should assume any tsb_info
10004  * pointers it has are no longer valid after calling this function.
10005  *
10006  * Return values:
10007  *      TSB_ALLOCFAIL   Failed to allocate a TSB, due to memory constraints
10008  *      TSB_LOSTRACE    HAT is busy, i.e. another thread is already doing
10009  *                      something to this tsbinfo/TSB
10010  *      TSB_SUCCESS     Operation succeeded
10011  */
10012 static tsb_replace_rc_t
10013 sfmmu_replace_tsb(sfmmu_t *sfmmup, struct tsb_info *old_tsbinfo, uint_t szc,
10014     hatlock_t *hatlockp, uint_t flags)
10015 {
10016         struct tsb_info *new_tsbinfo = NULL;
10017         struct tsb_info *curtsb, *prevtsb;
10018         uint_t tte_sz_mask;
10019         int i;

13391         for (; i <= (size - hme1blk_sz); i += hme1blk_sz, k++) {
13392                 hmeblkp = (struct hme_blk *)addr;
13393                 addr += hme1blk_sz;
13394                 hmeblkp->hblk_nuc_bit = 1;
13395                 hmeblkp->hblk_nextpa = cached_va_to_pa((caddr_t)hmeblkp);
13396         }
13397         ASSERT(k >= nhblk1);
13398         nucleus_hblk1.len = k;
13399         SFMMU_STAT_ADD(sf_hblk1_ncreate, k);
13400 }
13401 
13402 /*
13403  * This function is currently not supported on this platform. For what
13404  * it's supposed to do, see hat.c and hat_srmmu.c
13405  */
13406 /* ARGSUSED */
13407 faultcode_t
13408 hat_softlock(struct hat *hat, caddr_t addr, size_t *lenp, page_t **ppp,
13409     uint_t flags)
13410 {
13411         ASSERT(hat->sfmmu_xhat_provider == NULL);
13412         return (FC_NOSUPPORT);
13413 }
13414 
13415 /*
13416  * Searchs the mapping list of the page for a mapping of the same size. If not
13417  * found the corresponding bit is cleared in the p_index field. When large
13418  * pages are more prevalent in the system, we can maintain the mapping list
13419  * in order and we don't have to traverse the list each time. Just check the
13420  * next and prev entries, and if both are of different size, we clear the bit.
13421  */
13422 static void
13423 sfmmu_rm_large_mappings(page_t *pp, int ttesz)
13424 {
13425         struct sf_hment *sfhmep;
13426         struct hme_blk *hmeblkp;
13427         int     index;
13428         pgcnt_t npgs;
13429 
13430         ASSERT(ttesz > TTE8K);
13431 
13432         ASSERT(sfmmu_mlist_held(pp));
13433 
13434         ASSERT(PP_ISMAPPED_LARGE(pp));
13435 
13436         /*
13437          * Traverse mapping list looking for another mapping of same size.
13438          * since we only want to clear index field if all mappings of
13439          * that size are gone.
13440          */
13441 
13442         for (sfhmep = pp->p_mapping; sfhmep; sfhmep = sfhmep->hme_next) {
13443                 if (IS_PAHME(sfhmep))
13444                         continue;
13445                 hmeblkp = sfmmu_hmetohblk(sfhmep);
13446                 if (hmeblkp->hblk_xhat_bit)
13447                         continue;
13448                 if (hme_size(sfhmep) == ttesz) {
13449                         /*
13450                          * another mapping of the same size. don't clear index.
13451                          */
13452                         return;
13453                 }
13454         }
13455 
13456         /*
13457          * Clear the p_index bit for large page.
13458          */
13459         index = PAGESZ_TO_INDEX(ttesz);
13460         npgs = TTEPAGES(ttesz);
13461         while (npgs-- > 0) {
13462                 ASSERT(pp->p_index & index);
13463                 pp->p_index &= ~index;
13464                 pp = PP_PAGENEXT(pp);
13465         }
13466 }
13467

14012         uint_t rhash;
14013         uint_t rid;
14014         hatlock_t *hatlockp;
14015         sf_region_t *rgnp;
14016         sf_region_t *new_rgnp = NULL;
14017         int i;
14018         uint16_t *nextidp;
14019         sf_region_t **freelistp;
14020         int maxids;
14021         sf_region_t **rarrp;
14022         uint16_t *busyrgnsp;
14023         ulong_t rttecnt;
14024         uchar_t tteflag;
14025         uchar_t r_type = flags & HAT_REGION_TYPE_MASK;
14026         int text = (r_type == HAT_REGION_TEXT);
14027 
14028         if (srdp == NULL || r_size == 0) {
14029                 return (HAT_INVALID_REGION_COOKIE);
14030         }
14031 
14032         ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
14033         ASSERT(sfmmup != ksfmmup);
14034         ASSERT(AS_WRITE_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
14035         ASSERT(srdp->srd_refcnt > 0);
14036         ASSERT(!(flags & ~HAT_REGION_TYPE_MASK));
14037         ASSERT(flags == HAT_REGION_TEXT || flags == HAT_REGION_ISM);
14038         ASSERT(r_pgszc < mmu_page_sizes);
14039         if (!IS_P2ALIGNED(r_saddr, TTEBYTES(r_pgszc)) ||
14040             !IS_P2ALIGNED(r_size, TTEBYTES(r_pgszc))) {
14041                 panic("hat_join_region: region addr or size is not aligned\n");
14042         }
14043 
14044 
14045         r_type = (r_type == HAT_REGION_ISM) ? SFMMU_REGION_ISM :
14046             SFMMU_REGION_HME;
14047         /*
14048          * Currently only support shared hmes for the read only main text
14049          * region.
14050          */
14051         if (r_type == SFMMU_REGION_HME && ((r_obj != srdp->srd_evp) ||
14052             (r_perm & PROT_WRITE))) {

14318         ASSERT(flags == HAT_REGION_TEXT || flags == HAT_REGION_ISM);
14319         ASSERT(!sfmmup->sfmmu_free || sfmmup->sfmmu_scdp == NULL);
14320 
14321         r_type = (r_type == HAT_REGION_ISM) ? SFMMU_REGION_ISM :
14322             SFMMU_REGION_HME;
14323 
14324         if (r_type == SFMMU_REGION_ISM) {
14325                 ASSERT(SFMMU_IS_ISMRID_VALID(rid));
14326                 ASSERT(rid < SFMMU_MAX_ISM_REGIONS);
14327                 rgnp = srdp->srd_ismrgnp[rid];
14328         } else {
14329                 ASSERT(SFMMU_IS_SHMERID_VALID(rid));
14330                 ASSERT(rid < SFMMU_MAX_HME_REGIONS);
14331                 rgnp = srdp->srd_hmergnp[rid];
14332         }
14333         ASSERT(rgnp != NULL);
14334         ASSERT(rgnp->rgn_id == rid);
14335         ASSERT((rgnp->rgn_flags & SFMMU_REGION_TYPE_MASK) == r_type);
14336         ASSERT(!(rgnp->rgn_flags & SFMMU_REGION_FREE));
14337         ASSERT(AS_LOCK_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
14338 
14339         ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
14340         if (r_type == SFMMU_REGION_HME && sfmmup->sfmmu_as->a_xhat != NULL) {
14341                 xhat_unload_callback_all(sfmmup->sfmmu_as, rgnp->rgn_saddr,
14342                     rgnp->rgn_size, 0, NULL);
14343         }
14344 
14345         if (sfmmup->sfmmu_free) {
14346                 ulong_t rttecnt;
14347                 r_pgszc = rgnp->rgn_pgszc;
14348                 r_size = rgnp->rgn_size;
14349 
14350                 ASSERT(sfmmup->sfmmu_scdp == NULL);
14351                 if (r_type == SFMMU_REGION_ISM) {
14352                         SF_RGNMAP_DEL(sfmmup->sfmmu_ismregion_map, rid);
14353                 } else {
14354                         /* update shme rgns ttecnt in sfmmu_ttecnt */
14355                         rttecnt = r_size >> TTE_PAGE_SHIFT(r_pgszc);
14356                         ASSERT(sfmmup->sfmmu_ttecnt[r_pgszc] >= rttecnt);
14357 
14358                         atomic_add_long(&sfmmup->sfmmu_ttecnt[r_pgszc],
14359                             -rttecnt);
14360 
14361                         SF_RGNMAP_DEL(sfmmup->sfmmu_hmeregion_map, rid);
14362                 }
14363         } else if (r_type == SFMMU_REGION_ISM) {

  64 #include <vm/seg_kmem.h>
  65 #include <vm/seg_kpm.h>
  66 #include <vm/rm.h>
  67 #include <sys/t_lock.h>
  68 #include <sys/obpdefs.h>
  69 #include <sys/vm_machparam.h>
  70 #include <sys/var.h>
  71 #include <sys/trap.h>
  72 #include <sys/machtrap.h>
  73 #include <sys/scb.h>
  74 #include <sys/bitmap.h>
  75 #include <sys/machlock.h>
  76 #include <sys/membar.h>
  77 #include <sys/atomic.h>
  78 #include <sys/cpu_module.h>
  79 #include <sys/prom_debug.h>
  80 #include <sys/ksynch.h>
  81 #include <sys/mem_config.h>
  82 #include <sys/mem_cage.h>
  83 #include <vm/vm_dep.h>

  84 #include <sys/fpu/fpusystm.h>
  85 #include <vm/mach_kpm.h>
  86 #include <sys/callb.h>
  87 
  88 #ifdef  DEBUG
  89 #define SFMMU_VALIDATE_HMERID(hat, rid, saddr, len)                     \
  90         if (SFMMU_IS_SHMERID_VALID(rid)) {                              \
  91                 caddr_t _eaddr = (saddr) + (len);                       \
  92                 sf_srd_t *_srdp;                                        \
  93                 sf_region_t *_rgnp;                                     \
  94                 ASSERT((rid) < SFMMU_MAX_HME_REGIONS);                       \
  95                 ASSERT(SF_RGNMAP_TEST(hat->sfmmu_hmeregion_map, rid));       \
  96                 ASSERT((hat) != ksfmmup);                               \
  97                 _srdp = (hat)->sfmmu_srdp;                           \
  98                 ASSERT(_srdp != NULL);                                  \
  99                 ASSERT(_srdp->srd_refcnt != 0);                              \
 100                 _rgnp = _srdp->srd_hmergnp[(rid)];                   \
 101                 ASSERT(_rgnp != NULL && _rgnp->rgn_id == rid);               \
 102                 ASSERT(_rgnp->rgn_refcnt != 0);                              \
 103                 ASSERT(!(_rgnp->rgn_flags & SFMMU_REGION_FREE)); \

1332          * Reserve some kernel virtual address space for the locked TTEs
1333          * that allow us to probe the TSB from TL>0.
1334          */
1335         utsb_vabase = vmem_xalloc(heap_arena, tsb_slab_size, tsb_slab_size,
1336             0, 0, NULL, NULL, VM_SLEEP);
1337         utsb4m_vabase = vmem_xalloc(heap_arena, tsb_slab_size, tsb_slab_size,
1338             0, 0, NULL, NULL, VM_SLEEP);
1339 #endif
1340 
1341 #ifdef VAC
1342         /*
1343          * The big page VAC handling code assumes VAC
1344          * will not be bigger than the smallest big
1345          * page- which is 64K.
1346          */
1347         if (TTEPAGES(TTE64K) < CACHE_NUM_COLOR) {
1348                 cmn_err(CE_PANIC, "VAC too big!");
1349         }
1350 #endif
1351 


1352         uhme_hash_pa = va_to_pa(uhme_hash);
1353         khme_hash_pa = va_to_pa(khme_hash);
1354 
1355         /*
1356          * Initialize relocation locks. kpr_suspendlock is held
1357          * at PIL_MAX to prevent interrupts from pinning the holder
1358          * of a suspended TTE which may access it leading to a
1359          * deadlock condition.
1360          */
1361         mutex_init(&kpr_mutex, NULL, MUTEX_DEFAULT, NULL);
1362         mutex_init(&kpr_suspendlock, NULL, MUTEX_SPIN, (void *)PIL_MAX);
1363 
1364         /*
1365          * If Shared context support is disabled via /etc/system
1366          * set shctx_on to 0 here if it was set to 1 earlier in boot
1367          * sequence by cpu module initialization code.
1368          */
1369         if (shctx_on && disable_shctx) {
1370                 shctx_on = 0;
1371         }

1519         for (i = 0; i < max_mmu_page_sizes; i++) {
1520                 sfmmup->sfmmu_ttecnt[i] = 0;
1521                 sfmmup->sfmmu_scdrttecnt[i] = 0;
1522                 sfmmup->sfmmu_ismttecnt[i] = 0;
1523                 sfmmup->sfmmu_scdismttecnt[i] = 0;
1524                 sfmmup->sfmmu_pgsz[i] = TTE8K;
1525         }
1526         sfmmup->sfmmu_tsb0_4minflcnt = 0;
1527         sfmmup->sfmmu_iblk = NULL;
1528         sfmmup->sfmmu_ismhat = 0;
1529         sfmmup->sfmmu_scdhat = 0;
1530         sfmmup->sfmmu_ismblkpa = (uint64_t)-1;
1531         if (sfmmup == ksfmmup) {
1532                 CPUSET_ALL(sfmmup->sfmmu_cpusran);
1533         } else {
1534                 CPUSET_ZERO(sfmmup->sfmmu_cpusran);
1535         }
1536         sfmmup->sfmmu_free = 0;
1537         sfmmup->sfmmu_rmstat = 0;
1538         sfmmup->sfmmu_clrbin = sfmmup->sfmmu_clrstart;

1539         cv_init(&sfmmup->sfmmu_tsb_cv, NULL, CV_DEFAULT, NULL);
1540         sfmmup->sfmmu_srdp = NULL;
1541         SF_RGNMAP_ZERO(sfmmup->sfmmu_region_map);
1542         bzero(sfmmup->sfmmu_hmeregion_links, SFMMU_L1_HMERLINKS_SIZE);
1543         sfmmup->sfmmu_scdp = NULL;
1544         sfmmup->sfmmu_scd_link.next = NULL;
1545         sfmmup->sfmmu_scd_link.prev = NULL;
1546         return (sfmmup);
1547 }
1548 
1549 /*
1550  * Create per-MMU context domain kstats for a given MMU ctx.
1551  */
1552 static void
1553 sfmmu_mmu_kstat_create(mmu_ctx_t *mmu_ctxp)
1554 {
1555         mmu_ctx_stat_t  stat;
1556         kstat_t         *mmu_kstat;
1557 
1558         ASSERT(MUTEX_HELD(&cpu_lock));

1898                  * INVALID_CONTEXT to it.
1899                  * Compatibility Note: hw takes care of MMU_SCONTEXT1
1900                  */
1901                 sfmmu_setctx_sec(INVALID_CONTEXT);
1902                 sfmmu_clear_utsbinfo();
1903 
1904                 kpreempt_enable();
1905                 sfmmu_hat_exit(hatlockp);
1906         }
1907 }
1908 
1909 /*
1910  * Free all the translation resources for the specified address space.
1911  * Called from as_free when an address space is being destroyed.
1912  */
1913 void
1914 hat_free_start(struct hat *sfmmup)
1915 {
1916         ASSERT(AS_WRITE_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
1917         ASSERT(sfmmup != ksfmmup);

1918 
1919         sfmmup->sfmmu_free = 1;
1920         if (sfmmup->sfmmu_scdp != NULL) {
1921                 sfmmu_leave_scd(sfmmup, 0);
1922         }
1923 
1924         ASSERT(sfmmup->sfmmu_scdp == NULL);
1925 }
1926 
1927 void
1928 hat_free_end(struct hat *sfmmup)
1929 {
1930         int i;
1931 

1932         ASSERT(sfmmup->sfmmu_free == 1);
1933         ASSERT(sfmmup->sfmmu_ttecnt[TTE8K] == 0);
1934         ASSERT(sfmmup->sfmmu_ttecnt[TTE64K] == 0);
1935         ASSERT(sfmmup->sfmmu_ttecnt[TTE512K] == 0);
1936         ASSERT(sfmmup->sfmmu_ttecnt[TTE4M] == 0);
1937         ASSERT(sfmmup->sfmmu_ttecnt[TTE32M] == 0);
1938         ASSERT(sfmmup->sfmmu_ttecnt[TTE256M] == 0);
1939 
1940         if (sfmmup->sfmmu_rmstat) {
1941                 hat_freestat(sfmmup->sfmmu_as, NULL);
1942         }
1943 
1944         while (sfmmup->sfmmu_tsb != NULL) {
1945                 struct tsb_info *next = sfmmup->sfmmu_tsb->tsb_next;
1946                 sfmmu_tsbinfo_free(sfmmup->sfmmu_tsb);
1947                 sfmmup->sfmmu_tsb = next;
1948         }
1949 
1950         if (sfmmup->sfmmu_srdp != NULL) {
1951                 sfmmu_leave_srd(sfmmup);

1953                 for (i = 0; i < SFMMU_L1_HMERLINKS; i++) {
1954                         if (sfmmup->sfmmu_hmeregion_links[i] != NULL) {
1955                                 kmem_free(sfmmup->sfmmu_hmeregion_links[i],
1956                                     SFMMU_L2_HMERLINKS_SIZE);
1957                                 sfmmup->sfmmu_hmeregion_links[i] = NULL;
1958                         }
1959                 }
1960         }
1961         sfmmu_free_sfmmu(sfmmup);
1962 
1963 #ifdef DEBUG
1964         for (i = 0; i < SFMMU_L1_HMERLINKS; i++) {
1965                 ASSERT(sfmmup->sfmmu_hmeregion_links[i] == NULL);
1966         }
1967 #endif
1968 
1969         kmem_cache_free(sfmmuid_cache, sfmmup);
1970 }
1971 
1972 /*



















































































































































1973  * Duplicate the translations of an as into another newas
1974  */
1975 /* ARGSUSED */
1976 int
1977 hat_dup(struct hat *hat, struct hat *newhat, caddr_t addr, size_t len,
1978         uint_t flag)
1979 {
1980         sf_srd_t *srdp;
1981         sf_scd_t *scdp;
1982         int i;
1983         extern uint_t get_color_start(struct as *);
1984 

1985         ASSERT((flag == 0) || (flag == HAT_DUP_ALL) || (flag == HAT_DUP_COW) ||
1986             (flag == HAT_DUP_SRD));
1987         ASSERT(hat != ksfmmup);
1988         ASSERT(newhat != ksfmmup);
1989         ASSERT(flag != HAT_DUP_ALL || hat->sfmmu_srdp == newhat->sfmmu_srdp);
1990 
1991         if (flag == HAT_DUP_COW) {
1992                 panic("hat_dup: HAT_DUP_COW not supported");
1993         }
1994 
1995         if (flag == HAT_DUP_SRD && ((srdp = hat->sfmmu_srdp) != NULL)) {
1996                 ASSERT(srdp->srd_evp != NULL);
1997                 VN_HOLD(srdp->srd_evp);
1998                 ASSERT(srdp->srd_refcnt > 0);
1999                 newhat->sfmmu_srdp = srdp;
2000                 atomic_inc_32((volatile uint_t *)&srdp->srd_refcnt);
2001         }
2002 
2003         /*
2004          * HAT_DUP_ALL flag is used after as duplication is done.

2034         if (flag == HAT_DUP_ALL && consistent_coloring == 0 &&
2035             update_proc_pgcolorbase_after_fork != 0) {
2036                 hat->sfmmu_clrbin = get_color_start(hat->sfmmu_as);
2037         }
2038         return (0);
2039 }
2040 
2041 void
2042 hat_memload(struct hat *hat, caddr_t addr, struct page *pp,
2043         uint_t attr, uint_t flags)
2044 {
2045         hat_do_memload(hat, addr, pp, attr, flags,
2046             SFMMU_INVALID_SHMERID);
2047 }
2048 
2049 void
2050 hat_memload_region(struct hat *hat, caddr_t addr, struct page *pp,
2051         uint_t attr, uint_t flags, hat_region_cookie_t rcookie)
2052 {
2053         uint_t rid;
2054         if (rcookie == HAT_INVALID_REGION_COOKIE) {

2055                 hat_do_memload(hat, addr, pp, attr, flags,
2056                     SFMMU_INVALID_SHMERID);
2057                 return;
2058         }
2059         rid = (uint_t)((uint64_t)rcookie);
2060         ASSERT(rid < SFMMU_MAX_HME_REGIONS);
2061         hat_do_memload(hat, addr, pp, attr, flags, rid);
2062 }
2063 
2064 /*
2065  * Set up addr to map to page pp with protection prot.
2066  * As an optimization we also load the TSB with the
2067  * corresponding tte but it is no big deal if  the tte gets kicked out.
2068  */
2069 static void
2070 hat_do_memload(struct hat *hat, caddr_t addr, struct page *pp,
2071         uint_t attr, uint_t flags, uint_t rid)
2072 {
2073         tte_t tte;
2074 
2075 
2076         ASSERT(hat != NULL);
2077         ASSERT(PAGE_LOCKED(pp));
2078         ASSERT(!((uintptr_t)addr & MMU_PAGEOFFSET));
2079         ASSERT(!(flags & ~SFMMU_LOAD_ALLFLAG));
2080         ASSERT(!(attr & ~SFMMU_LOAD_ALLATTR));
2081         SFMMU_VALIDATE_HMERID(hat, rid, addr, MMU_PAGESIZE);
2082 
2083         if (PP_ISFREE(pp)) {
2084                 panic("hat_memload: loading a mapping to free page %p",
2085                     (void *)pp);
2086         }
2087 







2088         ASSERT((hat == ksfmmup) ||
2089             AS_LOCK_HELD(hat->sfmmu_as, &hat->sfmmu_as->a_lock));
2090 
2091         if (flags & ~SFMMU_LOAD_ALLFLAG)
2092                 cmn_err(CE_NOTE, "hat_memload: unsupported flags %d",
2093                     flags & ~SFMMU_LOAD_ALLFLAG);
2094 
2095         if (hat->sfmmu_rmstat)
2096                 hat_resvstat(MMU_PAGESIZE, hat->sfmmu_as, addr);
2097 
2098 #if defined(SF_ERRATA_57)
2099         if ((hat != ksfmmup) && AS_TYPE_64BIT(hat->sfmmu_as) &&
2100             (addr < errata57_limit) && (attr & PROT_EXEC) &&
2101             !(flags & HAT_LOAD_SHARE)) {
2102                 cmn_err(CE_WARN, "hat_memload: illegal attempt to make user "
2103                     " page executable");
2104                 attr &= ~PROT_EXEC;
2105         }
2106 #endif
2107

2117 }
2118 
2119 /*
2120  * hat_devload can be called to map real memory (e.g.
2121  * /dev/kmem) and even though hat_devload will determine pf is
2122  * for memory, it will be unable to get a shared lock on the
2123  * page (because someone else has it exclusively) and will
2124  * pass dp = NULL.  If tteload doesn't get a non-NULL
2125  * page pointer it can't cache memory.
2126  */
2127 void
2128 hat_devload(struct hat *hat, caddr_t addr, size_t len, pfn_t pfn,
2129         uint_t attr, int flags)
2130 {
2131         tte_t tte;
2132         struct page *pp = NULL;
2133         int use_lgpg = 0;
2134 
2135         ASSERT(hat != NULL);
2136 





2137         ASSERT(!(flags & ~SFMMU_LOAD_ALLFLAG));
2138         ASSERT(!(attr & ~SFMMU_LOAD_ALLATTR));
2139         ASSERT((hat == ksfmmup) ||
2140             AS_LOCK_HELD(hat->sfmmu_as, &hat->sfmmu_as->a_lock));
2141         if (len == 0)
2142                 panic("hat_devload: zero len");
2143         if (flags & ~SFMMU_LOAD_ALLFLAG)
2144                 cmn_err(CE_NOTE, "hat_devload: unsupported flags %d",
2145                     flags & ~SFMMU_LOAD_ALLFLAG);
2146 
2147 #if defined(SF_ERRATA_57)
2148         if ((hat != ksfmmup) && AS_TYPE_64BIT(hat->sfmmu_as) &&
2149             (addr < errata57_limit) && (attr & PROT_EXEC) &&
2150             !(flags & HAT_LOAD_SHARE)) {
2151                 cmn_err(CE_WARN, "hat_devload: illegal attempt to make user "
2152                     " page executable");
2153                 attr &= ~PROT_EXEC;
2154         }
2155 #endif
2156

2263          */
2264         if ((flags & HAT_LOAD_SHARE) == 0) {
2265                 sfmmu_check_page_sizes(hat, 1);
2266         }
2267 }
2268 
2269 void
2270 hat_memload_array(struct hat *hat, caddr_t addr, size_t len,
2271         struct page **pps, uint_t attr, uint_t flags)
2272 {
2273         hat_do_memload_array(hat, addr, len, pps, attr, flags,
2274             SFMMU_INVALID_SHMERID);
2275 }
2276 
2277 void
2278 hat_memload_array_region(struct hat *hat, caddr_t addr, size_t len,
2279         struct page **pps, uint_t attr, uint_t flags,
2280         hat_region_cookie_t rcookie)
2281 {
2282         uint_t rid;
2283         if (rcookie == HAT_INVALID_REGION_COOKIE) {

2284                 hat_do_memload_array(hat, addr, len, pps, attr, flags,
2285                     SFMMU_INVALID_SHMERID);
2286                 return;
2287         }
2288         rid = (uint_t)((uint64_t)rcookie);
2289         ASSERT(rid < SFMMU_MAX_HME_REGIONS);
2290         hat_do_memload_array(hat, addr, len, pps, attr, flags, rid);
2291 }
2292 
2293 /*
2294  * Map the largest extend possible out of the page array. The array may NOT
2295  * be in order.  The largest possible mapping a page can have
2296  * is specified in the p_szc field.  The p_szc field
2297  * cannot change as long as there any mappings (large or small)
2298  * to any of the pages that make up the large page. (ie. any
2299  * promotion/demotion of page size is not up to the hat but up to
2300  * the page free list manager).  The array
2301  * should consist of properly aligned contigous pages that are
2302  * part of a big page for a large mapping to be created.
2303  */
2304 static void
2305 hat_do_memload_array(struct hat *hat, caddr_t addr, size_t len,
2306         struct page **pps, uint_t attr, uint_t flags, uint_t rid)
2307 {
2308         int  ttesz;
2309         size_t mapsz;
2310         pgcnt_t numpg, npgs;
2311         tte_t tte;
2312         page_t *pp;
2313         uint_t large_pages_disable;
2314 
2315         ASSERT(!((uintptr_t)addr & MMU_PAGEOFFSET));
2316         SFMMU_VALIDATE_HMERID(hat, rid, addr, len);
2317 






2318         if (hat->sfmmu_rmstat)
2319                 hat_resvstat(len, hat->sfmmu_as, addr);
2320 
2321 #if defined(SF_ERRATA_57)
2322         if ((hat != ksfmmup) && AS_TYPE_64BIT(hat->sfmmu_as) &&
2323             (addr < errata57_limit) && (attr & PROT_EXEC) &&
2324             !(flags & HAT_LOAD_SHARE)) {
2325                 cmn_err(CE_WARN, "hat_memload_array: illegal attempt to make "
2326                     "user page executable");
2327                 attr &= ~PROT_EXEC;
2328         }
2329 #endif
2330 
2331         /* Get number of pages */
2332         npgs = len >> MMU_PAGESHIFT;
2333 
2334         if (flags & HAT_LOAD_SHARE) {
2335                 large_pages_disable = disable_ism_large_pages;
2336         } else {
2337                 large_pages_disable = disable_large_pages;

3780                             rsz, rgnp->rgn_obj,
3781                             rgnp->rgn_objoff);
3782                 }
3783                 ttesz--;
3784         }
3785 }
3786 
3787 /*
3788  * Release one hardware address translation lock on the given address range.
3789  */
3790 void
3791 hat_unlock(struct hat *sfmmup, caddr_t addr, size_t len)
3792 {
3793         struct hmehash_bucket *hmebp;
3794         hmeblk_tag hblktag;
3795         int hmeshift, hashno = 1;
3796         struct hme_blk *hmeblkp, *list = NULL;
3797         caddr_t endaddr;
3798 
3799         ASSERT(sfmmup != NULL);

3800 
3801         ASSERT((sfmmup == ksfmmup) ||
3802             AS_LOCK_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
3803         ASSERT((len & MMU_PAGEOFFSET) == 0);
3804         endaddr = addr + len;
3805         hblktag.htag_id = sfmmup;
3806         hblktag.htag_rid = SFMMU_INVALID_SHMERID;
3807 
3808         /*
3809          * Spitfire supports 4 page sizes.
3810          * Most pages are expected to be of the smallest page size (8K) and
3811          * these will not need to be rehashed. 64K pages also don't need to be
3812          * rehashed because an hmeblk spans 64K of address space. 512K pages
3813          * might need 1 rehash and and 4M pages might need 2 rehashes.
3814          */
3815         while (addr < endaddr) {
3816                 hmeshift = HME_HASH_SHIFT(hashno);
3817                 hblktag.htag_bspage = HME_HASH_BSPAGE(addr, hmeshift);
3818                 hblktag.htag_rehash = hashno;
3819                 hmebp = HME_HASH_FUNCTION(sfmmup, addr, hmeshift);

3864 {
3865         sf_srd_t *srdp;
3866         sf_region_t *rgnp;
3867         int ttesz;
3868         uint_t rid;
3869         caddr_t eaddr;
3870         caddr_t va;
3871         int hmeshift;
3872         hmeblk_tag hblktag;
3873         struct hmehash_bucket *hmebp;
3874         struct hme_blk *hmeblkp;
3875         struct hme_blk *pr_hblk;
3876         struct hme_blk *list;
3877 
3878         if (rcookie == HAT_INVALID_REGION_COOKIE) {
3879                 hat_unlock(sfmmup, addr, len);
3880                 return;
3881         }
3882 
3883         ASSERT(sfmmup != NULL);

3884         ASSERT(sfmmup != ksfmmup);
3885 
3886         srdp = sfmmup->sfmmu_srdp;
3887         rid = (uint_t)((uint64_t)rcookie);
3888         VERIFY3U(rid, <, SFMMU_MAX_HME_REGIONS);
3889         eaddr = addr + len;
3890         va = addr;
3891         list = NULL;
3892         rgnp = srdp->srd_hmergnp[rid];
3893         SFMMU_VALIDATE_HMERID(sfmmup, rid, addr, len);
3894 
3895         ASSERT(IS_P2ALIGNED(addr, TTEBYTES(rgnp->rgn_pgszc)));
3896         ASSERT(IS_P2ALIGNED(len, TTEBYTES(rgnp->rgn_pgszc)));
3897         if (rgnp->rgn_pgszc < HBLK_MIN_TTESZ) {
3898                 ttesz = HBLK_MIN_TTESZ;
3899         } else {
3900                 ttesz = rgnp->rgn_pgszc;
3901         }
3902         while (va < eaddr) {
3903                 while (ttesz < rgnp->rgn_pgszc &&

4575                 page_unlock(pp);
4576 }
4577 
4578 /*
4579  * hat_probe returns 1 if the translation for the address 'addr' is
4580  * loaded, zero otherwise.
4581  *
4582  * hat_probe should be used only for advisorary purposes because it may
4583  * occasionally return the wrong value. The implementation must guarantee that
4584  * returning the wrong value is a very rare event. hat_probe is used
4585  * to implement optimizations in the segment drivers.
4586  *
4587  */
4588 int
4589 hat_probe(struct hat *sfmmup, caddr_t addr)
4590 {
4591         pfn_t pfn;
4592         tte_t tte;
4593 
4594         ASSERT(sfmmup != NULL);

4595 
4596         ASSERT((sfmmup == ksfmmup) ||
4597             AS_LOCK_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
4598 
4599         if (sfmmup == ksfmmup) {
4600                 while ((pfn = sfmmu_vatopfn(addr, sfmmup, &tte))
4601                     == PFN_SUSPENDED) {
4602                         sfmmu_vatopfn_suspended(addr, sfmmup, &tte);
4603                 }
4604         } else {
4605                 pfn = sfmmu_uvatopfn(addr, sfmmup, NULL);
4606         }
4607 
4608         if (pfn != PFN_INVALID)
4609                 return (1);
4610         else
4611                 return (0);
4612 }
4613 
4614 ssize_t
4615 hat_getpagesize(struct hat *sfmmup, caddr_t addr)
4616 {
4617         tte_t tte;
4618 


4619         if (sfmmup == ksfmmup) {
4620                 if (sfmmu_vatopfn(addr, sfmmup, &tte) == PFN_INVALID) {
4621                         return (-1);
4622                 }
4623         } else {
4624                 if (sfmmu_uvatopfn(addr, sfmmup, &tte) == PFN_INVALID) {
4625                         return (-1);
4626                 }
4627         }
4628 
4629         ASSERT(TTE_IS_VALID(&tte));
4630         return (TTEBYTES(TTE_CSZ(&tte)));
4631 }
4632 
4633 uint_t
4634 hat_getattr(struct hat *sfmmup, caddr_t addr, uint_t *attr)
4635 {
4636         tte_t tte;
4637 


4638         if (sfmmup == ksfmmup) {
4639                 if (sfmmu_vatopfn(addr, sfmmup, &tte) == PFN_INVALID) {
4640                         tte.ll = 0;
4641                 }
4642         } else {
4643                 if (sfmmu_uvatopfn(addr, sfmmup, &tte) == PFN_INVALID) {
4644                         tte.ll = 0;
4645                 }
4646         }
4647         if (TTE_IS_VALID(&tte)) {
4648                 *attr = sfmmu_ptov_attr(&tte);
4649                 return (0);
4650         }
4651         *attr = 0;
4652         return ((uint_t)0xffffffff);
4653 }
4654 
4655 /*
4656  * Enables more attributes on specified address range (ie. logical OR)
4657  */
4658 void
4659 hat_setattr(struct hat *hat, caddr_t addr, size_t len, uint_t attr)
4660 {









4661         ASSERT(hat->sfmmu_as != NULL);



4662 
4663         sfmmu_chgattr(hat, addr, len, attr, SFMMU_SETATTR);
4664 }
4665 
4666 /*
4667  * Assigns attributes to the specified address range.  All the attributes
4668  * are specified.
4669  */
4670 void
4671 hat_chgattr(struct hat *hat, caddr_t addr, size_t len, uint_t attr)
4672 {









4673         ASSERT(hat->sfmmu_as != NULL);



4674 
4675         sfmmu_chgattr(hat, addr, len, attr, SFMMU_CHGATTR);
4676 }
4677 
4678 /*
4679  * Remove attributes on the specified address range (ie. loginal NAND)
4680  */
4681 void
4682 hat_clrattr(struct hat *hat, caddr_t addr, size_t len, uint_t attr)
4683 {









4684         ASSERT(hat->sfmmu_as != NULL);



4685 
4686         sfmmu_chgattr(hat, addr, len, attr, SFMMU_CLRATTR);
4687 }
4688 
4689 /*
4690  * Change attributes on an address range to that specified by attr and mode.
4691  */
4692 static void
4693 sfmmu_chgattr(struct hat *sfmmup, caddr_t addr, size_t len, uint_t attr,
4694         int mode)
4695 {
4696         struct hmehash_bucket *hmebp;
4697         hmeblk_tag hblktag;
4698         int hmeshift, hashno = 1;
4699         struct hme_blk *hmeblkp, *list = NULL;
4700         caddr_t endaddr;
4701         cpuset_t cpuset;
4702         demap_range_t dmr;
4703 
4704         CPUSET_ZERO(cpuset);

5017  * Change the protections in the virtual address range
5018  * given to the specified virtual protection.  If vprot is ~PROT_WRITE,
5019  * then remove write permission, leaving the other
5020  * permissions unchanged.  If vprot is ~PROT_USER, remove user permissions.
5021  *
5022  */
5023 void
5024 hat_chgprot(struct hat *sfmmup, caddr_t addr, size_t len, uint_t vprot)
5025 {
5026         struct hmehash_bucket *hmebp;
5027         hmeblk_tag hblktag;
5028         int hmeshift, hashno = 1;
5029         struct hme_blk *hmeblkp, *list = NULL;
5030         caddr_t endaddr;
5031         cpuset_t cpuset;
5032         demap_range_t dmr;
5033 
5034         ASSERT((len & MMU_PAGEOFFSET) == 0);
5035         ASSERT(((uintptr_t)addr & MMU_PAGEOFFSET) == 0);
5036 









5037         ASSERT(sfmmup->sfmmu_as != NULL);



5038 
5039         CPUSET_ZERO(cpuset);
5040 
5041         if ((vprot != (uint_t)~PROT_WRITE) && (vprot & PROT_USER) &&
5042             ((addr + len) > (caddr_t)USERLIMIT)) {
5043                 panic("user addr %p vprot %x in kernel space",
5044                     (void *)addr, vprot);
5045         }
5046         endaddr = addr + len;
5047         hblktag.htag_id = sfmmup;
5048         hblktag.htag_rid = SFMMU_INVALID_SHMERID;
5049         DEMAP_RANGE_INIT(sfmmup, &dmr);
5050 
5051         while (addr < endaddr) {
5052                 hmeshift = HME_HASH_SHIFT(hashno);
5053                 hblktag.htag_bspage = HME_HASH_BSPAGE(addr, hmeshift);
5054                 hblktag.htag_rehash = hashno;
5055                 hmebp = HME_HASH_FUNCTION(sfmmup, addr, hmeshift);
5056 
5057                 SFMMU_HASH_LOCK(hmebp);

5452 hat_unload_callback(
5453         struct hat *sfmmup,
5454         caddr_t addr,
5455         size_t len,
5456         uint_t flags,
5457         hat_callback_t *callback)
5458 {
5459         struct hmehash_bucket *hmebp;
5460         hmeblk_tag hblktag;
5461         int hmeshift, hashno, iskernel;
5462         struct hme_blk *hmeblkp, *pr_hblk, *list = NULL;
5463         caddr_t endaddr;
5464         cpuset_t cpuset;
5465         int addr_count = 0;
5466         int a;
5467         caddr_t cb_start_addr[MAX_CB_ADDR];
5468         caddr_t cb_end_addr[MAX_CB_ADDR];
5469         int issegkmap = ISSEGKMAP(sfmmup, addr);
5470         demap_range_t dmr, *dmrp;
5471 









5472         ASSERT(sfmmup->sfmmu_as != NULL);




5473 
5474         ASSERT((sfmmup == ksfmmup) || (flags & HAT_UNLOAD_OTHER) || \
5475             AS_LOCK_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
5476 
5477         ASSERT(sfmmup != NULL);
5478         ASSERT((len & MMU_PAGEOFFSET) == 0);
5479         ASSERT(!((uintptr_t)addr & MMU_PAGEOFFSET));
5480 
5481         /*
5482          * Probing through a large VA range (say 63 bits) will be slow, even
5483          * at 4 Meg steps between the probes. So, when the virtual address range
5484          * is very large, search the HME entries for what to unload.
5485          *
5486          *      len >> TTE_PAGE_SHIFT(TTE4M) is the # of 4Meg probes we'd need
5487          *
5488          *      UHMEHASH_SZ is number of hash buckets to examine
5489          *
5490          */
5491         if (sfmmup != KHATID && (len >> TTE_PAGE_SHIFT(TTE4M)) > UHMEHASH_SZ) {
5492                 hat_unload_large_virtual(sfmmup, addr, len, flags, callback);

5729                         callback->hcb_start_addr = cb_start_addr[a];
5730                         callback->hcb_end_addr = cb_end_addr[a];
5731                         callback->hcb_function(callback);
5732                 }
5733         }
5734 
5735         /*
5736          * Check TSB and TLB page sizes if the process isn't exiting.
5737          */
5738         if (!sfmmup->sfmmu_free)
5739                 sfmmu_check_page_sizes(sfmmup, 0);
5740 }
5741 
5742 /*
5743  * Unload all the mappings in the range [addr..addr+len). addr and len must
5744  * be MMU_PAGESIZE aligned.
5745  */
5746 void
5747 hat_unload(struct hat *sfmmup, caddr_t addr, size_t len, uint_t flags)
5748 {




5749         hat_unload_callback(sfmmup, addr, len, flags, NULL);
5750 }
5751 
5752 
5753 /*
5754  * Find the largest mapping size for this page.
5755  */
5756 int
5757 fnd_mapping_sz(page_t *pp)
5758 {
5759         int sz;
5760         int p_index;
5761 
5762         p_index = PP_MAPINDEX(pp);
5763 
5764         sz = 0;
5765         p_index >>= 1;    /* don't care about 8K bit */
5766         for (; p_index; p_index >>= 1) {
5767                 sz++;
5768         }

6068                 va += sz;
6069         }
6070 }
6071 
6072 /*
6073  * Synchronize all the mappings in the range [addr..addr+len).
6074  * Can be called with clearflag having two states:
6075  * HAT_SYNC_DONTZERO means just return the rm stats
6076  * HAT_SYNC_ZERORM means zero rm bits in the tte and return the stats
6077  */
6078 void
6079 hat_sync(struct hat *sfmmup, caddr_t addr, size_t len, uint_t clearflag)
6080 {
6081         struct hmehash_bucket *hmebp;
6082         hmeblk_tag hblktag;
6083         int hmeshift, hashno = 1;
6084         struct hme_blk *hmeblkp, *list = NULL;
6085         caddr_t endaddr;
6086         cpuset_t cpuset;
6087 

6088         ASSERT((sfmmup == ksfmmup) ||
6089             AS_LOCK_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
6090         ASSERT((len & MMU_PAGEOFFSET) == 0);
6091         ASSERT((clearflag == HAT_SYNC_DONTZERO) ||
6092             (clearflag == HAT_SYNC_ZERORM));
6093 
6094         CPUSET_ZERO(cpuset);
6095 
6096         endaddr = addr + len;
6097         hblktag.htag_id = sfmmup;
6098         hblktag.htag_rid = SFMMU_INVALID_SHMERID;
6099 
6100         /*
6101          * Spitfire supports 4 page sizes.
6102          * Most pages are expected to be of the smallest page
6103          * size (8K) and these will not need to be rehashed. 64K
6104          * pages also don't need to be rehashed because the an hmeblk
6105          * spans 64K of address space. 512K pages might need 1 rehash and
6106          * and 4M pages 2 rehashes.
6107          */

6874                         return;         /* non-fatal */
6875         }
6876         panic("pa_hment leaked: 0x%p", (void *)pahmep);
6877 }
6878 
6879 /*
6880  * Remove all mappings to page 'pp'.
6881  */
6882 int
6883 hat_pageunload(struct page *pp, uint_t forceflag)
6884 {
6885         struct page *origpp = pp;
6886         struct sf_hment *sfhme, *tmphme;
6887         struct hme_blk *hmeblkp;
6888         kmutex_t *pml;
6889 #ifdef VAC
6890         kmutex_t *pmtx;
6891 #endif
6892         cpuset_t cpuset, tset;
6893         int index, cons;

6894         int pa_hments;
6895 
6896         ASSERT(PAGE_EXCL(pp));
6897 

6898         tmphme = NULL;

6899         pa_hments = 0;
6900         CPUSET_ZERO(cpuset);
6901 
6902         pml = sfmmu_mlist_enter(pp);
6903 
6904 #ifdef VAC
6905         if (pp->p_kpmref)
6906                 sfmmu_kpm_pageunload(pp);
6907         ASSERT(!PP_ISMAPPED_KPM(pp));
6908 #endif
6909         /*
6910          * Clear vpm reference. Since the page is exclusively locked
6911          * vpm cannot be referencing it.
6912          */
6913         if (vpm_enable) {
6914                 pp->p_vpmref = 0;
6915         }
6916 
6917         index = PP_MAPINDEX(pp);
6918         cons = TTE8K;
6919 retry:
6920         for (sfhme = pp->p_mapping; sfhme; sfhme = tmphme) {
6921                 tmphme = sfhme->hme_next;
6922 
6923                 if (IS_PAHME(sfhme)) {
6924                         ASSERT(sfhme->hme_data != NULL);
6925                         pa_hments++;
6926                         continue;
6927                 }
6928 
6929                 hmeblkp = sfmmu_hmetohblk(sfhme);










6930 
6931                 /*
6932                  * If there are kernel mappings don't unload them, they will
6933                  * be suspended.
6934                  */
6935                 if (forceflag == SFMMU_KERNEL_RELOC && hmeblkp->hblk_lckcnt &&
6936                     hmeblkp->hblk_tag.htag_id == ksfmmup)
6937                         continue;
6938 
6939                 tset = sfmmu_pageunload(pp, sfhme, cons);
6940                 CPUSET_OR(cpuset, tset);
6941         }
6942 
6943         while (index != 0) {
6944                 index = index >> 1;
6945                 if (index != 0)
6946                         cons++;
6947                 if (index & 0x1) {
6948                         /* Go to leading page */
6949                         pp = PP_GROUPLEADER(pp, cons);
6950                         ASSERT(sfmmu_mlist_held(pp));
6951                         goto retry;
6952                 }
6953         }
6954 
6955         /*
6956          * cpuset may be empty if the page was only mapped by segkpm,
6957          * in which case we won't actually cross-trap.
6958          */
6959         xt_sync(cpuset);
6960 
6961         /*
6962          * The page should have no mappings at this point, unless
6963          * we were called from hat_page_relocate() in which case we
6964          * leave the locked mappings which will be suspended later.
6965          */
6966         ASSERT(!PP_ISMAPPED(origpp) || pa_hments ||
6967             (forceflag == SFMMU_KERNEL_RELOC));
6968 
6969 #ifdef VAC
6970         if (PP_ISTNC(pp)) {
6971                 if (cons == TTE8K) {
6972                         pmtx = sfmmu_page_enter(pp);
6973                         PP_CLRTNC(pp);
6974                         sfmmu_page_exit(pmtx);
6975                 } else {
6976                         conv_tnc(pp, cons);
6977                 }
6978         }
6979 #endif  /* VAC */
6980 
6981         if (pa_hments && forceflag != SFMMU_KERNEL_RELOC) {
6982                 /*
6983                  * Unlink any pa_hments and free them, calling back
6984                  * the responsible subsystem to notify it of the error.
6985                  * This can occur in situations such as drivers leaking
6986                  * DMA handles: naughty, but common enough that we'd like
6987                  * to keep the system running rather than bringing it
6988                  * down with an obscure error like "pa_hment leaked"
6989                  * which doesn't aid the user in debugging their driver.
6990                  */
6991                 for (sfhme = pp->p_mapping; sfhme; sfhme = tmphme) {
6992                         tmphme = sfhme->hme_next;
6993                         if (IS_PAHME(sfhme)) {
6994                                 struct pa_hment *pahmep = sfhme->hme_data;
6995                                 sfmmu_pahment_leaked(pahmep);
6996                                 HME_SUB(sfhme, pp);
6997                                 kmem_cache_free(pa_hment_cache, pahmep);
6998                         }
6999                 }
7000 
7001                 ASSERT(!PP_ISMAPPED(origpp));
7002         }
7003 
7004         sfmmu_mlist_exit(pml);
7005 











7006         return (0);
7007 }
7008 
7009 cpuset_t
7010 sfmmu_pageunload(page_t *pp, struct sf_hment *sfhme, int cons)
7011 {
7012         struct hme_blk *hmeblkp;
7013         sfmmu_t *sfmmup;
7014         tte_t tte, ttemod;
7015 #ifdef DEBUG
7016         tte_t orig_old;
7017 #endif /* DEBUG */
7018         caddr_t addr;
7019         int ttesz;
7020         int ret;
7021         cpuset_t cpuset;
7022 
7023         ASSERT(pp != NULL);
7024         ASSERT(sfmmu_mlist_held(pp));
7025         ASSERT(!PP_ISKAS(pp));

7266 
7267         clearflag &= ~HAT_SYNC_STOPON_SHARED;
7268         pml = sfmmu_mlist_enter(pp);
7269         index = PP_MAPINDEX(pp);
7270         cons = TTE8K;
7271 retry:
7272         for (sfhme = pp->p_mapping; sfhme; sfhme = tmphme) {
7273                 /*
7274                  * We need to save the next hment on the list since
7275                  * it is possible for pagesync to remove an invalid hment
7276                  * from the list.
7277                  */
7278                 tmphme = sfhme->hme_next;
7279                 if (IS_PAHME(sfhme))
7280                         continue;
7281                 /*
7282                  * If we are looking for large mappings and this hme doesn't
7283                  * reach the range we are seeking, just ignore it.
7284                  */
7285                 hmeblkp = sfmmu_hmetohblk(sfhme);


7286 
7287                 if (hme_size(sfhme) < cons)
7288                         continue;
7289 
7290                 if (stop_on_sh) {
7291                         if (hmeblkp->hblk_shared) {
7292                                 sf_srd_t *srdp = hblktosrd(hmeblkp);
7293                                 uint_t rid = hmeblkp->hblk_tag.htag_rid;
7294                                 sf_region_t *rgnp;
7295                                 ASSERT(SFMMU_IS_SHMERID_VALID(rid));
7296                                 ASSERT(rid < SFMMU_MAX_HME_REGIONS);
7297                                 ASSERT(srdp != NULL);
7298                                 rgnp = srdp->srd_hmergnp[rid];
7299                                 SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, srdp,
7300                                     rgnp, rid);
7301                                 shcnt += rgnp->rgn_refcnt;
7302                         } else {
7303                                 shcnt++;
7304                         }
7305                         if (shcnt > po_share) {

7425 {
7426         caddr_t addr;
7427         tte_t tte;
7428         tte_t ttemod;
7429         struct hme_blk *hmeblkp;
7430         int ret;
7431         sfmmu_t *sfmmup;
7432         cpuset_t cpuset;
7433 
7434         ASSERT(pp != NULL);
7435         ASSERT(sfmmu_mlist_held(pp));
7436 
7437         CPUSET_ZERO(cpuset);
7438         SFMMU_STAT(sf_clrwrt);
7439 
7440 retry:
7441 
7442         sfmmu_copytte(&sfhme->hme_tte, &tte);
7443         if (TTE_IS_VALID(&tte) && TTE_IS_WRITABLE(&tte)) {
7444                 hmeblkp = sfmmu_hmetohblk(sfhme);






7445                 sfmmup = hblktosfmmu(hmeblkp);
7446                 addr = tte_to_vaddr(hmeblkp, tte);
7447 
7448                 ttemod = tte;
7449                 TTE_CLR_WRT(&ttemod);
7450                 TTE_CLR_MOD(&ttemod);
7451                 ret = sfmmu_modifytte_try(&tte, &ttemod, &sfhme->hme_tte);
7452 
7453                 /*
7454                  * if cas failed and the new value is not what
7455                  * we want retry
7456                  */
7457                 if (ret < 0)
7458                         goto retry;
7459 
7460                 /* we win the cas */
7461                 if (ret > 0) {
7462                         if (hmeblkp->hblk_shared) {
7463                                 sf_srd_t *srdp = (sf_srd_t *)sfmmup;
7464                                 uint_t rid = hmeblkp->hblk_tag.htag_rid;

7689  * Returns a page frame number for a given virtual address.
7690  * Returns PFN_INVALID to indicate an invalid mapping
7691  */
7692 pfn_t
7693 hat_getpfnum(struct hat *hat, caddr_t addr)
7694 {
7695         pfn_t pfn;
7696         tte_t tte;
7697 
7698         /*
7699          * We would like to
7700          * ASSERT(AS_LOCK_HELD(as, &as->a_lock));
7701          * but we can't because the iommu driver will call this
7702          * routine at interrupt time and it can't grab the as lock
7703          * or it will deadlock: A thread could have the as lock
7704          * and be waiting for io.  The io can't complete
7705          * because the interrupt thread is blocked trying to grab
7706          * the as lock.
7707          */
7708 


7709         if (hat == ksfmmup) {
7710                 if (IS_KMEM_VA_LARGEPAGE(addr)) {
7711                         ASSERT(segkmem_lpszc > 0);
7712                         pfn = sfmmu_kvaszc2pfn(addr, segkmem_lpszc);
7713                         if (pfn != PFN_INVALID) {
7714                                 sfmmu_check_kpfn(pfn);
7715                                 return (pfn);
7716                         }
7717                 } else if (segkpm && IS_KPM_ADDR(addr)) {
7718                         return (sfmmu_kpm_vatopfn(addr));
7719                 }
7720                 while ((pfn = sfmmu_vatopfn(addr, ksfmmup, &tte))
7721                     == PFN_SUSPENDED) {
7722                         sfmmu_vatopfn_suspended(addr, ksfmmup, &tte);
7723                 }
7724                 sfmmu_check_kpfn(pfn);
7725                 return (pfn);
7726         } else {
7727                 return (sfmmu_uvatopfn(addr, hat, NULL));
7728         }

7872                                 SFMMU_HASH_UNLOCK(hmebp);
7873                                 pfn = PFN_INVALID;
7874                                 return (pfn);
7875                         }
7876                 }
7877                 SFMMU_HASH_UNLOCK(hmebp);
7878                 hashno++;
7879         } while (hashno <= mmu_hashcnt);
7880         return (PFN_INVALID);
7881 }
7882 
7883 
7884 /*
7885  * For compatability with AT&T and later optimizations
7886  */
7887 /* ARGSUSED */
7888 void
7889 hat_map(struct hat *hat, caddr_t addr, size_t len, uint_t flags)
7890 {
7891         ASSERT(hat != NULL);

7892 }
7893 
7894 /*
7895  * Return the number of mappings to a particular page.  This number is an
7896  * approximation of the number of people sharing the page.
7897  *
7898  * shared hmeblks or ism hmeblks are counted as 1 mapping here.
7899  * hat_page_checkshare() can be used to compare threshold to share
7900  * count that reflects the number of region sharers albeit at higher cost.
7901  */
7902 ulong_t
7903 hat_page_getshare(page_t *pp)
7904 {
7905         page_t *spp = pp;       /* start page */
7906         kmutex_t *pml;
7907         ulong_t cnt;
7908         int index, sz = TTE64K;
7909 
7910         /*
7911          * We need to grab the mlist lock to make sure any outstanding

7964 
7965         if (vpm_enable && pp->p_vpmref) {
7966                 cnt += 1;
7967         }
7968 
7969         if (pp->p_share + cnt > sh_thresh) {
7970                 sfmmu_mlist_exit(pml);
7971                 return (1);
7972         }
7973 
7974         index = PP_MAPINDEX(pp);
7975 
7976 again:
7977         for (sfhme = pp->p_mapping; sfhme; sfhme = tmphme) {
7978                 tmphme = sfhme->hme_next;
7979                 if (IS_PAHME(sfhme)) {
7980                         continue;
7981                 }
7982 
7983                 hmeblkp = sfmmu_hmetohblk(sfhme);








7984                 if (hme_size(sfhme) != sz) {
7985                         continue;
7986                 }
7987 
7988                 if (hmeblkp->hblk_shared) {
7989                         sf_srd_t *srdp = hblktosrd(hmeblkp);
7990                         uint_t rid = hmeblkp->hblk_tag.htag_rid;
7991                         sf_region_t *rgnp;
7992                         ASSERT(SFMMU_IS_SHMERID_VALID(rid));
7993                         ASSERT(rid < SFMMU_MAX_HME_REGIONS);
7994                         ASSERT(srdp != NULL);
7995                         rgnp = srdp->srd_hmergnp[rid];
7996                         SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, srdp,
7997                             rgnp, rid);
7998                         cnt += rgnp->rgn_refcnt;
7999                 } else {
8000                         cnt++;
8001                 }
8002                 if (cnt > sh_thresh) {
8003                         sfmmu_mlist_exit(pml);

8078                 CPUSET_ZERO(cpuset);
8079                 sz = TTE64K;
8080                 sync = 1;
8081         }
8082 
8083         while (index) {
8084                 if (!(index & 0x1)) {
8085                         index >>= 1;
8086                         sz++;
8087                         continue;
8088                 }
8089                 ASSERT(sz <= pszc);
8090                 rootpp = PP_GROUPLEADER(pp, sz);
8091                 for (sfhme = rootpp->p_mapping; sfhme; sfhme = tmphme) {
8092                         tmphme = sfhme->hme_next;
8093                         ASSERT(!IS_PAHME(sfhme));
8094                         hmeblkp = sfmmu_hmetohblk(sfhme);
8095                         if (hme_size(sfhme) != sz) {
8096                                 continue;
8097                         }




8098                         tset = sfmmu_pageunload(rootpp, sfhme, sz);
8099                         CPUSET_OR(cpuset, tset);
8100                 }
8101                 if (index >>= 1) {
8102                         sz++;
8103                 }
8104         }
8105 
8106         ASSERT(!PP_ISMAPPED_LARGE(pp));
8107 
8108         if (sync) {
8109                 xt_sync(cpuset);
8110 #ifdef VAC
8111                 if (PP_ISTNC(pp)) {
8112                         conv_tnc(rootpp, sz);
8113                 }
8114 #endif  /* VAC */
8115         }
8116 
8117         pmtx = sfmmu_page_enter(pp);

8205  * This is currently implemented as the number of bytes that have active
8206  * hardware translations that have page structures.  Therefore, it can
8207  * underestimate the traditional resident set size, eg, if the
8208  * physical page is present and the hardware translation is missing;
8209  * and it can overestimate the rss, eg, if there are active
8210  * translations to a frame buffer with page structs.
8211  * Also, it does not take sharing into account.
8212  *
8213  * Note that we don't acquire locks here since this function is most often
8214  * called from the clock thread.
8215  */
8216 size_t
8217 hat_get_mapped_size(struct hat *hat)
8218 {
8219         size_t          assize = 0;
8220         int             i;
8221 
8222         if (hat == NULL)
8223                 return (0);
8224 


8225         for (i = 0; i < mmu_page_sizes; i++)
8226                 assize += ((pgcnt_t)hat->sfmmu_ttecnt[i] +
8227                     (pgcnt_t)hat->sfmmu_scdrttecnt[i]) * TTEBYTES(i);
8228 
8229         if (hat->sfmmu_iblk == NULL)
8230                 return (assize);
8231 
8232         for (i = 0; i < mmu_page_sizes; i++)
8233                 assize += ((pgcnt_t)hat->sfmmu_ismttecnt[i] +
8234                     (pgcnt_t)hat->sfmmu_scdismttecnt[i]) * TTEBYTES(i);
8235 
8236         return (assize);
8237 }
8238 
8239 int
8240 hat_stats_enable(struct hat *hat)
8241 {
8242         hatlock_t       *hatlockp;
8243 


8244         hatlockp = sfmmu_hat_enter(hat);
8245         hat->sfmmu_rmstat++;
8246         sfmmu_hat_exit(hatlockp);
8247         return (1);
8248 }
8249 
8250 void
8251 hat_stats_disable(struct hat *hat)
8252 {
8253         hatlock_t       *hatlockp;
8254 


8255         hatlockp = sfmmu_hat_enter(hat);
8256         hat->sfmmu_rmstat--;
8257         sfmmu_hat_exit(hatlockp);
8258 }
8259 
8260 /*
8261  * Routines for entering or removing  ourselves from the
8262  * ism_hat's mapping list. This is used for both private and
8263  * SCD hats.
8264  */
8265 static void
8266 iment_add(struct ism_ment *iment,  struct hat *ism_hat)
8267 {
8268         ASSERT(MUTEX_HELD(&ism_mlist_lock));
8269 
8270         iment->iment_prev = NULL;
8271         iment->iment_next = ism_hat->sfmmu_iment;
8272         if (ism_hat->sfmmu_iment) {
8273                 ism_hat->sfmmu_iment->iment_prev = iment;
8274         }

8336         sf_scd_t        *old_scdp;
8337 
8338 #ifdef DEBUG
8339         caddr_t         eaddr = addr + len;
8340 #endif /* DEBUG */
8341 
8342         ASSERT(ism_hatid != NULL && sfmmup != NULL);
8343         ASSERT(sptaddr == ISMID_STARTADDR);
8344         /*
8345          * Check the alignment.
8346          */
8347         if (!ISM_ALIGNED(ismshift, addr) || !ISM_ALIGNED(ismshift, sptaddr))
8348                 return (EINVAL);
8349 
8350         /*
8351          * Check size alignment.
8352          */
8353         if (!ISM_ALIGNED(ismshift, len))
8354                 return (EINVAL);
8355 


8356         /*
8357          * Allocate ism_ment for the ism_hat's mapping list, and an
8358          * ism map blk in case we need one.  We must do our
8359          * allocations before acquiring locks to prevent a deadlock
8360          * in the kmem allocator on the mapping list lock.
8361          */
8362         new_iblk = kmem_cache_alloc(ism_blk_cache, KM_SLEEP);
8363         ism_ment = kmem_cache_alloc(ism_ment_cache, KM_SLEEP);
8364 
8365         /*
8366          * Serialize ISM mappings with the ISM busy flag, and also the
8367          * trap handlers.
8368          */
8369         sfmmu_ismhat_enter(sfmmup, 0);
8370 
8371         /*
8372          * Allocate an ism map blk if necessary.
8373          */
8374         if (sfmmup->sfmmu_iblk == NULL) {
8375                 sfmmup->sfmmu_iblk = new_iblk;

8547 void
8548 hat_unshare(struct hat *sfmmup, caddr_t addr, size_t len, uint_t ismszc)
8549 {
8550         ism_map_t       *ism_map;
8551         ism_ment_t      *free_ment = NULL;
8552         ism_blk_t       *ism_blkp;
8553         struct hat      *ism_hatid;
8554         int             found, i;
8555         hatlock_t       *hatlockp;
8556         struct tsb_info *tsbinfo;
8557         uint_t          ismshift = page_get_shift(ismszc);
8558         size_t          sh_size = ISM_SHIFT(ismshift, len);
8559         uchar_t         ism_rid;
8560         sf_scd_t        *old_scdp;
8561 
8562         ASSERT(ISM_ALIGNED(ismshift, addr));
8563         ASSERT(ISM_ALIGNED(ismshift, len));
8564         ASSERT(sfmmup != NULL);
8565         ASSERT(sfmmup != ksfmmup);
8566 









8567         ASSERT(sfmmup->sfmmu_as != NULL);



8568 
8569         /*
8570          * Make sure that during the entire time ISM mappings are removed,
8571          * the trap handlers serialize behind us, and that no one else
8572          * can be mucking with ISM mappings.  This also lets us get away
8573          * with not doing expensive cross calls to flush the TLB -- we
8574          * just discard the context, flush the entire TSB, and call it
8575          * a day.
8576          */
8577         sfmmu_ismhat_enter(sfmmup, 0);
8578 
8579         /*
8580          * Remove the mapping.
8581          *
8582          * We can't have any holes in the ism map.
8583          * The tsb miss code while searching the ism map will
8584          * stop on an empty map slot.  So we must move
8585          * everyone past the hole up 1 if any.
8586          *
8587          * Also empty ism map blks are not freed until the

9001                  * Always convert all mappings to TNC.
9002                  */
9003                 sz = fnd_mapping_sz(pp);
9004                 pp = PP_GROUPLEADER(pp, sz);
9005                 SFMMU_STAT_ADD(sf_uncache_conflict, TTEPAGES(sz));
9006                 sfmmu_page_cache_array(pp, HAT_TMPNC, CACHE_FLUSH,
9007                     TTEPAGES(sz));
9008 
9009                 return;
9010         }
9011 
9012         /*
9013          * check if any mapping is in same as or if it is locked
9014          * since in that case we need to uncache.
9015          */
9016         for (sfhmep = pp->p_mapping; sfhmep; sfhmep = tmphme) {
9017                 tmphme = sfhmep->hme_next;
9018                 if (IS_PAHME(sfhmep))
9019                         continue;
9020                 hmeblkp = sfmmu_hmetohblk(sfhmep);


9021                 tmphat = hblktosfmmu(hmeblkp);
9022                 sfmmu_copytte(&sfhmep->hme_tte, &tte);
9023                 ASSERT(TTE_IS_VALID(&tte));
9024                 if (hmeblkp->hblk_shared || tmphat == hat ||
9025                     hmeblkp->hblk_lckcnt) {
9026                         /*
9027                          * We have an uncache conflict
9028                          */
9029                         SFMMU_STAT(sf_uncache_conflict);
9030                         sfmmu_page_cache_array(pp, HAT_TMPNC, CACHE_FLUSH, 1);
9031                         return;
9032                 }
9033         }
9034 
9035         /*
9036          * We have an unload conflict
9037          * We have already checked for LARGE mappings, therefore
9038          * the remaining mapping(s) must be TTE8K.
9039          */
9040         SFMMU_STAT(sf_unload_conflict);
9041 
9042         for (sfhmep = pp->p_mapping; sfhmep; sfhmep = tmphme) {
9043                 tmphme = sfhmep->hme_next;
9044                 if (IS_PAHME(sfhmep))
9045                         continue;
9046                 hmeblkp = sfmmu_hmetohblk(sfhmep);


9047                 ASSERT(!hmeblkp->hblk_shared);
9048                 (void) sfmmu_pageunload(pp, sfhmep, TTE8K);
9049         }
9050 
9051         if (PP_ISMAPPED_KPM(pp))
9052                 sfmmu_kpm_vac_unload(pp, addr);
9053 
9054         /*
9055          * Unloads only do TLB flushes so we need to flush the
9056          * cache here.
9057          */
9058         sfmmu_cache_flush(pp->p_pagenum, PP_GET_VCOLOR(pp));
9059         PP_SET_VCOLOR(pp, vcolor);
9060 }
9061 
9062 /*
9063  * Whenever a mapping is unloaded and the page is in TNC state,
9064  * we see if the page can be made cacheable again. 'pp' is
9065  * the page that we just unloaded a mapping from, the size
9066  * of mapping that was unloaded is 'ottesz'.

9174 
9175                 if (PP_ISPNC(pp)) {
9176                         return (0);
9177                 }
9178 
9179                 clr_valid = 0;
9180                 if (PP_ISMAPPED_KPM(pp)) {
9181                         caddr_t kpmvaddr;
9182 
9183                         ASSERT(kpm_enable);
9184                         kpmvaddr = hat_kpm_page2va(pp, 1);
9185                         ASSERT(!(npages > 1 && IS_KPM_ALIAS_RANGE(kpmvaddr)));
9186                         color1 = addr_to_vcolor(kpmvaddr);
9187                         clr_valid = 1;
9188                 }
9189 
9190                 for (sfhme = pp->p_mapping; sfhme; sfhme = sfhme->hme_next) {
9191                         if (IS_PAHME(sfhme))
9192                                 continue;
9193                         hmeblkp = sfmmu_hmetohblk(sfhme);


9194 
9195                         sfmmu_copytte(&sfhme->hme_tte, &tte);
9196                         ASSERT(TTE_IS_VALID(&tte));
9197 
9198                         vaddr = tte_to_vaddr(hmeblkp, tte);
9199                         color = addr_to_vcolor(vaddr);
9200 
9201                         if (npages > 1) {
9202                                 /*
9203                                  * If there is a big mapping, make sure
9204                                  * 8K mapping is consistent with the big
9205                                  * mapping.
9206                                  */
9207                                 bcolor = i % ncolors;
9208                                 if (color != bcolor) {
9209                                         return (0);
9210                                 }
9211                         }
9212                         if (!clr_valid) {
9213                                 clr_valid = 1;

9321 static void
9322 sfmmu_page_cache(page_t *pp, int flags, int cache_flush_flag, int bcolor)
9323 {
9324         struct  sf_hment *sfhme;
9325         struct  hme_blk *hmeblkp;
9326         sfmmu_t *sfmmup;
9327         tte_t   tte, ttemod;
9328         caddr_t vaddr;
9329         int     ret, color;
9330         pfn_t   pfn;
9331 
9332         color = bcolor;
9333         pfn = pp->p_pagenum;
9334 
9335         for (sfhme = pp->p_mapping; sfhme; sfhme = sfhme->hme_next) {
9336 
9337                 if (IS_PAHME(sfhme))
9338                         continue;
9339                 hmeblkp = sfmmu_hmetohblk(sfhme);
9340 



9341                 sfmmu_copytte(&sfhme->hme_tte, &tte);
9342                 ASSERT(TTE_IS_VALID(&tte));
9343                 vaddr = tte_to_vaddr(hmeblkp, tte);
9344                 color = addr_to_vcolor(vaddr);
9345 
9346 #ifdef DEBUG
9347                 if ((flags & HAT_CACHE) && bcolor != NO_VCOLOR) {
9348                         ASSERT(color == bcolor);
9349                 }
9350 #endif
9351 
9352                 ASSERT(flags != HAT_TMPNC || color == PP_GET_VCOLOR(pp));
9353 
9354                 ttemod = tte;
9355                 if (flags & (HAT_UNCACHE | HAT_TMPNC)) {
9356                         TTE_CLR_VCACHEABLE(&ttemod);
9357                 } else {        /* flags & HAT_CACHE */
9358                         TTE_SET_VCACHEABLE(&ttemod);
9359                 }
9360                 ret = sfmmu_modifytte_try(&tte, &ttemod, &sfhme->hme_tte);

9656                         curcnum = sfmmu_getctx_sec();
9657                         if (curcnum == cnum)
9658                                 sfmmu_load_mmustate(sfmmup);
9659                         sfmmu_enable_intrs(pstate_save);
9660                         ASSERT(curcnum == cnum || curcnum == INVALID_CONTEXT);
9661                 }
9662         } else {
9663                 /*
9664                  * multi-thread
9665                  * or when sfmmup is not the same as the curproc.
9666                  */
9667                 sfmmu_invalidate_ctx(sfmmup);
9668         }
9669 
9670         kpreempt_enable();
9671 }
9672 
9673 
9674 /*
9675  * Replace the specified TSB with a new TSB.  This function gets called when
9676  * we grow, or shrink a TSB.  When swapping in a TSB (TSB_SWAPIN), the
9677  * TSB_FORCEALLOC flag may be used to force allocation of a minimum-sized TSB
9678  * (8K).
9679  *
9680  * Caller must hold the HAT lock, but should assume any tsb_info
9681  * pointers it has are no longer valid after calling this function.
9682  *
9683  * Return values:
9684  *      TSB_ALLOCFAIL   Failed to allocate a TSB, due to memory constraints
9685  *      TSB_LOSTRACE    HAT is busy, i.e. another thread is already doing
9686  *                      something to this tsbinfo/TSB
9687  *      TSB_SUCCESS     Operation succeeded
9688  */
9689 static tsb_replace_rc_t
9690 sfmmu_replace_tsb(sfmmu_t *sfmmup, struct tsb_info *old_tsbinfo, uint_t szc,
9691     hatlock_t *hatlockp, uint_t flags)
9692 {
9693         struct tsb_info *new_tsbinfo = NULL;
9694         struct tsb_info *curtsb, *prevtsb;
9695         uint_t tte_sz_mask;
9696         int i;

13068         for (; i <= (size - hme1blk_sz); i += hme1blk_sz, k++) {
13069                 hmeblkp = (struct hme_blk *)addr;
13070                 addr += hme1blk_sz;
13071                 hmeblkp->hblk_nuc_bit = 1;
13072                 hmeblkp->hblk_nextpa = cached_va_to_pa((caddr_t)hmeblkp);
13073         }
13074         ASSERT(k >= nhblk1);
13075         nucleus_hblk1.len = k;
13076         SFMMU_STAT_ADD(sf_hblk1_ncreate, k);
13077 }
13078 
13079 /*
13080  * This function is currently not supported on this platform. For what
13081  * it's supposed to do, see hat.c and hat_srmmu.c
13082  */
13083 /* ARGSUSED */
13084 faultcode_t
13085 hat_softlock(struct hat *hat, caddr_t addr, size_t *lenp, page_t **ppp,
13086     uint_t flags)
13087 {

13088         return (FC_NOSUPPORT);
13089 }
13090 
13091 /*
13092  * Searchs the mapping list of the page for a mapping of the same size. If not
13093  * found the corresponding bit is cleared in the p_index field. When large
13094  * pages are more prevalent in the system, we can maintain the mapping list
13095  * in order and we don't have to traverse the list each time. Just check the
13096  * next and prev entries, and if both are of different size, we clear the bit.
13097  */
13098 static void
13099 sfmmu_rm_large_mappings(page_t *pp, int ttesz)
13100 {
13101         struct sf_hment *sfhmep;
13102         struct hme_blk *hmeblkp;
13103         int     index;
13104         pgcnt_t npgs;
13105 
13106         ASSERT(ttesz > TTE8K);
13107 
13108         ASSERT(sfmmu_mlist_held(pp));
13109 
13110         ASSERT(PP_ISMAPPED_LARGE(pp));
13111 
13112         /*
13113          * Traverse mapping list looking for another mapping of same size.
13114          * since we only want to clear index field if all mappings of
13115          * that size are gone.
13116          */
13117 
13118         for (sfhmep = pp->p_mapping; sfhmep; sfhmep = sfhmep->hme_next) {
13119                 if (IS_PAHME(sfhmep))
13120                         continue;
13121                 hmeblkp = sfmmu_hmetohblk(sfhmep);


13122                 if (hme_size(sfhmep) == ttesz) {
13123                         /*
13124                          * another mapping of the same size. don't clear index.
13125                          */
13126                         return;
13127                 }
13128         }
13129 
13130         /*
13131          * Clear the p_index bit for large page.
13132          */
13133         index = PAGESZ_TO_INDEX(ttesz);
13134         npgs = TTEPAGES(ttesz);
13135         while (npgs-- > 0) {
13136                 ASSERT(pp->p_index & index);
13137                 pp->p_index &= ~index;
13138                 pp = PP_PAGENEXT(pp);
13139         }
13140 }
13141

13686         uint_t rhash;
13687         uint_t rid;
13688         hatlock_t *hatlockp;
13689         sf_region_t *rgnp;
13690         sf_region_t *new_rgnp = NULL;
13691         int i;
13692         uint16_t *nextidp;
13693         sf_region_t **freelistp;
13694         int maxids;
13695         sf_region_t **rarrp;
13696         uint16_t *busyrgnsp;
13697         ulong_t rttecnt;
13698         uchar_t tteflag;
13699         uchar_t r_type = flags & HAT_REGION_TYPE_MASK;
13700         int text = (r_type == HAT_REGION_TEXT);
13701 
13702         if (srdp == NULL || r_size == 0) {
13703                 return (HAT_INVALID_REGION_COOKIE);
13704         }
13705 

13706         ASSERT(sfmmup != ksfmmup);
13707         ASSERT(AS_WRITE_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
13708         ASSERT(srdp->srd_refcnt > 0);
13709         ASSERT(!(flags & ~HAT_REGION_TYPE_MASK));
13710         ASSERT(flags == HAT_REGION_TEXT || flags == HAT_REGION_ISM);
13711         ASSERT(r_pgszc < mmu_page_sizes);
13712         if (!IS_P2ALIGNED(r_saddr, TTEBYTES(r_pgszc)) ||
13713             !IS_P2ALIGNED(r_size, TTEBYTES(r_pgszc))) {
13714                 panic("hat_join_region: region addr or size is not aligned\n");
13715         }
13716 
13717 
13718         r_type = (r_type == HAT_REGION_ISM) ? SFMMU_REGION_ISM :
13719             SFMMU_REGION_HME;
13720         /*
13721          * Currently only support shared hmes for the read only main text
13722          * region.
13723          */
13724         if (r_type == SFMMU_REGION_HME && ((r_obj != srdp->srd_evp) ||
13725             (r_perm & PROT_WRITE))) {

13991         ASSERT(flags == HAT_REGION_TEXT || flags == HAT_REGION_ISM);
13992         ASSERT(!sfmmup->sfmmu_free || sfmmup->sfmmu_scdp == NULL);
13993 
13994         r_type = (r_type == HAT_REGION_ISM) ? SFMMU_REGION_ISM :
13995             SFMMU_REGION_HME;
13996 
13997         if (r_type == SFMMU_REGION_ISM) {
13998                 ASSERT(SFMMU_IS_ISMRID_VALID(rid));
13999                 ASSERT(rid < SFMMU_MAX_ISM_REGIONS);
14000                 rgnp = srdp->srd_ismrgnp[rid];
14001         } else {
14002                 ASSERT(SFMMU_IS_SHMERID_VALID(rid));
14003                 ASSERT(rid < SFMMU_MAX_HME_REGIONS);
14004                 rgnp = srdp->srd_hmergnp[rid];
14005         }
14006         ASSERT(rgnp != NULL);
14007         ASSERT(rgnp->rgn_id == rid);
14008         ASSERT((rgnp->rgn_flags & SFMMU_REGION_TYPE_MASK) == r_type);
14009         ASSERT(!(rgnp->rgn_flags & SFMMU_REGION_FREE));
14010         ASSERT(AS_LOCK_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));






14011 
14012         if (sfmmup->sfmmu_free) {
14013                 ulong_t rttecnt;
14014                 r_pgszc = rgnp->rgn_pgszc;
14015                 r_size = rgnp->rgn_size;
14016 
14017                 ASSERT(sfmmup->sfmmu_scdp == NULL);
14018                 if (r_type == SFMMU_REGION_ISM) {
14019                         SF_RGNMAP_DEL(sfmmup->sfmmu_ismregion_map, rid);
14020                 } else {
14021                         /* update shme rgns ttecnt in sfmmu_ttecnt */
14022                         rttecnt = r_size >> TTE_PAGE_SHIFT(r_pgszc);
14023                         ASSERT(sfmmup->sfmmu_ttecnt[r_pgszc] >= rttecnt);
14024 
14025                         atomic_add_long(&sfmmup->sfmmu_ttecnt[r_pgszc],
14026                             -rttecnt);
14027 
14028                         SF_RGNMAP_DEL(sfmmup->sfmmu_hmeregion_map, rid);
14029                 }
14030         } else if (r_type == SFMMU_REGION_ISM) {