2134 sf_scd_t *scdp;
2135 int i;
2136 extern uint_t get_color_start(struct as *);
2137
2138 ASSERT(hat->sfmmu_xhat_provider == NULL);
2139 ASSERT((flag == 0) || (flag == HAT_DUP_ALL) || (flag == HAT_DUP_COW) ||
2140 (flag == HAT_DUP_SRD));
2141 ASSERT(hat != ksfmmup);
2142 ASSERT(newhat != ksfmmup);
2143 ASSERT(flag != HAT_DUP_ALL || hat->sfmmu_srdp == newhat->sfmmu_srdp);
2144
2145 if (flag == HAT_DUP_COW) {
2146 panic("hat_dup: HAT_DUP_COW not supported");
2147 }
2148
2149 if (flag == HAT_DUP_SRD && ((srdp = hat->sfmmu_srdp) != NULL)) {
2150 ASSERT(srdp->srd_evp != NULL);
2151 VN_HOLD(srdp->srd_evp);
2152 ASSERT(srdp->srd_refcnt > 0);
2153 newhat->sfmmu_srdp = srdp;
2154 atomic_add_32((volatile uint_t *)&srdp->srd_refcnt, 1);
2155 }
2156
2157 /*
2158 * HAT_DUP_ALL flag is used after as duplication is done.
2159 */
2160 if (flag == HAT_DUP_ALL && ((srdp = newhat->sfmmu_srdp) != NULL)) {
2161 ASSERT(newhat->sfmmu_srdp->srd_refcnt >= 2);
2162 newhat->sfmmu_rtteflags = hat->sfmmu_rtteflags;
2163 if (hat->sfmmu_flags & HAT_4MTEXT_FLAG) {
2164 newhat->sfmmu_flags |= HAT_4MTEXT_FLAG;
2165 }
2166
2167 /* check if need to join scd */
2168 if ((scdp = hat->sfmmu_scdp) != NULL &&
2169 newhat->sfmmu_scdp != scdp) {
2170 int ret;
2171 SF_RGNMAP_IS_SUBSET(&newhat->sfmmu_region_map,
2172 &scdp->scd_region_map, ret);
2173 ASSERT(ret);
2174 sfmmu_join_scd(scdp, newhat);
3210 * sfmmu_pagearray_setup failed so return
3211 */
3212 sfmmu_mlist_exit(pml);
3213 return (1);
3214 }
3215 }
3216
3217 /*
3218 * Make sure hment is not on a mapping list.
3219 */
3220 ASSERT(remap || (sfhme->hme_page == NULL));
3221
3222 /* if it is not a remap then hme->next better be NULL */
3223 ASSERT((!remap) ? sfhme->hme_next == NULL : 1);
3224
3225 if (flags & HAT_LOAD_LOCK) {
3226 if ((hmeblkp->hblk_lckcnt + 1) >= MAX_HBLK_LCKCNT) {
3227 panic("too high lckcnt-hmeblk %p",
3228 (void *)hmeblkp);
3229 }
3230 atomic_add_32(&hmeblkp->hblk_lckcnt, 1);
3231
3232 HBLK_STACK_TRACE(hmeblkp, HBLK_LOCK);
3233 }
3234
3235 #ifdef VAC
3236 if (pp && PP_ISNC(pp)) {
3237 /*
3238 * If the physical page is marked to be uncacheable, like
3239 * by a vac conflict, make sure the new mapping is also
3240 * uncacheable.
3241 */
3242 TTE_CLR_VCACHEABLE(ttep);
3243 ASSERT(PP_GET_VCOLOR(pp) == NO_VCOLOR);
3244 }
3245 #endif
3246 ttep->tte_hmenum = hmenum;
3247
3248 #ifdef DEBUG
3249 orig_old = tteold;
3250 #endif /* DEBUG */
3251
3252 while (sfmmu_modifytte_try(&tteold, ttep, &sfhme->hme_tte) < 0) {
3253 if ((sfmmup == KHATID) &&
3254 (flags & (HAT_LOAD_LOCK | HAT_LOAD_REMAP))) {
3255 sfmmu_copytte(&sfhme->hme_tte, &tteold);
3256 }
3257 #ifdef DEBUG
3258 chk_tte(&orig_old, &tteold, ttep, hmeblkp);
3259 #endif /* DEBUG */
3260 }
3261 ASSERT(TTE_IS_VALID(&sfhme->hme_tte));
3262
3263 if (!TTE_IS_VALID(&tteold)) {
3264
3265 atomic_add_16(&hmeblkp->hblk_vcnt, 1);
3266 if (rid == SFMMU_INVALID_SHMERID) {
3267 atomic_add_long(&sfmmup->sfmmu_ttecnt[size], 1);
3268 } else {
3269 sf_srd_t *srdp = sfmmup->sfmmu_srdp;
3270 sf_region_t *rgnp = srdp->srd_hmergnp[rid];
3271 /*
3272 * We already accounted for region ttecnt's in sfmmu
3273 * during hat_join_region() processing. Here we
3274 * only update ttecnt's in region struture.
3275 */
3276 atomic_add_long(&rgnp->rgn_ttecnt[size], 1);
3277 }
3278 }
3279
3280 myflt = (astosfmmu(curthread->t_procp->p_as) == sfmmup);
3281 if (size > TTE8K && (flags & HAT_LOAD_SHARE) == 0 &&
3282 sfmmup != ksfmmup) {
3283 uchar_t tteflag = 1 << size;
3284 if (rid == SFMMU_INVALID_SHMERID) {
3285 if (!(sfmmup->sfmmu_tteflags & tteflag)) {
3286 hatlockp = sfmmu_hat_enter(sfmmup);
3287 sfmmup->sfmmu_tteflags |= tteflag;
3288 sfmmu_hat_exit(hatlockp);
3289 }
3290 } else if (!(sfmmup->sfmmu_rtteflags & tteflag)) {
3291 hatlockp = sfmmu_hat_enter(sfmmup);
3292 sfmmup->sfmmu_rtteflags |= tteflag;
3293 sfmmu_hat_exit(hatlockp);
3294 }
3295 /*
3296 * Update the current CPU tsbmiss area, so the current thread
3364 */
3365 if (size == TTE8K || size == TTE4M) {
3366 sf_scd_t *scdp;
3367 hatlockp = sfmmu_hat_enter(sfmmup);
3368 /*
3369 * Don't preload private TSB if the mapping is used
3370 * by the shctx in the SCD.
3371 */
3372 scdp = sfmmup->sfmmu_scdp;
3373 if (rid == SFMMU_INVALID_SHMERID || scdp == NULL ||
3374 !SF_RGNMAP_TEST(scdp->scd_hmeregion_map, rid)) {
3375 sfmmu_load_tsb(sfmmup, vaddr, &sfhme->hme_tte,
3376 size);
3377 }
3378 sfmmu_hat_exit(hatlockp);
3379 }
3380 }
3381 if (pp) {
3382 if (!remap) {
3383 HME_ADD(sfhme, pp);
3384 atomic_add_16(&hmeblkp->hblk_hmecnt, 1);
3385 ASSERT(hmeblkp->hblk_hmecnt > 0);
3386
3387 /*
3388 * Cannot ASSERT(hmeblkp->hblk_hmecnt <= NHMENTS)
3389 * see pageunload() for comment.
3390 */
3391 }
3392 sfmmu_mlist_exit(pml);
3393 }
3394
3395 return (0);
3396 }
3397 /*
3398 * Function unlocks hash bucket.
3399 */
3400 static void
3401 sfmmu_tteload_release_hashbucket(struct hmehash_bucket *hmebp)
3402 {
3403 ASSERT(SFMMU_HASH_LOCK_ISHELD(hmebp));
3404 SFMMU_HASH_UNLOCK(hmebp);
4137 readtte:
4138 sfmmu_copytte(&sfhme->hme_tte, &tteold);
4139 if (TTE_IS_VALID(&tteold)) {
4140
4141 ttemod = tteold;
4142
4143 ret = sfmmu_modifytte_try(&tteold, &ttemod,
4144 &sfhme->hme_tte);
4145
4146 if (ret < 0)
4147 goto readtte;
4148
4149 if (hmeblkp->hblk_lckcnt == 0)
4150 panic("zero hblk lckcnt");
4151
4152 if (((uintptr_t)addr + TTEBYTES(ttesz)) >
4153 (uintptr_t)endaddr)
4154 panic("can't unlock large tte");
4155
4156 ASSERT(hmeblkp->hblk_lckcnt > 0);
4157 atomic_add_32(&hmeblkp->hblk_lckcnt, -1);
4158 HBLK_STACK_TRACE(hmeblkp, HBLK_UNLOCK);
4159 } else {
4160 panic("sfmmu_hblk_unlock: invalid tte");
4161 }
4162 addr += TTEBYTES(ttesz);
4163 sfhme++;
4164 }
4165 return (addr);
4166 }
4167
4168 /*
4169 * Physical Address Mapping Framework
4170 *
4171 * General rules:
4172 *
4173 * (1) Applies only to seg_kmem memory pages. To make things easier,
4174 * seg_kpm addresses are also accepted by the routines, but nothing
4175 * is done with them since by definition their PA mappings are static.
4176 * (2) hat_add_callback() may only be called while holding the page lock
4177 * SE_SHARED or SE_EXCL of the underlying page (e.g., as_pagelock()),
6116 if (pp != NULL) {
6117 panic("sfmmu_hblk_unload: pp = 0x%p "
6118 "tte became invalid under mlist"
6119 " lock = 0x%p", (void *)pp,
6120 (void *)pml);
6121 }
6122 continue;
6123 }
6124
6125 if (!(flags & HAT_UNLOAD_NOSYNC)) {
6126 sfmmu_ttesync(sfmmup, addr, &tte, pp);
6127 }
6128
6129 /*
6130 * Ok- we invalidated the tte. Do the rest of the job.
6131 */
6132 ttecnt++;
6133
6134 if (flags & HAT_UNLOAD_UNLOCK) {
6135 ASSERT(hmeblkp->hblk_lckcnt > 0);
6136 atomic_add_32(&hmeblkp->hblk_lckcnt, -1);
6137 HBLK_STACK_TRACE(hmeblkp, HBLK_UNLOCK);
6138 }
6139
6140 /*
6141 * Normally we would need to flush the page
6142 * from the virtual cache at this point in
6143 * order to prevent a potential cache alias
6144 * inconsistency.
6145 * The particular scenario we need to worry
6146 * about is:
6147 * Given: va1 and va2 are two virtual address
6148 * that alias and map the same physical
6149 * address.
6150 * 1. mapping exists from va1 to pa and data
6151 * has been read into the cache.
6152 * 2. unload va1.
6153 * 3. load va2 and modify data using va2.
6154 * 4 unload va2.
6155 * 5. load va1 and reference data. Unless we
6156 * flush the data cache when we unload we will
6170 DEMAP_RANGE_MARKPG(dmrp, addr);
6171 } else {
6172 ASSERT(sfmmup != NULL);
6173 ASSERT(!hmeblkp->hblk_shared);
6174 sfmmu_tlb_demap(addr, sfmmup, hmeblkp,
6175 sfmmup->sfmmu_free, 0);
6176 }
6177
6178 if (pp) {
6179 /*
6180 * Remove the hment from the mapping list
6181 */
6182 ASSERT(hmeblkp->hblk_hmecnt > 0);
6183
6184 /*
6185 * Again, we cannot
6186 * ASSERT(hmeblkp->hblk_hmecnt <= NHMENTS);
6187 */
6188 HME_SUB(sfhmep, pp);
6189 membar_stst();
6190 atomic_add_16(&hmeblkp->hblk_hmecnt, -1);
6191 }
6192
6193 ASSERT(hmeblkp->hblk_vcnt > 0);
6194 atomic_add_16(&hmeblkp->hblk_vcnt, -1);
6195
6196 ASSERT(hmeblkp->hblk_hmecnt || hmeblkp->hblk_vcnt ||
6197 !hmeblkp->hblk_lckcnt);
6198
6199 #ifdef VAC
6200 if (pp && (pp->p_nrm & (P_KPMC | P_KPMS | P_TNC))) {
6201 if (PP_ISTNC(pp)) {
6202 /*
6203 * If page was temporary
6204 * uncached, try to recache
6205 * it. Note that HME_SUB() was
6206 * called above so p_index and
6207 * mlist had been updated.
6208 */
6209 conv_tnc(pp, ttesz);
6210 } else if (pp->p_mapping == NULL) {
6211 ASSERT(kpm_enable);
6212 /*
6213 * Page is marked to be in VAC conflict
6214 * to an existing kpm mapping and/or is
7332 }
7333
7334 if (ret == 0) {
7335 panic("pageunload: cas failed?");
7336 }
7337
7338 addr = tte_to_vaddr(hmeblkp, tte);
7339
7340 if (hmeblkp->hblk_shared) {
7341 sf_srd_t *srdp = (sf_srd_t *)sfmmup;
7342 uint_t rid = hmeblkp->hblk_tag.htag_rid;
7343 sf_region_t *rgnp;
7344 ASSERT(SFMMU_IS_SHMERID_VALID(rid));
7345 ASSERT(rid < SFMMU_MAX_HME_REGIONS);
7346 ASSERT(srdp != NULL);
7347 rgnp = srdp->srd_hmergnp[rid];
7348 SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, srdp, rgnp, rid);
7349 cpuset = sfmmu_rgntlb_demap(addr, rgnp, hmeblkp, 1);
7350 sfmmu_ttesync(NULL, addr, &tte, pp);
7351 ASSERT(rgnp->rgn_ttecnt[ttesz] > 0);
7352 atomic_add_long(&rgnp->rgn_ttecnt[ttesz], -1);
7353 } else {
7354 sfmmu_ttesync(sfmmup, addr, &tte, pp);
7355 atomic_add_long(&sfmmup->sfmmu_ttecnt[ttesz], -1);
7356
7357 /*
7358 * We need to flush the page from the virtual cache
7359 * in order to prevent a virtual cache alias
7360 * inconsistency. The particular scenario we need
7361 * to worry about is:
7362 * Given: va1 and va2 are two virtual address that
7363 * alias and will map the same physical address.
7364 * 1. mapping exists from va1 to pa and data has
7365 * been read into the cache.
7366 * 2. unload va1.
7367 * 3. load va2 and modify data using va2.
7368 * 4 unload va2.
7369 * 5. load va1 and reference data. Unless we flush
7370 * the data cache when we unload we will get
7371 * stale data.
7372 * This scenario is taken care of by using virtual
7373 * page coloring.
7374 */
7375 if (sfmmup->sfmmu_ismhat) {
7396 /*
7397 * Hme_sub has to run after ttesync() and a_rss update.
7398 * See hblk_unload().
7399 */
7400 HME_SUB(sfhme, pp);
7401 membar_stst();
7402
7403 /*
7404 * We can not make ASSERT(hmeblkp->hblk_hmecnt <= NHMENTS)
7405 * since pteload may have done a HME_ADD() right after
7406 * we did the HME_SUB() above. Hmecnt is now maintained
7407 * by cas only. no lock guranteed its value. The only
7408 * gurantee we have is the hmecnt should not be less than
7409 * what it should be so the hblk will not be taken away.
7410 * It's also important that we decremented the hmecnt after
7411 * we are done with hmeblkp so that this hmeblk won't be
7412 * stolen.
7413 */
7414 ASSERT(hmeblkp->hblk_hmecnt > 0);
7415 ASSERT(hmeblkp->hblk_vcnt > 0);
7416 atomic_add_16(&hmeblkp->hblk_vcnt, -1);
7417 atomic_add_16(&hmeblkp->hblk_hmecnt, -1);
7418 /*
7419 * This is bug 4063182.
7420 * XXX: fixme
7421 * ASSERT(hmeblkp->hblk_hmecnt || hmeblkp->hblk_vcnt ||
7422 * !hmeblkp->hblk_lckcnt);
7423 */
7424 } else {
7425 panic("invalid tte? pp %p &tte %p",
7426 (void *)pp, (void *)&tte);
7427 }
7428
7429 return (cpuset);
7430 }
7431
7432 /*
7433 * While relocating a kernel page, this function will move the mappings
7434 * from tpp to dpp and modify any associated data with these mappings.
7435 * It also unsuspends the suspended kernel mapping.
7436 */
7437 static void
13796 uint_t hash = SRD_HASH_FUNCTION(evp);
13797 sf_srd_t *srdp;
13798 sf_srd_t *newsrdp;
13799
13800 ASSERT(sfmmup != ksfmmup);
13801 ASSERT(sfmmup->sfmmu_srdp == NULL);
13802
13803 if (!shctx_on) {
13804 return;
13805 }
13806
13807 VN_HOLD(evp);
13808
13809 if (srd_buckets[hash].srdb_srdp != NULL) {
13810 mutex_enter(&srd_buckets[hash].srdb_lock);
13811 for (srdp = srd_buckets[hash].srdb_srdp; srdp != NULL;
13812 srdp = srdp->srd_hash) {
13813 if (srdp->srd_evp == evp) {
13814 ASSERT(srdp->srd_refcnt >= 0);
13815 sfmmup->sfmmu_srdp = srdp;
13816 atomic_add_32(
13817 (volatile uint_t *)&srdp->srd_refcnt, 1);
13818 mutex_exit(&srd_buckets[hash].srdb_lock);
13819 return;
13820 }
13821 }
13822 mutex_exit(&srd_buckets[hash].srdb_lock);
13823 }
13824 newsrdp = kmem_cache_alloc(srd_cache, KM_SLEEP);
13825 ASSERT(newsrdp->srd_next_ismrid == 0 && newsrdp->srd_next_hmerid == 0);
13826
13827 newsrdp->srd_evp = evp;
13828 newsrdp->srd_refcnt = 1;
13829 newsrdp->srd_hmergnfree = NULL;
13830 newsrdp->srd_ismrgnfree = NULL;
13831
13832 mutex_enter(&srd_buckets[hash].srdb_lock);
13833 for (srdp = srd_buckets[hash].srdb_srdp; srdp != NULL;
13834 srdp = srdp->srd_hash) {
13835 if (srdp->srd_evp == evp) {
13836 ASSERT(srdp->srd_refcnt >= 0);
13837 sfmmup->sfmmu_srdp = srdp;
13838 atomic_add_32((volatile uint_t *)&srdp->srd_refcnt, 1);
13839 mutex_exit(&srd_buckets[hash].srdb_lock);
13840 kmem_cache_free(srd_cache, newsrdp);
13841 return;
13842 }
13843 }
13844 newsrdp->srd_hash = srd_buckets[hash].srdb_srdp;
13845 srd_buckets[hash].srdb_srdp = newsrdp;
13846 sfmmup->sfmmu_srdp = newsrdp;
13847
13848 mutex_exit(&srd_buckets[hash].srdb_lock);
13849
13850 }
13851
13852 static void
13853 sfmmu_leave_srd(sfmmu_t *sfmmup)
13854 {
13855 vnode_t *evp;
13856 sf_srd_t *srdp = sfmmup->sfmmu_srdp;
13857 uint_t hash;
13858 sf_srd_t **prev_srdpp;
13859 sf_region_t *rgnp;
13860 sf_region_t *nrgnp;
13861 #ifdef DEBUG
13862 int rgns = 0;
13863 #endif
13864 int i;
13865
13866 ASSERT(sfmmup != ksfmmup);
13867 ASSERT(srdp != NULL);
13868 ASSERT(srdp->srd_refcnt > 0);
13869 ASSERT(sfmmup->sfmmu_scdp == NULL);
13870 ASSERT(sfmmup->sfmmu_free == 1);
13871
13872 sfmmup->sfmmu_srdp = NULL;
13873 evp = srdp->srd_evp;
13874 ASSERT(evp != NULL);
13875 if (atomic_add_32_nv(
13876 (volatile uint_t *)&srdp->srd_refcnt, -1)) {
13877 VN_RELE(evp);
13878 return;
13879 }
13880
13881 hash = SRD_HASH_FUNCTION(evp);
13882 mutex_enter(&srd_buckets[hash].srdb_lock);
13883 for (prev_srdpp = &srd_buckets[hash].srdb_srdp;
13884 (srdp = *prev_srdpp) != NULL; prev_srdpp = &srdp->srd_hash) {
13885 if (srdp->srd_evp == evp) {
13886 break;
13887 }
13888 }
13889 if (srdp == NULL || srdp->srd_refcnt) {
13890 mutex_exit(&srd_buckets[hash].srdb_lock);
13891 VN_RELE(evp);
13892 return;
13893 }
13894 *prev_srdpp = srdp->srd_hash;
13895 mutex_exit(&srd_buckets[hash].srdb_lock);
13896
14073 mutex_enter(&srdp->srd_mutex);
14074
14075 for (rgnp = srdp->srd_rgnhash[rhash]; rgnp != NULL;
14076 rgnp = rgnp->rgn_hash) {
14077 if (rgnp->rgn_saddr == r_saddr && rgnp->rgn_size == r_size &&
14078 rgnp->rgn_obj == r_obj && rgnp->rgn_objoff == r_objoff &&
14079 rgnp->rgn_perm == r_perm && rgnp->rgn_pgszc == r_pgszc) {
14080 break;
14081 }
14082 }
14083
14084 rfound:
14085 if (rgnp != NULL) {
14086 ASSERT((rgnp->rgn_flags & SFMMU_REGION_TYPE_MASK) == r_type);
14087 ASSERT(rgnp->rgn_cb_function == r_cb_function);
14088 ASSERT(rgnp->rgn_refcnt >= 0);
14089 rid = rgnp->rgn_id;
14090 ASSERT(rid < maxids);
14091 ASSERT(rarrp[rid] == rgnp);
14092 ASSERT(rid < *nextidp);
14093 atomic_add_32((volatile uint_t *)&rgnp->rgn_refcnt, 1);
14094 mutex_exit(&srdp->srd_mutex);
14095 if (new_rgnp != NULL) {
14096 kmem_cache_free(region_cache, new_rgnp);
14097 }
14098 if (r_type == SFMMU_REGION_HME) {
14099 int myjoin =
14100 (sfmmup == astosfmmu(curthread->t_procp->p_as));
14101
14102 sfmmu_link_to_hmeregion(sfmmup, rgnp);
14103 /*
14104 * bitmap should be updated after linking sfmmu on
14105 * region list so that pageunload() doesn't skip
14106 * TSB/TLB flush. As soon as bitmap is updated another
14107 * thread in this process can already start accessing
14108 * this region.
14109 */
14110 /*
14111 * Normally ttecnt accounting is done as part of
14112 * pagefault handling. But a process may not take any
14113 * pagefaults on shared hmeblks created by some other
14423
14424 /* update shme rgns ttecnt in sfmmu_ttecnt */
14425 rttecnt = r_size >> TTE_PAGE_SHIFT(r_pgszc);
14426 ASSERT(sfmmup->sfmmu_ttecnt[r_pgszc] >= rttecnt);
14427 atomic_add_long(&sfmmup->sfmmu_ttecnt[r_pgszc], -rttecnt);
14428
14429 sfmmu_hat_exit(hatlockp);
14430 if (scdp != NULL && sfmmup->sfmmu_scdp == NULL) {
14431 /* sfmmup left the scd, grow private tsb */
14432 sfmmu_check_page_sizes(sfmmup, 1);
14433 } else {
14434 sfmmu_check_page_sizes(sfmmup, 0);
14435 }
14436 }
14437
14438 if (r_type == SFMMU_REGION_HME) {
14439 sfmmu_unlink_from_hmeregion(sfmmup, rgnp);
14440 }
14441
14442 r_obj = rgnp->rgn_obj;
14443 if (atomic_add_32_nv((volatile uint_t *)&rgnp->rgn_refcnt, -1)) {
14444 return;
14445 }
14446
14447 /*
14448 * looks like nobody uses this region anymore. Free it.
14449 */
14450 rhash = RGN_HASH_FUNCTION(r_obj);
14451 mutex_enter(&srdp->srd_mutex);
14452 for (prev_rgnpp = &srdp->srd_rgnhash[rhash];
14453 (cur_rgnp = *prev_rgnpp) != NULL;
14454 prev_rgnpp = &cur_rgnp->rgn_hash) {
14455 if (cur_rgnp == rgnp && cur_rgnp->rgn_refcnt == 0) {
14456 break;
14457 }
14458 }
14459
14460 if (cur_rgnp == NULL) {
14461 mutex_exit(&srdp->srd_mutex);
14462 return;
14463 }
14508 uint_t rid = (uint_t)((uint64_t)rcookie);
14509 sf_region_t *rgnp;
14510 sf_rgn_link_t *rlink;
14511 sf_rgn_link_t *hrlink;
14512 ulong_t rttecnt;
14513
14514 ASSERT(sfmmup != ksfmmup);
14515 ASSERT(srdp != NULL);
14516 ASSERT(srdp->srd_refcnt > 0);
14517
14518 ASSERT(rid < srdp->srd_next_hmerid);
14519 ASSERT(SFMMU_IS_SHMERID_VALID(rid));
14520 ASSERT(rid < SFMMU_MAX_HME_REGIONS);
14521
14522 rgnp = srdp->srd_hmergnp[rid];
14523 ASSERT(rgnp->rgn_refcnt > 0);
14524 ASSERT(rgnp->rgn_id == rid);
14525 ASSERT((rgnp->rgn_flags & SFMMU_REGION_TYPE_MASK) == SFMMU_REGION_HME);
14526 ASSERT(!(rgnp->rgn_flags & SFMMU_REGION_FREE));
14527
14528 atomic_add_32((volatile uint_t *)&rgnp->rgn_refcnt, 1);
14529
14530 /* LINTED: constant in conditional context */
14531 SFMMU_HMERID2RLINKP(sfmmup, rid, rlink, 1, 0);
14532 ASSERT(rlink != NULL);
14533 mutex_enter(&rgnp->rgn_mutex);
14534 ASSERT(rgnp->rgn_sfmmu_head != NULL);
14535 /* LINTED: constant in conditional context */
14536 SFMMU_HMERID2RLINKP(rgnp->rgn_sfmmu_head, rid, hrlink, 0, 0);
14537 ASSERT(hrlink != NULL);
14538 ASSERT(hrlink->prev == NULL);
14539 rlink->next = rgnp->rgn_sfmmu_head;
14540 rlink->prev = NULL;
14541 hrlink->prev = sfmmup;
14542 /*
14543 * make sure rlink's next field is correct
14544 * before making this link visible.
14545 */
14546 membar_stst();
14547 rgnp->rgn_sfmmu_head = sfmmup;
14548 mutex_exit(&rgnp->rgn_mutex);
15238 static void
15239 sfmmu_find_scd(sfmmu_t *sfmmup)
15240 {
15241 sf_srd_t *srdp = sfmmup->sfmmu_srdp;
15242 sf_scd_t *scdp, *new_scdp;
15243 int ret;
15244
15245 ASSERT(srdp != NULL);
15246 ASSERT(AS_WRITE_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
15247
15248 mutex_enter(&srdp->srd_scd_mutex);
15249 for (scdp = srdp->srd_scdp; scdp != NULL;
15250 scdp = scdp->scd_next) {
15251 SF_RGNMAP_EQUAL(&scdp->scd_region_map,
15252 &sfmmup->sfmmu_region_map, ret);
15253 if (ret == 1) {
15254 SF_SCD_INCR_REF(scdp);
15255 mutex_exit(&srdp->srd_scd_mutex);
15256 sfmmu_join_scd(scdp, sfmmup);
15257 ASSERT(scdp->scd_refcnt >= 2);
15258 atomic_add_32((volatile uint32_t *)
15259 &scdp->scd_refcnt, -1);
15260 return;
15261 } else {
15262 /*
15263 * If the sfmmu region map is a subset of the scd
15264 * region map, then the assumption is that this process
15265 * will continue attaching to ISM segments until the
15266 * region maps are equal.
15267 */
15268 SF_RGNMAP_IS_SUBSET(&scdp->scd_region_map,
15269 &sfmmup->sfmmu_region_map, ret);
15270 if (ret == 1) {
15271 mutex_exit(&srdp->srd_scd_mutex);
15272 return;
15273 }
15274 }
15275 }
15276
15277 ASSERT(scdp == NULL);
15278 /*
15279 * No matching SCD has been found, create a new one.
15284 return;
15285 }
15286
15287 /*
15288 * sfmmu_alloc_scd() returns with a ref count of 1 on the scd.
15289 */
15290
15291 /* Set scd_rttecnt for shme rgns in SCD */
15292 sfmmu_set_scd_rttecnt(srdp, new_scdp);
15293
15294 /*
15295 * Link scd onto srd_scdp list and scd sfmmu onto region/iment lists.
15296 */
15297 sfmmu_link_scd_to_regions(srdp, new_scdp);
15298 sfmmu_add_scd(&srdp->srd_scdp, new_scdp);
15299 SFMMU_STAT_ADD(sf_create_scd, 1);
15300
15301 mutex_exit(&srdp->srd_scd_mutex);
15302 sfmmu_join_scd(new_scdp, sfmmup);
15303 ASSERT(new_scdp->scd_refcnt >= 2);
15304 atomic_add_32((volatile uint32_t *)&new_scdp->scd_refcnt, -1);
15305 }
15306
15307 /*
15308 * This routine is called by a process to remove itself from an SCD. It is
15309 * either called when the processes has detached from a segment or from
15310 * hat_free_start() as a result of calling exit.
15311 */
15312 static void
15313 sfmmu_leave_scd(sfmmu_t *sfmmup, uchar_t r_type)
15314 {
15315 sf_scd_t *scdp = sfmmup->sfmmu_scdp;
15316 sf_srd_t *srdp = sfmmup->sfmmu_srdp;
15317 hatlock_t *hatlockp = TSB_HASH(sfmmup);
15318 int i;
15319
15320 ASSERT(scdp != NULL);
15321 ASSERT(srdp != NULL);
15322
15323 if (sfmmup->sfmmu_free) {
15324 /*
|
2134 sf_scd_t *scdp;
2135 int i;
2136 extern uint_t get_color_start(struct as *);
2137
2138 ASSERT(hat->sfmmu_xhat_provider == NULL);
2139 ASSERT((flag == 0) || (flag == HAT_DUP_ALL) || (flag == HAT_DUP_COW) ||
2140 (flag == HAT_DUP_SRD));
2141 ASSERT(hat != ksfmmup);
2142 ASSERT(newhat != ksfmmup);
2143 ASSERT(flag != HAT_DUP_ALL || hat->sfmmu_srdp == newhat->sfmmu_srdp);
2144
2145 if (flag == HAT_DUP_COW) {
2146 panic("hat_dup: HAT_DUP_COW not supported");
2147 }
2148
2149 if (flag == HAT_DUP_SRD && ((srdp = hat->sfmmu_srdp) != NULL)) {
2150 ASSERT(srdp->srd_evp != NULL);
2151 VN_HOLD(srdp->srd_evp);
2152 ASSERT(srdp->srd_refcnt > 0);
2153 newhat->sfmmu_srdp = srdp;
2154 atomic_inc_32((volatile uint_t *)&srdp->srd_refcnt);
2155 }
2156
2157 /*
2158 * HAT_DUP_ALL flag is used after as duplication is done.
2159 */
2160 if (flag == HAT_DUP_ALL && ((srdp = newhat->sfmmu_srdp) != NULL)) {
2161 ASSERT(newhat->sfmmu_srdp->srd_refcnt >= 2);
2162 newhat->sfmmu_rtteflags = hat->sfmmu_rtteflags;
2163 if (hat->sfmmu_flags & HAT_4MTEXT_FLAG) {
2164 newhat->sfmmu_flags |= HAT_4MTEXT_FLAG;
2165 }
2166
2167 /* check if need to join scd */
2168 if ((scdp = hat->sfmmu_scdp) != NULL &&
2169 newhat->sfmmu_scdp != scdp) {
2170 int ret;
2171 SF_RGNMAP_IS_SUBSET(&newhat->sfmmu_region_map,
2172 &scdp->scd_region_map, ret);
2173 ASSERT(ret);
2174 sfmmu_join_scd(scdp, newhat);
3210 * sfmmu_pagearray_setup failed so return
3211 */
3212 sfmmu_mlist_exit(pml);
3213 return (1);
3214 }
3215 }
3216
3217 /*
3218 * Make sure hment is not on a mapping list.
3219 */
3220 ASSERT(remap || (sfhme->hme_page == NULL));
3221
3222 /* if it is not a remap then hme->next better be NULL */
3223 ASSERT((!remap) ? sfhme->hme_next == NULL : 1);
3224
3225 if (flags & HAT_LOAD_LOCK) {
3226 if ((hmeblkp->hblk_lckcnt + 1) >= MAX_HBLK_LCKCNT) {
3227 panic("too high lckcnt-hmeblk %p",
3228 (void *)hmeblkp);
3229 }
3230 atomic_inc_32(&hmeblkp->hblk_lckcnt);
3231
3232 HBLK_STACK_TRACE(hmeblkp, HBLK_LOCK);
3233 }
3234
3235 #ifdef VAC
3236 if (pp && PP_ISNC(pp)) {
3237 /*
3238 * If the physical page is marked to be uncacheable, like
3239 * by a vac conflict, make sure the new mapping is also
3240 * uncacheable.
3241 */
3242 TTE_CLR_VCACHEABLE(ttep);
3243 ASSERT(PP_GET_VCOLOR(pp) == NO_VCOLOR);
3244 }
3245 #endif
3246 ttep->tte_hmenum = hmenum;
3247
3248 #ifdef DEBUG
3249 orig_old = tteold;
3250 #endif /* DEBUG */
3251
3252 while (sfmmu_modifytte_try(&tteold, ttep, &sfhme->hme_tte) < 0) {
3253 if ((sfmmup == KHATID) &&
3254 (flags & (HAT_LOAD_LOCK | HAT_LOAD_REMAP))) {
3255 sfmmu_copytte(&sfhme->hme_tte, &tteold);
3256 }
3257 #ifdef DEBUG
3258 chk_tte(&orig_old, &tteold, ttep, hmeblkp);
3259 #endif /* DEBUG */
3260 }
3261 ASSERT(TTE_IS_VALID(&sfhme->hme_tte));
3262
3263 if (!TTE_IS_VALID(&tteold)) {
3264
3265 atomic_inc_16(&hmeblkp->hblk_vcnt);
3266 if (rid == SFMMU_INVALID_SHMERID) {
3267 atomic_inc_ulong(&sfmmup->sfmmu_ttecnt[size]);
3268 } else {
3269 sf_srd_t *srdp = sfmmup->sfmmu_srdp;
3270 sf_region_t *rgnp = srdp->srd_hmergnp[rid];
3271 /*
3272 * We already accounted for region ttecnt's in sfmmu
3273 * during hat_join_region() processing. Here we
3274 * only update ttecnt's in region struture.
3275 */
3276 atomic_inc_ulong(&rgnp->rgn_ttecnt[size]);
3277 }
3278 }
3279
3280 myflt = (astosfmmu(curthread->t_procp->p_as) == sfmmup);
3281 if (size > TTE8K && (flags & HAT_LOAD_SHARE) == 0 &&
3282 sfmmup != ksfmmup) {
3283 uchar_t tteflag = 1 << size;
3284 if (rid == SFMMU_INVALID_SHMERID) {
3285 if (!(sfmmup->sfmmu_tteflags & tteflag)) {
3286 hatlockp = sfmmu_hat_enter(sfmmup);
3287 sfmmup->sfmmu_tteflags |= tteflag;
3288 sfmmu_hat_exit(hatlockp);
3289 }
3290 } else if (!(sfmmup->sfmmu_rtteflags & tteflag)) {
3291 hatlockp = sfmmu_hat_enter(sfmmup);
3292 sfmmup->sfmmu_rtteflags |= tteflag;
3293 sfmmu_hat_exit(hatlockp);
3294 }
3295 /*
3296 * Update the current CPU tsbmiss area, so the current thread
3364 */
3365 if (size == TTE8K || size == TTE4M) {
3366 sf_scd_t *scdp;
3367 hatlockp = sfmmu_hat_enter(sfmmup);
3368 /*
3369 * Don't preload private TSB if the mapping is used
3370 * by the shctx in the SCD.
3371 */
3372 scdp = sfmmup->sfmmu_scdp;
3373 if (rid == SFMMU_INVALID_SHMERID || scdp == NULL ||
3374 !SF_RGNMAP_TEST(scdp->scd_hmeregion_map, rid)) {
3375 sfmmu_load_tsb(sfmmup, vaddr, &sfhme->hme_tte,
3376 size);
3377 }
3378 sfmmu_hat_exit(hatlockp);
3379 }
3380 }
3381 if (pp) {
3382 if (!remap) {
3383 HME_ADD(sfhme, pp);
3384 atomic_inc_16(&hmeblkp->hblk_hmecnt);
3385 ASSERT(hmeblkp->hblk_hmecnt > 0);
3386
3387 /*
3388 * Cannot ASSERT(hmeblkp->hblk_hmecnt <= NHMENTS)
3389 * see pageunload() for comment.
3390 */
3391 }
3392 sfmmu_mlist_exit(pml);
3393 }
3394
3395 return (0);
3396 }
3397 /*
3398 * Function unlocks hash bucket.
3399 */
3400 static void
3401 sfmmu_tteload_release_hashbucket(struct hmehash_bucket *hmebp)
3402 {
3403 ASSERT(SFMMU_HASH_LOCK_ISHELD(hmebp));
3404 SFMMU_HASH_UNLOCK(hmebp);
4137 readtte:
4138 sfmmu_copytte(&sfhme->hme_tte, &tteold);
4139 if (TTE_IS_VALID(&tteold)) {
4140
4141 ttemod = tteold;
4142
4143 ret = sfmmu_modifytte_try(&tteold, &ttemod,
4144 &sfhme->hme_tte);
4145
4146 if (ret < 0)
4147 goto readtte;
4148
4149 if (hmeblkp->hblk_lckcnt == 0)
4150 panic("zero hblk lckcnt");
4151
4152 if (((uintptr_t)addr + TTEBYTES(ttesz)) >
4153 (uintptr_t)endaddr)
4154 panic("can't unlock large tte");
4155
4156 ASSERT(hmeblkp->hblk_lckcnt > 0);
4157 atomic_dec_32(&hmeblkp->hblk_lckcnt);
4158 HBLK_STACK_TRACE(hmeblkp, HBLK_UNLOCK);
4159 } else {
4160 panic("sfmmu_hblk_unlock: invalid tte");
4161 }
4162 addr += TTEBYTES(ttesz);
4163 sfhme++;
4164 }
4165 return (addr);
4166 }
4167
4168 /*
4169 * Physical Address Mapping Framework
4170 *
4171 * General rules:
4172 *
4173 * (1) Applies only to seg_kmem memory pages. To make things easier,
4174 * seg_kpm addresses are also accepted by the routines, but nothing
4175 * is done with them since by definition their PA mappings are static.
4176 * (2) hat_add_callback() may only be called while holding the page lock
4177 * SE_SHARED or SE_EXCL of the underlying page (e.g., as_pagelock()),
6116 if (pp != NULL) {
6117 panic("sfmmu_hblk_unload: pp = 0x%p "
6118 "tte became invalid under mlist"
6119 " lock = 0x%p", (void *)pp,
6120 (void *)pml);
6121 }
6122 continue;
6123 }
6124
6125 if (!(flags & HAT_UNLOAD_NOSYNC)) {
6126 sfmmu_ttesync(sfmmup, addr, &tte, pp);
6127 }
6128
6129 /*
6130 * Ok- we invalidated the tte. Do the rest of the job.
6131 */
6132 ttecnt++;
6133
6134 if (flags & HAT_UNLOAD_UNLOCK) {
6135 ASSERT(hmeblkp->hblk_lckcnt > 0);
6136 atomic_dec_32(&hmeblkp->hblk_lckcnt);
6137 HBLK_STACK_TRACE(hmeblkp, HBLK_UNLOCK);
6138 }
6139
6140 /*
6141 * Normally we would need to flush the page
6142 * from the virtual cache at this point in
6143 * order to prevent a potential cache alias
6144 * inconsistency.
6145 * The particular scenario we need to worry
6146 * about is:
6147 * Given: va1 and va2 are two virtual address
6148 * that alias and map the same physical
6149 * address.
6150 * 1. mapping exists from va1 to pa and data
6151 * has been read into the cache.
6152 * 2. unload va1.
6153 * 3. load va2 and modify data using va2.
6154 * 4 unload va2.
6155 * 5. load va1 and reference data. Unless we
6156 * flush the data cache when we unload we will
6170 DEMAP_RANGE_MARKPG(dmrp, addr);
6171 } else {
6172 ASSERT(sfmmup != NULL);
6173 ASSERT(!hmeblkp->hblk_shared);
6174 sfmmu_tlb_demap(addr, sfmmup, hmeblkp,
6175 sfmmup->sfmmu_free, 0);
6176 }
6177
6178 if (pp) {
6179 /*
6180 * Remove the hment from the mapping list
6181 */
6182 ASSERT(hmeblkp->hblk_hmecnt > 0);
6183
6184 /*
6185 * Again, we cannot
6186 * ASSERT(hmeblkp->hblk_hmecnt <= NHMENTS);
6187 */
6188 HME_SUB(sfhmep, pp);
6189 membar_stst();
6190 atomic_dec_16(&hmeblkp->hblk_hmecnt);
6191 }
6192
6193 ASSERT(hmeblkp->hblk_vcnt > 0);
6194 atomic_dec_16(&hmeblkp->hblk_vcnt);
6195
6196 ASSERT(hmeblkp->hblk_hmecnt || hmeblkp->hblk_vcnt ||
6197 !hmeblkp->hblk_lckcnt);
6198
6199 #ifdef VAC
6200 if (pp && (pp->p_nrm & (P_KPMC | P_KPMS | P_TNC))) {
6201 if (PP_ISTNC(pp)) {
6202 /*
6203 * If page was temporary
6204 * uncached, try to recache
6205 * it. Note that HME_SUB() was
6206 * called above so p_index and
6207 * mlist had been updated.
6208 */
6209 conv_tnc(pp, ttesz);
6210 } else if (pp->p_mapping == NULL) {
6211 ASSERT(kpm_enable);
6212 /*
6213 * Page is marked to be in VAC conflict
6214 * to an existing kpm mapping and/or is
7332 }
7333
7334 if (ret == 0) {
7335 panic("pageunload: cas failed?");
7336 }
7337
7338 addr = tte_to_vaddr(hmeblkp, tte);
7339
7340 if (hmeblkp->hblk_shared) {
7341 sf_srd_t *srdp = (sf_srd_t *)sfmmup;
7342 uint_t rid = hmeblkp->hblk_tag.htag_rid;
7343 sf_region_t *rgnp;
7344 ASSERT(SFMMU_IS_SHMERID_VALID(rid));
7345 ASSERT(rid < SFMMU_MAX_HME_REGIONS);
7346 ASSERT(srdp != NULL);
7347 rgnp = srdp->srd_hmergnp[rid];
7348 SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, srdp, rgnp, rid);
7349 cpuset = sfmmu_rgntlb_demap(addr, rgnp, hmeblkp, 1);
7350 sfmmu_ttesync(NULL, addr, &tte, pp);
7351 ASSERT(rgnp->rgn_ttecnt[ttesz] > 0);
7352 atomic_dec_ulong(&rgnp->rgn_ttecnt[ttesz]);
7353 } else {
7354 sfmmu_ttesync(sfmmup, addr, &tte, pp);
7355 atomic_dec_ulong(&sfmmup->sfmmu_ttecnt[ttesz]);
7356
7357 /*
7358 * We need to flush the page from the virtual cache
7359 * in order to prevent a virtual cache alias
7360 * inconsistency. The particular scenario we need
7361 * to worry about is:
7362 * Given: va1 and va2 are two virtual address that
7363 * alias and will map the same physical address.
7364 * 1. mapping exists from va1 to pa and data has
7365 * been read into the cache.
7366 * 2. unload va1.
7367 * 3. load va2 and modify data using va2.
7368 * 4 unload va2.
7369 * 5. load va1 and reference data. Unless we flush
7370 * the data cache when we unload we will get
7371 * stale data.
7372 * This scenario is taken care of by using virtual
7373 * page coloring.
7374 */
7375 if (sfmmup->sfmmu_ismhat) {
7396 /*
7397 * Hme_sub has to run after ttesync() and a_rss update.
7398 * See hblk_unload().
7399 */
7400 HME_SUB(sfhme, pp);
7401 membar_stst();
7402
7403 /*
7404 * We can not make ASSERT(hmeblkp->hblk_hmecnt <= NHMENTS)
7405 * since pteload may have done a HME_ADD() right after
7406 * we did the HME_SUB() above. Hmecnt is now maintained
7407 * by cas only. no lock guranteed its value. The only
7408 * gurantee we have is the hmecnt should not be less than
7409 * what it should be so the hblk will not be taken away.
7410 * It's also important that we decremented the hmecnt after
7411 * we are done with hmeblkp so that this hmeblk won't be
7412 * stolen.
7413 */
7414 ASSERT(hmeblkp->hblk_hmecnt > 0);
7415 ASSERT(hmeblkp->hblk_vcnt > 0);
7416 atomic_dec_16(&hmeblkp->hblk_vcnt);
7417 atomic_dec_16(&hmeblkp->hblk_hmecnt);
7418 /*
7419 * This is bug 4063182.
7420 * XXX: fixme
7421 * ASSERT(hmeblkp->hblk_hmecnt || hmeblkp->hblk_vcnt ||
7422 * !hmeblkp->hblk_lckcnt);
7423 */
7424 } else {
7425 panic("invalid tte? pp %p &tte %p",
7426 (void *)pp, (void *)&tte);
7427 }
7428
7429 return (cpuset);
7430 }
7431
7432 /*
7433 * While relocating a kernel page, this function will move the mappings
7434 * from tpp to dpp and modify any associated data with these mappings.
7435 * It also unsuspends the suspended kernel mapping.
7436 */
7437 static void
13796 uint_t hash = SRD_HASH_FUNCTION(evp);
13797 sf_srd_t *srdp;
13798 sf_srd_t *newsrdp;
13799
13800 ASSERT(sfmmup != ksfmmup);
13801 ASSERT(sfmmup->sfmmu_srdp == NULL);
13802
13803 if (!shctx_on) {
13804 return;
13805 }
13806
13807 VN_HOLD(evp);
13808
13809 if (srd_buckets[hash].srdb_srdp != NULL) {
13810 mutex_enter(&srd_buckets[hash].srdb_lock);
13811 for (srdp = srd_buckets[hash].srdb_srdp; srdp != NULL;
13812 srdp = srdp->srd_hash) {
13813 if (srdp->srd_evp == evp) {
13814 ASSERT(srdp->srd_refcnt >= 0);
13815 sfmmup->sfmmu_srdp = srdp;
13816 atomic_inc_32(
13817 (volatile uint_t *)&srdp->srd_refcnt);
13818 mutex_exit(&srd_buckets[hash].srdb_lock);
13819 return;
13820 }
13821 }
13822 mutex_exit(&srd_buckets[hash].srdb_lock);
13823 }
13824 newsrdp = kmem_cache_alloc(srd_cache, KM_SLEEP);
13825 ASSERT(newsrdp->srd_next_ismrid == 0 && newsrdp->srd_next_hmerid == 0);
13826
13827 newsrdp->srd_evp = evp;
13828 newsrdp->srd_refcnt = 1;
13829 newsrdp->srd_hmergnfree = NULL;
13830 newsrdp->srd_ismrgnfree = NULL;
13831
13832 mutex_enter(&srd_buckets[hash].srdb_lock);
13833 for (srdp = srd_buckets[hash].srdb_srdp; srdp != NULL;
13834 srdp = srdp->srd_hash) {
13835 if (srdp->srd_evp == evp) {
13836 ASSERT(srdp->srd_refcnt >= 0);
13837 sfmmup->sfmmu_srdp = srdp;
13838 atomic_inc_32((volatile uint_t *)&srdp->srd_refcnt);
13839 mutex_exit(&srd_buckets[hash].srdb_lock);
13840 kmem_cache_free(srd_cache, newsrdp);
13841 return;
13842 }
13843 }
13844 newsrdp->srd_hash = srd_buckets[hash].srdb_srdp;
13845 srd_buckets[hash].srdb_srdp = newsrdp;
13846 sfmmup->sfmmu_srdp = newsrdp;
13847
13848 mutex_exit(&srd_buckets[hash].srdb_lock);
13849
13850 }
13851
13852 static void
13853 sfmmu_leave_srd(sfmmu_t *sfmmup)
13854 {
13855 vnode_t *evp;
13856 sf_srd_t *srdp = sfmmup->sfmmu_srdp;
13857 uint_t hash;
13858 sf_srd_t **prev_srdpp;
13859 sf_region_t *rgnp;
13860 sf_region_t *nrgnp;
13861 #ifdef DEBUG
13862 int rgns = 0;
13863 #endif
13864 int i;
13865
13866 ASSERT(sfmmup != ksfmmup);
13867 ASSERT(srdp != NULL);
13868 ASSERT(srdp->srd_refcnt > 0);
13869 ASSERT(sfmmup->sfmmu_scdp == NULL);
13870 ASSERT(sfmmup->sfmmu_free == 1);
13871
13872 sfmmup->sfmmu_srdp = NULL;
13873 evp = srdp->srd_evp;
13874 ASSERT(evp != NULL);
13875 if (atomic_dec_32_nv((volatile uint_t *)&srdp->srd_refcnt)) {
13876 VN_RELE(evp);
13877 return;
13878 }
13879
13880 hash = SRD_HASH_FUNCTION(evp);
13881 mutex_enter(&srd_buckets[hash].srdb_lock);
13882 for (prev_srdpp = &srd_buckets[hash].srdb_srdp;
13883 (srdp = *prev_srdpp) != NULL; prev_srdpp = &srdp->srd_hash) {
13884 if (srdp->srd_evp == evp) {
13885 break;
13886 }
13887 }
13888 if (srdp == NULL || srdp->srd_refcnt) {
13889 mutex_exit(&srd_buckets[hash].srdb_lock);
13890 VN_RELE(evp);
13891 return;
13892 }
13893 *prev_srdpp = srdp->srd_hash;
13894 mutex_exit(&srd_buckets[hash].srdb_lock);
13895
14072 mutex_enter(&srdp->srd_mutex);
14073
14074 for (rgnp = srdp->srd_rgnhash[rhash]; rgnp != NULL;
14075 rgnp = rgnp->rgn_hash) {
14076 if (rgnp->rgn_saddr == r_saddr && rgnp->rgn_size == r_size &&
14077 rgnp->rgn_obj == r_obj && rgnp->rgn_objoff == r_objoff &&
14078 rgnp->rgn_perm == r_perm && rgnp->rgn_pgszc == r_pgszc) {
14079 break;
14080 }
14081 }
14082
14083 rfound:
14084 if (rgnp != NULL) {
14085 ASSERT((rgnp->rgn_flags & SFMMU_REGION_TYPE_MASK) == r_type);
14086 ASSERT(rgnp->rgn_cb_function == r_cb_function);
14087 ASSERT(rgnp->rgn_refcnt >= 0);
14088 rid = rgnp->rgn_id;
14089 ASSERT(rid < maxids);
14090 ASSERT(rarrp[rid] == rgnp);
14091 ASSERT(rid < *nextidp);
14092 atomic_inc_32((volatile uint_t *)&rgnp->rgn_refcnt);
14093 mutex_exit(&srdp->srd_mutex);
14094 if (new_rgnp != NULL) {
14095 kmem_cache_free(region_cache, new_rgnp);
14096 }
14097 if (r_type == SFMMU_REGION_HME) {
14098 int myjoin =
14099 (sfmmup == astosfmmu(curthread->t_procp->p_as));
14100
14101 sfmmu_link_to_hmeregion(sfmmup, rgnp);
14102 /*
14103 * bitmap should be updated after linking sfmmu on
14104 * region list so that pageunload() doesn't skip
14105 * TSB/TLB flush. As soon as bitmap is updated another
14106 * thread in this process can already start accessing
14107 * this region.
14108 */
14109 /*
14110 * Normally ttecnt accounting is done as part of
14111 * pagefault handling. But a process may not take any
14112 * pagefaults on shared hmeblks created by some other
14422
14423 /* update shme rgns ttecnt in sfmmu_ttecnt */
14424 rttecnt = r_size >> TTE_PAGE_SHIFT(r_pgszc);
14425 ASSERT(sfmmup->sfmmu_ttecnt[r_pgszc] >= rttecnt);
14426 atomic_add_long(&sfmmup->sfmmu_ttecnt[r_pgszc], -rttecnt);
14427
14428 sfmmu_hat_exit(hatlockp);
14429 if (scdp != NULL && sfmmup->sfmmu_scdp == NULL) {
14430 /* sfmmup left the scd, grow private tsb */
14431 sfmmu_check_page_sizes(sfmmup, 1);
14432 } else {
14433 sfmmu_check_page_sizes(sfmmup, 0);
14434 }
14435 }
14436
14437 if (r_type == SFMMU_REGION_HME) {
14438 sfmmu_unlink_from_hmeregion(sfmmup, rgnp);
14439 }
14440
14441 r_obj = rgnp->rgn_obj;
14442 if (atomic_dec_32_nv((volatile uint_t *)&rgnp->rgn_refcnt)) {
14443 return;
14444 }
14445
14446 /*
14447 * looks like nobody uses this region anymore. Free it.
14448 */
14449 rhash = RGN_HASH_FUNCTION(r_obj);
14450 mutex_enter(&srdp->srd_mutex);
14451 for (prev_rgnpp = &srdp->srd_rgnhash[rhash];
14452 (cur_rgnp = *prev_rgnpp) != NULL;
14453 prev_rgnpp = &cur_rgnp->rgn_hash) {
14454 if (cur_rgnp == rgnp && cur_rgnp->rgn_refcnt == 0) {
14455 break;
14456 }
14457 }
14458
14459 if (cur_rgnp == NULL) {
14460 mutex_exit(&srdp->srd_mutex);
14461 return;
14462 }
14507 uint_t rid = (uint_t)((uint64_t)rcookie);
14508 sf_region_t *rgnp;
14509 sf_rgn_link_t *rlink;
14510 sf_rgn_link_t *hrlink;
14511 ulong_t rttecnt;
14512
14513 ASSERT(sfmmup != ksfmmup);
14514 ASSERT(srdp != NULL);
14515 ASSERT(srdp->srd_refcnt > 0);
14516
14517 ASSERT(rid < srdp->srd_next_hmerid);
14518 ASSERT(SFMMU_IS_SHMERID_VALID(rid));
14519 ASSERT(rid < SFMMU_MAX_HME_REGIONS);
14520
14521 rgnp = srdp->srd_hmergnp[rid];
14522 ASSERT(rgnp->rgn_refcnt > 0);
14523 ASSERT(rgnp->rgn_id == rid);
14524 ASSERT((rgnp->rgn_flags & SFMMU_REGION_TYPE_MASK) == SFMMU_REGION_HME);
14525 ASSERT(!(rgnp->rgn_flags & SFMMU_REGION_FREE));
14526
14527 atomic_inc_32((volatile uint_t *)&rgnp->rgn_refcnt);
14528
14529 /* LINTED: constant in conditional context */
14530 SFMMU_HMERID2RLINKP(sfmmup, rid, rlink, 1, 0);
14531 ASSERT(rlink != NULL);
14532 mutex_enter(&rgnp->rgn_mutex);
14533 ASSERT(rgnp->rgn_sfmmu_head != NULL);
14534 /* LINTED: constant in conditional context */
14535 SFMMU_HMERID2RLINKP(rgnp->rgn_sfmmu_head, rid, hrlink, 0, 0);
14536 ASSERT(hrlink != NULL);
14537 ASSERT(hrlink->prev == NULL);
14538 rlink->next = rgnp->rgn_sfmmu_head;
14539 rlink->prev = NULL;
14540 hrlink->prev = sfmmup;
14541 /*
14542 * make sure rlink's next field is correct
14543 * before making this link visible.
14544 */
14545 membar_stst();
14546 rgnp->rgn_sfmmu_head = sfmmup;
14547 mutex_exit(&rgnp->rgn_mutex);
15237 static void
15238 sfmmu_find_scd(sfmmu_t *sfmmup)
15239 {
15240 sf_srd_t *srdp = sfmmup->sfmmu_srdp;
15241 sf_scd_t *scdp, *new_scdp;
15242 int ret;
15243
15244 ASSERT(srdp != NULL);
15245 ASSERT(AS_WRITE_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
15246
15247 mutex_enter(&srdp->srd_scd_mutex);
15248 for (scdp = srdp->srd_scdp; scdp != NULL;
15249 scdp = scdp->scd_next) {
15250 SF_RGNMAP_EQUAL(&scdp->scd_region_map,
15251 &sfmmup->sfmmu_region_map, ret);
15252 if (ret == 1) {
15253 SF_SCD_INCR_REF(scdp);
15254 mutex_exit(&srdp->srd_scd_mutex);
15255 sfmmu_join_scd(scdp, sfmmup);
15256 ASSERT(scdp->scd_refcnt >= 2);
15257 atomic_dec_32((volatile uint32_t *)&scdp->scd_refcnt);
15258 return;
15259 } else {
15260 /*
15261 * If the sfmmu region map is a subset of the scd
15262 * region map, then the assumption is that this process
15263 * will continue attaching to ISM segments until the
15264 * region maps are equal.
15265 */
15266 SF_RGNMAP_IS_SUBSET(&scdp->scd_region_map,
15267 &sfmmup->sfmmu_region_map, ret);
15268 if (ret == 1) {
15269 mutex_exit(&srdp->srd_scd_mutex);
15270 return;
15271 }
15272 }
15273 }
15274
15275 ASSERT(scdp == NULL);
15276 /*
15277 * No matching SCD has been found, create a new one.
15282 return;
15283 }
15284
15285 /*
15286 * sfmmu_alloc_scd() returns with a ref count of 1 on the scd.
15287 */
15288
15289 /* Set scd_rttecnt for shme rgns in SCD */
15290 sfmmu_set_scd_rttecnt(srdp, new_scdp);
15291
15292 /*
15293 * Link scd onto srd_scdp list and scd sfmmu onto region/iment lists.
15294 */
15295 sfmmu_link_scd_to_regions(srdp, new_scdp);
15296 sfmmu_add_scd(&srdp->srd_scdp, new_scdp);
15297 SFMMU_STAT_ADD(sf_create_scd, 1);
15298
15299 mutex_exit(&srdp->srd_scd_mutex);
15300 sfmmu_join_scd(new_scdp, sfmmup);
15301 ASSERT(new_scdp->scd_refcnt >= 2);
15302 atomic_dec_32((volatile uint32_t *)&new_scdp->scd_refcnt);
15303 }
15304
15305 /*
15306 * This routine is called by a process to remove itself from an SCD. It is
15307 * either called when the processes has detached from a segment or from
15308 * hat_free_start() as a result of calling exit.
15309 */
15310 static void
15311 sfmmu_leave_scd(sfmmu_t *sfmmup, uchar_t r_type)
15312 {
15313 sf_scd_t *scdp = sfmmup->sfmmu_scdp;
15314 sf_srd_t *srdp = sfmmup->sfmmu_srdp;
15315 hatlock_t *hatlockp = TSB_HASH(sfmmup);
15316 int i;
15317
15318 ASSERT(scdp != NULL);
15319 ASSERT(srdp != NULL);
15320
15321 if (sfmmup->sfmmu_free) {
15322 /*
|