Print this page
5045 use atomic_{inc,dec}_* instead of atomic_add_*


2134         sf_scd_t *scdp;
2135         int i;
2136         extern uint_t get_color_start(struct as *);
2137 
2138         ASSERT(hat->sfmmu_xhat_provider == NULL);
2139         ASSERT((flag == 0) || (flag == HAT_DUP_ALL) || (flag == HAT_DUP_COW) ||
2140             (flag == HAT_DUP_SRD));
2141         ASSERT(hat != ksfmmup);
2142         ASSERT(newhat != ksfmmup);
2143         ASSERT(flag != HAT_DUP_ALL || hat->sfmmu_srdp == newhat->sfmmu_srdp);
2144 
2145         if (flag == HAT_DUP_COW) {
2146                 panic("hat_dup: HAT_DUP_COW not supported");
2147         }
2148 
2149         if (flag == HAT_DUP_SRD && ((srdp = hat->sfmmu_srdp) != NULL)) {
2150                 ASSERT(srdp->srd_evp != NULL);
2151                 VN_HOLD(srdp->srd_evp);
2152                 ASSERT(srdp->srd_refcnt > 0);
2153                 newhat->sfmmu_srdp = srdp;
2154                 atomic_add_32((volatile uint_t *)&srdp->srd_refcnt, 1);
2155         }
2156 
2157         /*
2158          * HAT_DUP_ALL flag is used after as duplication is done.
2159          */
2160         if (flag == HAT_DUP_ALL && ((srdp = newhat->sfmmu_srdp) != NULL)) {
2161                 ASSERT(newhat->sfmmu_srdp->srd_refcnt >= 2);
2162                 newhat->sfmmu_rtteflags = hat->sfmmu_rtteflags;
2163                 if (hat->sfmmu_flags & HAT_4MTEXT_FLAG) {
2164                         newhat->sfmmu_flags |= HAT_4MTEXT_FLAG;
2165                 }
2166 
2167                 /* check if need to join scd */
2168                 if ((scdp = hat->sfmmu_scdp) != NULL &&
2169                     newhat->sfmmu_scdp != scdp) {
2170                         int ret;
2171                         SF_RGNMAP_IS_SUBSET(&newhat->sfmmu_region_map,
2172                             &scdp->scd_region_map, ret);
2173                         ASSERT(ret);
2174                         sfmmu_join_scd(scdp, newhat);


3210                          * sfmmu_pagearray_setup failed so return
3211                          */
3212                         sfmmu_mlist_exit(pml);
3213                         return (1);
3214                 }
3215         }
3216 
3217         /*
3218          * Make sure hment is not on a mapping list.
3219          */
3220         ASSERT(remap || (sfhme->hme_page == NULL));
3221 
3222         /* if it is not a remap then hme->next better be NULL */
3223         ASSERT((!remap) ? sfhme->hme_next == NULL : 1);
3224 
3225         if (flags & HAT_LOAD_LOCK) {
3226                 if ((hmeblkp->hblk_lckcnt + 1) >= MAX_HBLK_LCKCNT) {
3227                         panic("too high lckcnt-hmeblk %p",
3228                             (void *)hmeblkp);
3229                 }
3230                 atomic_add_32(&hmeblkp->hblk_lckcnt, 1);
3231 
3232                 HBLK_STACK_TRACE(hmeblkp, HBLK_LOCK);
3233         }
3234 
3235 #ifdef VAC
3236         if (pp && PP_ISNC(pp)) {
3237                 /*
3238                  * If the physical page is marked to be uncacheable, like
3239                  * by a vac conflict, make sure the new mapping is also
3240                  * uncacheable.
3241                  */
3242                 TTE_CLR_VCACHEABLE(ttep);
3243                 ASSERT(PP_GET_VCOLOR(pp) == NO_VCOLOR);
3244         }
3245 #endif
3246         ttep->tte_hmenum = hmenum;
3247 
3248 #ifdef DEBUG
3249         orig_old = tteold;
3250 #endif /* DEBUG */
3251 
3252         while (sfmmu_modifytte_try(&tteold, ttep, &sfhme->hme_tte) < 0) {
3253                 if ((sfmmup == KHATID) &&
3254                     (flags & (HAT_LOAD_LOCK | HAT_LOAD_REMAP))) {
3255                         sfmmu_copytte(&sfhme->hme_tte, &tteold);
3256                 }
3257 #ifdef DEBUG
3258                 chk_tte(&orig_old, &tteold, ttep, hmeblkp);
3259 #endif /* DEBUG */
3260         }
3261         ASSERT(TTE_IS_VALID(&sfhme->hme_tte));
3262 
3263         if (!TTE_IS_VALID(&tteold)) {
3264 
3265                 atomic_add_16(&hmeblkp->hblk_vcnt, 1);
3266                 if (rid == SFMMU_INVALID_SHMERID) {
3267                         atomic_add_long(&sfmmup->sfmmu_ttecnt[size], 1);
3268                 } else {
3269                         sf_srd_t *srdp = sfmmup->sfmmu_srdp;
3270                         sf_region_t *rgnp = srdp->srd_hmergnp[rid];
3271                         /*
3272                          * We already accounted for region ttecnt's in sfmmu
3273                          * during hat_join_region() processing. Here we
3274                          * only update ttecnt's in region struture.
3275                          */
3276                         atomic_add_long(&rgnp->rgn_ttecnt[size], 1);
3277                 }
3278         }
3279 
3280         myflt = (astosfmmu(curthread->t_procp->p_as) == sfmmup);
3281         if (size > TTE8K && (flags & HAT_LOAD_SHARE) == 0 &&
3282             sfmmup != ksfmmup) {
3283                 uchar_t tteflag = 1 << size;
3284                 if (rid == SFMMU_INVALID_SHMERID) {
3285                         if (!(sfmmup->sfmmu_tteflags & tteflag)) {
3286                                 hatlockp = sfmmu_hat_enter(sfmmup);
3287                                 sfmmup->sfmmu_tteflags |= tteflag;
3288                                 sfmmu_hat_exit(hatlockp);
3289                         }
3290                 } else if (!(sfmmup->sfmmu_rtteflags & tteflag)) {
3291                         hatlockp = sfmmu_hat_enter(sfmmup);
3292                         sfmmup->sfmmu_rtteflags |= tteflag;
3293                         sfmmu_hat_exit(hatlockp);
3294                 }
3295                 /*
3296                  * Update the current CPU tsbmiss area, so the current thread


3364                  */
3365                 if (size == TTE8K || size == TTE4M) {
3366                         sf_scd_t *scdp;
3367                         hatlockp = sfmmu_hat_enter(sfmmup);
3368                         /*
3369                          * Don't preload private TSB if the mapping is used
3370                          * by the shctx in the SCD.
3371                          */
3372                         scdp = sfmmup->sfmmu_scdp;
3373                         if (rid == SFMMU_INVALID_SHMERID || scdp == NULL ||
3374                             !SF_RGNMAP_TEST(scdp->scd_hmeregion_map, rid)) {
3375                                 sfmmu_load_tsb(sfmmup, vaddr, &sfhme->hme_tte,
3376                                     size);
3377                         }
3378                         sfmmu_hat_exit(hatlockp);
3379                 }
3380         }
3381         if (pp) {
3382                 if (!remap) {
3383                         HME_ADD(sfhme, pp);
3384                         atomic_add_16(&hmeblkp->hblk_hmecnt, 1);
3385                         ASSERT(hmeblkp->hblk_hmecnt > 0);
3386 
3387                         /*
3388                          * Cannot ASSERT(hmeblkp->hblk_hmecnt <= NHMENTS)
3389                          * see pageunload() for comment.
3390                          */
3391                 }
3392                 sfmmu_mlist_exit(pml);
3393         }
3394 
3395         return (0);
3396 }
3397 /*
3398  * Function unlocks hash bucket.
3399  */
3400 static void
3401 sfmmu_tteload_release_hashbucket(struct hmehash_bucket *hmebp)
3402 {
3403         ASSERT(SFMMU_HASH_LOCK_ISHELD(hmebp));
3404         SFMMU_HASH_UNLOCK(hmebp);


4137 readtte:
4138                 sfmmu_copytte(&sfhme->hme_tte, &tteold);
4139                 if (TTE_IS_VALID(&tteold)) {
4140 
4141                         ttemod = tteold;
4142 
4143                         ret = sfmmu_modifytte_try(&tteold, &ttemod,
4144                             &sfhme->hme_tte);
4145 
4146                         if (ret < 0)
4147                                 goto readtte;
4148 
4149                         if (hmeblkp->hblk_lckcnt == 0)
4150                                 panic("zero hblk lckcnt");
4151 
4152                         if (((uintptr_t)addr + TTEBYTES(ttesz)) >
4153                             (uintptr_t)endaddr)
4154                                 panic("can't unlock large tte");
4155 
4156                         ASSERT(hmeblkp->hblk_lckcnt > 0);
4157                         atomic_add_32(&hmeblkp->hblk_lckcnt, -1);
4158                         HBLK_STACK_TRACE(hmeblkp, HBLK_UNLOCK);
4159                 } else {
4160                         panic("sfmmu_hblk_unlock: invalid tte");
4161                 }
4162                 addr += TTEBYTES(ttesz);
4163                 sfhme++;
4164         }
4165         return (addr);
4166 }
4167 
4168 /*
4169  * Physical Address Mapping Framework
4170  *
4171  * General rules:
4172  *
4173  * (1) Applies only to seg_kmem memory pages. To make things easier,
4174  *     seg_kpm addresses are also accepted by the routines, but nothing
4175  *     is done with them since by definition their PA mappings are static.
4176  * (2) hat_add_callback() may only be called while holding the page lock
4177  *     SE_SHARED or SE_EXCL of the underlying page (e.g., as_pagelock()),


6116                                 if (pp != NULL) {
6117                                         panic("sfmmu_hblk_unload: pp = 0x%p "
6118                                             "tte became invalid under mlist"
6119                                             " lock = 0x%p", (void *)pp,
6120                                             (void *)pml);
6121                                 }
6122                                 continue;
6123                         }
6124 
6125                         if (!(flags & HAT_UNLOAD_NOSYNC)) {
6126                                 sfmmu_ttesync(sfmmup, addr, &tte, pp);
6127                         }
6128 
6129                         /*
6130                          * Ok- we invalidated the tte. Do the rest of the job.
6131                          */
6132                         ttecnt++;
6133 
6134                         if (flags & HAT_UNLOAD_UNLOCK) {
6135                                 ASSERT(hmeblkp->hblk_lckcnt > 0);
6136                                 atomic_add_32(&hmeblkp->hblk_lckcnt, -1);
6137                                 HBLK_STACK_TRACE(hmeblkp, HBLK_UNLOCK);
6138                         }
6139 
6140                         /*
6141                          * Normally we would need to flush the page
6142                          * from the virtual cache at this point in
6143                          * order to prevent a potential cache alias
6144                          * inconsistency.
6145                          * The particular scenario we need to worry
6146                          * about is:
6147                          * Given:  va1 and va2 are two virtual address
6148                          * that alias and map the same physical
6149                          * address.
6150                          * 1.   mapping exists from va1 to pa and data
6151                          * has been read into the cache.
6152                          * 2.   unload va1.
6153                          * 3.   load va2 and modify data using va2.
6154                          * 4    unload va2.
6155                          * 5.   load va1 and reference data.  Unless we
6156                          * flush the data cache when we unload we will


6170                                 DEMAP_RANGE_MARKPG(dmrp, addr);
6171                         } else {
6172                                 ASSERT(sfmmup != NULL);
6173                                 ASSERT(!hmeblkp->hblk_shared);
6174                                 sfmmu_tlb_demap(addr, sfmmup, hmeblkp,
6175                                     sfmmup->sfmmu_free, 0);
6176                         }
6177 
6178                         if (pp) {
6179                                 /*
6180                                  * Remove the hment from the mapping list
6181                                  */
6182                                 ASSERT(hmeblkp->hblk_hmecnt > 0);
6183 
6184                                 /*
6185                                  * Again, we cannot
6186                                  * ASSERT(hmeblkp->hblk_hmecnt <= NHMENTS);
6187                                  */
6188                                 HME_SUB(sfhmep, pp);
6189                                 membar_stst();
6190                                 atomic_add_16(&hmeblkp->hblk_hmecnt, -1);
6191                         }
6192 
6193                         ASSERT(hmeblkp->hblk_vcnt > 0);
6194                         atomic_add_16(&hmeblkp->hblk_vcnt, -1);
6195 
6196                         ASSERT(hmeblkp->hblk_hmecnt || hmeblkp->hblk_vcnt ||
6197                             !hmeblkp->hblk_lckcnt);
6198 
6199 #ifdef VAC
6200                         if (pp && (pp->p_nrm & (P_KPMC | P_KPMS | P_TNC))) {
6201                                 if (PP_ISTNC(pp)) {
6202                                         /*
6203                                          * If page was temporary
6204                                          * uncached, try to recache
6205                                          * it. Note that HME_SUB() was
6206                                          * called above so p_index and
6207                                          * mlist had been updated.
6208                                          */
6209                                         conv_tnc(pp, ttesz);
6210                                 } else if (pp->p_mapping == NULL) {
6211                                         ASSERT(kpm_enable);
6212                                         /*
6213                                          * Page is marked to be in VAC conflict
6214                                          * to an existing kpm mapping and/or is


7332                 }
7333 
7334                 if (ret == 0) {
7335                         panic("pageunload: cas failed?");
7336                 }
7337 
7338                 addr = tte_to_vaddr(hmeblkp, tte);
7339 
7340                 if (hmeblkp->hblk_shared) {
7341                         sf_srd_t *srdp = (sf_srd_t *)sfmmup;
7342                         uint_t rid = hmeblkp->hblk_tag.htag_rid;
7343                         sf_region_t *rgnp;
7344                         ASSERT(SFMMU_IS_SHMERID_VALID(rid));
7345                         ASSERT(rid < SFMMU_MAX_HME_REGIONS);
7346                         ASSERT(srdp != NULL);
7347                         rgnp = srdp->srd_hmergnp[rid];
7348                         SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, srdp, rgnp, rid);
7349                         cpuset = sfmmu_rgntlb_demap(addr, rgnp, hmeblkp, 1);
7350                         sfmmu_ttesync(NULL, addr, &tte, pp);
7351                         ASSERT(rgnp->rgn_ttecnt[ttesz] > 0);
7352                         atomic_add_long(&rgnp->rgn_ttecnt[ttesz], -1);
7353                 } else {
7354                         sfmmu_ttesync(sfmmup, addr, &tte, pp);
7355                         atomic_add_long(&sfmmup->sfmmu_ttecnt[ttesz], -1);
7356 
7357                         /*
7358                          * We need to flush the page from the virtual cache
7359                          * in order to prevent a virtual cache alias
7360                          * inconsistency. The particular scenario we need
7361                          * to worry about is:
7362                          * Given:  va1 and va2 are two virtual address that
7363                          * alias and will map the same physical address.
7364                          * 1.   mapping exists from va1 to pa and data has
7365                          *      been read into the cache.
7366                          * 2.   unload va1.
7367                          * 3.   load va2 and modify data using va2.
7368                          * 4    unload va2.
7369                          * 5.   load va1 and reference data.  Unless we flush
7370                          *      the data cache when we unload we will get
7371                          *      stale data.
7372                          * This scenario is taken care of by using virtual
7373                          * page coloring.
7374                          */
7375                         if (sfmmup->sfmmu_ismhat) {


7396                 /*
7397                  * Hme_sub has to run after ttesync() and a_rss update.
7398                  * See hblk_unload().
7399                  */
7400                 HME_SUB(sfhme, pp);
7401                 membar_stst();
7402 
7403                 /*
7404                  * We can not make ASSERT(hmeblkp->hblk_hmecnt <= NHMENTS)
7405                  * since pteload may have done a HME_ADD() right after
7406                  * we did the HME_SUB() above. Hmecnt is now maintained
7407                  * by cas only. no lock guranteed its value. The only
7408                  * gurantee we have is the hmecnt should not be less than
7409                  * what it should be so the hblk will not be taken away.
7410                  * It's also important that we decremented the hmecnt after
7411                  * we are done with hmeblkp so that this hmeblk won't be
7412                  * stolen.
7413                  */
7414                 ASSERT(hmeblkp->hblk_hmecnt > 0);
7415                 ASSERT(hmeblkp->hblk_vcnt > 0);
7416                 atomic_add_16(&hmeblkp->hblk_vcnt, -1);
7417                 atomic_add_16(&hmeblkp->hblk_hmecnt, -1);
7418                 /*
7419                  * This is bug 4063182.
7420                  * XXX: fixme
7421                  * ASSERT(hmeblkp->hblk_hmecnt || hmeblkp->hblk_vcnt ||
7422                  *      !hmeblkp->hblk_lckcnt);
7423                  */
7424         } else {
7425                 panic("invalid tte? pp %p &tte %p",
7426                     (void *)pp, (void *)&tte);
7427         }
7428 
7429         return (cpuset);
7430 }
7431 
7432 /*
7433  * While relocating a kernel page, this function will move the mappings
7434  * from tpp to dpp and modify any associated data with these mappings.
7435  * It also unsuspends the suspended kernel mapping.
7436  */
7437 static void


13796         uint_t hash = SRD_HASH_FUNCTION(evp);
13797         sf_srd_t *srdp;
13798         sf_srd_t *newsrdp;
13799 
13800         ASSERT(sfmmup != ksfmmup);
13801         ASSERT(sfmmup->sfmmu_srdp == NULL);
13802 
13803         if (!shctx_on) {
13804                 return;
13805         }
13806 
13807         VN_HOLD(evp);
13808 
13809         if (srd_buckets[hash].srdb_srdp != NULL) {
13810                 mutex_enter(&srd_buckets[hash].srdb_lock);
13811                 for (srdp = srd_buckets[hash].srdb_srdp; srdp != NULL;
13812                     srdp = srdp->srd_hash) {
13813                         if (srdp->srd_evp == evp) {
13814                                 ASSERT(srdp->srd_refcnt >= 0);
13815                                 sfmmup->sfmmu_srdp = srdp;
13816                                 atomic_add_32(
13817                                     (volatile uint_t *)&srdp->srd_refcnt, 1);
13818                                 mutex_exit(&srd_buckets[hash].srdb_lock);
13819                                 return;
13820                         }
13821                 }
13822                 mutex_exit(&srd_buckets[hash].srdb_lock);
13823         }
13824         newsrdp = kmem_cache_alloc(srd_cache, KM_SLEEP);
13825         ASSERT(newsrdp->srd_next_ismrid == 0 && newsrdp->srd_next_hmerid == 0);
13826 
13827         newsrdp->srd_evp = evp;
13828         newsrdp->srd_refcnt = 1;
13829         newsrdp->srd_hmergnfree = NULL;
13830         newsrdp->srd_ismrgnfree = NULL;
13831 
13832         mutex_enter(&srd_buckets[hash].srdb_lock);
13833         for (srdp = srd_buckets[hash].srdb_srdp; srdp != NULL;
13834             srdp = srdp->srd_hash) {
13835                 if (srdp->srd_evp == evp) {
13836                         ASSERT(srdp->srd_refcnt >= 0);
13837                         sfmmup->sfmmu_srdp = srdp;
13838                         atomic_add_32((volatile uint_t *)&srdp->srd_refcnt, 1);
13839                         mutex_exit(&srd_buckets[hash].srdb_lock);
13840                         kmem_cache_free(srd_cache, newsrdp);
13841                         return;
13842                 }
13843         }
13844         newsrdp->srd_hash = srd_buckets[hash].srdb_srdp;
13845         srd_buckets[hash].srdb_srdp = newsrdp;
13846         sfmmup->sfmmu_srdp = newsrdp;
13847 
13848         mutex_exit(&srd_buckets[hash].srdb_lock);
13849 
13850 }
13851 
13852 static void
13853 sfmmu_leave_srd(sfmmu_t *sfmmup)
13854 {
13855         vnode_t *evp;
13856         sf_srd_t *srdp = sfmmup->sfmmu_srdp;
13857         uint_t hash;
13858         sf_srd_t **prev_srdpp;
13859         sf_region_t *rgnp;
13860         sf_region_t *nrgnp;
13861 #ifdef DEBUG
13862         int rgns = 0;
13863 #endif
13864         int i;
13865 
13866         ASSERT(sfmmup != ksfmmup);
13867         ASSERT(srdp != NULL);
13868         ASSERT(srdp->srd_refcnt > 0);
13869         ASSERT(sfmmup->sfmmu_scdp == NULL);
13870         ASSERT(sfmmup->sfmmu_free == 1);
13871 
13872         sfmmup->sfmmu_srdp = NULL;
13873         evp = srdp->srd_evp;
13874         ASSERT(evp != NULL);
13875         if (atomic_add_32_nv(
13876             (volatile uint_t *)&srdp->srd_refcnt, -1)) {
13877                 VN_RELE(evp);
13878                 return;
13879         }
13880 
13881         hash = SRD_HASH_FUNCTION(evp);
13882         mutex_enter(&srd_buckets[hash].srdb_lock);
13883         for (prev_srdpp = &srd_buckets[hash].srdb_srdp;
13884             (srdp = *prev_srdpp) != NULL; prev_srdpp = &srdp->srd_hash) {
13885                 if (srdp->srd_evp == evp) {
13886                         break;
13887                 }
13888         }
13889         if (srdp == NULL || srdp->srd_refcnt) {
13890                 mutex_exit(&srd_buckets[hash].srdb_lock);
13891                 VN_RELE(evp);
13892                 return;
13893         }
13894         *prev_srdpp = srdp->srd_hash;
13895         mutex_exit(&srd_buckets[hash].srdb_lock);
13896 


14073         mutex_enter(&srdp->srd_mutex);
14074 
14075         for (rgnp = srdp->srd_rgnhash[rhash]; rgnp != NULL;
14076             rgnp = rgnp->rgn_hash) {
14077                 if (rgnp->rgn_saddr == r_saddr && rgnp->rgn_size == r_size &&
14078                     rgnp->rgn_obj == r_obj && rgnp->rgn_objoff == r_objoff &&
14079                     rgnp->rgn_perm == r_perm && rgnp->rgn_pgszc == r_pgszc) {
14080                         break;
14081                 }
14082         }
14083 
14084 rfound:
14085         if (rgnp != NULL) {
14086                 ASSERT((rgnp->rgn_flags & SFMMU_REGION_TYPE_MASK) == r_type);
14087                 ASSERT(rgnp->rgn_cb_function == r_cb_function);
14088                 ASSERT(rgnp->rgn_refcnt >= 0);
14089                 rid = rgnp->rgn_id;
14090                 ASSERT(rid < maxids);
14091                 ASSERT(rarrp[rid] == rgnp);
14092                 ASSERT(rid < *nextidp);
14093                 atomic_add_32((volatile uint_t *)&rgnp->rgn_refcnt, 1);
14094                 mutex_exit(&srdp->srd_mutex);
14095                 if (new_rgnp != NULL) {
14096                         kmem_cache_free(region_cache, new_rgnp);
14097                 }
14098                 if (r_type == SFMMU_REGION_HME) {
14099                         int myjoin =
14100                             (sfmmup == astosfmmu(curthread->t_procp->p_as));
14101 
14102                         sfmmu_link_to_hmeregion(sfmmup, rgnp);
14103                         /*
14104                          * bitmap should be updated after linking sfmmu on
14105                          * region list so that pageunload() doesn't skip
14106                          * TSB/TLB flush. As soon as bitmap is updated another
14107                          * thread in this process can already start accessing
14108                          * this region.
14109                          */
14110                         /*
14111                          * Normally ttecnt accounting is done as part of
14112                          * pagefault handling. But a process may not take any
14113                          * pagefaults on shared hmeblks created by some other


14423 
14424                 /* update shme rgns ttecnt in sfmmu_ttecnt */
14425                 rttecnt = r_size >> TTE_PAGE_SHIFT(r_pgszc);
14426                 ASSERT(sfmmup->sfmmu_ttecnt[r_pgszc] >= rttecnt);
14427                 atomic_add_long(&sfmmup->sfmmu_ttecnt[r_pgszc], -rttecnt);
14428 
14429                 sfmmu_hat_exit(hatlockp);
14430                 if (scdp != NULL && sfmmup->sfmmu_scdp == NULL) {
14431                         /* sfmmup left the scd, grow private tsb */
14432                         sfmmu_check_page_sizes(sfmmup, 1);
14433                 } else {
14434                         sfmmu_check_page_sizes(sfmmup, 0);
14435                 }
14436         }
14437 
14438         if (r_type == SFMMU_REGION_HME) {
14439                 sfmmu_unlink_from_hmeregion(sfmmup, rgnp);
14440         }
14441 
14442         r_obj = rgnp->rgn_obj;
14443         if (atomic_add_32_nv((volatile uint_t *)&rgnp->rgn_refcnt, -1)) {
14444                 return;
14445         }
14446 
14447         /*
14448          * looks like nobody uses this region anymore. Free it.
14449          */
14450         rhash = RGN_HASH_FUNCTION(r_obj);
14451         mutex_enter(&srdp->srd_mutex);
14452         for (prev_rgnpp = &srdp->srd_rgnhash[rhash];
14453             (cur_rgnp = *prev_rgnpp) != NULL;
14454             prev_rgnpp = &cur_rgnp->rgn_hash) {
14455                 if (cur_rgnp == rgnp && cur_rgnp->rgn_refcnt == 0) {
14456                         break;
14457                 }
14458         }
14459 
14460         if (cur_rgnp == NULL) {
14461                 mutex_exit(&srdp->srd_mutex);
14462                 return;
14463         }


14508         uint_t rid = (uint_t)((uint64_t)rcookie);
14509         sf_region_t *rgnp;
14510         sf_rgn_link_t *rlink;
14511         sf_rgn_link_t *hrlink;
14512         ulong_t rttecnt;
14513 
14514         ASSERT(sfmmup != ksfmmup);
14515         ASSERT(srdp != NULL);
14516         ASSERT(srdp->srd_refcnt > 0);
14517 
14518         ASSERT(rid < srdp->srd_next_hmerid);
14519         ASSERT(SFMMU_IS_SHMERID_VALID(rid));
14520         ASSERT(rid < SFMMU_MAX_HME_REGIONS);
14521 
14522         rgnp = srdp->srd_hmergnp[rid];
14523         ASSERT(rgnp->rgn_refcnt > 0);
14524         ASSERT(rgnp->rgn_id == rid);
14525         ASSERT((rgnp->rgn_flags & SFMMU_REGION_TYPE_MASK) == SFMMU_REGION_HME);
14526         ASSERT(!(rgnp->rgn_flags & SFMMU_REGION_FREE));
14527 
14528         atomic_add_32((volatile uint_t *)&rgnp->rgn_refcnt, 1);
14529 
14530         /* LINTED: constant in conditional context */
14531         SFMMU_HMERID2RLINKP(sfmmup, rid, rlink, 1, 0);
14532         ASSERT(rlink != NULL);
14533         mutex_enter(&rgnp->rgn_mutex);
14534         ASSERT(rgnp->rgn_sfmmu_head != NULL);
14535         /* LINTED: constant in conditional context */
14536         SFMMU_HMERID2RLINKP(rgnp->rgn_sfmmu_head, rid, hrlink, 0, 0);
14537         ASSERT(hrlink != NULL);
14538         ASSERT(hrlink->prev == NULL);
14539         rlink->next = rgnp->rgn_sfmmu_head;
14540         rlink->prev = NULL;
14541         hrlink->prev = sfmmup;
14542         /*
14543          * make sure rlink's next field is correct
14544          * before making this link visible.
14545          */
14546         membar_stst();
14547         rgnp->rgn_sfmmu_head = sfmmup;
14548         mutex_exit(&rgnp->rgn_mutex);


15238 static void
15239 sfmmu_find_scd(sfmmu_t *sfmmup)
15240 {
15241         sf_srd_t *srdp = sfmmup->sfmmu_srdp;
15242         sf_scd_t *scdp, *new_scdp;
15243         int ret;
15244 
15245         ASSERT(srdp != NULL);
15246         ASSERT(AS_WRITE_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
15247 
15248         mutex_enter(&srdp->srd_scd_mutex);
15249         for (scdp = srdp->srd_scdp; scdp != NULL;
15250             scdp = scdp->scd_next) {
15251                 SF_RGNMAP_EQUAL(&scdp->scd_region_map,
15252                     &sfmmup->sfmmu_region_map, ret);
15253                 if (ret == 1) {
15254                         SF_SCD_INCR_REF(scdp);
15255                         mutex_exit(&srdp->srd_scd_mutex);
15256                         sfmmu_join_scd(scdp, sfmmup);
15257                         ASSERT(scdp->scd_refcnt >= 2);
15258                         atomic_add_32((volatile uint32_t *)
15259                             &scdp->scd_refcnt, -1);
15260                         return;
15261                 } else {
15262                         /*
15263                          * If the sfmmu region map is a subset of the scd
15264                          * region map, then the assumption is that this process
15265                          * will continue attaching to ISM segments until the
15266                          * region maps are equal.
15267                          */
15268                         SF_RGNMAP_IS_SUBSET(&scdp->scd_region_map,
15269                             &sfmmup->sfmmu_region_map, ret);
15270                         if (ret == 1) {
15271                                 mutex_exit(&srdp->srd_scd_mutex);
15272                                 return;
15273                         }
15274                 }
15275         }
15276 
15277         ASSERT(scdp == NULL);
15278         /*
15279          * No matching SCD has been found, create a new one.


15284                 return;
15285         }
15286 
15287         /*
15288          * sfmmu_alloc_scd() returns with a ref count of 1 on the scd.
15289          */
15290 
15291         /* Set scd_rttecnt for shme rgns in SCD */
15292         sfmmu_set_scd_rttecnt(srdp, new_scdp);
15293 
15294         /*
15295          * Link scd onto srd_scdp list and scd sfmmu onto region/iment lists.
15296          */
15297         sfmmu_link_scd_to_regions(srdp, new_scdp);
15298         sfmmu_add_scd(&srdp->srd_scdp, new_scdp);
15299         SFMMU_STAT_ADD(sf_create_scd, 1);
15300 
15301         mutex_exit(&srdp->srd_scd_mutex);
15302         sfmmu_join_scd(new_scdp, sfmmup);
15303         ASSERT(new_scdp->scd_refcnt >= 2);
15304         atomic_add_32((volatile uint32_t *)&new_scdp->scd_refcnt, -1);
15305 }
15306 
15307 /*
15308  * This routine is called by a process to remove itself from an SCD. It is
15309  * either called when the processes has detached from a segment or from
15310  * hat_free_start() as a result of calling exit.
15311  */
15312 static void
15313 sfmmu_leave_scd(sfmmu_t *sfmmup, uchar_t r_type)
15314 {
15315         sf_scd_t *scdp = sfmmup->sfmmu_scdp;
15316         sf_srd_t *srdp =  sfmmup->sfmmu_srdp;
15317         hatlock_t *hatlockp = TSB_HASH(sfmmup);
15318         int i;
15319 
15320         ASSERT(scdp != NULL);
15321         ASSERT(srdp != NULL);
15322 
15323         if (sfmmup->sfmmu_free) {
15324                 /*




2134         sf_scd_t *scdp;
2135         int i;
2136         extern uint_t get_color_start(struct as *);
2137 
2138         ASSERT(hat->sfmmu_xhat_provider == NULL);
2139         ASSERT((flag == 0) || (flag == HAT_DUP_ALL) || (flag == HAT_DUP_COW) ||
2140             (flag == HAT_DUP_SRD));
2141         ASSERT(hat != ksfmmup);
2142         ASSERT(newhat != ksfmmup);
2143         ASSERT(flag != HAT_DUP_ALL || hat->sfmmu_srdp == newhat->sfmmu_srdp);
2144 
2145         if (flag == HAT_DUP_COW) {
2146                 panic("hat_dup: HAT_DUP_COW not supported");
2147         }
2148 
2149         if (flag == HAT_DUP_SRD && ((srdp = hat->sfmmu_srdp) != NULL)) {
2150                 ASSERT(srdp->srd_evp != NULL);
2151                 VN_HOLD(srdp->srd_evp);
2152                 ASSERT(srdp->srd_refcnt > 0);
2153                 newhat->sfmmu_srdp = srdp;
2154                 atomic_inc_32((volatile uint_t *)&srdp->srd_refcnt);
2155         }
2156 
2157         /*
2158          * HAT_DUP_ALL flag is used after as duplication is done.
2159          */
2160         if (flag == HAT_DUP_ALL && ((srdp = newhat->sfmmu_srdp) != NULL)) {
2161                 ASSERT(newhat->sfmmu_srdp->srd_refcnt >= 2);
2162                 newhat->sfmmu_rtteflags = hat->sfmmu_rtteflags;
2163                 if (hat->sfmmu_flags & HAT_4MTEXT_FLAG) {
2164                         newhat->sfmmu_flags |= HAT_4MTEXT_FLAG;
2165                 }
2166 
2167                 /* check if need to join scd */
2168                 if ((scdp = hat->sfmmu_scdp) != NULL &&
2169                     newhat->sfmmu_scdp != scdp) {
2170                         int ret;
2171                         SF_RGNMAP_IS_SUBSET(&newhat->sfmmu_region_map,
2172                             &scdp->scd_region_map, ret);
2173                         ASSERT(ret);
2174                         sfmmu_join_scd(scdp, newhat);


3210                          * sfmmu_pagearray_setup failed so return
3211                          */
3212                         sfmmu_mlist_exit(pml);
3213                         return (1);
3214                 }
3215         }
3216 
3217         /*
3218          * Make sure hment is not on a mapping list.
3219          */
3220         ASSERT(remap || (sfhme->hme_page == NULL));
3221 
3222         /* if it is not a remap then hme->next better be NULL */
3223         ASSERT((!remap) ? sfhme->hme_next == NULL : 1);
3224 
3225         if (flags & HAT_LOAD_LOCK) {
3226                 if ((hmeblkp->hblk_lckcnt + 1) >= MAX_HBLK_LCKCNT) {
3227                         panic("too high lckcnt-hmeblk %p",
3228                             (void *)hmeblkp);
3229                 }
3230                 atomic_inc_32(&hmeblkp->hblk_lckcnt);
3231 
3232                 HBLK_STACK_TRACE(hmeblkp, HBLK_LOCK);
3233         }
3234 
3235 #ifdef VAC
3236         if (pp && PP_ISNC(pp)) {
3237                 /*
3238                  * If the physical page is marked to be uncacheable, like
3239                  * by a vac conflict, make sure the new mapping is also
3240                  * uncacheable.
3241                  */
3242                 TTE_CLR_VCACHEABLE(ttep);
3243                 ASSERT(PP_GET_VCOLOR(pp) == NO_VCOLOR);
3244         }
3245 #endif
3246         ttep->tte_hmenum = hmenum;
3247 
3248 #ifdef DEBUG
3249         orig_old = tteold;
3250 #endif /* DEBUG */
3251 
3252         while (sfmmu_modifytte_try(&tteold, ttep, &sfhme->hme_tte) < 0) {
3253                 if ((sfmmup == KHATID) &&
3254                     (flags & (HAT_LOAD_LOCK | HAT_LOAD_REMAP))) {
3255                         sfmmu_copytte(&sfhme->hme_tte, &tteold);
3256                 }
3257 #ifdef DEBUG
3258                 chk_tte(&orig_old, &tteold, ttep, hmeblkp);
3259 #endif /* DEBUG */
3260         }
3261         ASSERT(TTE_IS_VALID(&sfhme->hme_tte));
3262 
3263         if (!TTE_IS_VALID(&tteold)) {
3264 
3265                 atomic_inc_16(&hmeblkp->hblk_vcnt);
3266                 if (rid == SFMMU_INVALID_SHMERID) {
3267                         atomic_inc_ulong(&sfmmup->sfmmu_ttecnt[size]);
3268                 } else {
3269                         sf_srd_t *srdp = sfmmup->sfmmu_srdp;
3270                         sf_region_t *rgnp = srdp->srd_hmergnp[rid];
3271                         /*
3272                          * We already accounted for region ttecnt's in sfmmu
3273                          * during hat_join_region() processing. Here we
3274                          * only update ttecnt's in region struture.
3275                          */
3276                         atomic_inc_ulong(&rgnp->rgn_ttecnt[size]);
3277                 }
3278         }
3279 
3280         myflt = (astosfmmu(curthread->t_procp->p_as) == sfmmup);
3281         if (size > TTE8K && (flags & HAT_LOAD_SHARE) == 0 &&
3282             sfmmup != ksfmmup) {
3283                 uchar_t tteflag = 1 << size;
3284                 if (rid == SFMMU_INVALID_SHMERID) {
3285                         if (!(sfmmup->sfmmu_tteflags & tteflag)) {
3286                                 hatlockp = sfmmu_hat_enter(sfmmup);
3287                                 sfmmup->sfmmu_tteflags |= tteflag;
3288                                 sfmmu_hat_exit(hatlockp);
3289                         }
3290                 } else if (!(sfmmup->sfmmu_rtteflags & tteflag)) {
3291                         hatlockp = sfmmu_hat_enter(sfmmup);
3292                         sfmmup->sfmmu_rtteflags |= tteflag;
3293                         sfmmu_hat_exit(hatlockp);
3294                 }
3295                 /*
3296                  * Update the current CPU tsbmiss area, so the current thread


3364                  */
3365                 if (size == TTE8K || size == TTE4M) {
3366                         sf_scd_t *scdp;
3367                         hatlockp = sfmmu_hat_enter(sfmmup);
3368                         /*
3369                          * Don't preload private TSB if the mapping is used
3370                          * by the shctx in the SCD.
3371                          */
3372                         scdp = sfmmup->sfmmu_scdp;
3373                         if (rid == SFMMU_INVALID_SHMERID || scdp == NULL ||
3374                             !SF_RGNMAP_TEST(scdp->scd_hmeregion_map, rid)) {
3375                                 sfmmu_load_tsb(sfmmup, vaddr, &sfhme->hme_tte,
3376                                     size);
3377                         }
3378                         sfmmu_hat_exit(hatlockp);
3379                 }
3380         }
3381         if (pp) {
3382                 if (!remap) {
3383                         HME_ADD(sfhme, pp);
3384                         atomic_inc_16(&hmeblkp->hblk_hmecnt);
3385                         ASSERT(hmeblkp->hblk_hmecnt > 0);
3386 
3387                         /*
3388                          * Cannot ASSERT(hmeblkp->hblk_hmecnt <= NHMENTS)
3389                          * see pageunload() for comment.
3390                          */
3391                 }
3392                 sfmmu_mlist_exit(pml);
3393         }
3394 
3395         return (0);
3396 }
3397 /*
3398  * Function unlocks hash bucket.
3399  */
3400 static void
3401 sfmmu_tteload_release_hashbucket(struct hmehash_bucket *hmebp)
3402 {
3403         ASSERT(SFMMU_HASH_LOCK_ISHELD(hmebp));
3404         SFMMU_HASH_UNLOCK(hmebp);


4137 readtte:
4138                 sfmmu_copytte(&sfhme->hme_tte, &tteold);
4139                 if (TTE_IS_VALID(&tteold)) {
4140 
4141                         ttemod = tteold;
4142 
4143                         ret = sfmmu_modifytte_try(&tteold, &ttemod,
4144                             &sfhme->hme_tte);
4145 
4146                         if (ret < 0)
4147                                 goto readtte;
4148 
4149                         if (hmeblkp->hblk_lckcnt == 0)
4150                                 panic("zero hblk lckcnt");
4151 
4152                         if (((uintptr_t)addr + TTEBYTES(ttesz)) >
4153                             (uintptr_t)endaddr)
4154                                 panic("can't unlock large tte");
4155 
4156                         ASSERT(hmeblkp->hblk_lckcnt > 0);
4157                         atomic_dec_32(&hmeblkp->hblk_lckcnt);
4158                         HBLK_STACK_TRACE(hmeblkp, HBLK_UNLOCK);
4159                 } else {
4160                         panic("sfmmu_hblk_unlock: invalid tte");
4161                 }
4162                 addr += TTEBYTES(ttesz);
4163                 sfhme++;
4164         }
4165         return (addr);
4166 }
4167 
4168 /*
4169  * Physical Address Mapping Framework
4170  *
4171  * General rules:
4172  *
4173  * (1) Applies only to seg_kmem memory pages. To make things easier,
4174  *     seg_kpm addresses are also accepted by the routines, but nothing
4175  *     is done with them since by definition their PA mappings are static.
4176  * (2) hat_add_callback() may only be called while holding the page lock
4177  *     SE_SHARED or SE_EXCL of the underlying page (e.g., as_pagelock()),


6116                                 if (pp != NULL) {
6117                                         panic("sfmmu_hblk_unload: pp = 0x%p "
6118                                             "tte became invalid under mlist"
6119                                             " lock = 0x%p", (void *)pp,
6120                                             (void *)pml);
6121                                 }
6122                                 continue;
6123                         }
6124 
6125                         if (!(flags & HAT_UNLOAD_NOSYNC)) {
6126                                 sfmmu_ttesync(sfmmup, addr, &tte, pp);
6127                         }
6128 
6129                         /*
6130                          * Ok- we invalidated the tte. Do the rest of the job.
6131                          */
6132                         ttecnt++;
6133 
6134                         if (flags & HAT_UNLOAD_UNLOCK) {
6135                                 ASSERT(hmeblkp->hblk_lckcnt > 0);
6136                                 atomic_dec_32(&hmeblkp->hblk_lckcnt);
6137                                 HBLK_STACK_TRACE(hmeblkp, HBLK_UNLOCK);
6138                         }
6139 
6140                         /*
6141                          * Normally we would need to flush the page
6142                          * from the virtual cache at this point in
6143                          * order to prevent a potential cache alias
6144                          * inconsistency.
6145                          * The particular scenario we need to worry
6146                          * about is:
6147                          * Given:  va1 and va2 are two virtual address
6148                          * that alias and map the same physical
6149                          * address.
6150                          * 1.   mapping exists from va1 to pa and data
6151                          * has been read into the cache.
6152                          * 2.   unload va1.
6153                          * 3.   load va2 and modify data using va2.
6154                          * 4    unload va2.
6155                          * 5.   load va1 and reference data.  Unless we
6156                          * flush the data cache when we unload we will


6170                                 DEMAP_RANGE_MARKPG(dmrp, addr);
6171                         } else {
6172                                 ASSERT(sfmmup != NULL);
6173                                 ASSERT(!hmeblkp->hblk_shared);
6174                                 sfmmu_tlb_demap(addr, sfmmup, hmeblkp,
6175                                     sfmmup->sfmmu_free, 0);
6176                         }
6177 
6178                         if (pp) {
6179                                 /*
6180                                  * Remove the hment from the mapping list
6181                                  */
6182                                 ASSERT(hmeblkp->hblk_hmecnt > 0);
6183 
6184                                 /*
6185                                  * Again, we cannot
6186                                  * ASSERT(hmeblkp->hblk_hmecnt <= NHMENTS);
6187                                  */
6188                                 HME_SUB(sfhmep, pp);
6189                                 membar_stst();
6190                                 atomic_dec_16(&hmeblkp->hblk_hmecnt);
6191                         }
6192 
6193                         ASSERT(hmeblkp->hblk_vcnt > 0);
6194                         atomic_dec_16(&hmeblkp->hblk_vcnt);
6195 
6196                         ASSERT(hmeblkp->hblk_hmecnt || hmeblkp->hblk_vcnt ||
6197                             !hmeblkp->hblk_lckcnt);
6198 
6199 #ifdef VAC
6200                         if (pp && (pp->p_nrm & (P_KPMC | P_KPMS | P_TNC))) {
6201                                 if (PP_ISTNC(pp)) {
6202                                         /*
6203                                          * If page was temporary
6204                                          * uncached, try to recache
6205                                          * it. Note that HME_SUB() was
6206                                          * called above so p_index and
6207                                          * mlist had been updated.
6208                                          */
6209                                         conv_tnc(pp, ttesz);
6210                                 } else if (pp->p_mapping == NULL) {
6211                                         ASSERT(kpm_enable);
6212                                         /*
6213                                          * Page is marked to be in VAC conflict
6214                                          * to an existing kpm mapping and/or is


7332                 }
7333 
7334                 if (ret == 0) {
7335                         panic("pageunload: cas failed?");
7336                 }
7337 
7338                 addr = tte_to_vaddr(hmeblkp, tte);
7339 
7340                 if (hmeblkp->hblk_shared) {
7341                         sf_srd_t *srdp = (sf_srd_t *)sfmmup;
7342                         uint_t rid = hmeblkp->hblk_tag.htag_rid;
7343                         sf_region_t *rgnp;
7344                         ASSERT(SFMMU_IS_SHMERID_VALID(rid));
7345                         ASSERT(rid < SFMMU_MAX_HME_REGIONS);
7346                         ASSERT(srdp != NULL);
7347                         rgnp = srdp->srd_hmergnp[rid];
7348                         SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, srdp, rgnp, rid);
7349                         cpuset = sfmmu_rgntlb_demap(addr, rgnp, hmeblkp, 1);
7350                         sfmmu_ttesync(NULL, addr, &tte, pp);
7351                         ASSERT(rgnp->rgn_ttecnt[ttesz] > 0);
7352                         atomic_dec_ulong(&rgnp->rgn_ttecnt[ttesz]);
7353                 } else {
7354                         sfmmu_ttesync(sfmmup, addr, &tte, pp);
7355                         atomic_dec_ulong(&sfmmup->sfmmu_ttecnt[ttesz]);
7356 
7357                         /*
7358                          * We need to flush the page from the virtual cache
7359                          * in order to prevent a virtual cache alias
7360                          * inconsistency. The particular scenario we need
7361                          * to worry about is:
7362                          * Given:  va1 and va2 are two virtual address that
7363                          * alias and will map the same physical address.
7364                          * 1.   mapping exists from va1 to pa and data has
7365                          *      been read into the cache.
7366                          * 2.   unload va1.
7367                          * 3.   load va2 and modify data using va2.
7368                          * 4    unload va2.
7369                          * 5.   load va1 and reference data.  Unless we flush
7370                          *      the data cache when we unload we will get
7371                          *      stale data.
7372                          * This scenario is taken care of by using virtual
7373                          * page coloring.
7374                          */
7375                         if (sfmmup->sfmmu_ismhat) {


7396                 /*
7397                  * Hme_sub has to run after ttesync() and a_rss update.
7398                  * See hblk_unload().
7399                  */
7400                 HME_SUB(sfhme, pp);
7401                 membar_stst();
7402 
7403                 /*
7404                  * We can not make ASSERT(hmeblkp->hblk_hmecnt <= NHMENTS)
7405                  * since pteload may have done a HME_ADD() right after
7406                  * we did the HME_SUB() above. Hmecnt is now maintained
7407                  * by cas only. no lock guranteed its value. The only
7408                  * gurantee we have is the hmecnt should not be less than
7409                  * what it should be so the hblk will not be taken away.
7410                  * It's also important that we decremented the hmecnt after
7411                  * we are done with hmeblkp so that this hmeblk won't be
7412                  * stolen.
7413                  */
7414                 ASSERT(hmeblkp->hblk_hmecnt > 0);
7415                 ASSERT(hmeblkp->hblk_vcnt > 0);
7416                 atomic_dec_16(&hmeblkp->hblk_vcnt);
7417                 atomic_dec_16(&hmeblkp->hblk_hmecnt);
7418                 /*
7419                  * This is bug 4063182.
7420                  * XXX: fixme
7421                  * ASSERT(hmeblkp->hblk_hmecnt || hmeblkp->hblk_vcnt ||
7422                  *      !hmeblkp->hblk_lckcnt);
7423                  */
7424         } else {
7425                 panic("invalid tte? pp %p &tte %p",
7426                     (void *)pp, (void *)&tte);
7427         }
7428 
7429         return (cpuset);
7430 }
7431 
7432 /*
7433  * While relocating a kernel page, this function will move the mappings
7434  * from tpp to dpp and modify any associated data with these mappings.
7435  * It also unsuspends the suspended kernel mapping.
7436  */
7437 static void


13796         uint_t hash = SRD_HASH_FUNCTION(evp);
13797         sf_srd_t *srdp;
13798         sf_srd_t *newsrdp;
13799 
13800         ASSERT(sfmmup != ksfmmup);
13801         ASSERT(sfmmup->sfmmu_srdp == NULL);
13802 
13803         if (!shctx_on) {
13804                 return;
13805         }
13806 
13807         VN_HOLD(evp);
13808 
13809         if (srd_buckets[hash].srdb_srdp != NULL) {
13810                 mutex_enter(&srd_buckets[hash].srdb_lock);
13811                 for (srdp = srd_buckets[hash].srdb_srdp; srdp != NULL;
13812                     srdp = srdp->srd_hash) {
13813                         if (srdp->srd_evp == evp) {
13814                                 ASSERT(srdp->srd_refcnt >= 0);
13815                                 sfmmup->sfmmu_srdp = srdp;
13816                                 atomic_inc_32(
13817                                     (volatile uint_t *)&srdp->srd_refcnt);
13818                                 mutex_exit(&srd_buckets[hash].srdb_lock);
13819                                 return;
13820                         }
13821                 }
13822                 mutex_exit(&srd_buckets[hash].srdb_lock);
13823         }
13824         newsrdp = kmem_cache_alloc(srd_cache, KM_SLEEP);
13825         ASSERT(newsrdp->srd_next_ismrid == 0 && newsrdp->srd_next_hmerid == 0);
13826 
13827         newsrdp->srd_evp = evp;
13828         newsrdp->srd_refcnt = 1;
13829         newsrdp->srd_hmergnfree = NULL;
13830         newsrdp->srd_ismrgnfree = NULL;
13831 
13832         mutex_enter(&srd_buckets[hash].srdb_lock);
13833         for (srdp = srd_buckets[hash].srdb_srdp; srdp != NULL;
13834             srdp = srdp->srd_hash) {
13835                 if (srdp->srd_evp == evp) {
13836                         ASSERT(srdp->srd_refcnt >= 0);
13837                         sfmmup->sfmmu_srdp = srdp;
13838                         atomic_inc_32((volatile uint_t *)&srdp->srd_refcnt);
13839                         mutex_exit(&srd_buckets[hash].srdb_lock);
13840                         kmem_cache_free(srd_cache, newsrdp);
13841                         return;
13842                 }
13843         }
13844         newsrdp->srd_hash = srd_buckets[hash].srdb_srdp;
13845         srd_buckets[hash].srdb_srdp = newsrdp;
13846         sfmmup->sfmmu_srdp = newsrdp;
13847 
13848         mutex_exit(&srd_buckets[hash].srdb_lock);
13849 
13850 }
13851 
13852 static void
13853 sfmmu_leave_srd(sfmmu_t *sfmmup)
13854 {
13855         vnode_t *evp;
13856         sf_srd_t *srdp = sfmmup->sfmmu_srdp;
13857         uint_t hash;
13858         sf_srd_t **prev_srdpp;
13859         sf_region_t *rgnp;
13860         sf_region_t *nrgnp;
13861 #ifdef DEBUG
13862         int rgns = 0;
13863 #endif
13864         int i;
13865 
13866         ASSERT(sfmmup != ksfmmup);
13867         ASSERT(srdp != NULL);
13868         ASSERT(srdp->srd_refcnt > 0);
13869         ASSERT(sfmmup->sfmmu_scdp == NULL);
13870         ASSERT(sfmmup->sfmmu_free == 1);
13871 
13872         sfmmup->sfmmu_srdp = NULL;
13873         evp = srdp->srd_evp;
13874         ASSERT(evp != NULL);
13875         if (atomic_dec_32_nv((volatile uint_t *)&srdp->srd_refcnt)) {

13876                 VN_RELE(evp);
13877                 return;
13878         }
13879 
13880         hash = SRD_HASH_FUNCTION(evp);
13881         mutex_enter(&srd_buckets[hash].srdb_lock);
13882         for (prev_srdpp = &srd_buckets[hash].srdb_srdp;
13883             (srdp = *prev_srdpp) != NULL; prev_srdpp = &srdp->srd_hash) {
13884                 if (srdp->srd_evp == evp) {
13885                         break;
13886                 }
13887         }
13888         if (srdp == NULL || srdp->srd_refcnt) {
13889                 mutex_exit(&srd_buckets[hash].srdb_lock);
13890                 VN_RELE(evp);
13891                 return;
13892         }
13893         *prev_srdpp = srdp->srd_hash;
13894         mutex_exit(&srd_buckets[hash].srdb_lock);
13895 


14072         mutex_enter(&srdp->srd_mutex);
14073 
14074         for (rgnp = srdp->srd_rgnhash[rhash]; rgnp != NULL;
14075             rgnp = rgnp->rgn_hash) {
14076                 if (rgnp->rgn_saddr == r_saddr && rgnp->rgn_size == r_size &&
14077                     rgnp->rgn_obj == r_obj && rgnp->rgn_objoff == r_objoff &&
14078                     rgnp->rgn_perm == r_perm && rgnp->rgn_pgszc == r_pgszc) {
14079                         break;
14080                 }
14081         }
14082 
14083 rfound:
14084         if (rgnp != NULL) {
14085                 ASSERT((rgnp->rgn_flags & SFMMU_REGION_TYPE_MASK) == r_type);
14086                 ASSERT(rgnp->rgn_cb_function == r_cb_function);
14087                 ASSERT(rgnp->rgn_refcnt >= 0);
14088                 rid = rgnp->rgn_id;
14089                 ASSERT(rid < maxids);
14090                 ASSERT(rarrp[rid] == rgnp);
14091                 ASSERT(rid < *nextidp);
14092                 atomic_inc_32((volatile uint_t *)&rgnp->rgn_refcnt);
14093                 mutex_exit(&srdp->srd_mutex);
14094                 if (new_rgnp != NULL) {
14095                         kmem_cache_free(region_cache, new_rgnp);
14096                 }
14097                 if (r_type == SFMMU_REGION_HME) {
14098                         int myjoin =
14099                             (sfmmup == astosfmmu(curthread->t_procp->p_as));
14100 
14101                         sfmmu_link_to_hmeregion(sfmmup, rgnp);
14102                         /*
14103                          * bitmap should be updated after linking sfmmu on
14104                          * region list so that pageunload() doesn't skip
14105                          * TSB/TLB flush. As soon as bitmap is updated another
14106                          * thread in this process can already start accessing
14107                          * this region.
14108                          */
14109                         /*
14110                          * Normally ttecnt accounting is done as part of
14111                          * pagefault handling. But a process may not take any
14112                          * pagefaults on shared hmeblks created by some other


14422 
14423                 /* update shme rgns ttecnt in sfmmu_ttecnt */
14424                 rttecnt = r_size >> TTE_PAGE_SHIFT(r_pgszc);
14425                 ASSERT(sfmmup->sfmmu_ttecnt[r_pgszc] >= rttecnt);
14426                 atomic_add_long(&sfmmup->sfmmu_ttecnt[r_pgszc], -rttecnt);
14427 
14428                 sfmmu_hat_exit(hatlockp);
14429                 if (scdp != NULL && sfmmup->sfmmu_scdp == NULL) {
14430                         /* sfmmup left the scd, grow private tsb */
14431                         sfmmu_check_page_sizes(sfmmup, 1);
14432                 } else {
14433                         sfmmu_check_page_sizes(sfmmup, 0);
14434                 }
14435         }
14436 
14437         if (r_type == SFMMU_REGION_HME) {
14438                 sfmmu_unlink_from_hmeregion(sfmmup, rgnp);
14439         }
14440 
14441         r_obj = rgnp->rgn_obj;
14442         if (atomic_dec_32_nv((volatile uint_t *)&rgnp->rgn_refcnt)) {
14443                 return;
14444         }
14445 
14446         /*
14447          * looks like nobody uses this region anymore. Free it.
14448          */
14449         rhash = RGN_HASH_FUNCTION(r_obj);
14450         mutex_enter(&srdp->srd_mutex);
14451         for (prev_rgnpp = &srdp->srd_rgnhash[rhash];
14452             (cur_rgnp = *prev_rgnpp) != NULL;
14453             prev_rgnpp = &cur_rgnp->rgn_hash) {
14454                 if (cur_rgnp == rgnp && cur_rgnp->rgn_refcnt == 0) {
14455                         break;
14456                 }
14457         }
14458 
14459         if (cur_rgnp == NULL) {
14460                 mutex_exit(&srdp->srd_mutex);
14461                 return;
14462         }


14507         uint_t rid = (uint_t)((uint64_t)rcookie);
14508         sf_region_t *rgnp;
14509         sf_rgn_link_t *rlink;
14510         sf_rgn_link_t *hrlink;
14511         ulong_t rttecnt;
14512 
14513         ASSERT(sfmmup != ksfmmup);
14514         ASSERT(srdp != NULL);
14515         ASSERT(srdp->srd_refcnt > 0);
14516 
14517         ASSERT(rid < srdp->srd_next_hmerid);
14518         ASSERT(SFMMU_IS_SHMERID_VALID(rid));
14519         ASSERT(rid < SFMMU_MAX_HME_REGIONS);
14520 
14521         rgnp = srdp->srd_hmergnp[rid];
14522         ASSERT(rgnp->rgn_refcnt > 0);
14523         ASSERT(rgnp->rgn_id == rid);
14524         ASSERT((rgnp->rgn_flags & SFMMU_REGION_TYPE_MASK) == SFMMU_REGION_HME);
14525         ASSERT(!(rgnp->rgn_flags & SFMMU_REGION_FREE));
14526 
14527         atomic_inc_32((volatile uint_t *)&rgnp->rgn_refcnt);
14528 
14529         /* LINTED: constant in conditional context */
14530         SFMMU_HMERID2RLINKP(sfmmup, rid, rlink, 1, 0);
14531         ASSERT(rlink != NULL);
14532         mutex_enter(&rgnp->rgn_mutex);
14533         ASSERT(rgnp->rgn_sfmmu_head != NULL);
14534         /* LINTED: constant in conditional context */
14535         SFMMU_HMERID2RLINKP(rgnp->rgn_sfmmu_head, rid, hrlink, 0, 0);
14536         ASSERT(hrlink != NULL);
14537         ASSERT(hrlink->prev == NULL);
14538         rlink->next = rgnp->rgn_sfmmu_head;
14539         rlink->prev = NULL;
14540         hrlink->prev = sfmmup;
14541         /*
14542          * make sure rlink's next field is correct
14543          * before making this link visible.
14544          */
14545         membar_stst();
14546         rgnp->rgn_sfmmu_head = sfmmup;
14547         mutex_exit(&rgnp->rgn_mutex);


15237 static void
15238 sfmmu_find_scd(sfmmu_t *sfmmup)
15239 {
15240         sf_srd_t *srdp = sfmmup->sfmmu_srdp;
15241         sf_scd_t *scdp, *new_scdp;
15242         int ret;
15243 
15244         ASSERT(srdp != NULL);
15245         ASSERT(AS_WRITE_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
15246 
15247         mutex_enter(&srdp->srd_scd_mutex);
15248         for (scdp = srdp->srd_scdp; scdp != NULL;
15249             scdp = scdp->scd_next) {
15250                 SF_RGNMAP_EQUAL(&scdp->scd_region_map,
15251                     &sfmmup->sfmmu_region_map, ret);
15252                 if (ret == 1) {
15253                         SF_SCD_INCR_REF(scdp);
15254                         mutex_exit(&srdp->srd_scd_mutex);
15255                         sfmmu_join_scd(scdp, sfmmup);
15256                         ASSERT(scdp->scd_refcnt >= 2);
15257                         atomic_dec_32((volatile uint32_t *)&scdp->scd_refcnt);

15258                         return;
15259                 } else {
15260                         /*
15261                          * If the sfmmu region map is a subset of the scd
15262                          * region map, then the assumption is that this process
15263                          * will continue attaching to ISM segments until the
15264                          * region maps are equal.
15265                          */
15266                         SF_RGNMAP_IS_SUBSET(&scdp->scd_region_map,
15267                             &sfmmup->sfmmu_region_map, ret);
15268                         if (ret == 1) {
15269                                 mutex_exit(&srdp->srd_scd_mutex);
15270                                 return;
15271                         }
15272                 }
15273         }
15274 
15275         ASSERT(scdp == NULL);
15276         /*
15277          * No matching SCD has been found, create a new one.


15282                 return;
15283         }
15284 
15285         /*
15286          * sfmmu_alloc_scd() returns with a ref count of 1 on the scd.
15287          */
15288 
15289         /* Set scd_rttecnt for shme rgns in SCD */
15290         sfmmu_set_scd_rttecnt(srdp, new_scdp);
15291 
15292         /*
15293          * Link scd onto srd_scdp list and scd sfmmu onto region/iment lists.
15294          */
15295         sfmmu_link_scd_to_regions(srdp, new_scdp);
15296         sfmmu_add_scd(&srdp->srd_scdp, new_scdp);
15297         SFMMU_STAT_ADD(sf_create_scd, 1);
15298 
15299         mutex_exit(&srdp->srd_scd_mutex);
15300         sfmmu_join_scd(new_scdp, sfmmup);
15301         ASSERT(new_scdp->scd_refcnt >= 2);
15302         atomic_dec_32((volatile uint32_t *)&new_scdp->scd_refcnt);
15303 }
15304 
15305 /*
15306  * This routine is called by a process to remove itself from an SCD. It is
15307  * either called when the processes has detached from a segment or from
15308  * hat_free_start() as a result of calling exit.
15309  */
15310 static void
15311 sfmmu_leave_scd(sfmmu_t *sfmmup, uchar_t r_type)
15312 {
15313         sf_scd_t *scdp = sfmmup->sfmmu_scdp;
15314         sf_srd_t *srdp =  sfmmup->sfmmu_srdp;
15315         hatlock_t *hatlockp = TSB_HASH(sfmmup);
15316         int i;
15317 
15318         ASSERT(scdp != NULL);
15319         ASSERT(srdp != NULL);
15320 
15321         if (sfmmup->sfmmu_free) {
15322                 /*