Print this page
patch as-lock-macro-simplification


 224 sptdestroy(struct as *as, struct anon_map *amp)
 225 {
 226 
 227 #ifdef DEBUG
 228         TNF_PROBE_0(sptdestroy, "spt", /* CSTYLED */);
 229 #endif
 230         (void) as_unmap(as, SEGSPTADDR, amp->size);
 231         as_free(as);
 232 }
 233 
 234 /*
 235  * called from seg_free().
 236  * free (i.e., unlock, unmap, return to free list)
 237  *  all the pages in the given seg.
 238  */
 239 void
 240 segspt_free(struct seg  *seg)
 241 {
 242         struct spt_data *sptd = (struct spt_data *)seg->s_data;
 243 
 244         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
 245 
 246         if (sptd != NULL) {
 247                 if (sptd->spt_realsize)
 248                         segspt_free_pages(seg, seg->s_base, sptd->spt_realsize);
 249 
 250         if (sptd->spt_ppa_lckcnt)
 251                 kmem_free(sptd->spt_ppa_lckcnt,
 252                     sizeof (*sptd->spt_ppa_lckcnt)
 253                     * btopr(sptd->spt_amp->size));
 254                 kmem_free(sptd->spt_vp, sizeof (*sptd->spt_vp));
 255                 cv_destroy(&sptd->spt_cv);
 256                 mutex_destroy(&sptd->spt_lock);
 257                 kmem_free(sptd, sizeof (*sptd));
 258         }
 259 }
 260 
 261 /*ARGSUSED*/
 262 static int
 263 segspt_shmsync(struct seg *seg, caddr_t addr, size_t len, int attr,
 264         uint_t flags)
 265 {
 266         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
 267 
 268         return (0);
 269 }
 270 
 271 /*ARGSUSED*/
 272 static size_t
 273 segspt_shmincore(struct seg *seg, caddr_t addr, size_t len, char *vec)
 274 {
 275         caddr_t eo_seg;
 276         pgcnt_t npages;
 277         struct shm_data *shmd = (struct shm_data *)seg->s_data;
 278         struct seg      *sptseg;
 279         struct spt_data *sptd;
 280 
 281         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
 282 #ifdef lint
 283         seg = seg;
 284 #endif
 285         sptseg = shmd->shm_sptseg;
 286         sptd = sptseg->s_data;
 287 
 288         if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
 289                 eo_seg = addr + len;
 290                 while (addr < eo_seg) {
 291                         /* page exists, and it's locked. */
 292                         *vec++ = SEG_PAGE_INCORE | SEG_PAGE_LOCKED |
 293                             SEG_PAGE_ANON;
 294                         addr += PAGESIZE;
 295                 }
 296                 return (len);
 297         } else {
 298                 struct  anon_map *amp = shmd->shm_amp;
 299                 struct  anon    *ap;
 300                 page_t          *pp;
 301                 pgcnt_t         anon_index;


 325                                         page_unlock(pp);
 326                                 }
 327                         } else {
 328                                 anon_array_exit(&cookie);
 329                         }
 330                         if (shmd->shm_vpage[anon_index] & DISM_PG_LOCKED) {
 331                                 ret |= SEG_PAGE_LOCKED;
 332                         }
 333                         *vec++ = (char)ret;
 334                 }
 335                 ANON_LOCK_EXIT(&amp->a_rwlock);
 336                 return (len);
 337         }
 338 }
 339 
 340 static int
 341 segspt_unmap(struct seg *seg, caddr_t raddr, size_t ssize)
 342 {
 343         size_t share_size;
 344 
 345         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
 346 
 347         /*
 348          * seg.s_size may have been rounded up to the largest page size
 349          * in shmat().
 350          * XXX This should be cleanedup. sptdestroy should take a length
 351          * argument which should be the same as sptcreate. Then
 352          * this rounding would not be needed (or is done in shm.c)
 353          * Only the check for full segment will be needed.
 354          *
 355          * XXX -- shouldn't raddr == 0 always? These tests don't seem
 356          * to be useful at all.
 357          */
 358         share_size = page_get_pagesize(seg->s_szc);
 359         ssize = P2ROUNDUP(ssize, share_size);
 360 
 361         if (raddr == seg->s_base && ssize == seg->s_size) {
 362                 seg_free(seg);
 363                 return (0);
 364         } else
 365                 return (EINVAL);


 376         struct kshmid   *sp = amp->a_sp;
 377         struct  cred    *cred = CRED();
 378         ulong_t         i, j, anon_index = 0;
 379         pgcnt_t         npages = btopr(amp->size);
 380         struct vnode    *vp;
 381         page_t          **ppa;
 382         uint_t          hat_flags;
 383         size_t          pgsz;
 384         pgcnt_t         pgcnt;
 385         caddr_t         a;
 386         pgcnt_t         pidx;
 387         size_t          sz;
 388         proc_t          *procp = curproc;
 389         rctl_qty_t      lockedbytes = 0;
 390         kproject_t      *proj;
 391 
 392         /*
 393          * We are holding the a_lock on the underlying dummy as,
 394          * so we can make calls to the HAT layer.
 395          */
 396         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
 397         ASSERT(sp != NULL);
 398 
 399 #ifdef DEBUG
 400         TNF_PROBE_2(segspt_create, "spt", /* CSTYLED */,
 401             tnf_opaque, addr, addr, tnf_ulong, len, seg->s_size);
 402 #endif
 403         if ((sptcargs->flags & SHM_PAGEABLE) == 0) {
 404                 if (err = anon_swap_adjust(npages))
 405                         return (err);
 406         }
 407         err = ENOMEM;
 408 
 409         if ((sptd = kmem_zalloc(sizeof (*sptd), KM_NOSLEEP)) == NULL)
 410                 goto out1;
 411 
 412         if ((sptcargs->flags & SHM_PAGEABLE) == 0) {
 413                 if ((ppa = kmem_zalloc(((sizeof (page_t *)) * npages),
 414                     KM_NOSLEEP)) == NULL)
 415                         goto out2;
 416         }


 612 /*ARGSUSED*/
 613 void
 614 segspt_free_pages(struct seg *seg, caddr_t addr, size_t len)
 615 {
 616         struct page     *pp;
 617         struct spt_data *sptd = (struct spt_data *)seg->s_data;
 618         pgcnt_t         npages;
 619         ulong_t         anon_idx;
 620         struct anon_map *amp;
 621         struct anon     *ap;
 622         struct vnode    *vp;
 623         u_offset_t      off;
 624         uint_t          hat_flags;
 625         int             root = 0;
 626         pgcnt_t         pgs, curnpgs = 0;
 627         page_t          *rootpp;
 628         rctl_qty_t      unlocked_bytes = 0;
 629         kproject_t      *proj;
 630         kshmid_t        *sp;
 631 
 632         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
 633 
 634         len = P2ROUNDUP(len, PAGESIZE);
 635 
 636         npages = btop(len);
 637 
 638         hat_flags = HAT_UNLOAD_UNLOCK | HAT_UNLOAD_UNMAP;
 639         if ((hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) ||
 640             (sptd->spt_flags & SHM_PAGEABLE)) {
 641                 hat_flags = HAT_UNLOAD_UNMAP;
 642         }
 643 
 644         hat_unload(seg->s_as->a_hat, addr, len, hat_flags);
 645 
 646         amp = sptd->spt_amp;
 647         if (sptd->spt_flags & SHM_PAGEABLE)
 648                 npages = btop(amp->size);
 649 
 650         ASSERT(amp != NULL);
 651 
 652         if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {


 821 /*ARGSUSED*/
 822 static int
 823 segspt_dismpagelock(struct seg *seg, caddr_t addr, size_t len,
 824     struct page ***ppp, enum lock_type type, enum seg_rw rw)
 825 {
 826         struct  shm_data *shmd = (struct shm_data *)seg->s_data;
 827         struct  seg     *sptseg = shmd->shm_sptseg;
 828         struct  spt_data *sptd = sptseg->s_data;
 829         pgcnt_t pg_idx, npages, tot_npages, npgs;
 830         struct  page **pplist, **pl, **ppa, *pp;
 831         struct  anon_map *amp;
 832         spgcnt_t        an_idx;
 833         int     ret = ENOTSUP;
 834         uint_t  pl_built = 0;
 835         struct  anon *ap;
 836         struct  vnode *vp;
 837         u_offset_t off;
 838         pgcnt_t claim_availrmem = 0;
 839         uint_t  szc;
 840 
 841         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
 842         ASSERT(type == L_PAGELOCK || type == L_PAGEUNLOCK);
 843 
 844         /*
 845          * We want to lock/unlock the entire ISM segment. Therefore,
 846          * we will be using the underlying sptseg and it's base address
 847          * and length for the caching arguments.
 848          */
 849         ASSERT(sptseg);
 850         ASSERT(sptd);
 851 
 852         pg_idx = seg_page(seg, addr);
 853         npages = btopr(len);
 854 
 855         /*
 856          * check if the request is larger than number of pages covered
 857          * by amp
 858          */
 859         if (pg_idx + npages > btopr(sptd->spt_amp->size)) {
 860                 *ppp = NULL;
 861                 return (ENOTSUP);


1176  */
1177 /*ARGSUSED*/
1178 static int
1179 segspt_shmpagelock(struct seg *seg, caddr_t addr, size_t len,
1180     struct page ***ppp, enum lock_type type, enum seg_rw rw)
1181 {
1182         struct shm_data *shmd = (struct shm_data *)seg->s_data;
1183         struct seg      *sptseg = shmd->shm_sptseg;
1184         struct spt_data *sptd = sptseg->s_data;
1185         pgcnt_t np, page_index, npages;
1186         caddr_t a, spt_base;
1187         struct page **pplist, **pl, *pp;
1188         struct anon_map *amp;
1189         ulong_t anon_index;
1190         int ret = ENOTSUP;
1191         uint_t  pl_built = 0;
1192         struct anon *ap;
1193         struct vnode *vp;
1194         u_offset_t off;
1195 
1196         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
1197         ASSERT(type == L_PAGELOCK || type == L_PAGEUNLOCK);
1198 
1199 
1200         /*
1201          * We want to lock/unlock the entire ISM segment. Therefore,
1202          * we will be using the underlying sptseg and it's base address
1203          * and length for the caching arguments.
1204          */
1205         ASSERT(sptseg);
1206         ASSERT(sptd);
1207 
1208         if (sptd->spt_flags & SHM_PAGEABLE) {
1209                 return (segspt_dismpagelock(seg, addr, len, ppp, type, rw));
1210         }
1211 
1212         page_index = seg_page(seg, addr);
1213         npages = btopr(len);
1214 
1215         /*
1216          * check if the request is larger than number of pages covered


1434 segspt_reclaim(void *ptag, caddr_t addr, size_t len, struct page **pplist,
1435         enum seg_rw rw, int async)
1436 {
1437         struct seg *seg = (struct seg *)ptag;
1438         struct  shm_data *shmd = (struct shm_data *)seg->s_data;
1439         struct  seg     *sptseg;
1440         struct  spt_data *sptd;
1441         pgcnt_t npages, i, free_availrmem = 0;
1442         int     done = 0;
1443 
1444 #ifdef lint
1445         addr = addr;
1446 #endif
1447         sptseg = shmd->shm_sptseg;
1448         sptd = sptseg->s_data;
1449         npages = (len >> PAGESHIFT);
1450         ASSERT(npages);
1451         ASSERT(sptd->spt_pcachecnt != 0);
1452         ASSERT(sptd->spt_ppa == pplist);
1453         ASSERT(npages == btopr(sptd->spt_amp->size));
1454         ASSERT(async || AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
1455 
1456         /*
1457          * Acquire the lock on the dummy seg and destroy the
1458          * ppa array IF this is the last pcachecnt.
1459          */
1460         mutex_enter(&sptd->spt_lock);
1461         if (--sptd->spt_pcachecnt == 0) {
1462                 for (i = 0; i < npages; i++) {
1463                         if (pplist[i] == NULL) {
1464                                 continue;
1465                         }
1466                         if (rw == S_WRITE) {
1467                                 hat_setrefmod(pplist[i]);
1468                         } else {
1469                                 hat_setref(pplist[i]);
1470                         }
1471                         if ((sptd->spt_flags & SHM_PAGEABLE) &&
1472                             (sptd->spt_ppa_lckcnt[i] == 0))
1473                                 free_availrmem++;
1474                         page_unlock(pplist[i]);


1568  * its a read.  Thus cow faults can be ignored with respect to soft
1569  * unlocking, since the breaking of cow means that the anon slot(s) will
1570  * not be shared.
1571  */
1572 static void
1573 segspt_softunlock(struct seg *seg, caddr_t sptseg_addr,
1574         size_t len, enum seg_rw rw)
1575 {
1576         struct shm_data *shmd = (struct shm_data *)seg->s_data;
1577         struct seg      *sptseg;
1578         struct spt_data *sptd;
1579         page_t *pp;
1580         caddr_t adr;
1581         struct vnode *vp;
1582         u_offset_t offset;
1583         ulong_t anon_index;
1584         struct anon_map *amp;           /* XXX - for locknest */
1585         struct anon *ap = NULL;
1586         pgcnt_t npages;
1587 
1588         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
1589 
1590         sptseg = shmd->shm_sptseg;
1591         sptd = sptseg->s_data;
1592 
1593         /*
1594          * Some platforms assume that ISM mappings are HAT_LOAD_LOCK
1595          * and therefore their pages are SE_SHARED locked
1596          * for the entire life of the segment.
1597          */
1598         if ((!hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) &&
1599             ((sptd->spt_flags & SHM_PAGEABLE) == 0)) {
1600                 goto softlock_decrement;
1601         }
1602 
1603         /*
1604          * Any thread is free to do a page_find and
1605          * page_unlock() on the pages within this seg.
1606          *
1607          * We are already holding the as->a_lock on the user's
1608          * real segment, but we need to hold the a_lock on the
1609          * underlying dummy as. This is mostly to satisfy the
1610          * underlying HAT layer.
1611          */
1612         AS_LOCK_ENTER(sptseg->s_as, &sptseg->s_as->a_lock, RW_READER);
1613         hat_unlock(sptseg->s_as->a_hat, sptseg_addr, len);
1614         AS_LOCK_EXIT(sptseg->s_as, &sptseg->s_as->a_lock);
1615 
1616         amp = sptd->spt_amp;
1617         ASSERT(amp != NULL);
1618         anon_index = seg_page(sptseg, sptseg_addr);
1619 
1620         for (adr = sptseg_addr; adr < sptseg_addr + len; adr += PAGESIZE) {
1621                 ap = anon_get_ptr(amp->ahp, anon_index++);
1622                 ASSERT(ap != NULL);
1623                 swap_xlate(ap, &vp, &offset);
1624 
1625                 /*
1626                  * Use page_find() instead of page_lookup() to
1627                  * find the page since we know that it has a
1628                  * "shared" lock.
1629                  */
1630                 pp = page_find(vp, offset);
1631                 ASSERT(ap == anon_get_ptr(amp->ahp, anon_index - 1));
1632                 if (pp == NULL) {
1633                         panic("segspt_softunlock: "
1634                             "addr %p, ap %p, vp %p, off %llx",


1659                 if (AS_ISUNMAPWAIT(seg->s_as)) {
1660                         mutex_enter(&seg->s_as->a_contents);
1661                         if (AS_ISUNMAPWAIT(seg->s_as)) {
1662                                 AS_CLRUNMAPWAIT(seg->s_as);
1663                                 cv_broadcast(&seg->s_as->a_cv);
1664                         }
1665                         mutex_exit(&seg->s_as->a_contents);
1666                 }
1667         }
1668 }
1669 
1670 int
1671 segspt_shmattach(struct seg *seg, caddr_t *argsp)
1672 {
1673         struct shm_data *shmd_arg = (struct shm_data *)argsp;
1674         struct shm_data *shmd;
1675         struct anon_map *shm_amp = shmd_arg->shm_amp;
1676         struct spt_data *sptd;
1677         int error = 0;
1678 
1679         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
1680 
1681         shmd = kmem_zalloc((sizeof (*shmd)), KM_NOSLEEP);
1682         if (shmd == NULL)
1683                 return (ENOMEM);
1684 
1685         shmd->shm_sptas = shmd_arg->shm_sptas;
1686         shmd->shm_amp = shm_amp;
1687         shmd->shm_sptseg = shmd_arg->shm_sptseg;
1688 
1689         (void) lgrp_shm_policy_set(LGRP_MEM_POLICY_DEFAULT, shm_amp, 0,
1690             NULL, 0, seg->s_size);
1691 
1692         mutex_init(&shmd->shm_segfree_syncmtx, NULL, MUTEX_DEFAULT, NULL);
1693 
1694         seg->s_data = (void *)shmd;
1695         seg->s_ops = &segspt_shmops;
1696         seg->s_szc = shmd->shm_sptseg->s_szc;
1697         sptd = shmd->shm_sptseg->s_data;
1698 
1699         if (sptd->spt_flags & SHM_PAGEABLE) {


1718                     seg->s_size, seg->s_szc);
1719         }
1720         if (error) {
1721                 seg->s_szc = 0;
1722                 seg->s_data = (void *)NULL;
1723                 kmem_free(shmd, (sizeof (*shmd)));
1724         } else {
1725                 ANON_LOCK_ENTER(&shm_amp->a_rwlock, RW_WRITER);
1726                 shm_amp->refcnt++;
1727                 ANON_LOCK_EXIT(&shm_amp->a_rwlock);
1728         }
1729         return (error);
1730 }
1731 
1732 int
1733 segspt_shmunmap(struct seg *seg, caddr_t raddr, size_t ssize)
1734 {
1735         struct shm_data *shmd = (struct shm_data *)seg->s_data;
1736         int reclaim = 1;
1737 
1738         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
1739 retry:
1740         if (shmd->shm_softlockcnt > 0) {
1741                 if (reclaim == 1) {
1742                         segspt_purge(seg);
1743                         reclaim = 0;
1744                         goto retry;
1745                 }
1746                 return (EAGAIN);
1747         }
1748 
1749         if (ssize != seg->s_size) {
1750 #ifdef DEBUG
1751                 cmn_err(CE_WARN, "Incompatible ssize %lx s_size %lx\n",
1752                     ssize, seg->s_size);
1753 #endif
1754                 return (EINVAL);
1755         }
1756 
1757         (void) segspt_shmlockop(seg, raddr, shmd->shm_amp->size, 0, MC_UNLOCK,
1758             NULL, 0);
1759         hat_unshare(seg->s_as->a_hat, raddr, ssize, seg->s_szc);
1760 
1761         seg_free(seg);
1762 
1763         return (0);
1764 }
1765 
1766 void
1767 segspt_shmfree(struct seg *seg)
1768 {
1769         struct shm_data *shmd = (struct shm_data *)seg->s_data;
1770         struct anon_map *shm_amp = shmd->shm_amp;
1771 
1772         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
1773 
1774         (void) segspt_shmlockop(seg, seg->s_base, shm_amp->size, 0,
1775             MC_UNLOCK, NULL, 0);
1776 
1777         /*
1778          * Need to increment refcnt when attaching
1779          * and decrement when detaching because of dup().
1780          */
1781         ANON_LOCK_ENTER(&shm_amp->a_rwlock, RW_WRITER);
1782         shm_amp->refcnt--;
1783         ANON_LOCK_EXIT(&shm_amp->a_rwlock);
1784 
1785         if (shmd->shm_vpage) {       /* only for DISM */
1786                 kmem_free(shmd->shm_vpage, btopr(shm_amp->size));
1787                 shmd->shm_vpage = NULL;
1788         }
1789 
1790         /*
1791          * Take shm_segfree_syncmtx lock to let segspt_reclaim() finish if it's
1792          * still working with this segment without holding as lock.
1793          */
1794         ASSERT(shmd->shm_softlockcnt == 0);
1795         mutex_enter(&shmd->shm_segfree_syncmtx);
1796         mutex_destroy(&shmd->shm_segfree_syncmtx);
1797 
1798         kmem_free(shmd, sizeof (*shmd));
1799 }
1800 
1801 /*ARGSUSED*/
1802 int
1803 segspt_shmsetprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
1804 {
1805         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
1806 
1807         /*
1808          * Shared page table is more than shared mapping.
1809          *  Individual process sharing page tables can't change prot
1810          *  because there is only one set of page tables.
1811          *  This will be allowed after private page table is
1812          *  supported.
1813          */
1814 /* need to return correct status error? */
1815         return (0);
1816 }
1817 
1818 
1819 faultcode_t
1820 segspt_dismfault(struct hat *hat, struct seg *seg, caddr_t addr,
1821     size_t len, enum fault_type type, enum seg_rw rw)
1822 {
1823         struct  shm_data        *shmd = (struct shm_data *)seg->s_data;
1824         struct  seg             *sptseg = shmd->shm_sptseg;
1825         struct  as              *curspt = shmd->shm_sptas;
1826         struct  spt_data        *sptd = sptseg->s_data;
1827         pgcnt_t npages;
1828         size_t  size;
1829         caddr_t segspt_addr, shm_addr;
1830         page_t  **ppa;
1831         int     i;
1832         ulong_t an_idx = 0;
1833         int     err = 0;
1834         int     dyn_ism_unmap = hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0);
1835         size_t  pgsz;
1836         pgcnt_t pgcnt;
1837         caddr_t a;
1838         pgcnt_t pidx;
1839 
1840 #ifdef lint
1841         hat = hat;
1842 #endif
1843         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
1844 
1845         /*
1846          * Because of the way spt is implemented
1847          * the realsize of the segment does not have to be
1848          * equal to the segment size itself. The segment size is
1849          * often in multiples of a page size larger than PAGESIZE.
1850          * The realsize is rounded up to the nearest PAGESIZE
1851          * based on what the user requested. This is a bit of
1852          * ungliness that is historical but not easily fixed
1853          * without re-designing the higher levels of ISM.
1854          */
1855         ASSERT(addr >= seg->s_base);
1856         if (((addr + len) - seg->s_base) > sptd->spt_realsize)
1857                 return (FC_NOMAP);
1858         /*
1859          * For all of the following cases except F_PROT, we need to
1860          * make any necessary adjustments to addr and len
1861          * and get all of the necessary page_t's into an array called ppa[].
1862          *
1863          * The code in shmat() forces base addr and len of ISM segment


1892                 /*
1893                  * Fall through to the F_INVAL case to load up the hat layer
1894                  * entries with the HAT_LOAD_LOCK flag.
1895                  */
1896                 /* FALLTHRU */
1897         case F_INVAL:
1898 
1899                 if ((rw == S_EXEC) && !(sptd->spt_prot & PROT_EXEC))
1900                         return (FC_NOMAP);
1901 
1902                 ppa = kmem_zalloc(npages * sizeof (page_t *), KM_SLEEP);
1903 
1904                 err = spt_anon_getpages(sptseg, segspt_addr, size, ppa);
1905                 if (err != 0) {
1906                         if (type == F_SOFTLOCK) {
1907                                 atomic_add_long((ulong_t *)(
1908                                     &(shmd->shm_softlockcnt)), -npages);
1909                         }
1910                         goto dism_err;
1911                 }
1912                 AS_LOCK_ENTER(sptseg->s_as, &sptseg->s_as->a_lock, RW_READER);
1913                 a = segspt_addr;
1914                 pidx = 0;
1915                 if (type == F_SOFTLOCK) {
1916 
1917                         /*
1918                          * Load up the translation keeping it
1919                          * locked and don't unlock the page.
1920                          */
1921                         for (; pidx < npages; a += pgsz, pidx += pgcnt) {
1922                                 hat_memload_array(sptseg->s_as->a_hat,
1923                                     a, pgsz, &ppa[pidx], sptd->spt_prot,
1924                                     HAT_LOAD_LOCK | HAT_LOAD_SHARE);
1925                         }
1926                 } else {
1927                         if (hat == seg->s_as->a_hat) {
1928 
1929                                 /*
1930                                  * Migrate pages marked for migration
1931                                  */
1932                                 if (lgrp_optimizations())


1953                         if (dyn_ism_unmap) {
1954                                 for (i = 0; i < npages; i++) {
1955                                         page_unlock(ppa[i]);
1956                                 }
1957                         }
1958                 }
1959 
1960                 if (!dyn_ism_unmap) {
1961                         if (hat_share(seg->s_as->a_hat, shm_addr,
1962                             curspt->a_hat, segspt_addr, ptob(npages),
1963                             seg->s_szc) != 0) {
1964                                 panic("hat_share err in DISM fault");
1965                                 /* NOTREACHED */
1966                         }
1967                         if (type == F_INVAL) {
1968                                 for (i = 0; i < npages; i++) {
1969                                         page_unlock(ppa[i]);
1970                                 }
1971                         }
1972                 }
1973                 AS_LOCK_EXIT(sptseg->s_as, &sptseg->s_as->a_lock);
1974 dism_err:
1975                 kmem_free(ppa, npages * sizeof (page_t *));
1976                 return (err);
1977 
1978         case F_SOFTUNLOCK:
1979 
1980                 /*
1981                  * This is a bit ugly, we pass in the real seg pointer,
1982                  * but the segspt_addr is the virtual address within the
1983                  * dummy seg.
1984                  */
1985                 segspt_softunlock(seg, segspt_addr, size, rw);
1986                 return (0);
1987 
1988         case F_PROT:
1989 
1990                 /*
1991                  * This takes care of the unusual case where a user
1992                  * allocates a stack in shared memory and a register
1993                  * window overflow is written to that stack page before


2020         pgcnt_t npages;
2021         size_t size;
2022         caddr_t sptseg_addr, shm_addr;
2023         page_t *pp, **ppa;
2024         int     i;
2025         u_offset_t offset;
2026         ulong_t anon_index = 0;
2027         struct vnode *vp;
2028         struct anon_map *amp;           /* XXX - for locknest */
2029         struct anon *ap = NULL;
2030         size_t          pgsz;
2031         pgcnt_t         pgcnt;
2032         caddr_t         a;
2033         pgcnt_t         pidx;
2034         size_t          sz;
2035 
2036 #ifdef lint
2037         hat = hat;
2038 #endif
2039 
2040         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2041 
2042         if (sptd->spt_flags & SHM_PAGEABLE) {
2043                 return (segspt_dismfault(hat, seg, addr, len, type, rw));
2044         }
2045 
2046         /*
2047          * Because of the way spt is implemented
2048          * the realsize of the segment does not have to be
2049          * equal to the segment size itself. The segment size is
2050          * often in multiples of a page size larger than PAGESIZE.
2051          * The realsize is rounded up to the nearest PAGESIZE
2052          * based on what the user requested. This is a bit of
2053          * ungliness that is historical but not easily fixed
2054          * without re-designing the higher levels of ISM.
2055          */
2056         ASSERT(addr >= seg->s_base);
2057         if (((addr + len) - seg->s_base) > sptd->spt_realsize)
2058                 return (FC_NOMAP);
2059         /*
2060          * For all of the following cases except F_PROT, we need to


2152                 anon_index = seg_page(sptseg, sptseg_addr);
2153 
2154                 ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
2155                 for (i = 0; i < npages; i++) {
2156                         ap = anon_get_ptr(amp->ahp, anon_index++);
2157                         ASSERT(ap != NULL);
2158                         swap_xlate(ap, &vp, &offset);
2159                         pp = page_lookup(vp, offset, SE_SHARED);
2160                         ASSERT(pp != NULL);
2161                         ppa[i] = pp;
2162                 }
2163                 ANON_LOCK_EXIT(&amp->a_rwlock);
2164                 ASSERT(i == npages);
2165 
2166                 /*
2167                  * We are already holding the as->a_lock on the user's
2168                  * real segment, but we need to hold the a_lock on the
2169                  * underlying dummy as. This is mostly to satisfy the
2170                  * underlying HAT layer.
2171                  */
2172                 AS_LOCK_ENTER(sptseg->s_as, &sptseg->s_as->a_lock, RW_READER);
2173                 a = sptseg_addr;
2174                 pidx = 0;
2175                 if (type == F_SOFTLOCK) {
2176                         /*
2177                          * Load up the translation keeping it
2178                          * locked and don't unlock the page.
2179                          */
2180                         for (; pidx < npages; a += pgsz, pidx += pgcnt) {
2181                                 sz = MIN(pgsz, ptob(npages - pidx));
2182                                 hat_memload_array(sptseg->s_as->a_hat, a,
2183                                     sz, &ppa[pidx], sptd->spt_prot,
2184                                     HAT_LOAD_LOCK | HAT_LOAD_SHARE);
2185                         }
2186                 } else {
2187                         if (hat == seg->s_as->a_hat) {
2188 
2189                                 /*
2190                                  * Migrate pages marked for migration.
2191                                  */
2192                                 if (lgrp_optimizations())


2197                                 for (; pidx < npages;
2198                                     a += pgsz, pidx += pgcnt) {
2199                                         sz = MIN(pgsz, ptob(npages - pidx));
2200                                         hat_memload_array(sptseg->s_as->a_hat,
2201                                             a, sz, &ppa[pidx],
2202                                             sptd->spt_prot, HAT_LOAD_SHARE);
2203                                 }
2204                         } else {
2205                                 /* XHAT. Pass real address */
2206                                 hat_memload_array(hat, shm_addr,
2207                                     ptob(npages), ppa, sptd->spt_prot,
2208                                     HAT_LOAD_SHARE);
2209                         }
2210 
2211                         /*
2212                          * And now drop the SE_SHARED lock(s).
2213                          */
2214                         for (i = 0; i < npages; i++)
2215                                 page_unlock(ppa[i]);
2216                 }
2217                 AS_LOCK_EXIT(sptseg->s_as, &sptseg->s_as->a_lock);
2218 
2219                 kmem_free(ppa, sizeof (page_t *) * npages);
2220                 return (0);
2221         case F_SOFTUNLOCK:
2222 
2223                 /*
2224                  * This is a bit ugly, we pass in the real seg pointer,
2225                  * but the sptseg_addr is the virtual address within the
2226                  * dummy seg.
2227                  */
2228                 segspt_softunlock(seg, sptseg_addr, ptob(npages), rw);
2229                 return (0);
2230 
2231         case F_PROT:
2232 
2233                 /*
2234                  * This takes care of the unusual case where a user
2235                  * allocates a stack in shared memory and a register
2236                  * window overflow is written to that stack page before
2237                  * it is otherwise modified.


2267 /*ARGSUSED*/
2268 static size_t
2269 segspt_shmswapout(struct seg *seg)
2270 {
2271         return (0);
2272 }
2273 
2274 /*
2275  * duplicate the shared page tables
2276  */
2277 int
2278 segspt_shmdup(struct seg *seg, struct seg *newseg)
2279 {
2280         struct shm_data         *shmd = (struct shm_data *)seg->s_data;
2281         struct anon_map         *amp = shmd->shm_amp;
2282         struct shm_data         *shmd_new;
2283         struct seg              *spt_seg = shmd->shm_sptseg;
2284         struct spt_data         *sptd = spt_seg->s_data;
2285         int                     error = 0;
2286 
2287         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
2288 
2289         shmd_new = kmem_zalloc((sizeof (*shmd_new)), KM_SLEEP);
2290         newseg->s_data = (void *)shmd_new;
2291         shmd_new->shm_sptas = shmd->shm_sptas;
2292         shmd_new->shm_amp = amp;
2293         shmd_new->shm_sptseg = shmd->shm_sptseg;
2294         newseg->s_ops = &segspt_shmops;
2295         newseg->s_szc = seg->s_szc;
2296         ASSERT(seg->s_szc == shmd->shm_sptseg->s_szc);
2297 
2298         ANON_LOCK_ENTER(&amp->a_rwlock, RW_WRITER);
2299         amp->refcnt++;
2300         ANON_LOCK_EXIT(&amp->a_rwlock);
2301 
2302         if (sptd->spt_flags & SHM_PAGEABLE) {
2303                 shmd_new->shm_vpage = kmem_zalloc(btopr(amp->size), KM_SLEEP);
2304                 shmd_new->shm_lckpgs = 0;
2305                 if (hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) {
2306                         if ((error = hat_share(newseg->s_as->a_hat,
2307                             newseg->s_base, shmd->shm_sptas->a_hat, SEGSPTADDR,


2309                                 kmem_free(shmd_new->shm_vpage,
2310                                     btopr(amp->size));
2311                         }
2312                 }
2313                 return (error);
2314         } else {
2315                 return (hat_share(newseg->s_as->a_hat, newseg->s_base,
2316                     shmd->shm_sptas->a_hat, SEGSPTADDR, seg->s_size,
2317                     seg->s_szc));
2318 
2319         }
2320 }
2321 
2322 /*ARGSUSED*/
2323 int
2324 segspt_shmcheckprot(struct seg *seg, caddr_t addr, size_t size, uint_t prot)
2325 {
2326         struct shm_data *shmd = (struct shm_data *)seg->s_data;
2327         struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2328 
2329         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2330 
2331         /*
2332          * ISM segment is always rw.
2333          */
2334         return (((sptd->spt_prot & prot) != prot) ? EACCES : 0);
2335 }
2336 
2337 /*
2338  * Return an array of locked large pages, for empty slots allocate
2339  * private zero-filled anon pages.
2340  */
2341 static int
2342 spt_anon_getpages(
2343         struct seg *sptseg,
2344         caddr_t sptaddr,
2345         size_t len,
2346         page_t *ppa[])
2347 {
2348         struct  spt_data *sptd = sptseg->s_data;
2349         struct  anon_map *amp = sptd->spt_amp;


2660 segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len,
2661     int attr, int op, ulong_t *lockmap, size_t pos)
2662 {
2663         struct shm_data *shmd = seg->s_data;
2664         struct seg      *sptseg = shmd->shm_sptseg;
2665         struct spt_data *sptd = sptseg->s_data;
2666         struct kshmid   *sp = sptd->spt_amp->a_sp;
2667         pgcnt_t         npages, a_npages;
2668         page_t          **ppa;
2669         pgcnt_t         an_idx, a_an_idx, ppa_idx;
2670         caddr_t         spt_addr, a_addr;       /* spt and aligned address */
2671         size_t          a_len;                  /* aligned len */
2672         size_t          share_sz;
2673         ulong_t         i;
2674         int             sts = 0;
2675         rctl_qty_t      unlocked = 0;
2676         rctl_qty_t      locked = 0;
2677         struct proc     *p = curproc;
2678         kproject_t      *proj;
2679 
2680         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2681         ASSERT(sp != NULL);
2682 
2683         if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
2684                 return (0);
2685         }
2686 
2687         addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2688         an_idx = seg_page(seg, addr);
2689         npages = btopr(len);
2690 
2691         if (an_idx + npages > btopr(shmd->shm_amp->size)) {
2692                 return (ENOMEM);
2693         }
2694 
2695         /*
2696          * A shm's project never changes, so no lock needed.
2697          * The shm has a hold on the project, so it will not go away.
2698          * Since we have a mapping to shm within this zone, we know
2699          * that the zone will not go away.
2700          */


2790                         sptd->spt_flags |= DISM_PPA_CHANGED;
2791                 mutex_exit(&sptd->spt_lock);
2792 
2793                 rctl_decr_locked_mem(NULL, proj, unlocked, 0);
2794                 mutex_exit(&sp->shm_mlock);
2795 
2796                 if (ppa != NULL)
2797                         seg_ppurge_wiredpp(ppa);
2798         }
2799         return (sts);
2800 }
2801 
2802 /*ARGSUSED*/
2803 int
2804 segspt_shmgetprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
2805 {
2806         struct shm_data *shmd = (struct shm_data *)seg->s_data;
2807         struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2808         spgcnt_t pgno = seg_page(seg, addr+len) - seg_page(seg, addr) + 1;
2809 
2810         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2811 
2812         /*
2813          * ISM segment is always rw.
2814          */
2815         while (--pgno >= 0)
2816                 *protv++ = sptd->spt_prot;
2817         return (0);
2818 }
2819 
2820 /*ARGSUSED*/
2821 u_offset_t
2822 segspt_shmgetoffset(struct seg *seg, caddr_t addr)
2823 {
2824         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2825 
2826         /* Offset does not matter in ISM memory */
2827 
2828         return ((u_offset_t)0);
2829 }
2830 
2831 /* ARGSUSED */
2832 int
2833 segspt_shmgettype(struct seg *seg, caddr_t addr)
2834 {
2835         struct shm_data *shmd = (struct shm_data *)seg->s_data;
2836         struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2837 
2838         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2839 
2840         /*
2841          * The shared memory mapping is always MAP_SHARED, SWAP is only
2842          * reserved for DISM
2843          */
2844         return (MAP_SHARED |
2845             ((sptd->spt_flags & SHM_PAGEABLE) ? 0 : MAP_NORESERVE));
2846 }
2847 
2848 /*ARGSUSED*/
2849 int
2850 segspt_shmgetvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
2851 {
2852         struct shm_data *shmd = (struct shm_data *)seg->s_data;
2853         struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2854 
2855         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2856 
2857         *vpp = sptd->spt_vp;
2858         return (0);
2859 }
2860 
2861 /*
2862  * We need to wait for pending IO to complete to a DISM segment in order for
2863  * pages to get kicked out of the seg_pcache.  120 seconds should be more
2864  * than enough time to wait.
2865  */
2866 static clock_t spt_pcache_wait = 120;
2867 
2868 /*ARGSUSED*/
2869 static int
2870 segspt_shmadvise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
2871 {
2872         struct shm_data *shmd = (struct shm_data *)seg->s_data;
2873         struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2874         struct anon_map *amp;
2875         pgcnt_t pg_idx;
2876         ushort_t gen;
2877         clock_t end_lbolt;
2878         int writer;
2879         page_t **ppa;
2880 
2881         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2882 
2883         if (behav == MADV_FREE) {
2884                 if ((sptd->spt_flags & SHM_PAGEABLE) == 0)
2885                         return (0);
2886 
2887                 amp = sptd->spt_amp;
2888                 pg_idx = seg_page(seg, addr);
2889 
2890                 mutex_enter(&sptd->spt_lock);
2891                 if ((ppa = sptd->spt_ppa) == NULL) {
2892                         mutex_exit(&sptd->spt_lock);
2893                         ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
2894                         anon_disclaim(amp, pg_idx, len);
2895                         ANON_LOCK_EXIT(&amp->a_rwlock);
2896                         return (0);
2897                 }
2898 
2899                 sptd->spt_flags |= DISM_PPA_CHANGED;
2900                 gen = sptd->spt_gen;
2901 
2902                 mutex_exit(&sptd->spt_lock);
2903 
2904                 /*
2905                  * Purge all DISM cached pages
2906                  */
2907                 seg_ppurge_wiredpp(ppa);
2908 
2909                 /*
2910                  * Drop the AS_LOCK so that other threads can grab it
2911                  * in the as_pageunlock path and hopefully get the segment
2912                  * kicked out of the seg_pcache.  We bump the shm_softlockcnt
2913                  * to keep this segment resident.
2914                  */
2915                 writer = AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock);
2916                 atomic_inc_ulong((ulong_t *)(&(shmd->shm_softlockcnt)));
2917                 AS_LOCK_EXIT(seg->s_as, &seg->s_as->a_lock);
2918 
2919                 mutex_enter(&sptd->spt_lock);
2920 
2921                 end_lbolt = ddi_get_lbolt() + (hz * spt_pcache_wait);
2922 
2923                 /*
2924                  * Try to wait for pages to get kicked out of the seg_pcache.
2925                  */
2926                 while (sptd->spt_gen == gen &&
2927                     (sptd->spt_flags & DISM_PPA_CHANGED) &&
2928                     ddi_get_lbolt() < end_lbolt) {
2929                         if (!cv_timedwait_sig(&sptd->spt_cv,
2930                             &sptd->spt_lock, end_lbolt)) {
2931                                 break;
2932                         }
2933                 }
2934 
2935                 mutex_exit(&sptd->spt_lock);
2936 
2937                 /* Regrab the AS_LOCK and release our hold on the segment */
2938                 AS_LOCK_ENTER(seg->s_as, &seg->s_as->a_lock,
2939                     writer ? RW_WRITER : RW_READER);
2940                 atomic_dec_ulong((ulong_t *)(&(shmd->shm_softlockcnt)));
2941                 if (shmd->shm_softlockcnt <= 0) {
2942                         if (AS_ISUNMAPWAIT(seg->s_as)) {
2943                                 mutex_enter(&seg->s_as->a_contents);
2944                                 if (AS_ISUNMAPWAIT(seg->s_as)) {
2945                                         AS_CLRUNMAPWAIT(seg->s_as);
2946                                         cv_broadcast(&seg->s_as->a_cv);
2947                                 }
2948                                 mutex_exit(&seg->s_as->a_contents);
2949                         }
2950                 }
2951 
2952                 ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
2953                 anon_disclaim(amp, pg_idx, len);
2954                 ANON_LOCK_EXIT(&amp->a_rwlock);
2955         } else if (lgrp_optimizations() && (behav == MADV_ACCESS_LWP ||
2956             behav == MADV_ACCESS_MANY || behav == MADV_ACCESS_DEFAULT)) {
2957                 int                     already_set;
2958                 ulong_t                 anon_index;
2959                 lgrp_mem_policy_t       policy;




 224 sptdestroy(struct as *as, struct anon_map *amp)
 225 {
 226 
 227 #ifdef DEBUG
 228         TNF_PROBE_0(sptdestroy, "spt", /* CSTYLED */);
 229 #endif
 230         (void) as_unmap(as, SEGSPTADDR, amp->size);
 231         as_free(as);
 232 }
 233 
 234 /*
 235  * called from seg_free().
 236  * free (i.e., unlock, unmap, return to free list)
 237  *  all the pages in the given seg.
 238  */
 239 void
 240 segspt_free(struct seg  *seg)
 241 {
 242         struct spt_data *sptd = (struct spt_data *)seg->s_data;
 243 
 244         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
 245 
 246         if (sptd != NULL) {
 247                 if (sptd->spt_realsize)
 248                         segspt_free_pages(seg, seg->s_base, sptd->spt_realsize);
 249 
 250         if (sptd->spt_ppa_lckcnt)
 251                 kmem_free(sptd->spt_ppa_lckcnt,
 252                     sizeof (*sptd->spt_ppa_lckcnt)
 253                     * btopr(sptd->spt_amp->size));
 254                 kmem_free(sptd->spt_vp, sizeof (*sptd->spt_vp));
 255                 cv_destroy(&sptd->spt_cv);
 256                 mutex_destroy(&sptd->spt_lock);
 257                 kmem_free(sptd, sizeof (*sptd));
 258         }
 259 }
 260 
 261 /*ARGSUSED*/
 262 static int
 263 segspt_shmsync(struct seg *seg, caddr_t addr, size_t len, int attr,
 264         uint_t flags)
 265 {
 266         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
 267 
 268         return (0);
 269 }
 270 
 271 /*ARGSUSED*/
 272 static size_t
 273 segspt_shmincore(struct seg *seg, caddr_t addr, size_t len, char *vec)
 274 {
 275         caddr_t eo_seg;
 276         pgcnt_t npages;
 277         struct shm_data *shmd = (struct shm_data *)seg->s_data;
 278         struct seg      *sptseg;
 279         struct spt_data *sptd;
 280 
 281         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
 282 #ifdef lint
 283         seg = seg;
 284 #endif
 285         sptseg = shmd->shm_sptseg;
 286         sptd = sptseg->s_data;
 287 
 288         if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
 289                 eo_seg = addr + len;
 290                 while (addr < eo_seg) {
 291                         /* page exists, and it's locked. */
 292                         *vec++ = SEG_PAGE_INCORE | SEG_PAGE_LOCKED |
 293                             SEG_PAGE_ANON;
 294                         addr += PAGESIZE;
 295                 }
 296                 return (len);
 297         } else {
 298                 struct  anon_map *amp = shmd->shm_amp;
 299                 struct  anon    *ap;
 300                 page_t          *pp;
 301                 pgcnt_t         anon_index;


 325                                         page_unlock(pp);
 326                                 }
 327                         } else {
 328                                 anon_array_exit(&cookie);
 329                         }
 330                         if (shmd->shm_vpage[anon_index] & DISM_PG_LOCKED) {
 331                                 ret |= SEG_PAGE_LOCKED;
 332                         }
 333                         *vec++ = (char)ret;
 334                 }
 335                 ANON_LOCK_EXIT(&amp->a_rwlock);
 336                 return (len);
 337         }
 338 }
 339 
 340 static int
 341 segspt_unmap(struct seg *seg, caddr_t raddr, size_t ssize)
 342 {
 343         size_t share_size;
 344 
 345         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
 346 
 347         /*
 348          * seg.s_size may have been rounded up to the largest page size
 349          * in shmat().
 350          * XXX This should be cleanedup. sptdestroy should take a length
 351          * argument which should be the same as sptcreate. Then
 352          * this rounding would not be needed (or is done in shm.c)
 353          * Only the check for full segment will be needed.
 354          *
 355          * XXX -- shouldn't raddr == 0 always? These tests don't seem
 356          * to be useful at all.
 357          */
 358         share_size = page_get_pagesize(seg->s_szc);
 359         ssize = P2ROUNDUP(ssize, share_size);
 360 
 361         if (raddr == seg->s_base && ssize == seg->s_size) {
 362                 seg_free(seg);
 363                 return (0);
 364         } else
 365                 return (EINVAL);


 376         struct kshmid   *sp = amp->a_sp;
 377         struct  cred    *cred = CRED();
 378         ulong_t         i, j, anon_index = 0;
 379         pgcnt_t         npages = btopr(amp->size);
 380         struct vnode    *vp;
 381         page_t          **ppa;
 382         uint_t          hat_flags;
 383         size_t          pgsz;
 384         pgcnt_t         pgcnt;
 385         caddr_t         a;
 386         pgcnt_t         pidx;
 387         size_t          sz;
 388         proc_t          *procp = curproc;
 389         rctl_qty_t      lockedbytes = 0;
 390         kproject_t      *proj;
 391 
 392         /*
 393          * We are holding the a_lock on the underlying dummy as,
 394          * so we can make calls to the HAT layer.
 395          */
 396         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
 397         ASSERT(sp != NULL);
 398 
 399 #ifdef DEBUG
 400         TNF_PROBE_2(segspt_create, "spt", /* CSTYLED */,
 401             tnf_opaque, addr, addr, tnf_ulong, len, seg->s_size);
 402 #endif
 403         if ((sptcargs->flags & SHM_PAGEABLE) == 0) {
 404                 if (err = anon_swap_adjust(npages))
 405                         return (err);
 406         }
 407         err = ENOMEM;
 408 
 409         if ((sptd = kmem_zalloc(sizeof (*sptd), KM_NOSLEEP)) == NULL)
 410                 goto out1;
 411 
 412         if ((sptcargs->flags & SHM_PAGEABLE) == 0) {
 413                 if ((ppa = kmem_zalloc(((sizeof (page_t *)) * npages),
 414                     KM_NOSLEEP)) == NULL)
 415                         goto out2;
 416         }


 612 /*ARGSUSED*/
 613 void
 614 segspt_free_pages(struct seg *seg, caddr_t addr, size_t len)
 615 {
 616         struct page     *pp;
 617         struct spt_data *sptd = (struct spt_data *)seg->s_data;
 618         pgcnt_t         npages;
 619         ulong_t         anon_idx;
 620         struct anon_map *amp;
 621         struct anon     *ap;
 622         struct vnode    *vp;
 623         u_offset_t      off;
 624         uint_t          hat_flags;
 625         int             root = 0;
 626         pgcnt_t         pgs, curnpgs = 0;
 627         page_t          *rootpp;
 628         rctl_qty_t      unlocked_bytes = 0;
 629         kproject_t      *proj;
 630         kshmid_t        *sp;
 631 
 632         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
 633 
 634         len = P2ROUNDUP(len, PAGESIZE);
 635 
 636         npages = btop(len);
 637 
 638         hat_flags = HAT_UNLOAD_UNLOCK | HAT_UNLOAD_UNMAP;
 639         if ((hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) ||
 640             (sptd->spt_flags & SHM_PAGEABLE)) {
 641                 hat_flags = HAT_UNLOAD_UNMAP;
 642         }
 643 
 644         hat_unload(seg->s_as->a_hat, addr, len, hat_flags);
 645 
 646         amp = sptd->spt_amp;
 647         if (sptd->spt_flags & SHM_PAGEABLE)
 648                 npages = btop(amp->size);
 649 
 650         ASSERT(amp != NULL);
 651 
 652         if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {


 821 /*ARGSUSED*/
 822 static int
 823 segspt_dismpagelock(struct seg *seg, caddr_t addr, size_t len,
 824     struct page ***ppp, enum lock_type type, enum seg_rw rw)
 825 {
 826         struct  shm_data *shmd = (struct shm_data *)seg->s_data;
 827         struct  seg     *sptseg = shmd->shm_sptseg;
 828         struct  spt_data *sptd = sptseg->s_data;
 829         pgcnt_t pg_idx, npages, tot_npages, npgs;
 830         struct  page **pplist, **pl, **ppa, *pp;
 831         struct  anon_map *amp;
 832         spgcnt_t        an_idx;
 833         int     ret = ENOTSUP;
 834         uint_t  pl_built = 0;
 835         struct  anon *ap;
 836         struct  vnode *vp;
 837         u_offset_t off;
 838         pgcnt_t claim_availrmem = 0;
 839         uint_t  szc;
 840 
 841         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
 842         ASSERT(type == L_PAGELOCK || type == L_PAGEUNLOCK);
 843 
 844         /*
 845          * We want to lock/unlock the entire ISM segment. Therefore,
 846          * we will be using the underlying sptseg and it's base address
 847          * and length for the caching arguments.
 848          */
 849         ASSERT(sptseg);
 850         ASSERT(sptd);
 851 
 852         pg_idx = seg_page(seg, addr);
 853         npages = btopr(len);
 854 
 855         /*
 856          * check if the request is larger than number of pages covered
 857          * by amp
 858          */
 859         if (pg_idx + npages > btopr(sptd->spt_amp->size)) {
 860                 *ppp = NULL;
 861                 return (ENOTSUP);


1176  */
1177 /*ARGSUSED*/
1178 static int
1179 segspt_shmpagelock(struct seg *seg, caddr_t addr, size_t len,
1180     struct page ***ppp, enum lock_type type, enum seg_rw rw)
1181 {
1182         struct shm_data *shmd = (struct shm_data *)seg->s_data;
1183         struct seg      *sptseg = shmd->shm_sptseg;
1184         struct spt_data *sptd = sptseg->s_data;
1185         pgcnt_t np, page_index, npages;
1186         caddr_t a, spt_base;
1187         struct page **pplist, **pl, *pp;
1188         struct anon_map *amp;
1189         ulong_t anon_index;
1190         int ret = ENOTSUP;
1191         uint_t  pl_built = 0;
1192         struct anon *ap;
1193         struct vnode *vp;
1194         u_offset_t off;
1195 
1196         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
1197         ASSERT(type == L_PAGELOCK || type == L_PAGEUNLOCK);
1198 
1199 
1200         /*
1201          * We want to lock/unlock the entire ISM segment. Therefore,
1202          * we will be using the underlying sptseg and it's base address
1203          * and length for the caching arguments.
1204          */
1205         ASSERT(sptseg);
1206         ASSERT(sptd);
1207 
1208         if (sptd->spt_flags & SHM_PAGEABLE) {
1209                 return (segspt_dismpagelock(seg, addr, len, ppp, type, rw));
1210         }
1211 
1212         page_index = seg_page(seg, addr);
1213         npages = btopr(len);
1214 
1215         /*
1216          * check if the request is larger than number of pages covered


1434 segspt_reclaim(void *ptag, caddr_t addr, size_t len, struct page **pplist,
1435         enum seg_rw rw, int async)
1436 {
1437         struct seg *seg = (struct seg *)ptag;
1438         struct  shm_data *shmd = (struct shm_data *)seg->s_data;
1439         struct  seg     *sptseg;
1440         struct  spt_data *sptd;
1441         pgcnt_t npages, i, free_availrmem = 0;
1442         int     done = 0;
1443 
1444 #ifdef lint
1445         addr = addr;
1446 #endif
1447         sptseg = shmd->shm_sptseg;
1448         sptd = sptseg->s_data;
1449         npages = (len >> PAGESHIFT);
1450         ASSERT(npages);
1451         ASSERT(sptd->spt_pcachecnt != 0);
1452         ASSERT(sptd->spt_ppa == pplist);
1453         ASSERT(npages == btopr(sptd->spt_amp->size));
1454         ASSERT(async || AS_LOCK_HELD(seg->s_as));
1455 
1456         /*
1457          * Acquire the lock on the dummy seg and destroy the
1458          * ppa array IF this is the last pcachecnt.
1459          */
1460         mutex_enter(&sptd->spt_lock);
1461         if (--sptd->spt_pcachecnt == 0) {
1462                 for (i = 0; i < npages; i++) {
1463                         if (pplist[i] == NULL) {
1464                                 continue;
1465                         }
1466                         if (rw == S_WRITE) {
1467                                 hat_setrefmod(pplist[i]);
1468                         } else {
1469                                 hat_setref(pplist[i]);
1470                         }
1471                         if ((sptd->spt_flags & SHM_PAGEABLE) &&
1472                             (sptd->spt_ppa_lckcnt[i] == 0))
1473                                 free_availrmem++;
1474                         page_unlock(pplist[i]);


1568  * its a read.  Thus cow faults can be ignored with respect to soft
1569  * unlocking, since the breaking of cow means that the anon slot(s) will
1570  * not be shared.
1571  */
1572 static void
1573 segspt_softunlock(struct seg *seg, caddr_t sptseg_addr,
1574         size_t len, enum seg_rw rw)
1575 {
1576         struct shm_data *shmd = (struct shm_data *)seg->s_data;
1577         struct seg      *sptseg;
1578         struct spt_data *sptd;
1579         page_t *pp;
1580         caddr_t adr;
1581         struct vnode *vp;
1582         u_offset_t offset;
1583         ulong_t anon_index;
1584         struct anon_map *amp;           /* XXX - for locknest */
1585         struct anon *ap = NULL;
1586         pgcnt_t npages;
1587 
1588         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
1589 
1590         sptseg = shmd->shm_sptseg;
1591         sptd = sptseg->s_data;
1592 
1593         /*
1594          * Some platforms assume that ISM mappings are HAT_LOAD_LOCK
1595          * and therefore their pages are SE_SHARED locked
1596          * for the entire life of the segment.
1597          */
1598         if ((!hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) &&
1599             ((sptd->spt_flags & SHM_PAGEABLE) == 0)) {
1600                 goto softlock_decrement;
1601         }
1602 
1603         /*
1604          * Any thread is free to do a page_find and
1605          * page_unlock() on the pages within this seg.
1606          *
1607          * We are already holding the as->a_lock on the user's
1608          * real segment, but we need to hold the a_lock on the
1609          * underlying dummy as. This is mostly to satisfy the
1610          * underlying HAT layer.
1611          */
1612         AS_LOCK_ENTER(sptseg->s_as, RW_READER);
1613         hat_unlock(sptseg->s_as->a_hat, sptseg_addr, len);
1614         AS_LOCK_EXIT(sptseg->s_as);
1615 
1616         amp = sptd->spt_amp;
1617         ASSERT(amp != NULL);
1618         anon_index = seg_page(sptseg, sptseg_addr);
1619 
1620         for (adr = sptseg_addr; adr < sptseg_addr + len; adr += PAGESIZE) {
1621                 ap = anon_get_ptr(amp->ahp, anon_index++);
1622                 ASSERT(ap != NULL);
1623                 swap_xlate(ap, &vp, &offset);
1624 
1625                 /*
1626                  * Use page_find() instead of page_lookup() to
1627                  * find the page since we know that it has a
1628                  * "shared" lock.
1629                  */
1630                 pp = page_find(vp, offset);
1631                 ASSERT(ap == anon_get_ptr(amp->ahp, anon_index - 1));
1632                 if (pp == NULL) {
1633                         panic("segspt_softunlock: "
1634                             "addr %p, ap %p, vp %p, off %llx",


1659                 if (AS_ISUNMAPWAIT(seg->s_as)) {
1660                         mutex_enter(&seg->s_as->a_contents);
1661                         if (AS_ISUNMAPWAIT(seg->s_as)) {
1662                                 AS_CLRUNMAPWAIT(seg->s_as);
1663                                 cv_broadcast(&seg->s_as->a_cv);
1664                         }
1665                         mutex_exit(&seg->s_as->a_contents);
1666                 }
1667         }
1668 }
1669 
1670 int
1671 segspt_shmattach(struct seg *seg, caddr_t *argsp)
1672 {
1673         struct shm_data *shmd_arg = (struct shm_data *)argsp;
1674         struct shm_data *shmd;
1675         struct anon_map *shm_amp = shmd_arg->shm_amp;
1676         struct spt_data *sptd;
1677         int error = 0;
1678 
1679         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
1680 
1681         shmd = kmem_zalloc((sizeof (*shmd)), KM_NOSLEEP);
1682         if (shmd == NULL)
1683                 return (ENOMEM);
1684 
1685         shmd->shm_sptas = shmd_arg->shm_sptas;
1686         shmd->shm_amp = shm_amp;
1687         shmd->shm_sptseg = shmd_arg->shm_sptseg;
1688 
1689         (void) lgrp_shm_policy_set(LGRP_MEM_POLICY_DEFAULT, shm_amp, 0,
1690             NULL, 0, seg->s_size);
1691 
1692         mutex_init(&shmd->shm_segfree_syncmtx, NULL, MUTEX_DEFAULT, NULL);
1693 
1694         seg->s_data = (void *)shmd;
1695         seg->s_ops = &segspt_shmops;
1696         seg->s_szc = shmd->shm_sptseg->s_szc;
1697         sptd = shmd->shm_sptseg->s_data;
1698 
1699         if (sptd->spt_flags & SHM_PAGEABLE) {


1718                     seg->s_size, seg->s_szc);
1719         }
1720         if (error) {
1721                 seg->s_szc = 0;
1722                 seg->s_data = (void *)NULL;
1723                 kmem_free(shmd, (sizeof (*shmd)));
1724         } else {
1725                 ANON_LOCK_ENTER(&shm_amp->a_rwlock, RW_WRITER);
1726                 shm_amp->refcnt++;
1727                 ANON_LOCK_EXIT(&shm_amp->a_rwlock);
1728         }
1729         return (error);
1730 }
1731 
1732 int
1733 segspt_shmunmap(struct seg *seg, caddr_t raddr, size_t ssize)
1734 {
1735         struct shm_data *shmd = (struct shm_data *)seg->s_data;
1736         int reclaim = 1;
1737 
1738         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
1739 retry:
1740         if (shmd->shm_softlockcnt > 0) {
1741                 if (reclaim == 1) {
1742                         segspt_purge(seg);
1743                         reclaim = 0;
1744                         goto retry;
1745                 }
1746                 return (EAGAIN);
1747         }
1748 
1749         if (ssize != seg->s_size) {
1750 #ifdef DEBUG
1751                 cmn_err(CE_WARN, "Incompatible ssize %lx s_size %lx\n",
1752                     ssize, seg->s_size);
1753 #endif
1754                 return (EINVAL);
1755         }
1756 
1757         (void) segspt_shmlockop(seg, raddr, shmd->shm_amp->size, 0, MC_UNLOCK,
1758             NULL, 0);
1759         hat_unshare(seg->s_as->a_hat, raddr, ssize, seg->s_szc);
1760 
1761         seg_free(seg);
1762 
1763         return (0);
1764 }
1765 
1766 void
1767 segspt_shmfree(struct seg *seg)
1768 {
1769         struct shm_data *shmd = (struct shm_data *)seg->s_data;
1770         struct anon_map *shm_amp = shmd->shm_amp;
1771 
1772         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
1773 
1774         (void) segspt_shmlockop(seg, seg->s_base, shm_amp->size, 0,
1775             MC_UNLOCK, NULL, 0);
1776 
1777         /*
1778          * Need to increment refcnt when attaching
1779          * and decrement when detaching because of dup().
1780          */
1781         ANON_LOCK_ENTER(&shm_amp->a_rwlock, RW_WRITER);
1782         shm_amp->refcnt--;
1783         ANON_LOCK_EXIT(&shm_amp->a_rwlock);
1784 
1785         if (shmd->shm_vpage) {       /* only for DISM */
1786                 kmem_free(shmd->shm_vpage, btopr(shm_amp->size));
1787                 shmd->shm_vpage = NULL;
1788         }
1789 
1790         /*
1791          * Take shm_segfree_syncmtx lock to let segspt_reclaim() finish if it's
1792          * still working with this segment without holding as lock.
1793          */
1794         ASSERT(shmd->shm_softlockcnt == 0);
1795         mutex_enter(&shmd->shm_segfree_syncmtx);
1796         mutex_destroy(&shmd->shm_segfree_syncmtx);
1797 
1798         kmem_free(shmd, sizeof (*shmd));
1799 }
1800 
1801 /*ARGSUSED*/
1802 int
1803 segspt_shmsetprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
1804 {
1805         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
1806 
1807         /*
1808          * Shared page table is more than shared mapping.
1809          *  Individual process sharing page tables can't change prot
1810          *  because there is only one set of page tables.
1811          *  This will be allowed after private page table is
1812          *  supported.
1813          */
1814 /* need to return correct status error? */
1815         return (0);
1816 }
1817 
1818 
1819 faultcode_t
1820 segspt_dismfault(struct hat *hat, struct seg *seg, caddr_t addr,
1821     size_t len, enum fault_type type, enum seg_rw rw)
1822 {
1823         struct  shm_data        *shmd = (struct shm_data *)seg->s_data;
1824         struct  seg             *sptseg = shmd->shm_sptseg;
1825         struct  as              *curspt = shmd->shm_sptas;
1826         struct  spt_data        *sptd = sptseg->s_data;
1827         pgcnt_t npages;
1828         size_t  size;
1829         caddr_t segspt_addr, shm_addr;
1830         page_t  **ppa;
1831         int     i;
1832         ulong_t an_idx = 0;
1833         int     err = 0;
1834         int     dyn_ism_unmap = hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0);
1835         size_t  pgsz;
1836         pgcnt_t pgcnt;
1837         caddr_t a;
1838         pgcnt_t pidx;
1839 
1840 #ifdef lint
1841         hat = hat;
1842 #endif
1843         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
1844 
1845         /*
1846          * Because of the way spt is implemented
1847          * the realsize of the segment does not have to be
1848          * equal to the segment size itself. The segment size is
1849          * often in multiples of a page size larger than PAGESIZE.
1850          * The realsize is rounded up to the nearest PAGESIZE
1851          * based on what the user requested. This is a bit of
1852          * ungliness that is historical but not easily fixed
1853          * without re-designing the higher levels of ISM.
1854          */
1855         ASSERT(addr >= seg->s_base);
1856         if (((addr + len) - seg->s_base) > sptd->spt_realsize)
1857                 return (FC_NOMAP);
1858         /*
1859          * For all of the following cases except F_PROT, we need to
1860          * make any necessary adjustments to addr and len
1861          * and get all of the necessary page_t's into an array called ppa[].
1862          *
1863          * The code in shmat() forces base addr and len of ISM segment


1892                 /*
1893                  * Fall through to the F_INVAL case to load up the hat layer
1894                  * entries with the HAT_LOAD_LOCK flag.
1895                  */
1896                 /* FALLTHRU */
1897         case F_INVAL:
1898 
1899                 if ((rw == S_EXEC) && !(sptd->spt_prot & PROT_EXEC))
1900                         return (FC_NOMAP);
1901 
1902                 ppa = kmem_zalloc(npages * sizeof (page_t *), KM_SLEEP);
1903 
1904                 err = spt_anon_getpages(sptseg, segspt_addr, size, ppa);
1905                 if (err != 0) {
1906                         if (type == F_SOFTLOCK) {
1907                                 atomic_add_long((ulong_t *)(
1908                                     &(shmd->shm_softlockcnt)), -npages);
1909                         }
1910                         goto dism_err;
1911                 }
1912                 AS_LOCK_ENTER(sptseg->s_as, RW_READER);
1913                 a = segspt_addr;
1914                 pidx = 0;
1915                 if (type == F_SOFTLOCK) {
1916 
1917                         /*
1918                          * Load up the translation keeping it
1919                          * locked and don't unlock the page.
1920                          */
1921                         for (; pidx < npages; a += pgsz, pidx += pgcnt) {
1922                                 hat_memload_array(sptseg->s_as->a_hat,
1923                                     a, pgsz, &ppa[pidx], sptd->spt_prot,
1924                                     HAT_LOAD_LOCK | HAT_LOAD_SHARE);
1925                         }
1926                 } else {
1927                         if (hat == seg->s_as->a_hat) {
1928 
1929                                 /*
1930                                  * Migrate pages marked for migration
1931                                  */
1932                                 if (lgrp_optimizations())


1953                         if (dyn_ism_unmap) {
1954                                 for (i = 0; i < npages; i++) {
1955                                         page_unlock(ppa[i]);
1956                                 }
1957                         }
1958                 }
1959 
1960                 if (!dyn_ism_unmap) {
1961                         if (hat_share(seg->s_as->a_hat, shm_addr,
1962                             curspt->a_hat, segspt_addr, ptob(npages),
1963                             seg->s_szc) != 0) {
1964                                 panic("hat_share err in DISM fault");
1965                                 /* NOTREACHED */
1966                         }
1967                         if (type == F_INVAL) {
1968                                 for (i = 0; i < npages; i++) {
1969                                         page_unlock(ppa[i]);
1970                                 }
1971                         }
1972                 }
1973                 AS_LOCK_EXIT(sptseg->s_as);
1974 dism_err:
1975                 kmem_free(ppa, npages * sizeof (page_t *));
1976                 return (err);
1977 
1978         case F_SOFTUNLOCK:
1979 
1980                 /*
1981                  * This is a bit ugly, we pass in the real seg pointer,
1982                  * but the segspt_addr is the virtual address within the
1983                  * dummy seg.
1984                  */
1985                 segspt_softunlock(seg, segspt_addr, size, rw);
1986                 return (0);
1987 
1988         case F_PROT:
1989 
1990                 /*
1991                  * This takes care of the unusual case where a user
1992                  * allocates a stack in shared memory and a register
1993                  * window overflow is written to that stack page before


2020         pgcnt_t npages;
2021         size_t size;
2022         caddr_t sptseg_addr, shm_addr;
2023         page_t *pp, **ppa;
2024         int     i;
2025         u_offset_t offset;
2026         ulong_t anon_index = 0;
2027         struct vnode *vp;
2028         struct anon_map *amp;           /* XXX - for locknest */
2029         struct anon *ap = NULL;
2030         size_t          pgsz;
2031         pgcnt_t         pgcnt;
2032         caddr_t         a;
2033         pgcnt_t         pidx;
2034         size_t          sz;
2035 
2036 #ifdef lint
2037         hat = hat;
2038 #endif
2039 
2040         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2041 
2042         if (sptd->spt_flags & SHM_PAGEABLE) {
2043                 return (segspt_dismfault(hat, seg, addr, len, type, rw));
2044         }
2045 
2046         /*
2047          * Because of the way spt is implemented
2048          * the realsize of the segment does not have to be
2049          * equal to the segment size itself. The segment size is
2050          * often in multiples of a page size larger than PAGESIZE.
2051          * The realsize is rounded up to the nearest PAGESIZE
2052          * based on what the user requested. This is a bit of
2053          * ungliness that is historical but not easily fixed
2054          * without re-designing the higher levels of ISM.
2055          */
2056         ASSERT(addr >= seg->s_base);
2057         if (((addr + len) - seg->s_base) > sptd->spt_realsize)
2058                 return (FC_NOMAP);
2059         /*
2060          * For all of the following cases except F_PROT, we need to


2152                 anon_index = seg_page(sptseg, sptseg_addr);
2153 
2154                 ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
2155                 for (i = 0; i < npages; i++) {
2156                         ap = anon_get_ptr(amp->ahp, anon_index++);
2157                         ASSERT(ap != NULL);
2158                         swap_xlate(ap, &vp, &offset);
2159                         pp = page_lookup(vp, offset, SE_SHARED);
2160                         ASSERT(pp != NULL);
2161                         ppa[i] = pp;
2162                 }
2163                 ANON_LOCK_EXIT(&amp->a_rwlock);
2164                 ASSERT(i == npages);
2165 
2166                 /*
2167                  * We are already holding the as->a_lock on the user's
2168                  * real segment, but we need to hold the a_lock on the
2169                  * underlying dummy as. This is mostly to satisfy the
2170                  * underlying HAT layer.
2171                  */
2172                 AS_LOCK_ENTER(sptseg->s_as, RW_READER);
2173                 a = sptseg_addr;
2174                 pidx = 0;
2175                 if (type == F_SOFTLOCK) {
2176                         /*
2177                          * Load up the translation keeping it
2178                          * locked and don't unlock the page.
2179                          */
2180                         for (; pidx < npages; a += pgsz, pidx += pgcnt) {
2181                                 sz = MIN(pgsz, ptob(npages - pidx));
2182                                 hat_memload_array(sptseg->s_as->a_hat, a,
2183                                     sz, &ppa[pidx], sptd->spt_prot,
2184                                     HAT_LOAD_LOCK | HAT_LOAD_SHARE);
2185                         }
2186                 } else {
2187                         if (hat == seg->s_as->a_hat) {
2188 
2189                                 /*
2190                                  * Migrate pages marked for migration.
2191                                  */
2192                                 if (lgrp_optimizations())


2197                                 for (; pidx < npages;
2198                                     a += pgsz, pidx += pgcnt) {
2199                                         sz = MIN(pgsz, ptob(npages - pidx));
2200                                         hat_memload_array(sptseg->s_as->a_hat,
2201                                             a, sz, &ppa[pidx],
2202                                             sptd->spt_prot, HAT_LOAD_SHARE);
2203                                 }
2204                         } else {
2205                                 /* XHAT. Pass real address */
2206                                 hat_memload_array(hat, shm_addr,
2207                                     ptob(npages), ppa, sptd->spt_prot,
2208                                     HAT_LOAD_SHARE);
2209                         }
2210 
2211                         /*
2212                          * And now drop the SE_SHARED lock(s).
2213                          */
2214                         for (i = 0; i < npages; i++)
2215                                 page_unlock(ppa[i]);
2216                 }
2217                 AS_LOCK_EXIT(sptseg->s_as);
2218 
2219                 kmem_free(ppa, sizeof (page_t *) * npages);
2220                 return (0);
2221         case F_SOFTUNLOCK:
2222 
2223                 /*
2224                  * This is a bit ugly, we pass in the real seg pointer,
2225                  * but the sptseg_addr is the virtual address within the
2226                  * dummy seg.
2227                  */
2228                 segspt_softunlock(seg, sptseg_addr, ptob(npages), rw);
2229                 return (0);
2230 
2231         case F_PROT:
2232 
2233                 /*
2234                  * This takes care of the unusual case where a user
2235                  * allocates a stack in shared memory and a register
2236                  * window overflow is written to that stack page before
2237                  * it is otherwise modified.


2267 /*ARGSUSED*/
2268 static size_t
2269 segspt_shmswapout(struct seg *seg)
2270 {
2271         return (0);
2272 }
2273 
2274 /*
2275  * duplicate the shared page tables
2276  */
2277 int
2278 segspt_shmdup(struct seg *seg, struct seg *newseg)
2279 {
2280         struct shm_data         *shmd = (struct shm_data *)seg->s_data;
2281         struct anon_map         *amp = shmd->shm_amp;
2282         struct shm_data         *shmd_new;
2283         struct seg              *spt_seg = shmd->shm_sptseg;
2284         struct spt_data         *sptd = spt_seg->s_data;
2285         int                     error = 0;
2286 
2287         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
2288 
2289         shmd_new = kmem_zalloc((sizeof (*shmd_new)), KM_SLEEP);
2290         newseg->s_data = (void *)shmd_new;
2291         shmd_new->shm_sptas = shmd->shm_sptas;
2292         shmd_new->shm_amp = amp;
2293         shmd_new->shm_sptseg = shmd->shm_sptseg;
2294         newseg->s_ops = &segspt_shmops;
2295         newseg->s_szc = seg->s_szc;
2296         ASSERT(seg->s_szc == shmd->shm_sptseg->s_szc);
2297 
2298         ANON_LOCK_ENTER(&amp->a_rwlock, RW_WRITER);
2299         amp->refcnt++;
2300         ANON_LOCK_EXIT(&amp->a_rwlock);
2301 
2302         if (sptd->spt_flags & SHM_PAGEABLE) {
2303                 shmd_new->shm_vpage = kmem_zalloc(btopr(amp->size), KM_SLEEP);
2304                 shmd_new->shm_lckpgs = 0;
2305                 if (hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) {
2306                         if ((error = hat_share(newseg->s_as->a_hat,
2307                             newseg->s_base, shmd->shm_sptas->a_hat, SEGSPTADDR,


2309                                 kmem_free(shmd_new->shm_vpage,
2310                                     btopr(amp->size));
2311                         }
2312                 }
2313                 return (error);
2314         } else {
2315                 return (hat_share(newseg->s_as->a_hat, newseg->s_base,
2316                     shmd->shm_sptas->a_hat, SEGSPTADDR, seg->s_size,
2317                     seg->s_szc));
2318 
2319         }
2320 }
2321 
2322 /*ARGSUSED*/
2323 int
2324 segspt_shmcheckprot(struct seg *seg, caddr_t addr, size_t size, uint_t prot)
2325 {
2326         struct shm_data *shmd = (struct shm_data *)seg->s_data;
2327         struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2328 
2329         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2330 
2331         /*
2332          * ISM segment is always rw.
2333          */
2334         return (((sptd->spt_prot & prot) != prot) ? EACCES : 0);
2335 }
2336 
2337 /*
2338  * Return an array of locked large pages, for empty slots allocate
2339  * private zero-filled anon pages.
2340  */
2341 static int
2342 spt_anon_getpages(
2343         struct seg *sptseg,
2344         caddr_t sptaddr,
2345         size_t len,
2346         page_t *ppa[])
2347 {
2348         struct  spt_data *sptd = sptseg->s_data;
2349         struct  anon_map *amp = sptd->spt_amp;


2660 segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len,
2661     int attr, int op, ulong_t *lockmap, size_t pos)
2662 {
2663         struct shm_data *shmd = seg->s_data;
2664         struct seg      *sptseg = shmd->shm_sptseg;
2665         struct spt_data *sptd = sptseg->s_data;
2666         struct kshmid   *sp = sptd->spt_amp->a_sp;
2667         pgcnt_t         npages, a_npages;
2668         page_t          **ppa;
2669         pgcnt_t         an_idx, a_an_idx, ppa_idx;
2670         caddr_t         spt_addr, a_addr;       /* spt and aligned address */
2671         size_t          a_len;                  /* aligned len */
2672         size_t          share_sz;
2673         ulong_t         i;
2674         int             sts = 0;
2675         rctl_qty_t      unlocked = 0;
2676         rctl_qty_t      locked = 0;
2677         struct proc     *p = curproc;
2678         kproject_t      *proj;
2679 
2680         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2681         ASSERT(sp != NULL);
2682 
2683         if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
2684                 return (0);
2685         }
2686 
2687         addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2688         an_idx = seg_page(seg, addr);
2689         npages = btopr(len);
2690 
2691         if (an_idx + npages > btopr(shmd->shm_amp->size)) {
2692                 return (ENOMEM);
2693         }
2694 
2695         /*
2696          * A shm's project never changes, so no lock needed.
2697          * The shm has a hold on the project, so it will not go away.
2698          * Since we have a mapping to shm within this zone, we know
2699          * that the zone will not go away.
2700          */


2790                         sptd->spt_flags |= DISM_PPA_CHANGED;
2791                 mutex_exit(&sptd->spt_lock);
2792 
2793                 rctl_decr_locked_mem(NULL, proj, unlocked, 0);
2794                 mutex_exit(&sp->shm_mlock);
2795 
2796                 if (ppa != NULL)
2797                         seg_ppurge_wiredpp(ppa);
2798         }
2799         return (sts);
2800 }
2801 
2802 /*ARGSUSED*/
2803 int
2804 segspt_shmgetprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
2805 {
2806         struct shm_data *shmd = (struct shm_data *)seg->s_data;
2807         struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2808         spgcnt_t pgno = seg_page(seg, addr+len) - seg_page(seg, addr) + 1;
2809 
2810         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2811 
2812         /*
2813          * ISM segment is always rw.
2814          */
2815         while (--pgno >= 0)
2816                 *protv++ = sptd->spt_prot;
2817         return (0);
2818 }
2819 
2820 /*ARGSUSED*/
2821 u_offset_t
2822 segspt_shmgetoffset(struct seg *seg, caddr_t addr)
2823 {
2824         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2825 
2826         /* Offset does not matter in ISM memory */
2827 
2828         return ((u_offset_t)0);
2829 }
2830 
2831 /* ARGSUSED */
2832 int
2833 segspt_shmgettype(struct seg *seg, caddr_t addr)
2834 {
2835         struct shm_data *shmd = (struct shm_data *)seg->s_data;
2836         struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2837 
2838         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2839 
2840         /*
2841          * The shared memory mapping is always MAP_SHARED, SWAP is only
2842          * reserved for DISM
2843          */
2844         return (MAP_SHARED |
2845             ((sptd->spt_flags & SHM_PAGEABLE) ? 0 : MAP_NORESERVE));
2846 }
2847 
2848 /*ARGSUSED*/
2849 int
2850 segspt_shmgetvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
2851 {
2852         struct shm_data *shmd = (struct shm_data *)seg->s_data;
2853         struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2854 
2855         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2856 
2857         *vpp = sptd->spt_vp;
2858         return (0);
2859 }
2860 
2861 /*
2862  * We need to wait for pending IO to complete to a DISM segment in order for
2863  * pages to get kicked out of the seg_pcache.  120 seconds should be more
2864  * than enough time to wait.
2865  */
2866 static clock_t spt_pcache_wait = 120;
2867 
2868 /*ARGSUSED*/
2869 static int
2870 segspt_shmadvise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
2871 {
2872         struct shm_data *shmd = (struct shm_data *)seg->s_data;
2873         struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2874         struct anon_map *amp;
2875         pgcnt_t pg_idx;
2876         ushort_t gen;
2877         clock_t end_lbolt;
2878         int writer;
2879         page_t **ppa;
2880 
2881         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2882 
2883         if (behav == MADV_FREE) {
2884                 if ((sptd->spt_flags & SHM_PAGEABLE) == 0)
2885                         return (0);
2886 
2887                 amp = sptd->spt_amp;
2888                 pg_idx = seg_page(seg, addr);
2889 
2890                 mutex_enter(&sptd->spt_lock);
2891                 if ((ppa = sptd->spt_ppa) == NULL) {
2892                         mutex_exit(&sptd->spt_lock);
2893                         ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
2894                         anon_disclaim(amp, pg_idx, len);
2895                         ANON_LOCK_EXIT(&amp->a_rwlock);
2896                         return (0);
2897                 }
2898 
2899                 sptd->spt_flags |= DISM_PPA_CHANGED;
2900                 gen = sptd->spt_gen;
2901 
2902                 mutex_exit(&sptd->spt_lock);
2903 
2904                 /*
2905                  * Purge all DISM cached pages
2906                  */
2907                 seg_ppurge_wiredpp(ppa);
2908 
2909                 /*
2910                  * Drop the AS_LOCK so that other threads can grab it
2911                  * in the as_pageunlock path and hopefully get the segment
2912                  * kicked out of the seg_pcache.  We bump the shm_softlockcnt
2913                  * to keep this segment resident.
2914                  */
2915                 writer = AS_WRITE_HELD(seg->s_as);
2916                 atomic_inc_ulong((ulong_t *)(&(shmd->shm_softlockcnt)));
2917                 AS_LOCK_EXIT(seg->s_as);
2918 
2919                 mutex_enter(&sptd->spt_lock);
2920 
2921                 end_lbolt = ddi_get_lbolt() + (hz * spt_pcache_wait);
2922 
2923                 /*
2924                  * Try to wait for pages to get kicked out of the seg_pcache.
2925                  */
2926                 while (sptd->spt_gen == gen &&
2927                     (sptd->spt_flags & DISM_PPA_CHANGED) &&
2928                     ddi_get_lbolt() < end_lbolt) {
2929                         if (!cv_timedwait_sig(&sptd->spt_cv,
2930                             &sptd->spt_lock, end_lbolt)) {
2931                                 break;
2932                         }
2933                 }
2934 
2935                 mutex_exit(&sptd->spt_lock);
2936 
2937                 /* Regrab the AS_LOCK and release our hold on the segment */
2938                 AS_LOCK_ENTER(seg->s_as, writer ? RW_WRITER : RW_READER);

2939                 atomic_dec_ulong((ulong_t *)(&(shmd->shm_softlockcnt)));
2940                 if (shmd->shm_softlockcnt <= 0) {
2941                         if (AS_ISUNMAPWAIT(seg->s_as)) {
2942                                 mutex_enter(&seg->s_as->a_contents);
2943                                 if (AS_ISUNMAPWAIT(seg->s_as)) {
2944                                         AS_CLRUNMAPWAIT(seg->s_as);
2945                                         cv_broadcast(&seg->s_as->a_cv);
2946                                 }
2947                                 mutex_exit(&seg->s_as->a_contents);
2948                         }
2949                 }
2950 
2951                 ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
2952                 anon_disclaim(amp, pg_idx, len);
2953                 ANON_LOCK_EXIT(&amp->a_rwlock);
2954         } else if (lgrp_optimizations() && (behav == MADV_ACCESS_LWP ||
2955             behav == MADV_ACCESS_MANY || behav == MADV_ACCESS_DEFAULT)) {
2956                 int                     already_set;
2957                 ulong_t                 anon_index;
2958                 lgrp_mem_policy_t       policy;