224 sptdestroy(struct as *as, struct anon_map *amp)
225 {
226
227 #ifdef DEBUG
228 TNF_PROBE_0(sptdestroy, "spt", /* CSTYLED */);
229 #endif
230 (void) as_unmap(as, SEGSPTADDR, amp->size);
231 as_free(as);
232 }
233
234 /*
235 * called from seg_free().
236 * free (i.e., unlock, unmap, return to free list)
237 * all the pages in the given seg.
238 */
239 void
240 segspt_free(struct seg *seg)
241 {
242 struct spt_data *sptd = (struct spt_data *)seg->s_data;
243
244 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
245
246 if (sptd != NULL) {
247 if (sptd->spt_realsize)
248 segspt_free_pages(seg, seg->s_base, sptd->spt_realsize);
249
250 if (sptd->spt_ppa_lckcnt)
251 kmem_free(sptd->spt_ppa_lckcnt,
252 sizeof (*sptd->spt_ppa_lckcnt)
253 * btopr(sptd->spt_amp->size));
254 kmem_free(sptd->spt_vp, sizeof (*sptd->spt_vp));
255 cv_destroy(&sptd->spt_cv);
256 mutex_destroy(&sptd->spt_lock);
257 kmem_free(sptd, sizeof (*sptd));
258 }
259 }
260
261 /*ARGSUSED*/
262 static int
263 segspt_shmsync(struct seg *seg, caddr_t addr, size_t len, int attr,
264 uint_t flags)
265 {
266 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
267
268 return (0);
269 }
270
271 /*ARGSUSED*/
272 static size_t
273 segspt_shmincore(struct seg *seg, caddr_t addr, size_t len, char *vec)
274 {
275 caddr_t eo_seg;
276 pgcnt_t npages;
277 struct shm_data *shmd = (struct shm_data *)seg->s_data;
278 struct seg *sptseg;
279 struct spt_data *sptd;
280
281 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
282 #ifdef lint
283 seg = seg;
284 #endif
285 sptseg = shmd->shm_sptseg;
286 sptd = sptseg->s_data;
287
288 if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
289 eo_seg = addr + len;
290 while (addr < eo_seg) {
291 /* page exists, and it's locked. */
292 *vec++ = SEG_PAGE_INCORE | SEG_PAGE_LOCKED |
293 SEG_PAGE_ANON;
294 addr += PAGESIZE;
295 }
296 return (len);
297 } else {
298 struct anon_map *amp = shmd->shm_amp;
299 struct anon *ap;
300 page_t *pp;
301 pgcnt_t anon_index;
325 page_unlock(pp);
326 }
327 } else {
328 anon_array_exit(&cookie);
329 }
330 if (shmd->shm_vpage[anon_index] & DISM_PG_LOCKED) {
331 ret |= SEG_PAGE_LOCKED;
332 }
333 *vec++ = (char)ret;
334 }
335 ANON_LOCK_EXIT(&->a_rwlock);
336 return (len);
337 }
338 }
339
340 static int
341 segspt_unmap(struct seg *seg, caddr_t raddr, size_t ssize)
342 {
343 size_t share_size;
344
345 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
346
347 /*
348 * seg.s_size may have been rounded up to the largest page size
349 * in shmat().
350 * XXX This should be cleanedup. sptdestroy should take a length
351 * argument which should be the same as sptcreate. Then
352 * this rounding would not be needed (or is done in shm.c)
353 * Only the check for full segment will be needed.
354 *
355 * XXX -- shouldn't raddr == 0 always? These tests don't seem
356 * to be useful at all.
357 */
358 share_size = page_get_pagesize(seg->s_szc);
359 ssize = P2ROUNDUP(ssize, share_size);
360
361 if (raddr == seg->s_base && ssize == seg->s_size) {
362 seg_free(seg);
363 return (0);
364 } else
365 return (EINVAL);
376 struct kshmid *sp = amp->a_sp;
377 struct cred *cred = CRED();
378 ulong_t i, j, anon_index = 0;
379 pgcnt_t npages = btopr(amp->size);
380 struct vnode *vp;
381 page_t **ppa;
382 uint_t hat_flags;
383 size_t pgsz;
384 pgcnt_t pgcnt;
385 caddr_t a;
386 pgcnt_t pidx;
387 size_t sz;
388 proc_t *procp = curproc;
389 rctl_qty_t lockedbytes = 0;
390 kproject_t *proj;
391
392 /*
393 * We are holding the a_lock on the underlying dummy as,
394 * so we can make calls to the HAT layer.
395 */
396 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
397 ASSERT(sp != NULL);
398
399 #ifdef DEBUG
400 TNF_PROBE_2(segspt_create, "spt", /* CSTYLED */,
401 tnf_opaque, addr, addr, tnf_ulong, len, seg->s_size);
402 #endif
403 if ((sptcargs->flags & SHM_PAGEABLE) == 0) {
404 if (err = anon_swap_adjust(npages))
405 return (err);
406 }
407 err = ENOMEM;
408
409 if ((sptd = kmem_zalloc(sizeof (*sptd), KM_NOSLEEP)) == NULL)
410 goto out1;
411
412 if ((sptcargs->flags & SHM_PAGEABLE) == 0) {
413 if ((ppa = kmem_zalloc(((sizeof (page_t *)) * npages),
414 KM_NOSLEEP)) == NULL)
415 goto out2;
416 }
612 /*ARGSUSED*/
613 void
614 segspt_free_pages(struct seg *seg, caddr_t addr, size_t len)
615 {
616 struct page *pp;
617 struct spt_data *sptd = (struct spt_data *)seg->s_data;
618 pgcnt_t npages;
619 ulong_t anon_idx;
620 struct anon_map *amp;
621 struct anon *ap;
622 struct vnode *vp;
623 u_offset_t off;
624 uint_t hat_flags;
625 int root = 0;
626 pgcnt_t pgs, curnpgs = 0;
627 page_t *rootpp;
628 rctl_qty_t unlocked_bytes = 0;
629 kproject_t *proj;
630 kshmid_t *sp;
631
632 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
633
634 len = P2ROUNDUP(len, PAGESIZE);
635
636 npages = btop(len);
637
638 hat_flags = HAT_UNLOAD_UNLOCK | HAT_UNLOAD_UNMAP;
639 if ((hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) ||
640 (sptd->spt_flags & SHM_PAGEABLE)) {
641 hat_flags = HAT_UNLOAD_UNMAP;
642 }
643
644 hat_unload(seg->s_as->a_hat, addr, len, hat_flags);
645
646 amp = sptd->spt_amp;
647 if (sptd->spt_flags & SHM_PAGEABLE)
648 npages = btop(amp->size);
649
650 ASSERT(amp != NULL);
651
652 if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
821 /*ARGSUSED*/
822 static int
823 segspt_dismpagelock(struct seg *seg, caddr_t addr, size_t len,
824 struct page ***ppp, enum lock_type type, enum seg_rw rw)
825 {
826 struct shm_data *shmd = (struct shm_data *)seg->s_data;
827 struct seg *sptseg = shmd->shm_sptseg;
828 struct spt_data *sptd = sptseg->s_data;
829 pgcnt_t pg_idx, npages, tot_npages, npgs;
830 struct page **pplist, **pl, **ppa, *pp;
831 struct anon_map *amp;
832 spgcnt_t an_idx;
833 int ret = ENOTSUP;
834 uint_t pl_built = 0;
835 struct anon *ap;
836 struct vnode *vp;
837 u_offset_t off;
838 pgcnt_t claim_availrmem = 0;
839 uint_t szc;
840
841 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
842 ASSERT(type == L_PAGELOCK || type == L_PAGEUNLOCK);
843
844 /*
845 * We want to lock/unlock the entire ISM segment. Therefore,
846 * we will be using the underlying sptseg and it's base address
847 * and length for the caching arguments.
848 */
849 ASSERT(sptseg);
850 ASSERT(sptd);
851
852 pg_idx = seg_page(seg, addr);
853 npages = btopr(len);
854
855 /*
856 * check if the request is larger than number of pages covered
857 * by amp
858 */
859 if (pg_idx + npages > btopr(sptd->spt_amp->size)) {
860 *ppp = NULL;
861 return (ENOTSUP);
1176 */
1177 /*ARGSUSED*/
1178 static int
1179 segspt_shmpagelock(struct seg *seg, caddr_t addr, size_t len,
1180 struct page ***ppp, enum lock_type type, enum seg_rw rw)
1181 {
1182 struct shm_data *shmd = (struct shm_data *)seg->s_data;
1183 struct seg *sptseg = shmd->shm_sptseg;
1184 struct spt_data *sptd = sptseg->s_data;
1185 pgcnt_t np, page_index, npages;
1186 caddr_t a, spt_base;
1187 struct page **pplist, **pl, *pp;
1188 struct anon_map *amp;
1189 ulong_t anon_index;
1190 int ret = ENOTSUP;
1191 uint_t pl_built = 0;
1192 struct anon *ap;
1193 struct vnode *vp;
1194 u_offset_t off;
1195
1196 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
1197 ASSERT(type == L_PAGELOCK || type == L_PAGEUNLOCK);
1198
1199
1200 /*
1201 * We want to lock/unlock the entire ISM segment. Therefore,
1202 * we will be using the underlying sptseg and it's base address
1203 * and length for the caching arguments.
1204 */
1205 ASSERT(sptseg);
1206 ASSERT(sptd);
1207
1208 if (sptd->spt_flags & SHM_PAGEABLE) {
1209 return (segspt_dismpagelock(seg, addr, len, ppp, type, rw));
1210 }
1211
1212 page_index = seg_page(seg, addr);
1213 npages = btopr(len);
1214
1215 /*
1216 * check if the request is larger than number of pages covered
1434 segspt_reclaim(void *ptag, caddr_t addr, size_t len, struct page **pplist,
1435 enum seg_rw rw, int async)
1436 {
1437 struct seg *seg = (struct seg *)ptag;
1438 struct shm_data *shmd = (struct shm_data *)seg->s_data;
1439 struct seg *sptseg;
1440 struct spt_data *sptd;
1441 pgcnt_t npages, i, free_availrmem = 0;
1442 int done = 0;
1443
1444 #ifdef lint
1445 addr = addr;
1446 #endif
1447 sptseg = shmd->shm_sptseg;
1448 sptd = sptseg->s_data;
1449 npages = (len >> PAGESHIFT);
1450 ASSERT(npages);
1451 ASSERT(sptd->spt_pcachecnt != 0);
1452 ASSERT(sptd->spt_ppa == pplist);
1453 ASSERT(npages == btopr(sptd->spt_amp->size));
1454 ASSERT(async || AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
1455
1456 /*
1457 * Acquire the lock on the dummy seg and destroy the
1458 * ppa array IF this is the last pcachecnt.
1459 */
1460 mutex_enter(&sptd->spt_lock);
1461 if (--sptd->spt_pcachecnt == 0) {
1462 for (i = 0; i < npages; i++) {
1463 if (pplist[i] == NULL) {
1464 continue;
1465 }
1466 if (rw == S_WRITE) {
1467 hat_setrefmod(pplist[i]);
1468 } else {
1469 hat_setref(pplist[i]);
1470 }
1471 if ((sptd->spt_flags & SHM_PAGEABLE) &&
1472 (sptd->spt_ppa_lckcnt[i] == 0))
1473 free_availrmem++;
1474 page_unlock(pplist[i]);
1568 * its a read. Thus cow faults can be ignored with respect to soft
1569 * unlocking, since the breaking of cow means that the anon slot(s) will
1570 * not be shared.
1571 */
1572 static void
1573 segspt_softunlock(struct seg *seg, caddr_t sptseg_addr,
1574 size_t len, enum seg_rw rw)
1575 {
1576 struct shm_data *shmd = (struct shm_data *)seg->s_data;
1577 struct seg *sptseg;
1578 struct spt_data *sptd;
1579 page_t *pp;
1580 caddr_t adr;
1581 struct vnode *vp;
1582 u_offset_t offset;
1583 ulong_t anon_index;
1584 struct anon_map *amp; /* XXX - for locknest */
1585 struct anon *ap = NULL;
1586 pgcnt_t npages;
1587
1588 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
1589
1590 sptseg = shmd->shm_sptseg;
1591 sptd = sptseg->s_data;
1592
1593 /*
1594 * Some platforms assume that ISM mappings are HAT_LOAD_LOCK
1595 * and therefore their pages are SE_SHARED locked
1596 * for the entire life of the segment.
1597 */
1598 if ((!hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) &&
1599 ((sptd->spt_flags & SHM_PAGEABLE) == 0)) {
1600 goto softlock_decrement;
1601 }
1602
1603 /*
1604 * Any thread is free to do a page_find and
1605 * page_unlock() on the pages within this seg.
1606 *
1607 * We are already holding the as->a_lock on the user's
1608 * real segment, but we need to hold the a_lock on the
1609 * underlying dummy as. This is mostly to satisfy the
1610 * underlying HAT layer.
1611 */
1612 AS_LOCK_ENTER(sptseg->s_as, &sptseg->s_as->a_lock, RW_READER);
1613 hat_unlock(sptseg->s_as->a_hat, sptseg_addr, len);
1614 AS_LOCK_EXIT(sptseg->s_as, &sptseg->s_as->a_lock);
1615
1616 amp = sptd->spt_amp;
1617 ASSERT(amp != NULL);
1618 anon_index = seg_page(sptseg, sptseg_addr);
1619
1620 for (adr = sptseg_addr; adr < sptseg_addr + len; adr += PAGESIZE) {
1621 ap = anon_get_ptr(amp->ahp, anon_index++);
1622 ASSERT(ap != NULL);
1623 swap_xlate(ap, &vp, &offset);
1624
1625 /*
1626 * Use page_find() instead of page_lookup() to
1627 * find the page since we know that it has a
1628 * "shared" lock.
1629 */
1630 pp = page_find(vp, offset);
1631 ASSERT(ap == anon_get_ptr(amp->ahp, anon_index - 1));
1632 if (pp == NULL) {
1633 panic("segspt_softunlock: "
1634 "addr %p, ap %p, vp %p, off %llx",
1659 if (AS_ISUNMAPWAIT(seg->s_as)) {
1660 mutex_enter(&seg->s_as->a_contents);
1661 if (AS_ISUNMAPWAIT(seg->s_as)) {
1662 AS_CLRUNMAPWAIT(seg->s_as);
1663 cv_broadcast(&seg->s_as->a_cv);
1664 }
1665 mutex_exit(&seg->s_as->a_contents);
1666 }
1667 }
1668 }
1669
1670 int
1671 segspt_shmattach(struct seg *seg, caddr_t *argsp)
1672 {
1673 struct shm_data *shmd_arg = (struct shm_data *)argsp;
1674 struct shm_data *shmd;
1675 struct anon_map *shm_amp = shmd_arg->shm_amp;
1676 struct spt_data *sptd;
1677 int error = 0;
1678
1679 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
1680
1681 shmd = kmem_zalloc((sizeof (*shmd)), KM_NOSLEEP);
1682 if (shmd == NULL)
1683 return (ENOMEM);
1684
1685 shmd->shm_sptas = shmd_arg->shm_sptas;
1686 shmd->shm_amp = shm_amp;
1687 shmd->shm_sptseg = shmd_arg->shm_sptseg;
1688
1689 (void) lgrp_shm_policy_set(LGRP_MEM_POLICY_DEFAULT, shm_amp, 0,
1690 NULL, 0, seg->s_size);
1691
1692 mutex_init(&shmd->shm_segfree_syncmtx, NULL, MUTEX_DEFAULT, NULL);
1693
1694 seg->s_data = (void *)shmd;
1695 seg->s_ops = &segspt_shmops;
1696 seg->s_szc = shmd->shm_sptseg->s_szc;
1697 sptd = shmd->shm_sptseg->s_data;
1698
1699 if (sptd->spt_flags & SHM_PAGEABLE) {
1718 seg->s_size, seg->s_szc);
1719 }
1720 if (error) {
1721 seg->s_szc = 0;
1722 seg->s_data = (void *)NULL;
1723 kmem_free(shmd, (sizeof (*shmd)));
1724 } else {
1725 ANON_LOCK_ENTER(&shm_amp->a_rwlock, RW_WRITER);
1726 shm_amp->refcnt++;
1727 ANON_LOCK_EXIT(&shm_amp->a_rwlock);
1728 }
1729 return (error);
1730 }
1731
1732 int
1733 segspt_shmunmap(struct seg *seg, caddr_t raddr, size_t ssize)
1734 {
1735 struct shm_data *shmd = (struct shm_data *)seg->s_data;
1736 int reclaim = 1;
1737
1738 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
1739 retry:
1740 if (shmd->shm_softlockcnt > 0) {
1741 if (reclaim == 1) {
1742 segspt_purge(seg);
1743 reclaim = 0;
1744 goto retry;
1745 }
1746 return (EAGAIN);
1747 }
1748
1749 if (ssize != seg->s_size) {
1750 #ifdef DEBUG
1751 cmn_err(CE_WARN, "Incompatible ssize %lx s_size %lx\n",
1752 ssize, seg->s_size);
1753 #endif
1754 return (EINVAL);
1755 }
1756
1757 (void) segspt_shmlockop(seg, raddr, shmd->shm_amp->size, 0, MC_UNLOCK,
1758 NULL, 0);
1759 hat_unshare(seg->s_as->a_hat, raddr, ssize, seg->s_szc);
1760
1761 seg_free(seg);
1762
1763 return (0);
1764 }
1765
1766 void
1767 segspt_shmfree(struct seg *seg)
1768 {
1769 struct shm_data *shmd = (struct shm_data *)seg->s_data;
1770 struct anon_map *shm_amp = shmd->shm_amp;
1771
1772 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
1773
1774 (void) segspt_shmlockop(seg, seg->s_base, shm_amp->size, 0,
1775 MC_UNLOCK, NULL, 0);
1776
1777 /*
1778 * Need to increment refcnt when attaching
1779 * and decrement when detaching because of dup().
1780 */
1781 ANON_LOCK_ENTER(&shm_amp->a_rwlock, RW_WRITER);
1782 shm_amp->refcnt--;
1783 ANON_LOCK_EXIT(&shm_amp->a_rwlock);
1784
1785 if (shmd->shm_vpage) { /* only for DISM */
1786 kmem_free(shmd->shm_vpage, btopr(shm_amp->size));
1787 shmd->shm_vpage = NULL;
1788 }
1789
1790 /*
1791 * Take shm_segfree_syncmtx lock to let segspt_reclaim() finish if it's
1792 * still working with this segment without holding as lock.
1793 */
1794 ASSERT(shmd->shm_softlockcnt == 0);
1795 mutex_enter(&shmd->shm_segfree_syncmtx);
1796 mutex_destroy(&shmd->shm_segfree_syncmtx);
1797
1798 kmem_free(shmd, sizeof (*shmd));
1799 }
1800
1801 /*ARGSUSED*/
1802 int
1803 segspt_shmsetprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
1804 {
1805 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
1806
1807 /*
1808 * Shared page table is more than shared mapping.
1809 * Individual process sharing page tables can't change prot
1810 * because there is only one set of page tables.
1811 * This will be allowed after private page table is
1812 * supported.
1813 */
1814 /* need to return correct status error? */
1815 return (0);
1816 }
1817
1818
1819 faultcode_t
1820 segspt_dismfault(struct hat *hat, struct seg *seg, caddr_t addr,
1821 size_t len, enum fault_type type, enum seg_rw rw)
1822 {
1823 struct shm_data *shmd = (struct shm_data *)seg->s_data;
1824 struct seg *sptseg = shmd->shm_sptseg;
1825 struct as *curspt = shmd->shm_sptas;
1826 struct spt_data *sptd = sptseg->s_data;
1827 pgcnt_t npages;
1828 size_t size;
1829 caddr_t segspt_addr, shm_addr;
1830 page_t **ppa;
1831 int i;
1832 ulong_t an_idx = 0;
1833 int err = 0;
1834 int dyn_ism_unmap = hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0);
1835 size_t pgsz;
1836 pgcnt_t pgcnt;
1837 caddr_t a;
1838 pgcnt_t pidx;
1839
1840 #ifdef lint
1841 hat = hat;
1842 #endif
1843 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
1844
1845 /*
1846 * Because of the way spt is implemented
1847 * the realsize of the segment does not have to be
1848 * equal to the segment size itself. The segment size is
1849 * often in multiples of a page size larger than PAGESIZE.
1850 * The realsize is rounded up to the nearest PAGESIZE
1851 * based on what the user requested. This is a bit of
1852 * ungliness that is historical but not easily fixed
1853 * without re-designing the higher levels of ISM.
1854 */
1855 ASSERT(addr >= seg->s_base);
1856 if (((addr + len) - seg->s_base) > sptd->spt_realsize)
1857 return (FC_NOMAP);
1858 /*
1859 * For all of the following cases except F_PROT, we need to
1860 * make any necessary adjustments to addr and len
1861 * and get all of the necessary page_t's into an array called ppa[].
1862 *
1863 * The code in shmat() forces base addr and len of ISM segment
1892 /*
1893 * Fall through to the F_INVAL case to load up the hat layer
1894 * entries with the HAT_LOAD_LOCK flag.
1895 */
1896 /* FALLTHRU */
1897 case F_INVAL:
1898
1899 if ((rw == S_EXEC) && !(sptd->spt_prot & PROT_EXEC))
1900 return (FC_NOMAP);
1901
1902 ppa = kmem_zalloc(npages * sizeof (page_t *), KM_SLEEP);
1903
1904 err = spt_anon_getpages(sptseg, segspt_addr, size, ppa);
1905 if (err != 0) {
1906 if (type == F_SOFTLOCK) {
1907 atomic_add_long((ulong_t *)(
1908 &(shmd->shm_softlockcnt)), -npages);
1909 }
1910 goto dism_err;
1911 }
1912 AS_LOCK_ENTER(sptseg->s_as, &sptseg->s_as->a_lock, RW_READER);
1913 a = segspt_addr;
1914 pidx = 0;
1915 if (type == F_SOFTLOCK) {
1916
1917 /*
1918 * Load up the translation keeping it
1919 * locked and don't unlock the page.
1920 */
1921 for (; pidx < npages; a += pgsz, pidx += pgcnt) {
1922 hat_memload_array(sptseg->s_as->a_hat,
1923 a, pgsz, &ppa[pidx], sptd->spt_prot,
1924 HAT_LOAD_LOCK | HAT_LOAD_SHARE);
1925 }
1926 } else {
1927 if (hat == seg->s_as->a_hat) {
1928
1929 /*
1930 * Migrate pages marked for migration
1931 */
1932 if (lgrp_optimizations())
1953 if (dyn_ism_unmap) {
1954 for (i = 0; i < npages; i++) {
1955 page_unlock(ppa[i]);
1956 }
1957 }
1958 }
1959
1960 if (!dyn_ism_unmap) {
1961 if (hat_share(seg->s_as->a_hat, shm_addr,
1962 curspt->a_hat, segspt_addr, ptob(npages),
1963 seg->s_szc) != 0) {
1964 panic("hat_share err in DISM fault");
1965 /* NOTREACHED */
1966 }
1967 if (type == F_INVAL) {
1968 for (i = 0; i < npages; i++) {
1969 page_unlock(ppa[i]);
1970 }
1971 }
1972 }
1973 AS_LOCK_EXIT(sptseg->s_as, &sptseg->s_as->a_lock);
1974 dism_err:
1975 kmem_free(ppa, npages * sizeof (page_t *));
1976 return (err);
1977
1978 case F_SOFTUNLOCK:
1979
1980 /*
1981 * This is a bit ugly, we pass in the real seg pointer,
1982 * but the segspt_addr is the virtual address within the
1983 * dummy seg.
1984 */
1985 segspt_softunlock(seg, segspt_addr, size, rw);
1986 return (0);
1987
1988 case F_PROT:
1989
1990 /*
1991 * This takes care of the unusual case where a user
1992 * allocates a stack in shared memory and a register
1993 * window overflow is written to that stack page before
2020 pgcnt_t npages;
2021 size_t size;
2022 caddr_t sptseg_addr, shm_addr;
2023 page_t *pp, **ppa;
2024 int i;
2025 u_offset_t offset;
2026 ulong_t anon_index = 0;
2027 struct vnode *vp;
2028 struct anon_map *amp; /* XXX - for locknest */
2029 struct anon *ap = NULL;
2030 size_t pgsz;
2031 pgcnt_t pgcnt;
2032 caddr_t a;
2033 pgcnt_t pidx;
2034 size_t sz;
2035
2036 #ifdef lint
2037 hat = hat;
2038 #endif
2039
2040 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2041
2042 if (sptd->spt_flags & SHM_PAGEABLE) {
2043 return (segspt_dismfault(hat, seg, addr, len, type, rw));
2044 }
2045
2046 /*
2047 * Because of the way spt is implemented
2048 * the realsize of the segment does not have to be
2049 * equal to the segment size itself. The segment size is
2050 * often in multiples of a page size larger than PAGESIZE.
2051 * The realsize is rounded up to the nearest PAGESIZE
2052 * based on what the user requested. This is a bit of
2053 * ungliness that is historical but not easily fixed
2054 * without re-designing the higher levels of ISM.
2055 */
2056 ASSERT(addr >= seg->s_base);
2057 if (((addr + len) - seg->s_base) > sptd->spt_realsize)
2058 return (FC_NOMAP);
2059 /*
2060 * For all of the following cases except F_PROT, we need to
2152 anon_index = seg_page(sptseg, sptseg_addr);
2153
2154 ANON_LOCK_ENTER(&->a_rwlock, RW_READER);
2155 for (i = 0; i < npages; i++) {
2156 ap = anon_get_ptr(amp->ahp, anon_index++);
2157 ASSERT(ap != NULL);
2158 swap_xlate(ap, &vp, &offset);
2159 pp = page_lookup(vp, offset, SE_SHARED);
2160 ASSERT(pp != NULL);
2161 ppa[i] = pp;
2162 }
2163 ANON_LOCK_EXIT(&->a_rwlock);
2164 ASSERT(i == npages);
2165
2166 /*
2167 * We are already holding the as->a_lock on the user's
2168 * real segment, but we need to hold the a_lock on the
2169 * underlying dummy as. This is mostly to satisfy the
2170 * underlying HAT layer.
2171 */
2172 AS_LOCK_ENTER(sptseg->s_as, &sptseg->s_as->a_lock, RW_READER);
2173 a = sptseg_addr;
2174 pidx = 0;
2175 if (type == F_SOFTLOCK) {
2176 /*
2177 * Load up the translation keeping it
2178 * locked and don't unlock the page.
2179 */
2180 for (; pidx < npages; a += pgsz, pidx += pgcnt) {
2181 sz = MIN(pgsz, ptob(npages - pidx));
2182 hat_memload_array(sptseg->s_as->a_hat, a,
2183 sz, &ppa[pidx], sptd->spt_prot,
2184 HAT_LOAD_LOCK | HAT_LOAD_SHARE);
2185 }
2186 } else {
2187 if (hat == seg->s_as->a_hat) {
2188
2189 /*
2190 * Migrate pages marked for migration.
2191 */
2192 if (lgrp_optimizations())
2197 for (; pidx < npages;
2198 a += pgsz, pidx += pgcnt) {
2199 sz = MIN(pgsz, ptob(npages - pidx));
2200 hat_memload_array(sptseg->s_as->a_hat,
2201 a, sz, &ppa[pidx],
2202 sptd->spt_prot, HAT_LOAD_SHARE);
2203 }
2204 } else {
2205 /* XHAT. Pass real address */
2206 hat_memload_array(hat, shm_addr,
2207 ptob(npages), ppa, sptd->spt_prot,
2208 HAT_LOAD_SHARE);
2209 }
2210
2211 /*
2212 * And now drop the SE_SHARED lock(s).
2213 */
2214 for (i = 0; i < npages; i++)
2215 page_unlock(ppa[i]);
2216 }
2217 AS_LOCK_EXIT(sptseg->s_as, &sptseg->s_as->a_lock);
2218
2219 kmem_free(ppa, sizeof (page_t *) * npages);
2220 return (0);
2221 case F_SOFTUNLOCK:
2222
2223 /*
2224 * This is a bit ugly, we pass in the real seg pointer,
2225 * but the sptseg_addr is the virtual address within the
2226 * dummy seg.
2227 */
2228 segspt_softunlock(seg, sptseg_addr, ptob(npages), rw);
2229 return (0);
2230
2231 case F_PROT:
2232
2233 /*
2234 * This takes care of the unusual case where a user
2235 * allocates a stack in shared memory and a register
2236 * window overflow is written to that stack page before
2237 * it is otherwise modified.
2267 /*ARGSUSED*/
2268 static size_t
2269 segspt_shmswapout(struct seg *seg)
2270 {
2271 return (0);
2272 }
2273
2274 /*
2275 * duplicate the shared page tables
2276 */
2277 int
2278 segspt_shmdup(struct seg *seg, struct seg *newseg)
2279 {
2280 struct shm_data *shmd = (struct shm_data *)seg->s_data;
2281 struct anon_map *amp = shmd->shm_amp;
2282 struct shm_data *shmd_new;
2283 struct seg *spt_seg = shmd->shm_sptseg;
2284 struct spt_data *sptd = spt_seg->s_data;
2285 int error = 0;
2286
2287 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
2288
2289 shmd_new = kmem_zalloc((sizeof (*shmd_new)), KM_SLEEP);
2290 newseg->s_data = (void *)shmd_new;
2291 shmd_new->shm_sptas = shmd->shm_sptas;
2292 shmd_new->shm_amp = amp;
2293 shmd_new->shm_sptseg = shmd->shm_sptseg;
2294 newseg->s_ops = &segspt_shmops;
2295 newseg->s_szc = seg->s_szc;
2296 ASSERT(seg->s_szc == shmd->shm_sptseg->s_szc);
2297
2298 ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER);
2299 amp->refcnt++;
2300 ANON_LOCK_EXIT(&->a_rwlock);
2301
2302 if (sptd->spt_flags & SHM_PAGEABLE) {
2303 shmd_new->shm_vpage = kmem_zalloc(btopr(amp->size), KM_SLEEP);
2304 shmd_new->shm_lckpgs = 0;
2305 if (hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) {
2306 if ((error = hat_share(newseg->s_as->a_hat,
2307 newseg->s_base, shmd->shm_sptas->a_hat, SEGSPTADDR,
2309 kmem_free(shmd_new->shm_vpage,
2310 btopr(amp->size));
2311 }
2312 }
2313 return (error);
2314 } else {
2315 return (hat_share(newseg->s_as->a_hat, newseg->s_base,
2316 shmd->shm_sptas->a_hat, SEGSPTADDR, seg->s_size,
2317 seg->s_szc));
2318
2319 }
2320 }
2321
2322 /*ARGSUSED*/
2323 int
2324 segspt_shmcheckprot(struct seg *seg, caddr_t addr, size_t size, uint_t prot)
2325 {
2326 struct shm_data *shmd = (struct shm_data *)seg->s_data;
2327 struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2328
2329 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2330
2331 /*
2332 * ISM segment is always rw.
2333 */
2334 return (((sptd->spt_prot & prot) != prot) ? EACCES : 0);
2335 }
2336
2337 /*
2338 * Return an array of locked large pages, for empty slots allocate
2339 * private zero-filled anon pages.
2340 */
2341 static int
2342 spt_anon_getpages(
2343 struct seg *sptseg,
2344 caddr_t sptaddr,
2345 size_t len,
2346 page_t *ppa[])
2347 {
2348 struct spt_data *sptd = sptseg->s_data;
2349 struct anon_map *amp = sptd->spt_amp;
2660 segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len,
2661 int attr, int op, ulong_t *lockmap, size_t pos)
2662 {
2663 struct shm_data *shmd = seg->s_data;
2664 struct seg *sptseg = shmd->shm_sptseg;
2665 struct spt_data *sptd = sptseg->s_data;
2666 struct kshmid *sp = sptd->spt_amp->a_sp;
2667 pgcnt_t npages, a_npages;
2668 page_t **ppa;
2669 pgcnt_t an_idx, a_an_idx, ppa_idx;
2670 caddr_t spt_addr, a_addr; /* spt and aligned address */
2671 size_t a_len; /* aligned len */
2672 size_t share_sz;
2673 ulong_t i;
2674 int sts = 0;
2675 rctl_qty_t unlocked = 0;
2676 rctl_qty_t locked = 0;
2677 struct proc *p = curproc;
2678 kproject_t *proj;
2679
2680 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2681 ASSERT(sp != NULL);
2682
2683 if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
2684 return (0);
2685 }
2686
2687 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2688 an_idx = seg_page(seg, addr);
2689 npages = btopr(len);
2690
2691 if (an_idx + npages > btopr(shmd->shm_amp->size)) {
2692 return (ENOMEM);
2693 }
2694
2695 /*
2696 * A shm's project never changes, so no lock needed.
2697 * The shm has a hold on the project, so it will not go away.
2698 * Since we have a mapping to shm within this zone, we know
2699 * that the zone will not go away.
2700 */
2790 sptd->spt_flags |= DISM_PPA_CHANGED;
2791 mutex_exit(&sptd->spt_lock);
2792
2793 rctl_decr_locked_mem(NULL, proj, unlocked, 0);
2794 mutex_exit(&sp->shm_mlock);
2795
2796 if (ppa != NULL)
2797 seg_ppurge_wiredpp(ppa);
2798 }
2799 return (sts);
2800 }
2801
2802 /*ARGSUSED*/
2803 int
2804 segspt_shmgetprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
2805 {
2806 struct shm_data *shmd = (struct shm_data *)seg->s_data;
2807 struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2808 spgcnt_t pgno = seg_page(seg, addr+len) - seg_page(seg, addr) + 1;
2809
2810 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2811
2812 /*
2813 * ISM segment is always rw.
2814 */
2815 while (--pgno >= 0)
2816 *protv++ = sptd->spt_prot;
2817 return (0);
2818 }
2819
2820 /*ARGSUSED*/
2821 u_offset_t
2822 segspt_shmgetoffset(struct seg *seg, caddr_t addr)
2823 {
2824 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2825
2826 /* Offset does not matter in ISM memory */
2827
2828 return ((u_offset_t)0);
2829 }
2830
2831 /* ARGSUSED */
2832 int
2833 segspt_shmgettype(struct seg *seg, caddr_t addr)
2834 {
2835 struct shm_data *shmd = (struct shm_data *)seg->s_data;
2836 struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2837
2838 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2839
2840 /*
2841 * The shared memory mapping is always MAP_SHARED, SWAP is only
2842 * reserved for DISM
2843 */
2844 return (MAP_SHARED |
2845 ((sptd->spt_flags & SHM_PAGEABLE) ? 0 : MAP_NORESERVE));
2846 }
2847
2848 /*ARGSUSED*/
2849 int
2850 segspt_shmgetvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
2851 {
2852 struct shm_data *shmd = (struct shm_data *)seg->s_data;
2853 struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2854
2855 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2856
2857 *vpp = sptd->spt_vp;
2858 return (0);
2859 }
2860
2861 /*
2862 * We need to wait for pending IO to complete to a DISM segment in order for
2863 * pages to get kicked out of the seg_pcache. 120 seconds should be more
2864 * than enough time to wait.
2865 */
2866 static clock_t spt_pcache_wait = 120;
2867
2868 /*ARGSUSED*/
2869 static int
2870 segspt_shmadvise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
2871 {
2872 struct shm_data *shmd = (struct shm_data *)seg->s_data;
2873 struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2874 struct anon_map *amp;
2875 pgcnt_t pg_idx;
2876 ushort_t gen;
2877 clock_t end_lbolt;
2878 int writer;
2879 page_t **ppa;
2880
2881 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2882
2883 if (behav == MADV_FREE) {
2884 if ((sptd->spt_flags & SHM_PAGEABLE) == 0)
2885 return (0);
2886
2887 amp = sptd->spt_amp;
2888 pg_idx = seg_page(seg, addr);
2889
2890 mutex_enter(&sptd->spt_lock);
2891 if ((ppa = sptd->spt_ppa) == NULL) {
2892 mutex_exit(&sptd->spt_lock);
2893 ANON_LOCK_ENTER(&->a_rwlock, RW_READER);
2894 anon_disclaim(amp, pg_idx, len);
2895 ANON_LOCK_EXIT(&->a_rwlock);
2896 return (0);
2897 }
2898
2899 sptd->spt_flags |= DISM_PPA_CHANGED;
2900 gen = sptd->spt_gen;
2901
2902 mutex_exit(&sptd->spt_lock);
2903
2904 /*
2905 * Purge all DISM cached pages
2906 */
2907 seg_ppurge_wiredpp(ppa);
2908
2909 /*
2910 * Drop the AS_LOCK so that other threads can grab it
2911 * in the as_pageunlock path and hopefully get the segment
2912 * kicked out of the seg_pcache. We bump the shm_softlockcnt
2913 * to keep this segment resident.
2914 */
2915 writer = AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock);
2916 atomic_inc_ulong((ulong_t *)(&(shmd->shm_softlockcnt)));
2917 AS_LOCK_EXIT(seg->s_as, &seg->s_as->a_lock);
2918
2919 mutex_enter(&sptd->spt_lock);
2920
2921 end_lbolt = ddi_get_lbolt() + (hz * spt_pcache_wait);
2922
2923 /*
2924 * Try to wait for pages to get kicked out of the seg_pcache.
2925 */
2926 while (sptd->spt_gen == gen &&
2927 (sptd->spt_flags & DISM_PPA_CHANGED) &&
2928 ddi_get_lbolt() < end_lbolt) {
2929 if (!cv_timedwait_sig(&sptd->spt_cv,
2930 &sptd->spt_lock, end_lbolt)) {
2931 break;
2932 }
2933 }
2934
2935 mutex_exit(&sptd->spt_lock);
2936
2937 /* Regrab the AS_LOCK and release our hold on the segment */
2938 AS_LOCK_ENTER(seg->s_as, &seg->s_as->a_lock,
2939 writer ? RW_WRITER : RW_READER);
2940 atomic_dec_ulong((ulong_t *)(&(shmd->shm_softlockcnt)));
2941 if (shmd->shm_softlockcnt <= 0) {
2942 if (AS_ISUNMAPWAIT(seg->s_as)) {
2943 mutex_enter(&seg->s_as->a_contents);
2944 if (AS_ISUNMAPWAIT(seg->s_as)) {
2945 AS_CLRUNMAPWAIT(seg->s_as);
2946 cv_broadcast(&seg->s_as->a_cv);
2947 }
2948 mutex_exit(&seg->s_as->a_contents);
2949 }
2950 }
2951
2952 ANON_LOCK_ENTER(&->a_rwlock, RW_READER);
2953 anon_disclaim(amp, pg_idx, len);
2954 ANON_LOCK_EXIT(&->a_rwlock);
2955 } else if (lgrp_optimizations() && (behav == MADV_ACCESS_LWP ||
2956 behav == MADV_ACCESS_MANY || behav == MADV_ACCESS_DEFAULT)) {
2957 int already_set;
2958 ulong_t anon_index;
2959 lgrp_mem_policy_t policy;
|
224 sptdestroy(struct as *as, struct anon_map *amp)
225 {
226
227 #ifdef DEBUG
228 TNF_PROBE_0(sptdestroy, "spt", /* CSTYLED */);
229 #endif
230 (void) as_unmap(as, SEGSPTADDR, amp->size);
231 as_free(as);
232 }
233
234 /*
235 * called from seg_free().
236 * free (i.e., unlock, unmap, return to free list)
237 * all the pages in the given seg.
238 */
239 void
240 segspt_free(struct seg *seg)
241 {
242 struct spt_data *sptd = (struct spt_data *)seg->s_data;
243
244 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
245
246 if (sptd != NULL) {
247 if (sptd->spt_realsize)
248 segspt_free_pages(seg, seg->s_base, sptd->spt_realsize);
249
250 if (sptd->spt_ppa_lckcnt)
251 kmem_free(sptd->spt_ppa_lckcnt,
252 sizeof (*sptd->spt_ppa_lckcnt)
253 * btopr(sptd->spt_amp->size));
254 kmem_free(sptd->spt_vp, sizeof (*sptd->spt_vp));
255 cv_destroy(&sptd->spt_cv);
256 mutex_destroy(&sptd->spt_lock);
257 kmem_free(sptd, sizeof (*sptd));
258 }
259 }
260
261 /*ARGSUSED*/
262 static int
263 segspt_shmsync(struct seg *seg, caddr_t addr, size_t len, int attr,
264 uint_t flags)
265 {
266 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
267
268 return (0);
269 }
270
271 /*ARGSUSED*/
272 static size_t
273 segspt_shmincore(struct seg *seg, caddr_t addr, size_t len, char *vec)
274 {
275 caddr_t eo_seg;
276 pgcnt_t npages;
277 struct shm_data *shmd = (struct shm_data *)seg->s_data;
278 struct seg *sptseg;
279 struct spt_data *sptd;
280
281 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
282 #ifdef lint
283 seg = seg;
284 #endif
285 sptseg = shmd->shm_sptseg;
286 sptd = sptseg->s_data;
287
288 if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
289 eo_seg = addr + len;
290 while (addr < eo_seg) {
291 /* page exists, and it's locked. */
292 *vec++ = SEG_PAGE_INCORE | SEG_PAGE_LOCKED |
293 SEG_PAGE_ANON;
294 addr += PAGESIZE;
295 }
296 return (len);
297 } else {
298 struct anon_map *amp = shmd->shm_amp;
299 struct anon *ap;
300 page_t *pp;
301 pgcnt_t anon_index;
325 page_unlock(pp);
326 }
327 } else {
328 anon_array_exit(&cookie);
329 }
330 if (shmd->shm_vpage[anon_index] & DISM_PG_LOCKED) {
331 ret |= SEG_PAGE_LOCKED;
332 }
333 *vec++ = (char)ret;
334 }
335 ANON_LOCK_EXIT(&->a_rwlock);
336 return (len);
337 }
338 }
339
340 static int
341 segspt_unmap(struct seg *seg, caddr_t raddr, size_t ssize)
342 {
343 size_t share_size;
344
345 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
346
347 /*
348 * seg.s_size may have been rounded up to the largest page size
349 * in shmat().
350 * XXX This should be cleanedup. sptdestroy should take a length
351 * argument which should be the same as sptcreate. Then
352 * this rounding would not be needed (or is done in shm.c)
353 * Only the check for full segment will be needed.
354 *
355 * XXX -- shouldn't raddr == 0 always? These tests don't seem
356 * to be useful at all.
357 */
358 share_size = page_get_pagesize(seg->s_szc);
359 ssize = P2ROUNDUP(ssize, share_size);
360
361 if (raddr == seg->s_base && ssize == seg->s_size) {
362 seg_free(seg);
363 return (0);
364 } else
365 return (EINVAL);
376 struct kshmid *sp = amp->a_sp;
377 struct cred *cred = CRED();
378 ulong_t i, j, anon_index = 0;
379 pgcnt_t npages = btopr(amp->size);
380 struct vnode *vp;
381 page_t **ppa;
382 uint_t hat_flags;
383 size_t pgsz;
384 pgcnt_t pgcnt;
385 caddr_t a;
386 pgcnt_t pidx;
387 size_t sz;
388 proc_t *procp = curproc;
389 rctl_qty_t lockedbytes = 0;
390 kproject_t *proj;
391
392 /*
393 * We are holding the a_lock on the underlying dummy as,
394 * so we can make calls to the HAT layer.
395 */
396 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
397 ASSERT(sp != NULL);
398
399 #ifdef DEBUG
400 TNF_PROBE_2(segspt_create, "spt", /* CSTYLED */,
401 tnf_opaque, addr, addr, tnf_ulong, len, seg->s_size);
402 #endif
403 if ((sptcargs->flags & SHM_PAGEABLE) == 0) {
404 if (err = anon_swap_adjust(npages))
405 return (err);
406 }
407 err = ENOMEM;
408
409 if ((sptd = kmem_zalloc(sizeof (*sptd), KM_NOSLEEP)) == NULL)
410 goto out1;
411
412 if ((sptcargs->flags & SHM_PAGEABLE) == 0) {
413 if ((ppa = kmem_zalloc(((sizeof (page_t *)) * npages),
414 KM_NOSLEEP)) == NULL)
415 goto out2;
416 }
612 /*ARGSUSED*/
613 void
614 segspt_free_pages(struct seg *seg, caddr_t addr, size_t len)
615 {
616 struct page *pp;
617 struct spt_data *sptd = (struct spt_data *)seg->s_data;
618 pgcnt_t npages;
619 ulong_t anon_idx;
620 struct anon_map *amp;
621 struct anon *ap;
622 struct vnode *vp;
623 u_offset_t off;
624 uint_t hat_flags;
625 int root = 0;
626 pgcnt_t pgs, curnpgs = 0;
627 page_t *rootpp;
628 rctl_qty_t unlocked_bytes = 0;
629 kproject_t *proj;
630 kshmid_t *sp;
631
632 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
633
634 len = P2ROUNDUP(len, PAGESIZE);
635
636 npages = btop(len);
637
638 hat_flags = HAT_UNLOAD_UNLOCK | HAT_UNLOAD_UNMAP;
639 if ((hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) ||
640 (sptd->spt_flags & SHM_PAGEABLE)) {
641 hat_flags = HAT_UNLOAD_UNMAP;
642 }
643
644 hat_unload(seg->s_as->a_hat, addr, len, hat_flags);
645
646 amp = sptd->spt_amp;
647 if (sptd->spt_flags & SHM_PAGEABLE)
648 npages = btop(amp->size);
649
650 ASSERT(amp != NULL);
651
652 if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
821 /*ARGSUSED*/
822 static int
823 segspt_dismpagelock(struct seg *seg, caddr_t addr, size_t len,
824 struct page ***ppp, enum lock_type type, enum seg_rw rw)
825 {
826 struct shm_data *shmd = (struct shm_data *)seg->s_data;
827 struct seg *sptseg = shmd->shm_sptseg;
828 struct spt_data *sptd = sptseg->s_data;
829 pgcnt_t pg_idx, npages, tot_npages, npgs;
830 struct page **pplist, **pl, **ppa, *pp;
831 struct anon_map *amp;
832 spgcnt_t an_idx;
833 int ret = ENOTSUP;
834 uint_t pl_built = 0;
835 struct anon *ap;
836 struct vnode *vp;
837 u_offset_t off;
838 pgcnt_t claim_availrmem = 0;
839 uint_t szc;
840
841 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
842 ASSERT(type == L_PAGELOCK || type == L_PAGEUNLOCK);
843
844 /*
845 * We want to lock/unlock the entire ISM segment. Therefore,
846 * we will be using the underlying sptseg and it's base address
847 * and length for the caching arguments.
848 */
849 ASSERT(sptseg);
850 ASSERT(sptd);
851
852 pg_idx = seg_page(seg, addr);
853 npages = btopr(len);
854
855 /*
856 * check if the request is larger than number of pages covered
857 * by amp
858 */
859 if (pg_idx + npages > btopr(sptd->spt_amp->size)) {
860 *ppp = NULL;
861 return (ENOTSUP);
1176 */
1177 /*ARGSUSED*/
1178 static int
1179 segspt_shmpagelock(struct seg *seg, caddr_t addr, size_t len,
1180 struct page ***ppp, enum lock_type type, enum seg_rw rw)
1181 {
1182 struct shm_data *shmd = (struct shm_data *)seg->s_data;
1183 struct seg *sptseg = shmd->shm_sptseg;
1184 struct spt_data *sptd = sptseg->s_data;
1185 pgcnt_t np, page_index, npages;
1186 caddr_t a, spt_base;
1187 struct page **pplist, **pl, *pp;
1188 struct anon_map *amp;
1189 ulong_t anon_index;
1190 int ret = ENOTSUP;
1191 uint_t pl_built = 0;
1192 struct anon *ap;
1193 struct vnode *vp;
1194 u_offset_t off;
1195
1196 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
1197 ASSERT(type == L_PAGELOCK || type == L_PAGEUNLOCK);
1198
1199
1200 /*
1201 * We want to lock/unlock the entire ISM segment. Therefore,
1202 * we will be using the underlying sptseg and it's base address
1203 * and length for the caching arguments.
1204 */
1205 ASSERT(sptseg);
1206 ASSERT(sptd);
1207
1208 if (sptd->spt_flags & SHM_PAGEABLE) {
1209 return (segspt_dismpagelock(seg, addr, len, ppp, type, rw));
1210 }
1211
1212 page_index = seg_page(seg, addr);
1213 npages = btopr(len);
1214
1215 /*
1216 * check if the request is larger than number of pages covered
1434 segspt_reclaim(void *ptag, caddr_t addr, size_t len, struct page **pplist,
1435 enum seg_rw rw, int async)
1436 {
1437 struct seg *seg = (struct seg *)ptag;
1438 struct shm_data *shmd = (struct shm_data *)seg->s_data;
1439 struct seg *sptseg;
1440 struct spt_data *sptd;
1441 pgcnt_t npages, i, free_availrmem = 0;
1442 int done = 0;
1443
1444 #ifdef lint
1445 addr = addr;
1446 #endif
1447 sptseg = shmd->shm_sptseg;
1448 sptd = sptseg->s_data;
1449 npages = (len >> PAGESHIFT);
1450 ASSERT(npages);
1451 ASSERT(sptd->spt_pcachecnt != 0);
1452 ASSERT(sptd->spt_ppa == pplist);
1453 ASSERT(npages == btopr(sptd->spt_amp->size));
1454 ASSERT(async || AS_LOCK_HELD(seg->s_as));
1455
1456 /*
1457 * Acquire the lock on the dummy seg and destroy the
1458 * ppa array IF this is the last pcachecnt.
1459 */
1460 mutex_enter(&sptd->spt_lock);
1461 if (--sptd->spt_pcachecnt == 0) {
1462 for (i = 0; i < npages; i++) {
1463 if (pplist[i] == NULL) {
1464 continue;
1465 }
1466 if (rw == S_WRITE) {
1467 hat_setrefmod(pplist[i]);
1468 } else {
1469 hat_setref(pplist[i]);
1470 }
1471 if ((sptd->spt_flags & SHM_PAGEABLE) &&
1472 (sptd->spt_ppa_lckcnt[i] == 0))
1473 free_availrmem++;
1474 page_unlock(pplist[i]);
1568 * its a read. Thus cow faults can be ignored with respect to soft
1569 * unlocking, since the breaking of cow means that the anon slot(s) will
1570 * not be shared.
1571 */
1572 static void
1573 segspt_softunlock(struct seg *seg, caddr_t sptseg_addr,
1574 size_t len, enum seg_rw rw)
1575 {
1576 struct shm_data *shmd = (struct shm_data *)seg->s_data;
1577 struct seg *sptseg;
1578 struct spt_data *sptd;
1579 page_t *pp;
1580 caddr_t adr;
1581 struct vnode *vp;
1582 u_offset_t offset;
1583 ulong_t anon_index;
1584 struct anon_map *amp; /* XXX - for locknest */
1585 struct anon *ap = NULL;
1586 pgcnt_t npages;
1587
1588 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
1589
1590 sptseg = shmd->shm_sptseg;
1591 sptd = sptseg->s_data;
1592
1593 /*
1594 * Some platforms assume that ISM mappings are HAT_LOAD_LOCK
1595 * and therefore their pages are SE_SHARED locked
1596 * for the entire life of the segment.
1597 */
1598 if ((!hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) &&
1599 ((sptd->spt_flags & SHM_PAGEABLE) == 0)) {
1600 goto softlock_decrement;
1601 }
1602
1603 /*
1604 * Any thread is free to do a page_find and
1605 * page_unlock() on the pages within this seg.
1606 *
1607 * We are already holding the as->a_lock on the user's
1608 * real segment, but we need to hold the a_lock on the
1609 * underlying dummy as. This is mostly to satisfy the
1610 * underlying HAT layer.
1611 */
1612 AS_LOCK_ENTER(sptseg->s_as, RW_READER);
1613 hat_unlock(sptseg->s_as->a_hat, sptseg_addr, len);
1614 AS_LOCK_EXIT(sptseg->s_as);
1615
1616 amp = sptd->spt_amp;
1617 ASSERT(amp != NULL);
1618 anon_index = seg_page(sptseg, sptseg_addr);
1619
1620 for (adr = sptseg_addr; adr < sptseg_addr + len; adr += PAGESIZE) {
1621 ap = anon_get_ptr(amp->ahp, anon_index++);
1622 ASSERT(ap != NULL);
1623 swap_xlate(ap, &vp, &offset);
1624
1625 /*
1626 * Use page_find() instead of page_lookup() to
1627 * find the page since we know that it has a
1628 * "shared" lock.
1629 */
1630 pp = page_find(vp, offset);
1631 ASSERT(ap == anon_get_ptr(amp->ahp, anon_index - 1));
1632 if (pp == NULL) {
1633 panic("segspt_softunlock: "
1634 "addr %p, ap %p, vp %p, off %llx",
1659 if (AS_ISUNMAPWAIT(seg->s_as)) {
1660 mutex_enter(&seg->s_as->a_contents);
1661 if (AS_ISUNMAPWAIT(seg->s_as)) {
1662 AS_CLRUNMAPWAIT(seg->s_as);
1663 cv_broadcast(&seg->s_as->a_cv);
1664 }
1665 mutex_exit(&seg->s_as->a_contents);
1666 }
1667 }
1668 }
1669
1670 int
1671 segspt_shmattach(struct seg *seg, caddr_t *argsp)
1672 {
1673 struct shm_data *shmd_arg = (struct shm_data *)argsp;
1674 struct shm_data *shmd;
1675 struct anon_map *shm_amp = shmd_arg->shm_amp;
1676 struct spt_data *sptd;
1677 int error = 0;
1678
1679 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
1680
1681 shmd = kmem_zalloc((sizeof (*shmd)), KM_NOSLEEP);
1682 if (shmd == NULL)
1683 return (ENOMEM);
1684
1685 shmd->shm_sptas = shmd_arg->shm_sptas;
1686 shmd->shm_amp = shm_amp;
1687 shmd->shm_sptseg = shmd_arg->shm_sptseg;
1688
1689 (void) lgrp_shm_policy_set(LGRP_MEM_POLICY_DEFAULT, shm_amp, 0,
1690 NULL, 0, seg->s_size);
1691
1692 mutex_init(&shmd->shm_segfree_syncmtx, NULL, MUTEX_DEFAULT, NULL);
1693
1694 seg->s_data = (void *)shmd;
1695 seg->s_ops = &segspt_shmops;
1696 seg->s_szc = shmd->shm_sptseg->s_szc;
1697 sptd = shmd->shm_sptseg->s_data;
1698
1699 if (sptd->spt_flags & SHM_PAGEABLE) {
1718 seg->s_size, seg->s_szc);
1719 }
1720 if (error) {
1721 seg->s_szc = 0;
1722 seg->s_data = (void *)NULL;
1723 kmem_free(shmd, (sizeof (*shmd)));
1724 } else {
1725 ANON_LOCK_ENTER(&shm_amp->a_rwlock, RW_WRITER);
1726 shm_amp->refcnt++;
1727 ANON_LOCK_EXIT(&shm_amp->a_rwlock);
1728 }
1729 return (error);
1730 }
1731
1732 int
1733 segspt_shmunmap(struct seg *seg, caddr_t raddr, size_t ssize)
1734 {
1735 struct shm_data *shmd = (struct shm_data *)seg->s_data;
1736 int reclaim = 1;
1737
1738 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
1739 retry:
1740 if (shmd->shm_softlockcnt > 0) {
1741 if (reclaim == 1) {
1742 segspt_purge(seg);
1743 reclaim = 0;
1744 goto retry;
1745 }
1746 return (EAGAIN);
1747 }
1748
1749 if (ssize != seg->s_size) {
1750 #ifdef DEBUG
1751 cmn_err(CE_WARN, "Incompatible ssize %lx s_size %lx\n",
1752 ssize, seg->s_size);
1753 #endif
1754 return (EINVAL);
1755 }
1756
1757 (void) segspt_shmlockop(seg, raddr, shmd->shm_amp->size, 0, MC_UNLOCK,
1758 NULL, 0);
1759 hat_unshare(seg->s_as->a_hat, raddr, ssize, seg->s_szc);
1760
1761 seg_free(seg);
1762
1763 return (0);
1764 }
1765
1766 void
1767 segspt_shmfree(struct seg *seg)
1768 {
1769 struct shm_data *shmd = (struct shm_data *)seg->s_data;
1770 struct anon_map *shm_amp = shmd->shm_amp;
1771
1772 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
1773
1774 (void) segspt_shmlockop(seg, seg->s_base, shm_amp->size, 0,
1775 MC_UNLOCK, NULL, 0);
1776
1777 /*
1778 * Need to increment refcnt when attaching
1779 * and decrement when detaching because of dup().
1780 */
1781 ANON_LOCK_ENTER(&shm_amp->a_rwlock, RW_WRITER);
1782 shm_amp->refcnt--;
1783 ANON_LOCK_EXIT(&shm_amp->a_rwlock);
1784
1785 if (shmd->shm_vpage) { /* only for DISM */
1786 kmem_free(shmd->shm_vpage, btopr(shm_amp->size));
1787 shmd->shm_vpage = NULL;
1788 }
1789
1790 /*
1791 * Take shm_segfree_syncmtx lock to let segspt_reclaim() finish if it's
1792 * still working with this segment without holding as lock.
1793 */
1794 ASSERT(shmd->shm_softlockcnt == 0);
1795 mutex_enter(&shmd->shm_segfree_syncmtx);
1796 mutex_destroy(&shmd->shm_segfree_syncmtx);
1797
1798 kmem_free(shmd, sizeof (*shmd));
1799 }
1800
1801 /*ARGSUSED*/
1802 int
1803 segspt_shmsetprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
1804 {
1805 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
1806
1807 /*
1808 * Shared page table is more than shared mapping.
1809 * Individual process sharing page tables can't change prot
1810 * because there is only one set of page tables.
1811 * This will be allowed after private page table is
1812 * supported.
1813 */
1814 /* need to return correct status error? */
1815 return (0);
1816 }
1817
1818
1819 faultcode_t
1820 segspt_dismfault(struct hat *hat, struct seg *seg, caddr_t addr,
1821 size_t len, enum fault_type type, enum seg_rw rw)
1822 {
1823 struct shm_data *shmd = (struct shm_data *)seg->s_data;
1824 struct seg *sptseg = shmd->shm_sptseg;
1825 struct as *curspt = shmd->shm_sptas;
1826 struct spt_data *sptd = sptseg->s_data;
1827 pgcnt_t npages;
1828 size_t size;
1829 caddr_t segspt_addr, shm_addr;
1830 page_t **ppa;
1831 int i;
1832 ulong_t an_idx = 0;
1833 int err = 0;
1834 int dyn_ism_unmap = hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0);
1835 size_t pgsz;
1836 pgcnt_t pgcnt;
1837 caddr_t a;
1838 pgcnt_t pidx;
1839
1840 #ifdef lint
1841 hat = hat;
1842 #endif
1843 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
1844
1845 /*
1846 * Because of the way spt is implemented
1847 * the realsize of the segment does not have to be
1848 * equal to the segment size itself. The segment size is
1849 * often in multiples of a page size larger than PAGESIZE.
1850 * The realsize is rounded up to the nearest PAGESIZE
1851 * based on what the user requested. This is a bit of
1852 * ungliness that is historical but not easily fixed
1853 * without re-designing the higher levels of ISM.
1854 */
1855 ASSERT(addr >= seg->s_base);
1856 if (((addr + len) - seg->s_base) > sptd->spt_realsize)
1857 return (FC_NOMAP);
1858 /*
1859 * For all of the following cases except F_PROT, we need to
1860 * make any necessary adjustments to addr and len
1861 * and get all of the necessary page_t's into an array called ppa[].
1862 *
1863 * The code in shmat() forces base addr and len of ISM segment
1892 /*
1893 * Fall through to the F_INVAL case to load up the hat layer
1894 * entries with the HAT_LOAD_LOCK flag.
1895 */
1896 /* FALLTHRU */
1897 case F_INVAL:
1898
1899 if ((rw == S_EXEC) && !(sptd->spt_prot & PROT_EXEC))
1900 return (FC_NOMAP);
1901
1902 ppa = kmem_zalloc(npages * sizeof (page_t *), KM_SLEEP);
1903
1904 err = spt_anon_getpages(sptseg, segspt_addr, size, ppa);
1905 if (err != 0) {
1906 if (type == F_SOFTLOCK) {
1907 atomic_add_long((ulong_t *)(
1908 &(shmd->shm_softlockcnt)), -npages);
1909 }
1910 goto dism_err;
1911 }
1912 AS_LOCK_ENTER(sptseg->s_as, RW_READER);
1913 a = segspt_addr;
1914 pidx = 0;
1915 if (type == F_SOFTLOCK) {
1916
1917 /*
1918 * Load up the translation keeping it
1919 * locked and don't unlock the page.
1920 */
1921 for (; pidx < npages; a += pgsz, pidx += pgcnt) {
1922 hat_memload_array(sptseg->s_as->a_hat,
1923 a, pgsz, &ppa[pidx], sptd->spt_prot,
1924 HAT_LOAD_LOCK | HAT_LOAD_SHARE);
1925 }
1926 } else {
1927 if (hat == seg->s_as->a_hat) {
1928
1929 /*
1930 * Migrate pages marked for migration
1931 */
1932 if (lgrp_optimizations())
1953 if (dyn_ism_unmap) {
1954 for (i = 0; i < npages; i++) {
1955 page_unlock(ppa[i]);
1956 }
1957 }
1958 }
1959
1960 if (!dyn_ism_unmap) {
1961 if (hat_share(seg->s_as->a_hat, shm_addr,
1962 curspt->a_hat, segspt_addr, ptob(npages),
1963 seg->s_szc) != 0) {
1964 panic("hat_share err in DISM fault");
1965 /* NOTREACHED */
1966 }
1967 if (type == F_INVAL) {
1968 for (i = 0; i < npages; i++) {
1969 page_unlock(ppa[i]);
1970 }
1971 }
1972 }
1973 AS_LOCK_EXIT(sptseg->s_as);
1974 dism_err:
1975 kmem_free(ppa, npages * sizeof (page_t *));
1976 return (err);
1977
1978 case F_SOFTUNLOCK:
1979
1980 /*
1981 * This is a bit ugly, we pass in the real seg pointer,
1982 * but the segspt_addr is the virtual address within the
1983 * dummy seg.
1984 */
1985 segspt_softunlock(seg, segspt_addr, size, rw);
1986 return (0);
1987
1988 case F_PROT:
1989
1990 /*
1991 * This takes care of the unusual case where a user
1992 * allocates a stack in shared memory and a register
1993 * window overflow is written to that stack page before
2020 pgcnt_t npages;
2021 size_t size;
2022 caddr_t sptseg_addr, shm_addr;
2023 page_t *pp, **ppa;
2024 int i;
2025 u_offset_t offset;
2026 ulong_t anon_index = 0;
2027 struct vnode *vp;
2028 struct anon_map *amp; /* XXX - for locknest */
2029 struct anon *ap = NULL;
2030 size_t pgsz;
2031 pgcnt_t pgcnt;
2032 caddr_t a;
2033 pgcnt_t pidx;
2034 size_t sz;
2035
2036 #ifdef lint
2037 hat = hat;
2038 #endif
2039
2040 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2041
2042 if (sptd->spt_flags & SHM_PAGEABLE) {
2043 return (segspt_dismfault(hat, seg, addr, len, type, rw));
2044 }
2045
2046 /*
2047 * Because of the way spt is implemented
2048 * the realsize of the segment does not have to be
2049 * equal to the segment size itself. The segment size is
2050 * often in multiples of a page size larger than PAGESIZE.
2051 * The realsize is rounded up to the nearest PAGESIZE
2052 * based on what the user requested. This is a bit of
2053 * ungliness that is historical but not easily fixed
2054 * without re-designing the higher levels of ISM.
2055 */
2056 ASSERT(addr >= seg->s_base);
2057 if (((addr + len) - seg->s_base) > sptd->spt_realsize)
2058 return (FC_NOMAP);
2059 /*
2060 * For all of the following cases except F_PROT, we need to
2152 anon_index = seg_page(sptseg, sptseg_addr);
2153
2154 ANON_LOCK_ENTER(&->a_rwlock, RW_READER);
2155 for (i = 0; i < npages; i++) {
2156 ap = anon_get_ptr(amp->ahp, anon_index++);
2157 ASSERT(ap != NULL);
2158 swap_xlate(ap, &vp, &offset);
2159 pp = page_lookup(vp, offset, SE_SHARED);
2160 ASSERT(pp != NULL);
2161 ppa[i] = pp;
2162 }
2163 ANON_LOCK_EXIT(&->a_rwlock);
2164 ASSERT(i == npages);
2165
2166 /*
2167 * We are already holding the as->a_lock on the user's
2168 * real segment, but we need to hold the a_lock on the
2169 * underlying dummy as. This is mostly to satisfy the
2170 * underlying HAT layer.
2171 */
2172 AS_LOCK_ENTER(sptseg->s_as, RW_READER);
2173 a = sptseg_addr;
2174 pidx = 0;
2175 if (type == F_SOFTLOCK) {
2176 /*
2177 * Load up the translation keeping it
2178 * locked and don't unlock the page.
2179 */
2180 for (; pidx < npages; a += pgsz, pidx += pgcnt) {
2181 sz = MIN(pgsz, ptob(npages - pidx));
2182 hat_memload_array(sptseg->s_as->a_hat, a,
2183 sz, &ppa[pidx], sptd->spt_prot,
2184 HAT_LOAD_LOCK | HAT_LOAD_SHARE);
2185 }
2186 } else {
2187 if (hat == seg->s_as->a_hat) {
2188
2189 /*
2190 * Migrate pages marked for migration.
2191 */
2192 if (lgrp_optimizations())
2197 for (; pidx < npages;
2198 a += pgsz, pidx += pgcnt) {
2199 sz = MIN(pgsz, ptob(npages - pidx));
2200 hat_memload_array(sptseg->s_as->a_hat,
2201 a, sz, &ppa[pidx],
2202 sptd->spt_prot, HAT_LOAD_SHARE);
2203 }
2204 } else {
2205 /* XHAT. Pass real address */
2206 hat_memload_array(hat, shm_addr,
2207 ptob(npages), ppa, sptd->spt_prot,
2208 HAT_LOAD_SHARE);
2209 }
2210
2211 /*
2212 * And now drop the SE_SHARED lock(s).
2213 */
2214 for (i = 0; i < npages; i++)
2215 page_unlock(ppa[i]);
2216 }
2217 AS_LOCK_EXIT(sptseg->s_as);
2218
2219 kmem_free(ppa, sizeof (page_t *) * npages);
2220 return (0);
2221 case F_SOFTUNLOCK:
2222
2223 /*
2224 * This is a bit ugly, we pass in the real seg pointer,
2225 * but the sptseg_addr is the virtual address within the
2226 * dummy seg.
2227 */
2228 segspt_softunlock(seg, sptseg_addr, ptob(npages), rw);
2229 return (0);
2230
2231 case F_PROT:
2232
2233 /*
2234 * This takes care of the unusual case where a user
2235 * allocates a stack in shared memory and a register
2236 * window overflow is written to that stack page before
2237 * it is otherwise modified.
2267 /*ARGSUSED*/
2268 static size_t
2269 segspt_shmswapout(struct seg *seg)
2270 {
2271 return (0);
2272 }
2273
2274 /*
2275 * duplicate the shared page tables
2276 */
2277 int
2278 segspt_shmdup(struct seg *seg, struct seg *newseg)
2279 {
2280 struct shm_data *shmd = (struct shm_data *)seg->s_data;
2281 struct anon_map *amp = shmd->shm_amp;
2282 struct shm_data *shmd_new;
2283 struct seg *spt_seg = shmd->shm_sptseg;
2284 struct spt_data *sptd = spt_seg->s_data;
2285 int error = 0;
2286
2287 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
2288
2289 shmd_new = kmem_zalloc((sizeof (*shmd_new)), KM_SLEEP);
2290 newseg->s_data = (void *)shmd_new;
2291 shmd_new->shm_sptas = shmd->shm_sptas;
2292 shmd_new->shm_amp = amp;
2293 shmd_new->shm_sptseg = shmd->shm_sptseg;
2294 newseg->s_ops = &segspt_shmops;
2295 newseg->s_szc = seg->s_szc;
2296 ASSERT(seg->s_szc == shmd->shm_sptseg->s_szc);
2297
2298 ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER);
2299 amp->refcnt++;
2300 ANON_LOCK_EXIT(&->a_rwlock);
2301
2302 if (sptd->spt_flags & SHM_PAGEABLE) {
2303 shmd_new->shm_vpage = kmem_zalloc(btopr(amp->size), KM_SLEEP);
2304 shmd_new->shm_lckpgs = 0;
2305 if (hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) {
2306 if ((error = hat_share(newseg->s_as->a_hat,
2307 newseg->s_base, shmd->shm_sptas->a_hat, SEGSPTADDR,
2309 kmem_free(shmd_new->shm_vpage,
2310 btopr(amp->size));
2311 }
2312 }
2313 return (error);
2314 } else {
2315 return (hat_share(newseg->s_as->a_hat, newseg->s_base,
2316 shmd->shm_sptas->a_hat, SEGSPTADDR, seg->s_size,
2317 seg->s_szc));
2318
2319 }
2320 }
2321
2322 /*ARGSUSED*/
2323 int
2324 segspt_shmcheckprot(struct seg *seg, caddr_t addr, size_t size, uint_t prot)
2325 {
2326 struct shm_data *shmd = (struct shm_data *)seg->s_data;
2327 struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2328
2329 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2330
2331 /*
2332 * ISM segment is always rw.
2333 */
2334 return (((sptd->spt_prot & prot) != prot) ? EACCES : 0);
2335 }
2336
2337 /*
2338 * Return an array of locked large pages, for empty slots allocate
2339 * private zero-filled anon pages.
2340 */
2341 static int
2342 spt_anon_getpages(
2343 struct seg *sptseg,
2344 caddr_t sptaddr,
2345 size_t len,
2346 page_t *ppa[])
2347 {
2348 struct spt_data *sptd = sptseg->s_data;
2349 struct anon_map *amp = sptd->spt_amp;
2660 segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len,
2661 int attr, int op, ulong_t *lockmap, size_t pos)
2662 {
2663 struct shm_data *shmd = seg->s_data;
2664 struct seg *sptseg = shmd->shm_sptseg;
2665 struct spt_data *sptd = sptseg->s_data;
2666 struct kshmid *sp = sptd->spt_amp->a_sp;
2667 pgcnt_t npages, a_npages;
2668 page_t **ppa;
2669 pgcnt_t an_idx, a_an_idx, ppa_idx;
2670 caddr_t spt_addr, a_addr; /* spt and aligned address */
2671 size_t a_len; /* aligned len */
2672 size_t share_sz;
2673 ulong_t i;
2674 int sts = 0;
2675 rctl_qty_t unlocked = 0;
2676 rctl_qty_t locked = 0;
2677 struct proc *p = curproc;
2678 kproject_t *proj;
2679
2680 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2681 ASSERT(sp != NULL);
2682
2683 if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
2684 return (0);
2685 }
2686
2687 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2688 an_idx = seg_page(seg, addr);
2689 npages = btopr(len);
2690
2691 if (an_idx + npages > btopr(shmd->shm_amp->size)) {
2692 return (ENOMEM);
2693 }
2694
2695 /*
2696 * A shm's project never changes, so no lock needed.
2697 * The shm has a hold on the project, so it will not go away.
2698 * Since we have a mapping to shm within this zone, we know
2699 * that the zone will not go away.
2700 */
2790 sptd->spt_flags |= DISM_PPA_CHANGED;
2791 mutex_exit(&sptd->spt_lock);
2792
2793 rctl_decr_locked_mem(NULL, proj, unlocked, 0);
2794 mutex_exit(&sp->shm_mlock);
2795
2796 if (ppa != NULL)
2797 seg_ppurge_wiredpp(ppa);
2798 }
2799 return (sts);
2800 }
2801
2802 /*ARGSUSED*/
2803 int
2804 segspt_shmgetprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
2805 {
2806 struct shm_data *shmd = (struct shm_data *)seg->s_data;
2807 struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2808 spgcnt_t pgno = seg_page(seg, addr+len) - seg_page(seg, addr) + 1;
2809
2810 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2811
2812 /*
2813 * ISM segment is always rw.
2814 */
2815 while (--pgno >= 0)
2816 *protv++ = sptd->spt_prot;
2817 return (0);
2818 }
2819
2820 /*ARGSUSED*/
2821 u_offset_t
2822 segspt_shmgetoffset(struct seg *seg, caddr_t addr)
2823 {
2824 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2825
2826 /* Offset does not matter in ISM memory */
2827
2828 return ((u_offset_t)0);
2829 }
2830
2831 /* ARGSUSED */
2832 int
2833 segspt_shmgettype(struct seg *seg, caddr_t addr)
2834 {
2835 struct shm_data *shmd = (struct shm_data *)seg->s_data;
2836 struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2837
2838 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2839
2840 /*
2841 * The shared memory mapping is always MAP_SHARED, SWAP is only
2842 * reserved for DISM
2843 */
2844 return (MAP_SHARED |
2845 ((sptd->spt_flags & SHM_PAGEABLE) ? 0 : MAP_NORESERVE));
2846 }
2847
2848 /*ARGSUSED*/
2849 int
2850 segspt_shmgetvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
2851 {
2852 struct shm_data *shmd = (struct shm_data *)seg->s_data;
2853 struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2854
2855 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2856
2857 *vpp = sptd->spt_vp;
2858 return (0);
2859 }
2860
2861 /*
2862 * We need to wait for pending IO to complete to a DISM segment in order for
2863 * pages to get kicked out of the seg_pcache. 120 seconds should be more
2864 * than enough time to wait.
2865 */
2866 static clock_t spt_pcache_wait = 120;
2867
2868 /*ARGSUSED*/
2869 static int
2870 segspt_shmadvise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
2871 {
2872 struct shm_data *shmd = (struct shm_data *)seg->s_data;
2873 struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2874 struct anon_map *amp;
2875 pgcnt_t pg_idx;
2876 ushort_t gen;
2877 clock_t end_lbolt;
2878 int writer;
2879 page_t **ppa;
2880
2881 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2882
2883 if (behav == MADV_FREE) {
2884 if ((sptd->spt_flags & SHM_PAGEABLE) == 0)
2885 return (0);
2886
2887 amp = sptd->spt_amp;
2888 pg_idx = seg_page(seg, addr);
2889
2890 mutex_enter(&sptd->spt_lock);
2891 if ((ppa = sptd->spt_ppa) == NULL) {
2892 mutex_exit(&sptd->spt_lock);
2893 ANON_LOCK_ENTER(&->a_rwlock, RW_READER);
2894 anon_disclaim(amp, pg_idx, len);
2895 ANON_LOCK_EXIT(&->a_rwlock);
2896 return (0);
2897 }
2898
2899 sptd->spt_flags |= DISM_PPA_CHANGED;
2900 gen = sptd->spt_gen;
2901
2902 mutex_exit(&sptd->spt_lock);
2903
2904 /*
2905 * Purge all DISM cached pages
2906 */
2907 seg_ppurge_wiredpp(ppa);
2908
2909 /*
2910 * Drop the AS_LOCK so that other threads can grab it
2911 * in the as_pageunlock path and hopefully get the segment
2912 * kicked out of the seg_pcache. We bump the shm_softlockcnt
2913 * to keep this segment resident.
2914 */
2915 writer = AS_WRITE_HELD(seg->s_as);
2916 atomic_inc_ulong((ulong_t *)(&(shmd->shm_softlockcnt)));
2917 AS_LOCK_EXIT(seg->s_as);
2918
2919 mutex_enter(&sptd->spt_lock);
2920
2921 end_lbolt = ddi_get_lbolt() + (hz * spt_pcache_wait);
2922
2923 /*
2924 * Try to wait for pages to get kicked out of the seg_pcache.
2925 */
2926 while (sptd->spt_gen == gen &&
2927 (sptd->spt_flags & DISM_PPA_CHANGED) &&
2928 ddi_get_lbolt() < end_lbolt) {
2929 if (!cv_timedwait_sig(&sptd->spt_cv,
2930 &sptd->spt_lock, end_lbolt)) {
2931 break;
2932 }
2933 }
2934
2935 mutex_exit(&sptd->spt_lock);
2936
2937 /* Regrab the AS_LOCK and release our hold on the segment */
2938 AS_LOCK_ENTER(seg->s_as, writer ? RW_WRITER : RW_READER);
2939 atomic_dec_ulong((ulong_t *)(&(shmd->shm_softlockcnt)));
2940 if (shmd->shm_softlockcnt <= 0) {
2941 if (AS_ISUNMAPWAIT(seg->s_as)) {
2942 mutex_enter(&seg->s_as->a_contents);
2943 if (AS_ISUNMAPWAIT(seg->s_as)) {
2944 AS_CLRUNMAPWAIT(seg->s_as);
2945 cv_broadcast(&seg->s_as->a_cv);
2946 }
2947 mutex_exit(&seg->s_as->a_contents);
2948 }
2949 }
2950
2951 ANON_LOCK_ENTER(&->a_rwlock, RW_READER);
2952 anon_disclaim(amp, pg_idx, len);
2953 ANON_LOCK_EXIT(&->a_rwlock);
2954 } else if (lgrp_optimizations() && (behav == MADV_ACCESS_LWP ||
2955 behav == MADV_ACCESS_MANY || behav == MADV_ACCESS_DEFAULT)) {
2956 int already_set;
2957 ulong_t anon_index;
2958 lgrp_mem_policy_t policy;
|