80 #include <sys/contract_impl.h>
81 #include <sys/contract/process.h>
82 #include <sys/contract/process_impl.h>
83 #include <sys/schedctl.h>
84 #include <sys/pool.h>
85 #include <sys/zone.h>
86 #include <sys/atomic.h>
87 #include <sys/sdt.h>
88
89 #define MAX_ITERS_SPIN 5
90
91 typedef struct prpagev {
92 uint_t *pg_protv; /* vector of page permissions */
93 char *pg_incore; /* vector of incore flags */
94 size_t pg_npages; /* number of pages in protv and incore */
95 ulong_t pg_pnbase; /* pn within segment of first protv element */
96 } prpagev_t;
97
98 size_t pagev_lim = 256 * 1024; /* limit on number of pages in prpagev_t */
99
100 extern struct seg_ops segdev_ops; /* needs a header file */
101 extern struct seg_ops segspt_shmops; /* needs a header file */
102
103 static int set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t);
104 static void clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t);
105
106 /*
107 * Choose an lwp from the complete set of lwps for the process.
108 * This is called for any operation applied to the process
109 * file descriptor that requires an lwp to operate upon.
110 *
111 * Returns a pointer to the thread for the selected LWP,
112 * and with the dispatcher lock held for the thread.
113 *
114 * The algorithm for choosing an lwp is critical for /proc semantics;
115 * don't touch this code unless you know all of the implications.
116 */
117 kthread_t *
118 prchoose(proc_t *p)
119 {
120 kthread_t *t;
121 kthread_t *t_onproc = NULL; /* running on processor */
1466 }
1467 s += pr_u32tos(getmajor(vattr->va_fsid), s, 0);
1468 *s++ = '.';
1469 s += pr_u32tos(getminor(vattr->va_fsid), s, 0);
1470 *s++ = '.';
1471 s += pr_u64tos(vattr->va_nodeid, s);
1472 *s++ = '\0';
1473 }
1474
1475 struct seg *
1476 break_seg(proc_t *p)
1477 {
1478 caddr_t addr = p->p_brkbase;
1479 struct seg *seg;
1480 struct vnode *vp;
1481
1482 if (p->p_brksize != 0)
1483 addr += p->p_brksize - 1;
1484 seg = as_segat(p->p_as, addr);
1485 if (seg != NULL && seg->s_ops == &segvn_ops &&
1486 (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL))
1487 return (seg);
1488 return (NULL);
1489 }
1490
1491 /*
1492 * Implementation of service functions to handle procfs generic chained
1493 * copyout buffers.
1494 */
1495 typedef struct pr_iobuf_list {
1496 list_node_t piol_link; /* buffer linkage */
1497 size_t piol_size; /* total size (header + data) */
1498 size_t piol_usedsize; /* amount to copy out from this buf */
1499 } piol_t;
1500
1501 #define MAPSIZE (64 * 1024)
1502 #define PIOL_DATABUF(iol) ((void *)(&(iol)[1]))
1503
1504 void
1505 pr_iol_initlist(list_t *iolhead, size_t itemsize, int n)
1506 {
1631 return (0);
1632
1633 brkseg = break_seg(p);
1634 stkseg = as_segat(as, prgetstackbase(p));
1635
1636 do {
1637 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1638 caddr_t saddr, naddr;
1639 void *tmp = NULL;
1640
1641 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1642 prot = pr_getprot(seg, reserved, &tmp,
1643 &saddr, &naddr, eaddr);
1644 if (saddr == naddr)
1645 continue;
1646
1647 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1648
1649 mp->pr_vaddr = (uintptr_t)saddr;
1650 mp->pr_size = naddr - saddr;
1651 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1652 mp->pr_mflags = 0;
1653 if (prot & PROT_READ)
1654 mp->pr_mflags |= MA_READ;
1655 if (prot & PROT_WRITE)
1656 mp->pr_mflags |= MA_WRITE;
1657 if (prot & PROT_EXEC)
1658 mp->pr_mflags |= MA_EXEC;
1659 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1660 mp->pr_mflags |= MA_SHARED;
1661 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1662 mp->pr_mflags |= MA_NORESERVE;
1663 if (seg->s_ops == &segspt_shmops ||
1664 (seg->s_ops == &segvn_ops &&
1665 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1666 mp->pr_mflags |= MA_ANON;
1667 if (seg == brkseg)
1668 mp->pr_mflags |= MA_BREAK;
1669 else if (seg == stkseg) {
1670 mp->pr_mflags |= MA_STACK;
1671 if (reserved) {
1672 size_t maxstack =
1673 ((size_t)p->p_stk_ctl +
1674 PAGEOFFSET) & PAGEMASK;
1675 mp->pr_vaddr =
1676 (uintptr_t)prgetstackbase(p) +
1677 p->p_stksize - maxstack;
1678 mp->pr_size = (uintptr_t)naddr -
1679 mp->pr_vaddr;
1680 }
1681 }
1682 if (seg->s_ops == &segspt_shmops)
1683 mp->pr_mflags |= MA_ISM | MA_SHM;
1684 mp->pr_pagesize = PAGESIZE;
1685
1686 /*
1687 * Manufacture a filename for the "object" directory.
1688 */
1689 vattr.va_mask = AT_FSID|AT_NODEID;
1690 if (seg->s_ops == &segvn_ops &&
1691 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1692 vp != NULL && vp->v_type == VREG &&
1693 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1694 if (vp == p->p_exec)
1695 (void) strcpy(mp->pr_mapname, "a.out");
1696 else
1697 pr_object_name(mp->pr_mapname,
1698 vp, &vattr);
1699 }
1700
1701 /*
1702 * Get the SysV shared memory id, if any.
1703 */
1704 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1705 (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1706 SHMID_NONE) {
1707 if (mp->pr_shmid == SHMID_FREE)
1708 mp->pr_shmid = -1;
1709
1710 mp->pr_mflags |= MA_SHM;
1711 } else {
1742 return (0);
1743
1744 brkseg = break_seg(p);
1745 stkseg = as_segat(as, prgetstackbase(p));
1746
1747 do {
1748 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1749 caddr_t saddr, naddr;
1750 void *tmp = NULL;
1751
1752 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1753 prot = pr_getprot(seg, reserved, &tmp,
1754 &saddr, &naddr, eaddr);
1755 if (saddr == naddr)
1756 continue;
1757
1758 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1759
1760 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
1761 mp->pr_size = (size32_t)(naddr - saddr);
1762 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1763 mp->pr_mflags = 0;
1764 if (prot & PROT_READ)
1765 mp->pr_mflags |= MA_READ;
1766 if (prot & PROT_WRITE)
1767 mp->pr_mflags |= MA_WRITE;
1768 if (prot & PROT_EXEC)
1769 mp->pr_mflags |= MA_EXEC;
1770 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1771 mp->pr_mflags |= MA_SHARED;
1772 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1773 mp->pr_mflags |= MA_NORESERVE;
1774 if (seg->s_ops == &segspt_shmops ||
1775 (seg->s_ops == &segvn_ops &&
1776 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1777 mp->pr_mflags |= MA_ANON;
1778 if (seg == brkseg)
1779 mp->pr_mflags |= MA_BREAK;
1780 else if (seg == stkseg) {
1781 mp->pr_mflags |= MA_STACK;
1782 if (reserved) {
1783 size_t maxstack =
1784 ((size_t)p->p_stk_ctl +
1785 PAGEOFFSET) & PAGEMASK;
1786 uintptr_t vaddr =
1787 (uintptr_t)prgetstackbase(p) +
1788 p->p_stksize - maxstack;
1789 mp->pr_vaddr = (caddr32_t)vaddr;
1790 mp->pr_size = (size32_t)
1791 ((uintptr_t)naddr - vaddr);
1792 }
1793 }
1794 if (seg->s_ops == &segspt_shmops)
1795 mp->pr_mflags |= MA_ISM | MA_SHM;
1796 mp->pr_pagesize = PAGESIZE;
1797
1798 /*
1799 * Manufacture a filename for the "object" directory.
1800 */
1801 vattr.va_mask = AT_FSID|AT_NODEID;
1802 if (seg->s_ops == &segvn_ops &&
1803 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1804 vp != NULL && vp->v_type == VREG &&
1805 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1806 if (vp == p->p_exec)
1807 (void) strcpy(mp->pr_mapname, "a.out");
1808 else
1809 pr_object_name(mp->pr_mapname,
1810 vp, &vattr);
1811 }
1812
1813 /*
1814 * Get the SysV shared memory id, if any.
1815 */
1816 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1817 (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1818 SHMID_NONE) {
1819 if (mp->pr_shmid == SHMID_FREE)
1820 mp->pr_shmid = -1;
1821
1822 mp->pr_mflags |= MA_SHM;
1823 } else {
1936 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1937 struct vnode *vp;
1938 struct vattr vattr;
1939 size_t len;
1940 size_t npage;
1941 uint_t prot;
1942 uintptr_t next;
1943
1944 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1945 if ((len = (size_t)(naddr - saddr)) == 0)
1946 continue;
1947 npage = len / PAGESIZE;
1948 next = (uintptr_t)(pmp + 1) + round8(npage);
1949 /*
1950 * It's possible that the address space can change
1951 * subtlely even though we're holding as->a_lock
1952 * due to the nondeterminism of page_exists() in
1953 * the presence of asychronously flushed pages or
1954 * mapped files whose sizes are changing.
1955 * page_exists() may be called indirectly from
1956 * pr_getprot() by a SEGOP_INCORE() routine.
1957 * If this happens we need to make sure we don't
1958 * overrun the buffer whose size we computed based
1959 * on the initial iteration through the segments.
1960 * Once we've detected an overflow, we need to clean
1961 * up the temporary memory allocated in pr_getprot()
1962 * and retry. If there's a pending signal, we return
1963 * EINTR so that this thread can be dislodged if
1964 * a latent bug causes us to spin indefinitely.
1965 */
1966 if (next > (uintptr_t)buf + size) {
1967 pr_getprot_done(&tmp);
1968 AS_LOCK_EXIT(as, &as->a_lock);
1969
1970 kmem_free(buf, size);
1971
1972 if (ISSIG(curthread, JUSTLOOKING))
1973 return (EINTR);
1974
1975 goto again;
1976 }
1977
1978 php->pr_nmap++;
1979 php->pr_npage += npage;
1980 pmp->pr_vaddr = (uintptr_t)saddr;
1981 pmp->pr_npage = npage;
1982 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1983 pmp->pr_mflags = 0;
1984 if (prot & PROT_READ)
1985 pmp->pr_mflags |= MA_READ;
1986 if (prot & PROT_WRITE)
1987 pmp->pr_mflags |= MA_WRITE;
1988 if (prot & PROT_EXEC)
1989 pmp->pr_mflags |= MA_EXEC;
1990 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1991 pmp->pr_mflags |= MA_SHARED;
1992 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1993 pmp->pr_mflags |= MA_NORESERVE;
1994 if (seg->s_ops == &segspt_shmops ||
1995 (seg->s_ops == &segvn_ops &&
1996 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1997 pmp->pr_mflags |= MA_ANON;
1998 if (seg->s_ops == &segspt_shmops)
1999 pmp->pr_mflags |= MA_ISM | MA_SHM;
2000 pmp->pr_pagesize = PAGESIZE;
2001 /*
2002 * Manufacture a filename for the "object" directory.
2003 */
2004 vattr.va_mask = AT_FSID|AT_NODEID;
2005 if (seg->s_ops == &segvn_ops &&
2006 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2007 vp != NULL && vp->v_type == VREG &&
2008 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2009 if (vp == p->p_exec)
2010 (void) strcpy(pmp->pr_mapname, "a.out");
2011 else
2012 pr_object_name(pmp->pr_mapname,
2013 vp, &vattr);
2014 }
2015
2016 /*
2017 * Get the SysV shared memory id, if any.
2018 */
2019 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2020 (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2021 SHMID_NONE) {
2022 if (pmp->pr_shmid == SHMID_FREE)
2023 pmp->pr_shmid = -1;
2024
2025 pmp->pr_mflags |= MA_SHM;
2026 } else {
2083 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2084 struct vnode *vp;
2085 struct vattr vattr;
2086 size_t len;
2087 size_t npage;
2088 uint_t prot;
2089 uintptr_t next;
2090
2091 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2092 if ((len = (size_t)(naddr - saddr)) == 0)
2093 continue;
2094 npage = len / PAGESIZE;
2095 next = (uintptr_t)(pmp + 1) + round8(npage);
2096 /*
2097 * It's possible that the address space can change
2098 * subtlely even though we're holding as->a_lock
2099 * due to the nondeterminism of page_exists() in
2100 * the presence of asychronously flushed pages or
2101 * mapped files whose sizes are changing.
2102 * page_exists() may be called indirectly from
2103 * pr_getprot() by a SEGOP_INCORE() routine.
2104 * If this happens we need to make sure we don't
2105 * overrun the buffer whose size we computed based
2106 * on the initial iteration through the segments.
2107 * Once we've detected an overflow, we need to clean
2108 * up the temporary memory allocated in pr_getprot()
2109 * and retry. If there's a pending signal, we return
2110 * EINTR so that this thread can be dislodged if
2111 * a latent bug causes us to spin indefinitely.
2112 */
2113 if (next > (uintptr_t)buf + size) {
2114 pr_getprot_done(&tmp);
2115 AS_LOCK_EXIT(as, &as->a_lock);
2116
2117 kmem_free(buf, size);
2118
2119 if (ISSIG(curthread, JUSTLOOKING))
2120 return (EINTR);
2121
2122 goto again;
2123 }
2124
2125 php->pr_nmap++;
2126 php->pr_npage += npage;
2127 pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
2128 pmp->pr_npage = (size32_t)npage;
2129 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
2130 pmp->pr_mflags = 0;
2131 if (prot & PROT_READ)
2132 pmp->pr_mflags |= MA_READ;
2133 if (prot & PROT_WRITE)
2134 pmp->pr_mflags |= MA_WRITE;
2135 if (prot & PROT_EXEC)
2136 pmp->pr_mflags |= MA_EXEC;
2137 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
2138 pmp->pr_mflags |= MA_SHARED;
2139 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
2140 pmp->pr_mflags |= MA_NORESERVE;
2141 if (seg->s_ops == &segspt_shmops ||
2142 (seg->s_ops == &segvn_ops &&
2143 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
2144 pmp->pr_mflags |= MA_ANON;
2145 if (seg->s_ops == &segspt_shmops)
2146 pmp->pr_mflags |= MA_ISM | MA_SHM;
2147 pmp->pr_pagesize = PAGESIZE;
2148 /*
2149 * Manufacture a filename for the "object" directory.
2150 */
2151 vattr.va_mask = AT_FSID|AT_NODEID;
2152 if (seg->s_ops == &segvn_ops &&
2153 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2154 vp != NULL && vp->v_type == VREG &&
2155 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2156 if (vp == p->p_exec)
2157 (void) strcpy(pmp->pr_mapname, "a.out");
2158 else
2159 pr_object_name(pmp->pr_mapname,
2160 vp, &vattr);
2161 }
2162
2163 /*
2164 * Get the SysV shared memory id, if any.
2165 */
2166 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2167 (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2168 SHMID_NONE) {
2169 if (pmp->pr_shmid == SHMID_FREE)
2170 pmp->pr_shmid = -1;
2171
2172 pmp->pr_mflags |= MA_SHM;
2173 } else {
3311
3312 if (as == NULL || avl_numnodes(&as->a_wpage) == 0)
3313 return;
3314
3315 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
3316 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3317
3318 pwp = avl_first(&as->a_wpage);
3319
3320 cookie = NULL;
3321 while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) {
3322 retrycnt = 0;
3323 if ((prot = pwp->wp_oprot) != 0) {
3324 caddr_t addr = pwp->wp_vaddr;
3325 struct seg *seg;
3326 retry:
3327
3328 if ((pwp->wp_prot != prot ||
3329 (pwp->wp_flags & WP_NOWATCH)) &&
3330 (seg = as_segat(as, addr)) != NULL) {
3331 err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot);
3332 if (err == IE_RETRY) {
3333 ASSERT(retrycnt == 0);
3334 retrycnt++;
3335 goto retry;
3336 }
3337 }
3338 }
3339 kmem_free(pwp, sizeof (struct watched_page));
3340 }
3341
3342 avl_destroy(&as->a_wpage);
3343 p->p_wprot = NULL;
3344
3345 AS_LOCK_EXIT(as, &as->a_lock);
3346 }
3347
3348 /*
3349 * Insert a watched area into the list of watched pages.
3350 * If oflags is zero then we are adding a new watched area.
3351 * Otherwise we are changing the flags of an existing watched area.
3417 if (oflags & WA_WRITE)
3418 pwp->wp_write--;
3419 if (oflags & WA_EXEC)
3420 pwp->wp_exec--;
3421
3422 ASSERT(pwp->wp_read >= 0);
3423 ASSERT(pwp->wp_write >= 0);
3424 ASSERT(pwp->wp_exec >= 0);
3425
3426 if (flags & WA_READ)
3427 pwp->wp_read++;
3428 if (flags & WA_WRITE)
3429 pwp->wp_write++;
3430 if (flags & WA_EXEC)
3431 pwp->wp_exec++;
3432
3433 if (!(p->p_flag & SVFWAIT)) {
3434 vaddr = pwp->wp_vaddr;
3435 if (pwp->wp_oprot == 0 &&
3436 (seg = as_segat(as, vaddr)) != NULL) {
3437 SEGOP_GETPROT(seg, vaddr, 0, &prot);
3438 pwp->wp_oprot = (uchar_t)prot;
3439 pwp->wp_prot = (uchar_t)prot;
3440 }
3441 if (pwp->wp_oprot != 0) {
3442 prot = pwp->wp_oprot;
3443 if (pwp->wp_read)
3444 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3445 if (pwp->wp_write)
3446 prot &= ~PROT_WRITE;
3447 if (pwp->wp_exec)
3448 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3449 if (!(pwp->wp_flags & WP_NOWATCH) &&
3450 pwp->wp_prot != prot &&
3451 (pwp->wp_flags & WP_SETPROT) == 0) {
3452 pwp->wp_flags |= WP_SETPROT;
3453 pwp->wp_list = p->p_wprot;
3454 p->p_wprot = pwp;
3455 }
3456 pwp->wp_prot = (uchar_t)prot;
3457 }
3623
3624 if (addr == eaddr)
3625 return (eaddr);
3626
3627 refill:
3628 ASSERT(addr < eaddr);
3629 pagev->pg_pnbase = seg_page(seg, addr);
3630 pnlim = pagev->pg_pnbase + pagev->pg_npages;
3631 saddr = addr;
3632
3633 if (lastpg < pnlim)
3634 len = (size_t)(eaddr - addr);
3635 else
3636 len = pagev->pg_npages * PAGESIZE;
3637
3638 if (pagev->pg_incore != NULL) {
3639 /*
3640 * INCORE cleverly has different semantics than GETPROT:
3641 * it returns info on pages up to but NOT including addr + len.
3642 */
3643 SEGOP_INCORE(seg, addr, len, pagev->pg_incore);
3644 pn = pagev->pg_pnbase;
3645
3646 do {
3647 /*
3648 * Guilty knowledge here: We know that segvn_incore
3649 * returns more than just the low-order bit that
3650 * indicates the page is actually in memory. If any
3651 * bits are set, then the page has backing store.
3652 */
3653 if (pagev->pg_incore[pn++ - pagev->pg_pnbase])
3654 goto out;
3655
3656 } while ((addr += PAGESIZE) < eaddr && pn < pnlim);
3657
3658 /*
3659 * If we examined all the pages in the vector but we're not
3660 * at the end of the segment, take another lap.
3661 */
3662 if (addr < eaddr)
3663 goto refill;
3664 }
3665
3666 /*
3667 * Need to take len - 1 because addr + len is the address of the
3668 * first byte of the page just past the end of what we want.
3669 */
3670 out:
3671 SEGOP_GETPROT(seg, saddr, len - 1, pagev->pg_protv);
3672 return (addr);
3673 }
3674
3675 static caddr_t
3676 pr_pagev_nextprot(prpagev_t *pagev, struct seg *seg,
3677 caddr_t *saddrp, caddr_t eaddr, uint_t *protp)
3678 {
3679 /*
3680 * Our starting address is either the specified address, or the base
3681 * address from the start of the pagev. If the latter is greater,
3682 * this means a previous call to pr_pagev_fill has already scanned
3683 * further than the end of the previous mapping.
3684 */
3685 caddr_t base = seg->s_base + pagev->pg_pnbase * PAGESIZE;
3686 caddr_t addr = MAX(*saddrp, base);
3687 ulong_t pn = seg_page(seg, addr);
3688 uint_t prot, nprot;
3689
3690 /*
3691 * If we're dealing with noreserve pages, then advance addr to
3772
3773 /*
3774 * If we're interested in the reserved space, return the size of the
3775 * segment itself. Everything else in this function is a special case
3776 * to determine the actual underlying size of various segment types.
3777 */
3778 if (reserved)
3779 return (size);
3780
3781 /*
3782 * If this is a segvn mapping of a regular file, return the smaller
3783 * of the segment size and the remaining size of the file beyond
3784 * the file offset corresponding to seg->s_base.
3785 */
3786 if (seg->s_ops == &segvn_ops) {
3787 vattr_t vattr;
3788 vnode_t *vp;
3789
3790 vattr.va_mask = AT_SIZE;
3791
3792 if (SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
3793 vp != NULL && vp->v_type == VREG &&
3794 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
3795
3796 u_offset_t fsize = vattr.va_size;
3797 u_offset_t offset = SEGOP_GETOFFSET(seg, seg->s_base);
3798
3799 if (fsize < offset)
3800 fsize = 0;
3801 else
3802 fsize -= offset;
3803
3804 fsize = roundup(fsize, (u_offset_t)PAGESIZE);
3805
3806 if (fsize < (u_offset_t)size)
3807 size = (size_t)fsize;
3808 }
3809
3810 return (size);
3811 }
3812
3813 /*
3814 * If this is an ISM shared segment, don't include pages that are
3815 * beyond the real size of the spt segment that backs it.
3816 */
3817 if (seg->s_ops == &segspt_shmops)
3818 return (MIN(spt_realsize(seg), size));
3819
3820 /*
3821 * If this is segment is a mapping from /dev/null, then this is a
3822 * reservation of virtual address space and has no actual size.
3823 * Such segments are backed by segdev and have type set to neither
3824 * MAP_SHARED nor MAP_PRIVATE.
3825 */
3826 if (seg->s_ops == &segdev_ops &&
3827 ((SEGOP_GETTYPE(seg, seg->s_base) &
3828 (MAP_SHARED | MAP_PRIVATE)) == 0))
3829 return (0);
3830
3831 /*
3832 * If this segment doesn't match one of the special types we handle,
3833 * just return the size of the segment itself.
3834 */
3835 return (size);
3836 }
3837
3838 uint_t
3839 pr_getprot(struct seg *seg, int reserved, void **tmp,
3840 caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr)
3841 {
3842 struct as *as = seg->s_as;
3843
3844 caddr_t saddr = *saddrp;
3845 caddr_t naddr;
3846
3847 int check_noreserve;
4038 * ranges that have different virtual memory protections.
4039 */
4040 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4041 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4042 ASSERT(baddr >= saddr && baddr <= eaddr);
4043
4044 /*
4045 * Segment loop part two: iterate from the current
4046 * position to the end of the protection boundary,
4047 * pausing at each address boundary (naddr) between
4048 * ranges that have different underlying page sizes.
4049 */
4050 for (; saddr < baddr; saddr = naddr) {
4051 psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4052 ASSERT(naddr >= saddr && naddr <= baddr);
4053
4054 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4055
4056 mp->pr_vaddr = (uintptr_t)saddr;
4057 mp->pr_size = naddr - saddr;
4058 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
4059 mp->pr_mflags = 0;
4060 if (prot & PROT_READ)
4061 mp->pr_mflags |= MA_READ;
4062 if (prot & PROT_WRITE)
4063 mp->pr_mflags |= MA_WRITE;
4064 if (prot & PROT_EXEC)
4065 mp->pr_mflags |= MA_EXEC;
4066 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
4067 mp->pr_mflags |= MA_SHARED;
4068 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
4069 mp->pr_mflags |= MA_NORESERVE;
4070 if (seg->s_ops == &segspt_shmops ||
4071 (seg->s_ops == &segvn_ops &&
4072 (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
4073 vp == NULL)))
4074 mp->pr_mflags |= MA_ANON;
4075 if (seg == brkseg)
4076 mp->pr_mflags |= MA_BREAK;
4077 else if (seg == stkseg)
4078 mp->pr_mflags |= MA_STACK;
4079 if (seg->s_ops == &segspt_shmops)
4080 mp->pr_mflags |= MA_ISM | MA_SHM;
4081
4082 mp->pr_pagesize = PAGESIZE;
4083 if (psz == -1) {
4084 mp->pr_hatpagesize = 0;
4085 } else {
4086 mp->pr_hatpagesize = psz;
4087 }
4088
4089 /*
4090 * Manufacture a filename for the "object" dir.
4091 */
4092 mp->pr_dev = PRNODEV;
4093 vattr.va_mask = AT_FSID|AT_NODEID;
4094 if (seg->s_ops == &segvn_ops &&
4095 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
4096 vp != NULL && vp->v_type == VREG &&
4097 VOP_GETATTR(vp, &vattr, 0, CRED(),
4098 NULL) == 0) {
4099 mp->pr_dev = vattr.va_fsid;
4100 mp->pr_ino = vattr.va_nodeid;
4101 if (vp == p->p_exec)
4102 (void) strcpy(mp->pr_mapname,
4103 "a.out");
4104 else
4105 pr_object_name(mp->pr_mapname,
4106 vp, &vattr);
4107 }
4108
4109 /*
4110 * Get the SysV shared memory id, if any.
4111 */
4112 if ((mp->pr_mflags & MA_SHARED) &&
4113 p->p_segacct && (mp->pr_shmid = shmgetid(p,
4114 seg->s_base)) != SHMID_NONE) {
4115 if (mp->pr_shmid == SHMID_FREE)
4116 mp->pr_shmid = -1;
4117
4118 mp->pr_mflags |= MA_SHM;
4119 } else {
4120 mp->pr_shmid = -1;
4121 }
4122
4123 npages = ((uintptr_t)(naddr - saddr)) >>
4124 PAGESHIFT;
4125 parr = kmem_zalloc(npages, KM_SLEEP);
4126
4127 SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4128
4129 for (pagenum = 0; pagenum < npages; pagenum++) {
4130 if (parr[pagenum] & SEG_PAGE_INCORE)
4131 mp->pr_rss++;
4132 if (parr[pagenum] & SEG_PAGE_ANON)
4133 mp->pr_anon++;
4134 if (parr[pagenum] & SEG_PAGE_LOCKED)
4135 mp->pr_locked++;
4136 }
4137 kmem_free(parr, npages);
4138 }
4139 }
4140 ASSERT(tmp == NULL);
4141 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4142
4143 return (0);
4144 }
4145
4146 /*
4147 * Return the process's credentials. We don't need a 32-bit equivalent of
4222 * ranges that have different virtual memory protections.
4223 */
4224 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4225 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4226 ASSERT(baddr >= saddr && baddr <= eaddr);
4227
4228 /*
4229 * Segment loop part two: iterate from the current
4230 * position to the end of the protection boundary,
4231 * pausing at each address boundary (naddr) between
4232 * ranges that have different underlying page sizes.
4233 */
4234 for (; saddr < baddr; saddr = naddr) {
4235 psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4236 ASSERT(naddr >= saddr && naddr <= baddr);
4237
4238 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4239
4240 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
4241 mp->pr_size = (size32_t)(naddr - saddr);
4242 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
4243 mp->pr_mflags = 0;
4244 if (prot & PROT_READ)
4245 mp->pr_mflags |= MA_READ;
4246 if (prot & PROT_WRITE)
4247 mp->pr_mflags |= MA_WRITE;
4248 if (prot & PROT_EXEC)
4249 mp->pr_mflags |= MA_EXEC;
4250 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
4251 mp->pr_mflags |= MA_SHARED;
4252 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
4253 mp->pr_mflags |= MA_NORESERVE;
4254 if (seg->s_ops == &segspt_shmops ||
4255 (seg->s_ops == &segvn_ops &&
4256 (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
4257 vp == NULL)))
4258 mp->pr_mflags |= MA_ANON;
4259 if (seg == brkseg)
4260 mp->pr_mflags |= MA_BREAK;
4261 else if (seg == stkseg)
4262 mp->pr_mflags |= MA_STACK;
4263 if (seg->s_ops == &segspt_shmops)
4264 mp->pr_mflags |= MA_ISM | MA_SHM;
4265
4266 mp->pr_pagesize = PAGESIZE;
4267 if (psz == -1) {
4268 mp->pr_hatpagesize = 0;
4269 } else {
4270 mp->pr_hatpagesize = psz;
4271 }
4272
4273 /*
4274 * Manufacture a filename for the "object" dir.
4275 */
4276 mp->pr_dev = PRNODEV32;
4277 vattr.va_mask = AT_FSID|AT_NODEID;
4278 if (seg->s_ops == &segvn_ops &&
4279 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
4280 vp != NULL && vp->v_type == VREG &&
4281 VOP_GETATTR(vp, &vattr, 0, CRED(),
4282 NULL) == 0) {
4283 (void) cmpldev(&mp->pr_dev,
4284 vattr.va_fsid);
4285 mp->pr_ino = vattr.va_nodeid;
4286 if (vp == p->p_exec)
4287 (void) strcpy(mp->pr_mapname,
4288 "a.out");
4289 else
4290 pr_object_name(mp->pr_mapname,
4291 vp, &vattr);
4292 }
4293
4294 /*
4295 * Get the SysV shared memory id, if any.
4296 */
4297 if ((mp->pr_mflags & MA_SHARED) &&
4298 p->p_segacct && (mp->pr_shmid = shmgetid(p,
4299 seg->s_base)) != SHMID_NONE) {
4300 if (mp->pr_shmid == SHMID_FREE)
4301 mp->pr_shmid = -1;
4302
4303 mp->pr_mflags |= MA_SHM;
4304 } else {
4305 mp->pr_shmid = -1;
4306 }
4307
4308 npages = ((uintptr_t)(naddr - saddr)) >>
4309 PAGESHIFT;
4310 parr = kmem_zalloc(npages, KM_SLEEP);
4311
4312 SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4313
4314 for (pagenum = 0; pagenum < npages; pagenum++) {
4315 if (parr[pagenum] & SEG_PAGE_INCORE)
4316 mp->pr_rss++;
4317 if (parr[pagenum] & SEG_PAGE_ANON)
4318 mp->pr_anon++;
4319 if (parr[pagenum] & SEG_PAGE_LOCKED)
4320 mp->pr_locked++;
4321 }
4322 kmem_free(parr, npages);
4323 }
4324 }
4325 ASSERT(tmp == NULL);
4326 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4327
4328 return (0);
4329 }
4330 #endif /* _SYSCALL32_IMPL */
|
80 #include <sys/contract_impl.h>
81 #include <sys/contract/process.h>
82 #include <sys/contract/process_impl.h>
83 #include <sys/schedctl.h>
84 #include <sys/pool.h>
85 #include <sys/zone.h>
86 #include <sys/atomic.h>
87 #include <sys/sdt.h>
88
89 #define MAX_ITERS_SPIN 5
90
91 typedef struct prpagev {
92 uint_t *pg_protv; /* vector of page permissions */
93 char *pg_incore; /* vector of incore flags */
94 size_t pg_npages; /* number of pages in protv and incore */
95 ulong_t pg_pnbase; /* pn within segment of first protv element */
96 } prpagev_t;
97
98 size_t pagev_lim = 256 * 1024; /* limit on number of pages in prpagev_t */
99
100 extern const struct seg_ops segdev_ops; /* needs a header file */
101 extern const struct seg_ops segspt_shmops; /* needs a header file */
102
103 static int set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t);
104 static void clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t);
105
106 /*
107 * Choose an lwp from the complete set of lwps for the process.
108 * This is called for any operation applied to the process
109 * file descriptor that requires an lwp to operate upon.
110 *
111 * Returns a pointer to the thread for the selected LWP,
112 * and with the dispatcher lock held for the thread.
113 *
114 * The algorithm for choosing an lwp is critical for /proc semantics;
115 * don't touch this code unless you know all of the implications.
116 */
117 kthread_t *
118 prchoose(proc_t *p)
119 {
120 kthread_t *t;
121 kthread_t *t_onproc = NULL; /* running on processor */
1466 }
1467 s += pr_u32tos(getmajor(vattr->va_fsid), s, 0);
1468 *s++ = '.';
1469 s += pr_u32tos(getminor(vattr->va_fsid), s, 0);
1470 *s++ = '.';
1471 s += pr_u64tos(vattr->va_nodeid, s);
1472 *s++ = '\0';
1473 }
1474
1475 struct seg *
1476 break_seg(proc_t *p)
1477 {
1478 caddr_t addr = p->p_brkbase;
1479 struct seg *seg;
1480 struct vnode *vp;
1481
1482 if (p->p_brksize != 0)
1483 addr += p->p_brksize - 1;
1484 seg = as_segat(p->p_as, addr);
1485 if (seg != NULL && seg->s_ops == &segvn_ops &&
1486 (segop_getvp(seg, seg->s_base, &vp) != 0 || vp == NULL))
1487 return (seg);
1488 return (NULL);
1489 }
1490
1491 /*
1492 * Implementation of service functions to handle procfs generic chained
1493 * copyout buffers.
1494 */
1495 typedef struct pr_iobuf_list {
1496 list_node_t piol_link; /* buffer linkage */
1497 size_t piol_size; /* total size (header + data) */
1498 size_t piol_usedsize; /* amount to copy out from this buf */
1499 } piol_t;
1500
1501 #define MAPSIZE (64 * 1024)
1502 #define PIOL_DATABUF(iol) ((void *)(&(iol)[1]))
1503
1504 void
1505 pr_iol_initlist(list_t *iolhead, size_t itemsize, int n)
1506 {
1631 return (0);
1632
1633 brkseg = break_seg(p);
1634 stkseg = as_segat(as, prgetstackbase(p));
1635
1636 do {
1637 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1638 caddr_t saddr, naddr;
1639 void *tmp = NULL;
1640
1641 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1642 prot = pr_getprot(seg, reserved, &tmp,
1643 &saddr, &naddr, eaddr);
1644 if (saddr == naddr)
1645 continue;
1646
1647 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1648
1649 mp->pr_vaddr = (uintptr_t)saddr;
1650 mp->pr_size = naddr - saddr;
1651 mp->pr_offset = segop_getoffset(seg, saddr);
1652 mp->pr_mflags = 0;
1653 if (prot & PROT_READ)
1654 mp->pr_mflags |= MA_READ;
1655 if (prot & PROT_WRITE)
1656 mp->pr_mflags |= MA_WRITE;
1657 if (prot & PROT_EXEC)
1658 mp->pr_mflags |= MA_EXEC;
1659 if (segop_gettype(seg, saddr) & MAP_SHARED)
1660 mp->pr_mflags |= MA_SHARED;
1661 if (segop_gettype(seg, saddr) & MAP_NORESERVE)
1662 mp->pr_mflags |= MA_NORESERVE;
1663 if (seg->s_ops == &segspt_shmops ||
1664 (seg->s_ops == &segvn_ops &&
1665 (segop_getvp(seg, saddr, &vp) != 0 || vp == NULL)))
1666 mp->pr_mflags |= MA_ANON;
1667 if (seg == brkseg)
1668 mp->pr_mflags |= MA_BREAK;
1669 else if (seg == stkseg) {
1670 mp->pr_mflags |= MA_STACK;
1671 if (reserved) {
1672 size_t maxstack =
1673 ((size_t)p->p_stk_ctl +
1674 PAGEOFFSET) & PAGEMASK;
1675 mp->pr_vaddr =
1676 (uintptr_t)prgetstackbase(p) +
1677 p->p_stksize - maxstack;
1678 mp->pr_size = (uintptr_t)naddr -
1679 mp->pr_vaddr;
1680 }
1681 }
1682 if (seg->s_ops == &segspt_shmops)
1683 mp->pr_mflags |= MA_ISM | MA_SHM;
1684 mp->pr_pagesize = PAGESIZE;
1685
1686 /*
1687 * Manufacture a filename for the "object" directory.
1688 */
1689 vattr.va_mask = AT_FSID|AT_NODEID;
1690 if (seg->s_ops == &segvn_ops &&
1691 segop_getvp(seg, saddr, &vp) == 0 &&
1692 vp != NULL && vp->v_type == VREG &&
1693 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1694 if (vp == p->p_exec)
1695 (void) strcpy(mp->pr_mapname, "a.out");
1696 else
1697 pr_object_name(mp->pr_mapname,
1698 vp, &vattr);
1699 }
1700
1701 /*
1702 * Get the SysV shared memory id, if any.
1703 */
1704 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1705 (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1706 SHMID_NONE) {
1707 if (mp->pr_shmid == SHMID_FREE)
1708 mp->pr_shmid = -1;
1709
1710 mp->pr_mflags |= MA_SHM;
1711 } else {
1742 return (0);
1743
1744 brkseg = break_seg(p);
1745 stkseg = as_segat(as, prgetstackbase(p));
1746
1747 do {
1748 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1749 caddr_t saddr, naddr;
1750 void *tmp = NULL;
1751
1752 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1753 prot = pr_getprot(seg, reserved, &tmp,
1754 &saddr, &naddr, eaddr);
1755 if (saddr == naddr)
1756 continue;
1757
1758 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1759
1760 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
1761 mp->pr_size = (size32_t)(naddr - saddr);
1762 mp->pr_offset = segop_getoffset(seg, saddr);
1763 mp->pr_mflags = 0;
1764 if (prot & PROT_READ)
1765 mp->pr_mflags |= MA_READ;
1766 if (prot & PROT_WRITE)
1767 mp->pr_mflags |= MA_WRITE;
1768 if (prot & PROT_EXEC)
1769 mp->pr_mflags |= MA_EXEC;
1770 if (segop_gettype(seg, saddr) & MAP_SHARED)
1771 mp->pr_mflags |= MA_SHARED;
1772 if (segop_gettype(seg, saddr) & MAP_NORESERVE)
1773 mp->pr_mflags |= MA_NORESERVE;
1774 if (seg->s_ops == &segspt_shmops ||
1775 (seg->s_ops == &segvn_ops &&
1776 (segop_getvp(seg, saddr, &vp) != 0 || vp == NULL)))
1777 mp->pr_mflags |= MA_ANON;
1778 if (seg == brkseg)
1779 mp->pr_mflags |= MA_BREAK;
1780 else if (seg == stkseg) {
1781 mp->pr_mflags |= MA_STACK;
1782 if (reserved) {
1783 size_t maxstack =
1784 ((size_t)p->p_stk_ctl +
1785 PAGEOFFSET) & PAGEMASK;
1786 uintptr_t vaddr =
1787 (uintptr_t)prgetstackbase(p) +
1788 p->p_stksize - maxstack;
1789 mp->pr_vaddr = (caddr32_t)vaddr;
1790 mp->pr_size = (size32_t)
1791 ((uintptr_t)naddr - vaddr);
1792 }
1793 }
1794 if (seg->s_ops == &segspt_shmops)
1795 mp->pr_mflags |= MA_ISM | MA_SHM;
1796 mp->pr_pagesize = PAGESIZE;
1797
1798 /*
1799 * Manufacture a filename for the "object" directory.
1800 */
1801 vattr.va_mask = AT_FSID|AT_NODEID;
1802 if (seg->s_ops == &segvn_ops &&
1803 segop_getvp(seg, saddr, &vp) == 0 &&
1804 vp != NULL && vp->v_type == VREG &&
1805 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1806 if (vp == p->p_exec)
1807 (void) strcpy(mp->pr_mapname, "a.out");
1808 else
1809 pr_object_name(mp->pr_mapname,
1810 vp, &vattr);
1811 }
1812
1813 /*
1814 * Get the SysV shared memory id, if any.
1815 */
1816 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1817 (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1818 SHMID_NONE) {
1819 if (mp->pr_shmid == SHMID_FREE)
1820 mp->pr_shmid = -1;
1821
1822 mp->pr_mflags |= MA_SHM;
1823 } else {
1936 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1937 struct vnode *vp;
1938 struct vattr vattr;
1939 size_t len;
1940 size_t npage;
1941 uint_t prot;
1942 uintptr_t next;
1943
1944 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1945 if ((len = (size_t)(naddr - saddr)) == 0)
1946 continue;
1947 npage = len / PAGESIZE;
1948 next = (uintptr_t)(pmp + 1) + round8(npage);
1949 /*
1950 * It's possible that the address space can change
1951 * subtlely even though we're holding as->a_lock
1952 * due to the nondeterminism of page_exists() in
1953 * the presence of asychronously flushed pages or
1954 * mapped files whose sizes are changing.
1955 * page_exists() may be called indirectly from
1956 * pr_getprot() by a segop_incore() routine.
1957 * If this happens we need to make sure we don't
1958 * overrun the buffer whose size we computed based
1959 * on the initial iteration through the segments.
1960 * Once we've detected an overflow, we need to clean
1961 * up the temporary memory allocated in pr_getprot()
1962 * and retry. If there's a pending signal, we return
1963 * EINTR so that this thread can be dislodged if
1964 * a latent bug causes us to spin indefinitely.
1965 */
1966 if (next > (uintptr_t)buf + size) {
1967 pr_getprot_done(&tmp);
1968 AS_LOCK_EXIT(as, &as->a_lock);
1969
1970 kmem_free(buf, size);
1971
1972 if (ISSIG(curthread, JUSTLOOKING))
1973 return (EINTR);
1974
1975 goto again;
1976 }
1977
1978 php->pr_nmap++;
1979 php->pr_npage += npage;
1980 pmp->pr_vaddr = (uintptr_t)saddr;
1981 pmp->pr_npage = npage;
1982 pmp->pr_offset = segop_getoffset(seg, saddr);
1983 pmp->pr_mflags = 0;
1984 if (prot & PROT_READ)
1985 pmp->pr_mflags |= MA_READ;
1986 if (prot & PROT_WRITE)
1987 pmp->pr_mflags |= MA_WRITE;
1988 if (prot & PROT_EXEC)
1989 pmp->pr_mflags |= MA_EXEC;
1990 if (segop_gettype(seg, saddr) & MAP_SHARED)
1991 pmp->pr_mflags |= MA_SHARED;
1992 if (segop_gettype(seg, saddr) & MAP_NORESERVE)
1993 pmp->pr_mflags |= MA_NORESERVE;
1994 if (seg->s_ops == &segspt_shmops ||
1995 (seg->s_ops == &segvn_ops &&
1996 (segop_getvp(seg, saddr, &vp) != 0 || vp == NULL)))
1997 pmp->pr_mflags |= MA_ANON;
1998 if (seg->s_ops == &segspt_shmops)
1999 pmp->pr_mflags |= MA_ISM | MA_SHM;
2000 pmp->pr_pagesize = PAGESIZE;
2001 /*
2002 * Manufacture a filename for the "object" directory.
2003 */
2004 vattr.va_mask = AT_FSID|AT_NODEID;
2005 if (seg->s_ops == &segvn_ops &&
2006 segop_getvp(seg, saddr, &vp) == 0 &&
2007 vp != NULL && vp->v_type == VREG &&
2008 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2009 if (vp == p->p_exec)
2010 (void) strcpy(pmp->pr_mapname, "a.out");
2011 else
2012 pr_object_name(pmp->pr_mapname,
2013 vp, &vattr);
2014 }
2015
2016 /*
2017 * Get the SysV shared memory id, if any.
2018 */
2019 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2020 (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2021 SHMID_NONE) {
2022 if (pmp->pr_shmid == SHMID_FREE)
2023 pmp->pr_shmid = -1;
2024
2025 pmp->pr_mflags |= MA_SHM;
2026 } else {
2083 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2084 struct vnode *vp;
2085 struct vattr vattr;
2086 size_t len;
2087 size_t npage;
2088 uint_t prot;
2089 uintptr_t next;
2090
2091 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2092 if ((len = (size_t)(naddr - saddr)) == 0)
2093 continue;
2094 npage = len / PAGESIZE;
2095 next = (uintptr_t)(pmp + 1) + round8(npage);
2096 /*
2097 * It's possible that the address space can change
2098 * subtlely even though we're holding as->a_lock
2099 * due to the nondeterminism of page_exists() in
2100 * the presence of asychronously flushed pages or
2101 * mapped files whose sizes are changing.
2102 * page_exists() may be called indirectly from
2103 * pr_getprot() by a segop_incore() routine.
2104 * If this happens we need to make sure we don't
2105 * overrun the buffer whose size we computed based
2106 * on the initial iteration through the segments.
2107 * Once we've detected an overflow, we need to clean
2108 * up the temporary memory allocated in pr_getprot()
2109 * and retry. If there's a pending signal, we return
2110 * EINTR so that this thread can be dislodged if
2111 * a latent bug causes us to spin indefinitely.
2112 */
2113 if (next > (uintptr_t)buf + size) {
2114 pr_getprot_done(&tmp);
2115 AS_LOCK_EXIT(as, &as->a_lock);
2116
2117 kmem_free(buf, size);
2118
2119 if (ISSIG(curthread, JUSTLOOKING))
2120 return (EINTR);
2121
2122 goto again;
2123 }
2124
2125 php->pr_nmap++;
2126 php->pr_npage += npage;
2127 pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
2128 pmp->pr_npage = (size32_t)npage;
2129 pmp->pr_offset = segop_getoffset(seg, saddr);
2130 pmp->pr_mflags = 0;
2131 if (prot & PROT_READ)
2132 pmp->pr_mflags |= MA_READ;
2133 if (prot & PROT_WRITE)
2134 pmp->pr_mflags |= MA_WRITE;
2135 if (prot & PROT_EXEC)
2136 pmp->pr_mflags |= MA_EXEC;
2137 if (segop_gettype(seg, saddr) & MAP_SHARED)
2138 pmp->pr_mflags |= MA_SHARED;
2139 if (segop_gettype(seg, saddr) & MAP_NORESERVE)
2140 pmp->pr_mflags |= MA_NORESERVE;
2141 if (seg->s_ops == &segspt_shmops ||
2142 (seg->s_ops == &segvn_ops &&
2143 (segop_getvp(seg, saddr, &vp) != 0 || vp == NULL)))
2144 pmp->pr_mflags |= MA_ANON;
2145 if (seg->s_ops == &segspt_shmops)
2146 pmp->pr_mflags |= MA_ISM | MA_SHM;
2147 pmp->pr_pagesize = PAGESIZE;
2148 /*
2149 * Manufacture a filename for the "object" directory.
2150 */
2151 vattr.va_mask = AT_FSID|AT_NODEID;
2152 if (seg->s_ops == &segvn_ops &&
2153 segop_getvp(seg, saddr, &vp) == 0 &&
2154 vp != NULL && vp->v_type == VREG &&
2155 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2156 if (vp == p->p_exec)
2157 (void) strcpy(pmp->pr_mapname, "a.out");
2158 else
2159 pr_object_name(pmp->pr_mapname,
2160 vp, &vattr);
2161 }
2162
2163 /*
2164 * Get the SysV shared memory id, if any.
2165 */
2166 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2167 (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2168 SHMID_NONE) {
2169 if (pmp->pr_shmid == SHMID_FREE)
2170 pmp->pr_shmid = -1;
2171
2172 pmp->pr_mflags |= MA_SHM;
2173 } else {
3311
3312 if (as == NULL || avl_numnodes(&as->a_wpage) == 0)
3313 return;
3314
3315 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
3316 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3317
3318 pwp = avl_first(&as->a_wpage);
3319
3320 cookie = NULL;
3321 while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) {
3322 retrycnt = 0;
3323 if ((prot = pwp->wp_oprot) != 0) {
3324 caddr_t addr = pwp->wp_vaddr;
3325 struct seg *seg;
3326 retry:
3327
3328 if ((pwp->wp_prot != prot ||
3329 (pwp->wp_flags & WP_NOWATCH)) &&
3330 (seg = as_segat(as, addr)) != NULL) {
3331 err = segop_setprot(seg, addr, PAGESIZE, prot);
3332 if (err == IE_RETRY) {
3333 ASSERT(retrycnt == 0);
3334 retrycnt++;
3335 goto retry;
3336 }
3337 }
3338 }
3339 kmem_free(pwp, sizeof (struct watched_page));
3340 }
3341
3342 avl_destroy(&as->a_wpage);
3343 p->p_wprot = NULL;
3344
3345 AS_LOCK_EXIT(as, &as->a_lock);
3346 }
3347
3348 /*
3349 * Insert a watched area into the list of watched pages.
3350 * If oflags is zero then we are adding a new watched area.
3351 * Otherwise we are changing the flags of an existing watched area.
3417 if (oflags & WA_WRITE)
3418 pwp->wp_write--;
3419 if (oflags & WA_EXEC)
3420 pwp->wp_exec--;
3421
3422 ASSERT(pwp->wp_read >= 0);
3423 ASSERT(pwp->wp_write >= 0);
3424 ASSERT(pwp->wp_exec >= 0);
3425
3426 if (flags & WA_READ)
3427 pwp->wp_read++;
3428 if (flags & WA_WRITE)
3429 pwp->wp_write++;
3430 if (flags & WA_EXEC)
3431 pwp->wp_exec++;
3432
3433 if (!(p->p_flag & SVFWAIT)) {
3434 vaddr = pwp->wp_vaddr;
3435 if (pwp->wp_oprot == 0 &&
3436 (seg = as_segat(as, vaddr)) != NULL) {
3437 segop_getprot(seg, vaddr, 0, &prot);
3438 pwp->wp_oprot = (uchar_t)prot;
3439 pwp->wp_prot = (uchar_t)prot;
3440 }
3441 if (pwp->wp_oprot != 0) {
3442 prot = pwp->wp_oprot;
3443 if (pwp->wp_read)
3444 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3445 if (pwp->wp_write)
3446 prot &= ~PROT_WRITE;
3447 if (pwp->wp_exec)
3448 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3449 if (!(pwp->wp_flags & WP_NOWATCH) &&
3450 pwp->wp_prot != prot &&
3451 (pwp->wp_flags & WP_SETPROT) == 0) {
3452 pwp->wp_flags |= WP_SETPROT;
3453 pwp->wp_list = p->p_wprot;
3454 p->p_wprot = pwp;
3455 }
3456 pwp->wp_prot = (uchar_t)prot;
3457 }
3623
3624 if (addr == eaddr)
3625 return (eaddr);
3626
3627 refill:
3628 ASSERT(addr < eaddr);
3629 pagev->pg_pnbase = seg_page(seg, addr);
3630 pnlim = pagev->pg_pnbase + pagev->pg_npages;
3631 saddr = addr;
3632
3633 if (lastpg < pnlim)
3634 len = (size_t)(eaddr - addr);
3635 else
3636 len = pagev->pg_npages * PAGESIZE;
3637
3638 if (pagev->pg_incore != NULL) {
3639 /*
3640 * INCORE cleverly has different semantics than GETPROT:
3641 * it returns info on pages up to but NOT including addr + len.
3642 */
3643 segop_incore(seg, addr, len, pagev->pg_incore);
3644 pn = pagev->pg_pnbase;
3645
3646 do {
3647 /*
3648 * Guilty knowledge here: We know that segvn_incore
3649 * returns more than just the low-order bit that
3650 * indicates the page is actually in memory. If any
3651 * bits are set, then the page has backing store.
3652 */
3653 if (pagev->pg_incore[pn++ - pagev->pg_pnbase])
3654 goto out;
3655
3656 } while ((addr += PAGESIZE) < eaddr && pn < pnlim);
3657
3658 /*
3659 * If we examined all the pages in the vector but we're not
3660 * at the end of the segment, take another lap.
3661 */
3662 if (addr < eaddr)
3663 goto refill;
3664 }
3665
3666 /*
3667 * Need to take len - 1 because addr + len is the address of the
3668 * first byte of the page just past the end of what we want.
3669 */
3670 out:
3671 segop_getprot(seg, saddr, len - 1, pagev->pg_protv);
3672 return (addr);
3673 }
3674
3675 static caddr_t
3676 pr_pagev_nextprot(prpagev_t *pagev, struct seg *seg,
3677 caddr_t *saddrp, caddr_t eaddr, uint_t *protp)
3678 {
3679 /*
3680 * Our starting address is either the specified address, or the base
3681 * address from the start of the pagev. If the latter is greater,
3682 * this means a previous call to pr_pagev_fill has already scanned
3683 * further than the end of the previous mapping.
3684 */
3685 caddr_t base = seg->s_base + pagev->pg_pnbase * PAGESIZE;
3686 caddr_t addr = MAX(*saddrp, base);
3687 ulong_t pn = seg_page(seg, addr);
3688 uint_t prot, nprot;
3689
3690 /*
3691 * If we're dealing with noreserve pages, then advance addr to
3772
3773 /*
3774 * If we're interested in the reserved space, return the size of the
3775 * segment itself. Everything else in this function is a special case
3776 * to determine the actual underlying size of various segment types.
3777 */
3778 if (reserved)
3779 return (size);
3780
3781 /*
3782 * If this is a segvn mapping of a regular file, return the smaller
3783 * of the segment size and the remaining size of the file beyond
3784 * the file offset corresponding to seg->s_base.
3785 */
3786 if (seg->s_ops == &segvn_ops) {
3787 vattr_t vattr;
3788 vnode_t *vp;
3789
3790 vattr.va_mask = AT_SIZE;
3791
3792 if (segop_getvp(seg, seg->s_base, &vp) == 0 &&
3793 vp != NULL && vp->v_type == VREG &&
3794 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
3795
3796 u_offset_t fsize = vattr.va_size;
3797 u_offset_t offset = segop_getoffset(seg, seg->s_base);
3798
3799 if (fsize < offset)
3800 fsize = 0;
3801 else
3802 fsize -= offset;
3803
3804 fsize = roundup(fsize, (u_offset_t)PAGESIZE);
3805
3806 if (fsize < (u_offset_t)size)
3807 size = (size_t)fsize;
3808 }
3809
3810 return (size);
3811 }
3812
3813 /*
3814 * If this is an ISM shared segment, don't include pages that are
3815 * beyond the real size of the spt segment that backs it.
3816 */
3817 if (seg->s_ops == &segspt_shmops)
3818 return (MIN(spt_realsize(seg), size));
3819
3820 /*
3821 * If this is segment is a mapping from /dev/null, then this is a
3822 * reservation of virtual address space and has no actual size.
3823 * Such segments are backed by segdev and have type set to neither
3824 * MAP_SHARED nor MAP_PRIVATE.
3825 */
3826 if (seg->s_ops == &segdev_ops &&
3827 ((segop_gettype(seg, seg->s_base) &
3828 (MAP_SHARED | MAP_PRIVATE)) == 0))
3829 return (0);
3830
3831 /*
3832 * If this segment doesn't match one of the special types we handle,
3833 * just return the size of the segment itself.
3834 */
3835 return (size);
3836 }
3837
3838 uint_t
3839 pr_getprot(struct seg *seg, int reserved, void **tmp,
3840 caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr)
3841 {
3842 struct as *as = seg->s_as;
3843
3844 caddr_t saddr = *saddrp;
3845 caddr_t naddr;
3846
3847 int check_noreserve;
4038 * ranges that have different virtual memory protections.
4039 */
4040 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4041 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4042 ASSERT(baddr >= saddr && baddr <= eaddr);
4043
4044 /*
4045 * Segment loop part two: iterate from the current
4046 * position to the end of the protection boundary,
4047 * pausing at each address boundary (naddr) between
4048 * ranges that have different underlying page sizes.
4049 */
4050 for (; saddr < baddr; saddr = naddr) {
4051 psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4052 ASSERT(naddr >= saddr && naddr <= baddr);
4053
4054 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4055
4056 mp->pr_vaddr = (uintptr_t)saddr;
4057 mp->pr_size = naddr - saddr;
4058 mp->pr_offset = segop_getoffset(seg, saddr);
4059 mp->pr_mflags = 0;
4060 if (prot & PROT_READ)
4061 mp->pr_mflags |= MA_READ;
4062 if (prot & PROT_WRITE)
4063 mp->pr_mflags |= MA_WRITE;
4064 if (prot & PROT_EXEC)
4065 mp->pr_mflags |= MA_EXEC;
4066 if (segop_gettype(seg, saddr) & MAP_SHARED)
4067 mp->pr_mflags |= MA_SHARED;
4068 if (segop_gettype(seg, saddr) & MAP_NORESERVE)
4069 mp->pr_mflags |= MA_NORESERVE;
4070 if (seg->s_ops == &segspt_shmops ||
4071 (seg->s_ops == &segvn_ops &&
4072 (segop_getvp(seg, saddr, &vp) != 0 ||
4073 vp == NULL)))
4074 mp->pr_mflags |= MA_ANON;
4075 if (seg == brkseg)
4076 mp->pr_mflags |= MA_BREAK;
4077 else if (seg == stkseg)
4078 mp->pr_mflags |= MA_STACK;
4079 if (seg->s_ops == &segspt_shmops)
4080 mp->pr_mflags |= MA_ISM | MA_SHM;
4081
4082 mp->pr_pagesize = PAGESIZE;
4083 if (psz == -1) {
4084 mp->pr_hatpagesize = 0;
4085 } else {
4086 mp->pr_hatpagesize = psz;
4087 }
4088
4089 /*
4090 * Manufacture a filename for the "object" dir.
4091 */
4092 mp->pr_dev = PRNODEV;
4093 vattr.va_mask = AT_FSID|AT_NODEID;
4094 if (seg->s_ops == &segvn_ops &&
4095 segop_getvp(seg, saddr, &vp) == 0 &&
4096 vp != NULL && vp->v_type == VREG &&
4097 VOP_GETATTR(vp, &vattr, 0, CRED(),
4098 NULL) == 0) {
4099 mp->pr_dev = vattr.va_fsid;
4100 mp->pr_ino = vattr.va_nodeid;
4101 if (vp == p->p_exec)
4102 (void) strcpy(mp->pr_mapname,
4103 "a.out");
4104 else
4105 pr_object_name(mp->pr_mapname,
4106 vp, &vattr);
4107 }
4108
4109 /*
4110 * Get the SysV shared memory id, if any.
4111 */
4112 if ((mp->pr_mflags & MA_SHARED) &&
4113 p->p_segacct && (mp->pr_shmid = shmgetid(p,
4114 seg->s_base)) != SHMID_NONE) {
4115 if (mp->pr_shmid == SHMID_FREE)
4116 mp->pr_shmid = -1;
4117
4118 mp->pr_mflags |= MA_SHM;
4119 } else {
4120 mp->pr_shmid = -1;
4121 }
4122
4123 npages = ((uintptr_t)(naddr - saddr)) >>
4124 PAGESHIFT;
4125 parr = kmem_zalloc(npages, KM_SLEEP);
4126
4127 segop_incore(seg, saddr, naddr - saddr, parr);
4128
4129 for (pagenum = 0; pagenum < npages; pagenum++) {
4130 if (parr[pagenum] & SEG_PAGE_INCORE)
4131 mp->pr_rss++;
4132 if (parr[pagenum] & SEG_PAGE_ANON)
4133 mp->pr_anon++;
4134 if (parr[pagenum] & SEG_PAGE_LOCKED)
4135 mp->pr_locked++;
4136 }
4137 kmem_free(parr, npages);
4138 }
4139 }
4140 ASSERT(tmp == NULL);
4141 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4142
4143 return (0);
4144 }
4145
4146 /*
4147 * Return the process's credentials. We don't need a 32-bit equivalent of
4222 * ranges that have different virtual memory protections.
4223 */
4224 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4225 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4226 ASSERT(baddr >= saddr && baddr <= eaddr);
4227
4228 /*
4229 * Segment loop part two: iterate from the current
4230 * position to the end of the protection boundary,
4231 * pausing at each address boundary (naddr) between
4232 * ranges that have different underlying page sizes.
4233 */
4234 for (; saddr < baddr; saddr = naddr) {
4235 psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4236 ASSERT(naddr >= saddr && naddr <= baddr);
4237
4238 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4239
4240 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
4241 mp->pr_size = (size32_t)(naddr - saddr);
4242 mp->pr_offset = segop_getoffset(seg, saddr);
4243 mp->pr_mflags = 0;
4244 if (prot & PROT_READ)
4245 mp->pr_mflags |= MA_READ;
4246 if (prot & PROT_WRITE)
4247 mp->pr_mflags |= MA_WRITE;
4248 if (prot & PROT_EXEC)
4249 mp->pr_mflags |= MA_EXEC;
4250 if (segop_gettype(seg, saddr) & MAP_SHARED)
4251 mp->pr_mflags |= MA_SHARED;
4252 if (segop_gettype(seg, saddr) & MAP_NORESERVE)
4253 mp->pr_mflags |= MA_NORESERVE;
4254 if (seg->s_ops == &segspt_shmops ||
4255 (seg->s_ops == &segvn_ops &&
4256 (segop_getvp(seg, saddr, &vp) != 0 ||
4257 vp == NULL)))
4258 mp->pr_mflags |= MA_ANON;
4259 if (seg == brkseg)
4260 mp->pr_mflags |= MA_BREAK;
4261 else if (seg == stkseg)
4262 mp->pr_mflags |= MA_STACK;
4263 if (seg->s_ops == &segspt_shmops)
4264 mp->pr_mflags |= MA_ISM | MA_SHM;
4265
4266 mp->pr_pagesize = PAGESIZE;
4267 if (psz == -1) {
4268 mp->pr_hatpagesize = 0;
4269 } else {
4270 mp->pr_hatpagesize = psz;
4271 }
4272
4273 /*
4274 * Manufacture a filename for the "object" dir.
4275 */
4276 mp->pr_dev = PRNODEV32;
4277 vattr.va_mask = AT_FSID|AT_NODEID;
4278 if (seg->s_ops == &segvn_ops &&
4279 segop_getvp(seg, saddr, &vp) == 0 &&
4280 vp != NULL && vp->v_type == VREG &&
4281 VOP_GETATTR(vp, &vattr, 0, CRED(),
4282 NULL) == 0) {
4283 (void) cmpldev(&mp->pr_dev,
4284 vattr.va_fsid);
4285 mp->pr_ino = vattr.va_nodeid;
4286 if (vp == p->p_exec)
4287 (void) strcpy(mp->pr_mapname,
4288 "a.out");
4289 else
4290 pr_object_name(mp->pr_mapname,
4291 vp, &vattr);
4292 }
4293
4294 /*
4295 * Get the SysV shared memory id, if any.
4296 */
4297 if ((mp->pr_mflags & MA_SHARED) &&
4298 p->p_segacct && (mp->pr_shmid = shmgetid(p,
4299 seg->s_base)) != SHMID_NONE) {
4300 if (mp->pr_shmid == SHMID_FREE)
4301 mp->pr_shmid = -1;
4302
4303 mp->pr_mflags |= MA_SHM;
4304 } else {
4305 mp->pr_shmid = -1;
4306 }
4307
4308 npages = ((uintptr_t)(naddr - saddr)) >>
4309 PAGESHIFT;
4310 parr = kmem_zalloc(npages, KM_SLEEP);
4311
4312 segop_incore(seg, saddr, naddr - saddr, parr);
4313
4314 for (pagenum = 0; pagenum < npages; pagenum++) {
4315 if (parr[pagenum] & SEG_PAGE_INCORE)
4316 mp->pr_rss++;
4317 if (parr[pagenum] & SEG_PAGE_ANON)
4318 mp->pr_anon++;
4319 if (parr[pagenum] & SEG_PAGE_LOCKED)
4320 mp->pr_locked++;
4321 }
4322 kmem_free(parr, npages);
4323 }
4324 }
4325 ASSERT(tmp == NULL);
4326 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4327
4328 return (0);
4329 }
4330 #endif /* _SYSCALL32_IMPL */
|