5592
5593 /*
5594 * Return zero if the process has at least one vnode mapped in to its
5595 * address space which shouldn't be allowed to change zones.
5596 *
5597 * Also return zero if the process has any shared mappings which reserve
5598 * swap. This is because the counting for zone.max-swap does not allow swap
5599 * reservation to be shared between zones. zone swap reservation is counted
5600 * on zone->zone_max_swap.
5601 */
5602 static int
5603 as_can_change_zones(void)
5604 {
5605 proc_t *pp = curproc;
5606 struct seg *seg;
5607 struct as *as = pp->p_as;
5608 vnode_t *vp;
5609 int allow = 1;
5610
5611 ASSERT(pp->p_as != &kas);
5612 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
5613 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
5614
5615 /*
5616 * Cannot enter zone with shared anon memory which
5617 * reserves swap. See comment above.
5618 */
5619 if (seg_can_change_zones(seg) == B_FALSE) {
5620 allow = 0;
5621 break;
5622 }
5623 /*
5624 * if we can't get a backing vnode for this segment then skip
5625 * it.
5626 */
5627 vp = NULL;
5628 if (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL)
5629 continue;
5630 if (!vn_can_change_zones(vp)) { /* bail on first match */
5631 allow = 0;
5632 break;
5633 }
5634 }
5635 AS_LOCK_EXIT(as, &as->a_lock);
5636 return (allow);
5637 }
5638
5639 /*
5640 * Count swap reserved by curproc's address space
5641 */
5642 static size_t
5643 as_swresv(void)
5644 {
5645 proc_t *pp = curproc;
5646 struct seg *seg;
5647 struct as *as = pp->p_as;
5648 size_t swap = 0;
5649
5650 ASSERT(pp->p_as != &kas);
5651 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
5652 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg))
5653 swap += seg_swresv(seg);
5654
5655 return (swap);
5656 }
5657
5658 /*
5659 * Systemcall entry point for zone_enter().
5660 *
5661 * The current process is injected into said zone. In the process
5662 * it will change its project membership, privileges, rootdir/cwd,
5663 * zone-wide rctls, and pool association to match those of the zone.
5664 *
5665 * The first zone_enter() called while the zone is in the ZONE_IS_READY
5666 * state will transition it to ZONE_IS_RUNNING. Processes may only
5667 * enter a zone that is "ready" or "running".
5668 */
5669 static int
5670 zone_enter(zoneid_t zoneid)
5671 {
5836 */
5837 mutex_exit(&zonehash_lock);
5838 mutex_exit(&cpu_lock);
5839 if (pool_state == POOL_ENABLED &&
5840 newpool != oldpool)
5841 (void) pool_do_bind(oldpool, P_PID, P_MYID,
5842 POOL_BIND_ALL);
5843 pool_unlock();
5844 zone_rele(zone);
5845 err = EINVAL;
5846 goto out;
5847 }
5848
5849 /*
5850 * a_lock must be held while transfering locked memory and swap
5851 * reservation from the global zone to the non global zone because
5852 * asynchronous faults on the processes' address space can lock
5853 * memory and reserve swap via MCL_FUTURE and MAP_NORESERVE
5854 * segments respectively.
5855 */
5856 AS_LOCK_ENTER(pp->as, &pp->p_as->a_lock, RW_WRITER);
5857 swap = as_swresv();
5858 mutex_enter(&pp->p_lock);
5859 zone_proj0 = zone->zone_zsched->p_task->tk_proj;
5860 /* verify that we do not exceed and task or lwp limits */
5861 mutex_enter(&zone->zone_nlwps_lock);
5862 /* add new lwps to zone and zone's proj0 */
5863 zone_proj0->kpj_nlwps += pp->p_lwpcnt;
5864 zone->zone_nlwps += pp->p_lwpcnt;
5865 /* add 1 task to zone's proj0 */
5866 zone_proj0->kpj_ntasks += 1;
5867
5868 zone_proj0->kpj_nprocs++;
5869 zone->zone_nprocs++;
5870 mutex_exit(&zone->zone_nlwps_lock);
5871
5872 mutex_enter(&zone->zone_mem_lock);
5873 zone->zone_locked_mem += pp->p_locked_mem;
5874 zone_proj0->kpj_data.kpd_locked_mem += pp->p_locked_mem;
5875 zone->zone_max_swap += swap;
5876 mutex_exit(&zone->zone_mem_lock);
5883 mutex_enter(&pp->p_zone->zone_nlwps_lock);
5884 pp->p_zone->zone_nlwps -= pp->p_lwpcnt;
5885 pp->p_task->tk_proj->kpj_nlwps -= pp->p_lwpcnt;
5886 pp->p_task->tk_proj->kpj_nprocs--;
5887 pp->p_zone->zone_nprocs--;
5888 mutex_exit(&pp->p_zone->zone_nlwps_lock);
5889
5890 mutex_enter(&pp->p_zone->zone_mem_lock);
5891 pp->p_zone->zone_locked_mem -= pp->p_locked_mem;
5892 pp->p_task->tk_proj->kpj_data.kpd_locked_mem -= pp->p_locked_mem;
5893 pp->p_zone->zone_max_swap -= swap;
5894 mutex_exit(&pp->p_zone->zone_mem_lock);
5895
5896 mutex_enter(&(pp->p_task->tk_proj->kpj_data.kpd_crypto_lock));
5897 pp->p_task->tk_proj->kpj_data.kpd_crypto_mem -= pp->p_crypto_mem;
5898 mutex_exit(&(pp->p_task->tk_proj->kpj_data.kpd_crypto_lock));
5899
5900 pp->p_flag |= SZONETOP;
5901 pp->p_zone = zone;
5902 mutex_exit(&pp->p_lock);
5903 AS_LOCK_EXIT(pp->p_as, &pp->p_as->a_lock);
5904
5905 /*
5906 * Joining the zone cannot fail from now on.
5907 *
5908 * This means that a lot of the following code can be commonized and
5909 * shared with zsched().
5910 */
5911
5912 /*
5913 * If the process contract fmri was inherited, we need to
5914 * flag this so that any contract status will not leak
5915 * extra zone information, svc_fmri in this case
5916 */
5917 if (ctp->conp_svc_ctid != ct->ct_id) {
5918 mutex_enter(&ct->ct_lock);
5919 ctp->conp_svc_zone_enter = ct->ct_id;
5920 mutex_exit(&ct->ct_lock);
5921 }
5922
5923 /*
|
5592
5593 /*
5594 * Return zero if the process has at least one vnode mapped in to its
5595 * address space which shouldn't be allowed to change zones.
5596 *
5597 * Also return zero if the process has any shared mappings which reserve
5598 * swap. This is because the counting for zone.max-swap does not allow swap
5599 * reservation to be shared between zones. zone swap reservation is counted
5600 * on zone->zone_max_swap.
5601 */
5602 static int
5603 as_can_change_zones(void)
5604 {
5605 proc_t *pp = curproc;
5606 struct seg *seg;
5607 struct as *as = pp->p_as;
5608 vnode_t *vp;
5609 int allow = 1;
5610
5611 ASSERT(pp->p_as != &kas);
5612 AS_LOCK_ENTER(as, RW_READER);
5613 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
5614
5615 /*
5616 * Cannot enter zone with shared anon memory which
5617 * reserves swap. See comment above.
5618 */
5619 if (seg_can_change_zones(seg) == B_FALSE) {
5620 allow = 0;
5621 break;
5622 }
5623 /*
5624 * if we can't get a backing vnode for this segment then skip
5625 * it.
5626 */
5627 vp = NULL;
5628 if (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL)
5629 continue;
5630 if (!vn_can_change_zones(vp)) { /* bail on first match */
5631 allow = 0;
5632 break;
5633 }
5634 }
5635 AS_LOCK_EXIT(as);
5636 return (allow);
5637 }
5638
5639 /*
5640 * Count swap reserved by curproc's address space
5641 */
5642 static size_t
5643 as_swresv(void)
5644 {
5645 proc_t *pp = curproc;
5646 struct seg *seg;
5647 struct as *as = pp->p_as;
5648 size_t swap = 0;
5649
5650 ASSERT(pp->p_as != &kas);
5651 ASSERT(AS_WRITE_HELD(as));
5652 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg))
5653 swap += seg_swresv(seg);
5654
5655 return (swap);
5656 }
5657
5658 /*
5659 * Systemcall entry point for zone_enter().
5660 *
5661 * The current process is injected into said zone. In the process
5662 * it will change its project membership, privileges, rootdir/cwd,
5663 * zone-wide rctls, and pool association to match those of the zone.
5664 *
5665 * The first zone_enter() called while the zone is in the ZONE_IS_READY
5666 * state will transition it to ZONE_IS_RUNNING. Processes may only
5667 * enter a zone that is "ready" or "running".
5668 */
5669 static int
5670 zone_enter(zoneid_t zoneid)
5671 {
5836 */
5837 mutex_exit(&zonehash_lock);
5838 mutex_exit(&cpu_lock);
5839 if (pool_state == POOL_ENABLED &&
5840 newpool != oldpool)
5841 (void) pool_do_bind(oldpool, P_PID, P_MYID,
5842 POOL_BIND_ALL);
5843 pool_unlock();
5844 zone_rele(zone);
5845 err = EINVAL;
5846 goto out;
5847 }
5848
5849 /*
5850 * a_lock must be held while transfering locked memory and swap
5851 * reservation from the global zone to the non global zone because
5852 * asynchronous faults on the processes' address space can lock
5853 * memory and reserve swap via MCL_FUTURE and MAP_NORESERVE
5854 * segments respectively.
5855 */
5856 AS_LOCK_ENTER(pp->p_as, RW_WRITER);
5857 swap = as_swresv();
5858 mutex_enter(&pp->p_lock);
5859 zone_proj0 = zone->zone_zsched->p_task->tk_proj;
5860 /* verify that we do not exceed and task or lwp limits */
5861 mutex_enter(&zone->zone_nlwps_lock);
5862 /* add new lwps to zone and zone's proj0 */
5863 zone_proj0->kpj_nlwps += pp->p_lwpcnt;
5864 zone->zone_nlwps += pp->p_lwpcnt;
5865 /* add 1 task to zone's proj0 */
5866 zone_proj0->kpj_ntasks += 1;
5867
5868 zone_proj0->kpj_nprocs++;
5869 zone->zone_nprocs++;
5870 mutex_exit(&zone->zone_nlwps_lock);
5871
5872 mutex_enter(&zone->zone_mem_lock);
5873 zone->zone_locked_mem += pp->p_locked_mem;
5874 zone_proj0->kpj_data.kpd_locked_mem += pp->p_locked_mem;
5875 zone->zone_max_swap += swap;
5876 mutex_exit(&zone->zone_mem_lock);
5883 mutex_enter(&pp->p_zone->zone_nlwps_lock);
5884 pp->p_zone->zone_nlwps -= pp->p_lwpcnt;
5885 pp->p_task->tk_proj->kpj_nlwps -= pp->p_lwpcnt;
5886 pp->p_task->tk_proj->kpj_nprocs--;
5887 pp->p_zone->zone_nprocs--;
5888 mutex_exit(&pp->p_zone->zone_nlwps_lock);
5889
5890 mutex_enter(&pp->p_zone->zone_mem_lock);
5891 pp->p_zone->zone_locked_mem -= pp->p_locked_mem;
5892 pp->p_task->tk_proj->kpj_data.kpd_locked_mem -= pp->p_locked_mem;
5893 pp->p_zone->zone_max_swap -= swap;
5894 mutex_exit(&pp->p_zone->zone_mem_lock);
5895
5896 mutex_enter(&(pp->p_task->tk_proj->kpj_data.kpd_crypto_lock));
5897 pp->p_task->tk_proj->kpj_data.kpd_crypto_mem -= pp->p_crypto_mem;
5898 mutex_exit(&(pp->p_task->tk_proj->kpj_data.kpd_crypto_lock));
5899
5900 pp->p_flag |= SZONETOP;
5901 pp->p_zone = zone;
5902 mutex_exit(&pp->p_lock);
5903 AS_LOCK_EXIT(pp->p_as);
5904
5905 /*
5906 * Joining the zone cannot fail from now on.
5907 *
5908 * This means that a lot of the following code can be commonized and
5909 * shared with zsched().
5910 */
5911
5912 /*
5913 * If the process contract fmri was inherited, we need to
5914 * flag this so that any contract status will not leak
5915 * extra zone information, svc_fmri in this case
5916 */
5917 if (ctp->conp_svc_ctid != ct->ct_id) {
5918 mutex_enter(&ct->ct_lock);
5919 ctp->conp_svc_zone_enter = ct->ct_id;
5920 mutex_exit(&ct->ct_lock);
5921 }
5922
5923 /*
|