58 #include <sys/dumphdr.h>
59 #include <sys/debug.h>
60 #include <sys/vtrace.h>
61 #include <sys/stack.h>
62 #include <sys/atomic.h>
63 #include <sys/archsystm.h>
64 #include <sys/lgrp.h>
65
66 #include <vm/as.h>
67 #include <vm/seg.h>
68 #include <vm/seg_kp.h>
69 #include <vm/seg_kmem.h>
70 #include <vm/anon.h>
71 #include <vm/page.h>
72 #include <vm/hat.h>
73 #include <sys/bitmap.h>
74
75 /*
76 * Private seg op routines
77 */
78 static void segkp_badop(void);
79 static void segkp_dump(struct seg *seg);
80 static int segkp_checkprot(struct seg *seg, caddr_t addr, size_t len,
81 uint_t prot);
82 static int segkp_kluster(struct seg *seg, caddr_t addr, ssize_t delta);
83 static int segkp_pagelock(struct seg *seg, caddr_t addr, size_t len,
84 struct page ***page, enum lock_type type,
85 enum seg_rw rw);
86 static void segkp_insert(struct seg *seg, struct segkp_data *kpd);
87 static void segkp_delete(struct seg *seg, struct segkp_data *kpd);
88 static caddr_t segkp_get_internal(struct seg *seg, size_t len, uint_t flags,
89 struct segkp_data **tkpd, struct anon_map *amp);
90 static void segkp_release_internal(struct seg *seg,
91 struct segkp_data *kpd, size_t len);
92 static int segkp_unlock(struct hat *hat, struct seg *seg, caddr_t vaddr,
93 size_t len, struct segkp_data *kpd, uint_t flags);
94 static int segkp_load(struct hat *hat, struct seg *seg, caddr_t vaddr,
95 size_t len, struct segkp_data *kpd, uint_t flags);
96 static struct segkp_data *segkp_find(struct seg *seg, caddr_t vaddr);
97 static int segkp_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp);
98 static lgrp_mem_policy_info_t *segkp_getpolicy(struct seg *seg,
99 caddr_t addr);
100 static int segkp_capable(struct seg *seg, segcapability_t capability);
101
102 /*
103 * Lock used to protect the hash table(s) and caches.
104 */
105 static kmutex_t segkp_lock;
106
107 /*
108 * The segkp caches
109 */
110 static struct segkp_cache segkp_cache[SEGKP_MAX_CACHE];
111
112 #define SEGKP_BADOP(t) (t(*)())segkp_badop
113
114 /*
115 * When there are fewer than red_minavail bytes left on the stack,
116 * segkp_map_red() will map in the redzone (if called). 5000 seems
117 * to work reasonably well...
118 */
119 long red_minavail = 5000;
120
121 /*
122 * will be set to 1 for 32 bit x86 systems only, in startup.c
123 */
124 int segkp_fromheap = 0;
125 ulong_t *segkp_bitmap;
126
127 /*
128 * If segkp_map_red() is called with the redzone already mapped and
129 * with less than RED_DEEP_THRESHOLD bytes available on the stack,
130 * then the stack situation has become quite serious; if much more stack
131 * is consumed, we have the potential of scrogging the next thread/LWP
132 * structure. To help debug the "can't happen" panics which may
133 * result from this condition, we record hrestime and the calling thread
134 * in red_deep_hires and red_deep_thread respectively.
135 */
136 #define RED_DEEP_THRESHOLD 2000
137
138 hrtime_t red_deep_hires;
139 kthread_t *red_deep_thread;
140
141 uint32_t red_nmapped;
142 uint32_t red_closest = UINT_MAX;
143 uint32_t red_ndoubles;
144
145 pgcnt_t anon_segkp_pages_locked; /* See vm/anon.h */
146 pgcnt_t anon_segkp_pages_resv; /* anon reserved by seg_kp */
147
148 static struct seg_ops segkp_ops = {
149 SEGKP_BADOP(int), /* dup */
150 SEGKP_BADOP(int), /* unmap */
151 SEGKP_BADOP(void), /* free */
152 segkp_fault,
153 SEGKP_BADOP(faultcode_t), /* faulta */
154 SEGKP_BADOP(int), /* setprot */
155 segkp_checkprot,
156 segkp_kluster,
157 SEGKP_BADOP(size_t), /* swapout */
158 SEGKP_BADOP(int), /* sync */
159 SEGKP_BADOP(size_t), /* incore */
160 SEGKP_BADOP(int), /* lockop */
161 SEGKP_BADOP(int), /* getprot */
162 SEGKP_BADOP(u_offset_t), /* getoffset */
163 SEGKP_BADOP(int), /* gettype */
164 SEGKP_BADOP(int), /* getvp */
165 SEGKP_BADOP(int), /* advise */
166 segkp_dump, /* dump */
167 segkp_pagelock, /* pagelock */
168 SEGKP_BADOP(int), /* setpgsz */
169 segkp_getmemid, /* getmemid */
170 segkp_getpolicy, /* getpolicy */
171 segkp_capable, /* capable */
172 seg_inherit_notsup /* inherit */
173 };
174
175
176 static void
177 segkp_badop(void)
178 {
179 panic("segkp_badop");
180 /*NOTREACHED*/
181 }
182
183 static void segkpinit_mem_config(struct seg *);
184
185 static uint32_t segkp_indel;
186
187 /*
188 * Allocate the segment specific private data struct and fill it in
189 * with the per kp segment mutex, anon ptr. array and hash table.
190 */
191 int
192 segkp_create(struct seg *seg)
193 {
194 struct segkp_segdata *kpsd;
195 size_t np;
196
197 ASSERT(seg != NULL && seg->s_as == &kas);
198 ASSERT(RW_WRITE_HELD(&seg->s_as->a_lock));
199
200 if (seg->s_size & PAGEOFFSET) {
201 panic("Bad segkp size");
202 /*NOTREACHED*/
743 }
744 }
745
746 /* If locked, release physical memory reservation */
747 if (kpd->kp_flags & KPD_LOCKED) {
748 pgcnt_t pages = btop(SEGKP_MAPLEN(kpd->kp_len, kpd->kp_flags));
749 if ((kpd->kp_flags & KPD_NO_ANON) == 0)
750 atomic_add_long(&anon_segkp_pages_locked, -pages);
751 page_unresv(pages);
752 }
753
754 vmem_free(SEGKP_VMEM(seg), kpd->kp_base, kpd->kp_len);
755 kmem_free(kpd, sizeof (struct segkp_data));
756 }
757
758 /*
759 * segkp_map_red() will check the current frame pointer against the
760 * stack base. If the amount of stack remaining is questionable
761 * (less than red_minavail), then segkp_map_red() will map in the redzone
762 * and return 1. Otherwise, it will return 0. segkp_map_red() can
763 * _only_ be called when:
764 *
765 * - it is safe to sleep on page_create_va().
766 * - the caller is non-swappable.
767 *
768 * It is up to the caller to remember whether segkp_map_red() successfully
769 * mapped the redzone, and, if so, to call segkp_unmap_red() at a later
770 * time. Note that the caller must _remain_ non-swappable until after
771 * calling segkp_unmap_red().
772 *
773 * Currently, this routine is only called from pagefault() (which necessarily
774 * satisfies the above conditions).
775 */
776 #if defined(STACK_GROWTH_DOWN)
777 int
778 segkp_map_red(void)
779 {
780 uintptr_t fp = STACK_BIAS + (uintptr_t)getfp();
781 #ifndef _LP64
782 caddr_t stkbase;
783 #endif
784
785 ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
786
787 /*
788 * Optimize for the common case where we simply return.
789 */
790 if ((curthread->t_red_pp == NULL) &&
791 (fp - (uintptr_t)curthread->t_stkbase >= red_minavail))
792 return (0);
793
794 #if defined(_LP64)
795 /*
796 * XXX We probably need something better than this.
797 */
798 panic("kernel stack overflow");
799 /*NOTREACHED*/
800 #else /* _LP64 */
801 if (curthread->t_red_pp == NULL) {
802 page_t *red_pp;
803 struct seg kseg;
804
805 caddr_t red_va = (caddr_t)
806 (((uintptr_t)curthread->t_stkbase & (uintptr_t)PAGEMASK) -
867 red_deep_hires = hrestime.tv_nsec;
868 red_deep_thread = curthread;
869 }
870
871 /*
872 * If this is a DEBUG kernel, and we've run too deep for comfort, toss.
873 */
874 ASSERT(fp - (uintptr_t)stkbase >= RED_DEEP_THRESHOLD);
875 return (0);
876 #endif /* _LP64 */
877 }
878
879 void
880 segkp_unmap_red(void)
881 {
882 page_t *pp;
883 caddr_t red_va = (caddr_t)(((uintptr_t)curthread->t_stkbase &
884 (uintptr_t)PAGEMASK) - PAGESIZE);
885
886 ASSERT(curthread->t_red_pp != NULL);
887 ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
888
889 /*
890 * Because we locked the mapping down, we can't simply rely
891 * on page_destroy() to clean everything up; we need to call
892 * hat_unload() to explicitly unlock the mapping resources.
893 */
894 hat_unload(kas.a_hat, red_va, PAGESIZE, HAT_UNLOAD_UNLOCK);
895
896 pp = curthread->t_red_pp;
897
898 ASSERT(pp == page_find(&kvp, (u_offset_t)(uintptr_t)red_va));
899
900 /*
901 * Need to upgrade the SE_SHARED lock to SE_EXCL.
902 */
903 if (!page_tryupgrade(pp)) {
904 /*
905 * As there is now wait for upgrade, release the
906 * SE_SHARED lock and wait for SE_EXCL.
907 */
1380 addr = kpd->kp_base;
1381 eaddr = addr + kpd->kp_len;
1382 while (addr < eaddr) {
1383 ASSERT(seg->s_as == &kas);
1384 pfn = hat_getpfnum(seg->s_as->a_hat, addr);
1385 if (pfn != PFN_INVALID)
1386 dump_addpage(seg->s_as, addr, pfn);
1387 addr += PAGESIZE;
1388 dump_timeleft = dump_timeout;
1389 }
1390 }
1391 }
1392 }
1393
1394 /*ARGSUSED*/
1395 static int
1396 segkp_pagelock(struct seg *seg, caddr_t addr, size_t len,
1397 struct page ***ppp, enum lock_type type, enum seg_rw rw)
1398 {
1399 return (ENOTSUP);
1400 }
1401
1402 /*ARGSUSED*/
1403 static int
1404 segkp_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
1405 {
1406 return (ENODEV);
1407 }
1408
1409 /*ARGSUSED*/
1410 static lgrp_mem_policy_info_t *
1411 segkp_getpolicy(struct seg *seg, caddr_t addr)
1412 {
1413 return (NULL);
1414 }
1415
1416 /*ARGSUSED*/
1417 static int
1418 segkp_capable(struct seg *seg, segcapability_t capability)
1419 {
1420 return (0);
1421 }
1422
1423 #include <sys/mem_config.h>
1424
1425 /*ARGSUSED*/
1426 static void
1427 segkp_mem_config_post_add(void *arg, pgcnt_t delta_pages)
1428 {}
1429
1430 /*
1431 * During memory delete, turn off caches so that pages are not held.
1432 * A better solution may be to unlock the pages while they are
1433 * in the cache so that they may be collected naturally.
1434 */
1435
1436 /*ARGSUSED*/
1437 static int
1438 segkp_mem_config_pre_del(void *arg, pgcnt_t delta_pages)
1439 {
1440 atomic_inc_32(&segkp_indel);
|
58 #include <sys/dumphdr.h>
59 #include <sys/debug.h>
60 #include <sys/vtrace.h>
61 #include <sys/stack.h>
62 #include <sys/atomic.h>
63 #include <sys/archsystm.h>
64 #include <sys/lgrp.h>
65
66 #include <vm/as.h>
67 #include <vm/seg.h>
68 #include <vm/seg_kp.h>
69 #include <vm/seg_kmem.h>
70 #include <vm/anon.h>
71 #include <vm/page.h>
72 #include <vm/hat.h>
73 #include <sys/bitmap.h>
74
75 /*
76 * Private seg op routines
77 */
78 static void segkp_dump(struct seg *seg);
79 static int segkp_checkprot(struct seg *seg, caddr_t addr, size_t len,
80 uint_t prot);
81 static int segkp_kluster(struct seg *seg, caddr_t addr, ssize_t delta);
82 static int segkp_pagelock(struct seg *seg, caddr_t addr, size_t len,
83 struct page ***page, enum lock_type type,
84 enum seg_rw rw);
85 static void segkp_insert(struct seg *seg, struct segkp_data *kpd);
86 static void segkp_delete(struct seg *seg, struct segkp_data *kpd);
87 static caddr_t segkp_get_internal(struct seg *seg, size_t len, uint_t flags,
88 struct segkp_data **tkpd, struct anon_map *amp);
89 static void segkp_release_internal(struct seg *seg,
90 struct segkp_data *kpd, size_t len);
91 static int segkp_unlock(struct hat *hat, struct seg *seg, caddr_t vaddr,
92 size_t len, struct segkp_data *kpd, uint_t flags);
93 static int segkp_load(struct hat *hat, struct seg *seg, caddr_t vaddr,
94 size_t len, struct segkp_data *kpd, uint_t flags);
95 static struct segkp_data *segkp_find(struct seg *seg, caddr_t vaddr);
96
97 /*
98 * Lock used to protect the hash table(s) and caches.
99 */
100 static kmutex_t segkp_lock;
101
102 /*
103 * The segkp caches
104 */
105 static struct segkp_cache segkp_cache[SEGKP_MAX_CACHE];
106
107 /*
108 * When there are fewer than red_minavail bytes left on the stack,
109 * segkp_map_red() will map in the redzone (if called). 5000 seems
110 * to work reasonably well...
111 */
112 long red_minavail = 5000;
113
114 /*
115 * will be set to 1 for 32 bit x86 systems only, in startup.c
116 */
117 int segkp_fromheap = 0;
118 ulong_t *segkp_bitmap;
119
120 /*
121 * If segkp_map_red() is called with the redzone already mapped and
122 * with less than RED_DEEP_THRESHOLD bytes available on the stack,
123 * then the stack situation has become quite serious; if much more stack
124 * is consumed, we have the potential of scrogging the next thread/LWP
125 * structure. To help debug the "can't happen" panics which may
126 * result from this condition, we record hrestime and the calling thread
127 * in red_deep_hires and red_deep_thread respectively.
128 */
129 #define RED_DEEP_THRESHOLD 2000
130
131 hrtime_t red_deep_hires;
132 kthread_t *red_deep_thread;
133
134 uint32_t red_nmapped;
135 uint32_t red_closest = UINT_MAX;
136 uint32_t red_ndoubles;
137
138 pgcnt_t anon_segkp_pages_locked; /* See vm/anon.h */
139 pgcnt_t anon_segkp_pages_resv; /* anon reserved by seg_kp */
140
141 static const struct seg_ops segkp_ops = {
142 .fault = segkp_fault,
143 .checkprot = segkp_checkprot,
144 .kluster = segkp_kluster,
145 .dump = segkp_dump,
146 .pagelock = segkp_pagelock,
147 };
148
149
150 static void segkpinit_mem_config(struct seg *);
151
152 static uint32_t segkp_indel;
153
154 /*
155 * Allocate the segment specific private data struct and fill it in
156 * with the per kp segment mutex, anon ptr. array and hash table.
157 */
158 int
159 segkp_create(struct seg *seg)
160 {
161 struct segkp_segdata *kpsd;
162 size_t np;
163
164 ASSERT(seg != NULL && seg->s_as == &kas);
165 ASSERT(RW_WRITE_HELD(&seg->s_as->a_lock));
166
167 if (seg->s_size & PAGEOFFSET) {
168 panic("Bad segkp size");
169 /*NOTREACHED*/
710 }
711 }
712
713 /* If locked, release physical memory reservation */
714 if (kpd->kp_flags & KPD_LOCKED) {
715 pgcnt_t pages = btop(SEGKP_MAPLEN(kpd->kp_len, kpd->kp_flags));
716 if ((kpd->kp_flags & KPD_NO_ANON) == 0)
717 atomic_add_long(&anon_segkp_pages_locked, -pages);
718 page_unresv(pages);
719 }
720
721 vmem_free(SEGKP_VMEM(seg), kpd->kp_base, kpd->kp_len);
722 kmem_free(kpd, sizeof (struct segkp_data));
723 }
724
725 /*
726 * segkp_map_red() will check the current frame pointer against the
727 * stack base. If the amount of stack remaining is questionable
728 * (less than red_minavail), then segkp_map_red() will map in the redzone
729 * and return 1. Otherwise, it will return 0. segkp_map_red() can
730 * _only_ be called when it is safe to sleep on page_create_va().
731 *
732 * It is up to the caller to remember whether segkp_map_red() successfully
733 * mapped the redzone, and, if so, to call segkp_unmap_red() at a later
734 * time.
735 *
736 * Currently, this routine is only called from pagefault() (which necessarily
737 * satisfies the above conditions).
738 */
739 #if defined(STACK_GROWTH_DOWN)
740 int
741 segkp_map_red(void)
742 {
743 uintptr_t fp = STACK_BIAS + (uintptr_t)getfp();
744 #ifndef _LP64
745 caddr_t stkbase;
746 #endif
747
748 /*
749 * Optimize for the common case where we simply return.
750 */
751 if ((curthread->t_red_pp == NULL) &&
752 (fp - (uintptr_t)curthread->t_stkbase >= red_minavail))
753 return (0);
754
755 #if defined(_LP64)
756 /*
757 * XXX We probably need something better than this.
758 */
759 panic("kernel stack overflow");
760 /*NOTREACHED*/
761 #else /* _LP64 */
762 if (curthread->t_red_pp == NULL) {
763 page_t *red_pp;
764 struct seg kseg;
765
766 caddr_t red_va = (caddr_t)
767 (((uintptr_t)curthread->t_stkbase & (uintptr_t)PAGEMASK) -
828 red_deep_hires = hrestime.tv_nsec;
829 red_deep_thread = curthread;
830 }
831
832 /*
833 * If this is a DEBUG kernel, and we've run too deep for comfort, toss.
834 */
835 ASSERT(fp - (uintptr_t)stkbase >= RED_DEEP_THRESHOLD);
836 return (0);
837 #endif /* _LP64 */
838 }
839
840 void
841 segkp_unmap_red(void)
842 {
843 page_t *pp;
844 caddr_t red_va = (caddr_t)(((uintptr_t)curthread->t_stkbase &
845 (uintptr_t)PAGEMASK) - PAGESIZE);
846
847 ASSERT(curthread->t_red_pp != NULL);
848
849 /*
850 * Because we locked the mapping down, we can't simply rely
851 * on page_destroy() to clean everything up; we need to call
852 * hat_unload() to explicitly unlock the mapping resources.
853 */
854 hat_unload(kas.a_hat, red_va, PAGESIZE, HAT_UNLOAD_UNLOCK);
855
856 pp = curthread->t_red_pp;
857
858 ASSERT(pp == page_find(&kvp, (u_offset_t)(uintptr_t)red_va));
859
860 /*
861 * Need to upgrade the SE_SHARED lock to SE_EXCL.
862 */
863 if (!page_tryupgrade(pp)) {
864 /*
865 * As there is now wait for upgrade, release the
866 * SE_SHARED lock and wait for SE_EXCL.
867 */
1340 addr = kpd->kp_base;
1341 eaddr = addr + kpd->kp_len;
1342 while (addr < eaddr) {
1343 ASSERT(seg->s_as == &kas);
1344 pfn = hat_getpfnum(seg->s_as->a_hat, addr);
1345 if (pfn != PFN_INVALID)
1346 dump_addpage(seg->s_as, addr, pfn);
1347 addr += PAGESIZE;
1348 dump_timeleft = dump_timeout;
1349 }
1350 }
1351 }
1352 }
1353
1354 /*ARGSUSED*/
1355 static int
1356 segkp_pagelock(struct seg *seg, caddr_t addr, size_t len,
1357 struct page ***ppp, enum lock_type type, enum seg_rw rw)
1358 {
1359 return (ENOTSUP);
1360 }
1361
1362 #include <sys/mem_config.h>
1363
1364 /*ARGSUSED*/
1365 static void
1366 segkp_mem_config_post_add(void *arg, pgcnt_t delta_pages)
1367 {}
1368
1369 /*
1370 * During memory delete, turn off caches so that pages are not held.
1371 * A better solution may be to unlock the pages while they are
1372 * in the cache so that they may be collected naturally.
1373 */
1374
1375 /*ARGSUSED*/
1376 static int
1377 segkp_mem_config_pre_del(void *arg, pgcnt_t delta_pages)
1378 {
1379 atomic_inc_32(&segkp_indel);
|