Print this page
use NULL dump segop as a shorthand for no-op
Instead of forcing every segment driver to implement a dummy function that
does nothing, handle NULL dump segop function pointer as a no-op shorthand.
const-ify make segment ops structures
There is no reason to keep the segment ops structures writable.
use NULL setpagesize segop as a shorthand for ENOTSUP
Instead of forcing every segment driver to implement a dummp function to
return (hopefully) ENOTSUP, handle NULL setpagesize segop function pointer
as "return ENOTSUP" shorthand.
use NULL getmemid segop as a shorthand for ENODEV
Instead of forcing every segment driver to implement a dummy function to
return (hopefully) ENODEV, handle NULL getmemid segop function pointer as
"return ENODEV" shorthand.
use NULL capable segop as a shorthand for no-capabilities
Instead of forcing every segment driver to implement a dummy "return 0"
function, handle NULL capable segop function pointer as "no copabilities
supported" shorthand.
seg_inherit_notsup is redundant since segop_inherit checks for NULL properly
no need for bad-op segment op functions
The segment drivers have a number of bad-op functions that simply panic.
Keeping the function pointer NULL will accomplish the same thing in most
cases. In other cases, keeping the function pointer NULL will result in
proper error code being returned.
use C99 initializers in segment ops structures
remove whole-process swapping
Long before Unix supported paging, it used process swapping to reclaim
memory. The code is there and in theory it runs when we get *extremely* low
on memory. In practice, it never runs since the definition of low-on-memory
is antiquated. (XXX: define what antiquated means)
You can check the number of swapout/swapin events with kstats:
$ kstat -p ::vm:swapin ::vm:swapout
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/vm/seg_kpm.c
+++ new/usr/src/uts/common/vm/seg_kpm.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License, Version 1.0 only
6 6 * (the "License"). You may not use this file except in compliance
7 7 * with the License.
8 8 *
9 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 10 * or http://www.opensolaris.org/os/licensing.
11 11 * See the License for the specific language governing permissions
12 12 * and limitations under the License.
13 13 *
14 14 * When distributing Covered Code, include this CDDL HEADER in each
15 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 16 * If applicable, add the following below this CDDL HEADER, with the
17 17 * fields enclosed by brackets "[]" replaced with your own identifying
18 18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 19 *
20 20 * CDDL HEADER END
21 21 */
22 22 /*
23 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 */
26 26
27 27 /*
28 28 * Kernel Physical Mapping (kpm) segment driver (segkpm).
29 29 *
30 30 * This driver delivers along with the hat_kpm* interfaces an alternative
31 31 * mechanism for kernel mappings within the 64-bit Solaris operating system,
32 32 * which allows the mapping of all physical memory into the kernel address
33 33 * space at once. This is feasible in 64 bit kernels, e.g. for Ultrasparc II
34 34 * and beyond processors, since the available VA range is much larger than
35 35 * possible physical memory. Momentarily all physical memory is supported,
36 36 * that is represented by the list of memory segments (memsegs).
37 37 *
38 38 * Segkpm mappings have also very low overhead and large pages are used
39 39 * (when possible) to minimize the TLB and TSB footprint. It is also
40 40 * extentable for other than Sparc architectures (e.g. AMD64). Main
41 41 * advantage is the avoidance of the TLB-shootdown X-calls, which are
42 42 * normally needed when a kernel (global) mapping has to be removed.
43 43 *
44 44 * First example of a kernel facility that uses the segkpm mapping scheme
45 45 * is seg_map, where it is used as an alternative to hat_memload().
46 46 * See also hat layer for more information about the hat_kpm* routines.
47 47 * The kpm facilty can be turned off at boot time (e.g. /etc/system).
48 48 */
49 49
50 50 #include <sys/types.h>
51 51 #include <sys/param.h>
52 52 #include <sys/sysmacros.h>
53 53 #include <sys/systm.h>
54 54 #include <sys/vnode.h>
55 55 #include <sys/cmn_err.h>
56 56 #include <sys/debug.h>
57 57 #include <sys/thread.h>
58 58 #include <sys/cpuvar.h>
59 59 #include <sys/bitmap.h>
60 60 #include <sys/atomic.h>
61 61 #include <sys/lgrp.h>
62 62
63 63 #include <vm/seg_kmem.h>
64 64 #include <vm/seg_kpm.h>
65 65 #include <vm/hat.h>
66 66 #include <vm/as.h>
67 67 #include <vm/seg.h>
68 68 #include <vm/page.h>
69 69
70 70 /*
71 71 * Global kpm controls.
72 72 * See also platform and mmu specific controls.
73 73 *
74 74 * kpm_enable -- global on/off switch for segkpm.
75 75 * . Set by default on 64bit platforms that have kpm support.
76 76 * . Will be disabled from platform layer if not supported.
77 77 * . Can be disabled via /etc/system.
78 78 *
79 79 * kpm_smallpages -- use only regular/system pagesize for kpm mappings.
80 80 * . Can be useful for critical debugging of kpm clients.
81 81 * . Set to zero by default for platforms that support kpm large pages.
82 82 * The use of kpm large pages reduces the footprint of kpm meta data
83 83 * and has all the other advantages of using large pages (e.g TLB
84 84 * miss reduction).
85 85 * . Set by default for platforms that don't support kpm large pages or
86 86 * where large pages cannot be used for other reasons (e.g. there are
87 87 * only few full associative TLB entries available for large pages).
88 88 *
89 89 * segmap_kpm -- separate on/off switch for segmap using segkpm:
90 90 * . Set by default.
91 91 * . Will be disabled when kpm_enable is zero.
92 92 * . Will be disabled when MAXBSIZE != PAGESIZE.
93 93 * . Can be disabled via /etc/system.
94 94 *
↓ open down ↓ |
94 lines elided |
↑ open up ↑ |
95 95 */
96 96 int kpm_enable = 1;
97 97 int kpm_smallpages = 0;
98 98 int segmap_kpm = 1;
99 99
100 100 /*
101 101 * Private seg op routines.
102 102 */
103 103 faultcode_t segkpm_fault(struct hat *hat, struct seg *seg, caddr_t addr,
104 104 size_t len, enum fault_type type, enum seg_rw rw);
105 -static void segkpm_dump(struct seg *);
106 -static void segkpm_badop(void);
107 -static int segkpm_notsup(void);
108 -static int segkpm_capable(struct seg *, segcapability_t);
109 -
110 -#define SEGKPM_BADOP(t) (t(*)())segkpm_badop
111 -#define SEGKPM_NOTSUP (int(*)())segkpm_notsup
112 -
113 -static struct seg_ops segkpm_ops = {
114 - SEGKPM_BADOP(int), /* dup */
115 - SEGKPM_BADOP(int), /* unmap */
116 - SEGKPM_BADOP(void), /* free */
117 - segkpm_fault,
118 - SEGKPM_BADOP(int), /* faulta */
119 - SEGKPM_BADOP(int), /* setprot */
120 - SEGKPM_BADOP(int), /* checkprot */
121 - SEGKPM_BADOP(int), /* kluster */
122 - SEGKPM_BADOP(size_t), /* swapout */
123 - SEGKPM_BADOP(int), /* sync */
124 - SEGKPM_BADOP(size_t), /* incore */
125 - SEGKPM_BADOP(int), /* lockop */
126 - SEGKPM_BADOP(int), /* getprot */
127 - SEGKPM_BADOP(u_offset_t), /* getoffset */
128 - SEGKPM_BADOP(int), /* gettype */
129 - SEGKPM_BADOP(int), /* getvp */
130 - SEGKPM_BADOP(int), /* advise */
131 - segkpm_dump, /* dump */
132 - SEGKPM_NOTSUP, /* pagelock */
133 - SEGKPM_BADOP(int), /* setpgsz */
134 - SEGKPM_BADOP(int), /* getmemid */
135 - SEGKPM_BADOP(lgrp_mem_policy_info_t *), /* getpolicy */
136 - segkpm_capable, /* capable */
137 - seg_inherit_notsup /* inherit */
105 +static int segkpm_pagelock(struct seg *seg, caddr_t addr, size_t len,
106 + struct page ***page, enum lock_type type,
107 + enum seg_rw rw);
108 +
109 +static const struct seg_ops segkpm_ops = {
110 + .fault = segkpm_fault,
111 + .pagelock = segkpm_pagelock,
112 +//#ifndef SEGKPM_SUPPORT
113 +#if 0
114 +#error FIXME: define nop
115 + .dup = nop,
116 + .unmap = nop,
117 + .free = nop,
118 + .faulta = nop,
119 + .setprot = nop,
120 + .checkprot = nop,
121 + .kluster = nop,
122 + .sync = nop,
123 + .incore = nop,
124 + .lockop = nop,
125 + .getprot = nop,
126 + .getoffset = nop,
127 + .gettype = nop,
128 + .getvp = nop,
129 + .advise = nop,
130 + .getpolicy = nop,
131 +#endif
138 132 };
139 133
140 134 /*
141 135 * kpm_pgsz and kpm_pgshft are set by platform layer.
142 136 */
143 137 size_t kpm_pgsz; /* kpm page size */
144 138 uint_t kpm_pgshft; /* kpm page shift */
145 139 u_offset_t kpm_pgoff; /* kpm page offset mask */
146 140 uint_t kpmp2pshft; /* kpm page to page shift */
147 141 pgcnt_t kpmpnpgs; /* how many pages per kpm page */
148 142
149 143
150 144 #ifdef SEGKPM_SUPPORT
151 145
152 146 int
153 147 segkpm_create(struct seg *seg, void *argsp)
154 148 {
155 149 struct segkpm_data *skd;
156 150 struct segkpm_crargs *b = (struct segkpm_crargs *)argsp;
157 151 ushort_t *p;
158 152 int i, j;
159 153
160 154 ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock));
161 155 ASSERT(btokpmp(seg->s_size) >= 1 &&
162 156 kpmpageoff((uintptr_t)seg->s_base) == 0 &&
163 157 kpmpageoff((uintptr_t)seg->s_base + seg->s_size) == 0);
164 158
165 159 skd = kmem_zalloc(sizeof (struct segkpm_data), KM_SLEEP);
166 160
167 161 seg->s_data = (void *)skd;
168 162 seg->s_ops = &segkpm_ops;
169 163 skd->skd_prot = b->prot;
170 164
171 165 /*
172 166 * (1) Segkpm virtual addresses are based on physical adresses.
173 167 * From this and in opposite to other segment drivers it is
174 168 * often required to allocate a page first to be able to
175 169 * calculate the final segkpm virtual address.
176 170 * (2) Page allocation is done by calling page_create_va(),
177 171 * one important input argument is a virtual address (also
178 172 * expressed by the "va" in the function name). This function
179 173 * is highly optimized to select the right page for an optimal
180 174 * processor and platform support (e.g. virtual addressed
181 175 * caches (VAC), physical addressed caches, NUMA).
182 176 *
183 177 * Because of (1) the approach is to generate a faked virtual
184 178 * address for calling page_create_va(). In order to exploit
185 179 * the abilities of (2), especially to utilize the cache
186 180 * hierarchy (3) and to avoid VAC alias conflicts (4) the
187 181 * selection has to be done carefully. For each virtual color
188 182 * a separate counter is provided (4). The count values are
189 183 * used for the utilization of all cache lines (3) and are
190 184 * corresponding to the cache bins.
191 185 */
192 186 skd->skd_nvcolors = b->nvcolors;
193 187
194 188 p = skd->skd_va_select =
195 189 kmem_zalloc(NCPU * b->nvcolors * sizeof (ushort_t), KM_SLEEP);
196 190
197 191 for (i = 0; i < NCPU; i++)
198 192 for (j = 0; j < b->nvcolors; j++, p++)
199 193 *p = j;
200 194
201 195 return (0);
202 196 }
203 197
204 198 /*
205 199 * This routine is called via a machine specific fault handling
206 200 * routine.
207 201 */
208 202 /* ARGSUSED */
209 203 faultcode_t
210 204 segkpm_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
211 205 enum fault_type type, enum seg_rw rw)
212 206 {
213 207 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
214 208
215 209 switch (type) {
216 210 case F_INVAL:
217 211 return (hat_kpm_fault(hat, addr));
218 212 case F_SOFTLOCK:
219 213 case F_SOFTUNLOCK:
220 214 return (0);
221 215 default:
222 216 return (FC_NOSUPPORT);
223 217 }
224 218 /*NOTREACHED*/
225 219 }
226 220
227 221 #define addr_to_vcolor(addr, vcolors) \
228 222 ((int)(((uintptr_t)(addr) & ((vcolors << PAGESHIFT) - 1)) >> PAGESHIFT))
229 223
230 224 /*
231 225 * Create a virtual address that can be used for invocations of
232 226 * page_create_va. Goal is to utilize the cache hierarchy (round
233 227 * robin bins) and to select the right color for virtual indexed
234 228 * caches. It isn't exact since we also increment the bin counter
235 229 * when the caller uses VOP_GETPAGE and gets a hit in the page
236 230 * cache, but we keep the bins turning for cache distribution
237 231 * (see also segkpm_create block comment).
238 232 */
239 233 caddr_t
240 234 segkpm_create_va(u_offset_t off)
241 235 {
242 236 int vcolor;
243 237 ushort_t *p;
244 238 struct segkpm_data *skd = (struct segkpm_data *)segkpm->s_data;
245 239 int nvcolors = skd->skd_nvcolors;
246 240 caddr_t va;
247 241
248 242 vcolor = (nvcolors > 1) ? addr_to_vcolor(off, nvcolors) : 0;
249 243 p = &skd->skd_va_select[(CPU->cpu_id * nvcolors) + vcolor];
250 244 va = (caddr_t)ptob(*p);
251 245
252 246 atomic_add_16(p, nvcolors);
253 247
254 248 return (va);
255 249 }
256 250
257 251 /*
258 252 * Unload mapping if the instance has an active kpm mapping.
259 253 */
260 254 void
261 255 segkpm_mapout_validkpme(struct kpme *kpme)
262 256 {
263 257 caddr_t vaddr;
264 258 page_t *pp;
265 259
266 260 retry:
267 261 if ((pp = kpme->kpe_page) == NULL) {
268 262 return;
269 263 }
270 264
271 265 if (page_lock(pp, SE_SHARED, (kmutex_t *)NULL, P_RECLAIM) == 0)
272 266 goto retry;
273 267
274 268 /*
275 269 * Check if segkpm mapping is not unloaded in the meantime
276 270 */
↓ open down ↓ |
129 lines elided |
↑ open up ↑ |
277 271 if (kpme->kpe_page == NULL) {
278 272 page_unlock(pp);
279 273 return;
280 274 }
281 275
282 276 vaddr = hat_kpm_page2va(pp, 1);
283 277 hat_kpm_mapout(pp, kpme, vaddr);
284 278 page_unlock(pp);
285 279 }
286 280
287 -static void
288 -segkpm_badop()
289 -{
290 - panic("segkpm_badop");
291 -}
292 -
293 281 #else /* SEGKPM_SUPPORT */
294 282
295 283 /* segkpm stubs */
296 284
297 285 /*ARGSUSED*/
298 -int segkpm_create(struct seg *seg, void *argsp) { return (0); }
286 +int segkpm_create(struct seg *seg, void *argsp)
287 +{
288 + return (0);
289 +}
299 290
300 291 /* ARGSUSED */
301 292 faultcode_t
302 293 segkpm_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
303 294 enum fault_type type, enum seg_rw rw)
304 295 {
305 - return ((faultcode_t)0);
296 + return (0);
306 297 }
307 298
308 299 /* ARGSUSED */
309 -caddr_t segkpm_create_va(u_offset_t off) { return (NULL); }
300 +caddr_t segkpm_create_va(u_offset_t off)
301 +{
302 + return (NULL);
303 +}
310 304
311 305 /* ARGSUSED */
312 -void segkpm_mapout_validkpme(struct kpme *kpme) {}
313 -
314 -static void
315 -segkpm_badop() {}
316 -
317 -#endif /* SEGKPM_SUPPORT */
318 -
319 -static int
320 -segkpm_notsup()
306 +void segkpm_mapout_validkpme(struct kpme *kpme)
321 307 {
322 - return (ENOTSUP);
323 308 }
324 309
325 -/*
326 - * segkpm pages are not dumped, so we just return
327 - */
328 -/*ARGSUSED*/
329 -static void
330 -segkpm_dump(struct seg *seg)
331 -{}
310 +#endif /* SEGKPM_SUPPORT */
332 311
333 -/*
334 - * We claim to have no special capabilities.
335 - */
336 -/*ARGSUSED*/
312 +/* ARGSUSED */
337 313 static int
338 -segkpm_capable(struct seg *seg, segcapability_t capability)
314 +segkpm_pagelock(struct seg *seg, caddr_t addr, size_t len,
315 + struct page ***page, enum lock_type type, enum seg_rw rw)
339 316 {
340 - return (0);
317 + return (ENOTSUP);
341 318 }
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX