Print this page
const-ify make segment ops structures
There is no reason to keep the segment ops structures writable.
use NULL getmemid segop as a shorthand for ENODEV
Instead of forcing every segment driver to implement a dummy function to
return (hopefully) ENODEV, handle NULL getmemid segop function pointer as
"return ENODEV" shorthand.
use NULL capable segop as a shorthand for no-capabilities
Instead of forcing every segment driver to implement a dummy "return 0"
function, handle NULL capable segop function pointer as "no copabilities
supported" shorthand.
segop_getpolicy already checks for a NULL op
seg_inherit_notsup is redundant since segop_inherit checks for NULL properly
no need for bad-op segment op functions
The segment drivers have a number of bad-op functions that simply panic.
Keeping the function pointer NULL will accomplish the same thing in most
cases. In other cases, keeping the function pointer NULL will result in
proper error code being returned.
use C99 initializers in segment ops structures
remove whole-process swapping
Long before Unix supported paging, it used process swapping to reclaim
memory. The code is there and in theory it runs when we get *extremely* low
on memory. In practice, it never runs since the definition of low-on-memory
is antiquated. (XXX: define what antiquated means)
You can check the number of swapout/swapin events with kstats:
$ kstat -p ::vm:swapin ::vm:swapout
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/vm/seg_kp.c
+++ new/usr/src/uts/common/vm/seg_kp.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 */
24 24
25 25 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
26 26 /* All Rights Reserved */
27 27
28 28 /*
29 29 * Portions of this source code were derived from Berkeley 4.3 BSD
30 30 * under license from the Regents of the University of California.
31 31 */
32 32
33 33 /*
34 34 * segkp is a segment driver that administers the allocation and deallocation
35 35 * of pageable variable size chunks of kernel virtual address space. Each
36 36 * allocated resource is page-aligned.
37 37 *
38 38 * The user may specify whether the resource should be initialized to 0,
39 39 * include a redzone, or locked in memory.
40 40 */
41 41
42 42 #include <sys/types.h>
43 43 #include <sys/t_lock.h>
44 44 #include <sys/thread.h>
45 45 #include <sys/param.h>
46 46 #include <sys/errno.h>
47 47 #include <sys/sysmacros.h>
48 48 #include <sys/systm.h>
49 49 #include <sys/buf.h>
50 50 #include <sys/mman.h>
51 51 #include <sys/vnode.h>
52 52 #include <sys/cmn_err.h>
53 53 #include <sys/swap.h>
54 54 #include <sys/tuneable.h>
55 55 #include <sys/kmem.h>
56 56 #include <sys/vmem.h>
57 57 #include <sys/cred.h>
58 58 #include <sys/dumphdr.h>
59 59 #include <sys/debug.h>
60 60 #include <sys/vtrace.h>
61 61 #include <sys/stack.h>
62 62 #include <sys/atomic.h>
63 63 #include <sys/archsystm.h>
64 64 #include <sys/lgrp.h>
65 65
66 66 #include <vm/as.h>
67 67 #include <vm/seg.h>
↓ open down ↓ |
67 lines elided |
↑ open up ↑ |
68 68 #include <vm/seg_kp.h>
69 69 #include <vm/seg_kmem.h>
70 70 #include <vm/anon.h>
71 71 #include <vm/page.h>
72 72 #include <vm/hat.h>
73 73 #include <sys/bitmap.h>
74 74
75 75 /*
76 76 * Private seg op routines
77 77 */
78 -static void segkp_badop(void);
79 78 static void segkp_dump(struct seg *seg);
80 79 static int segkp_checkprot(struct seg *seg, caddr_t addr, size_t len,
81 80 uint_t prot);
82 81 static int segkp_kluster(struct seg *seg, caddr_t addr, ssize_t delta);
83 82 static int segkp_pagelock(struct seg *seg, caddr_t addr, size_t len,
84 83 struct page ***page, enum lock_type type,
85 84 enum seg_rw rw);
86 85 static void segkp_insert(struct seg *seg, struct segkp_data *kpd);
87 86 static void segkp_delete(struct seg *seg, struct segkp_data *kpd);
88 87 static caddr_t segkp_get_internal(struct seg *seg, size_t len, uint_t flags,
89 88 struct segkp_data **tkpd, struct anon_map *amp);
90 89 static void segkp_release_internal(struct seg *seg,
91 90 struct segkp_data *kpd, size_t len);
92 91 static int segkp_unlock(struct hat *hat, struct seg *seg, caddr_t vaddr,
93 92 size_t len, struct segkp_data *kpd, uint_t flags);
94 93 static int segkp_load(struct hat *hat, struct seg *seg, caddr_t vaddr,
95 94 size_t len, struct segkp_data *kpd, uint_t flags);
96 95 static struct segkp_data *segkp_find(struct seg *seg, caddr_t vaddr);
97 -static int segkp_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp);
98 -static lgrp_mem_policy_info_t *segkp_getpolicy(struct seg *seg,
99 - caddr_t addr);
100 -static int segkp_capable(struct seg *seg, segcapability_t capability);
101 96
102 97 /*
103 98 * Lock used to protect the hash table(s) and caches.
104 99 */
105 100 static kmutex_t segkp_lock;
106 101
107 102 /*
108 103 * The segkp caches
109 104 */
110 105 static struct segkp_cache segkp_cache[SEGKP_MAX_CACHE];
111 106
112 -#define SEGKP_BADOP(t) (t(*)())segkp_badop
113 -
114 107 /*
115 108 * When there are fewer than red_minavail bytes left on the stack,
116 109 * segkp_map_red() will map in the redzone (if called). 5000 seems
117 110 * to work reasonably well...
118 111 */
119 112 long red_minavail = 5000;
120 113
121 114 /*
122 115 * will be set to 1 for 32 bit x86 systems only, in startup.c
123 116 */
124 117 int segkp_fromheap = 0;
125 118 ulong_t *segkp_bitmap;
126 119
127 120 /*
128 121 * If segkp_map_red() is called with the redzone already mapped and
129 122 * with less than RED_DEEP_THRESHOLD bytes available on the stack,
130 123 * then the stack situation has become quite serious; if much more stack
131 124 * is consumed, we have the potential of scrogging the next thread/LWP
132 125 * structure. To help debug the "can't happen" panics which may
133 126 * result from this condition, we record hrestime and the calling thread
134 127 * in red_deep_hires and red_deep_thread respectively.
135 128 */
136 129 #define RED_DEEP_THRESHOLD 2000
137 130
↓ open down ↓ |
14 lines elided |
↑ open up ↑ |
138 131 hrtime_t red_deep_hires;
139 132 kthread_t *red_deep_thread;
140 133
141 134 uint32_t red_nmapped;
142 135 uint32_t red_closest = UINT_MAX;
143 136 uint32_t red_ndoubles;
144 137
145 138 pgcnt_t anon_segkp_pages_locked; /* See vm/anon.h */
146 139 pgcnt_t anon_segkp_pages_resv; /* anon reserved by seg_kp */
147 140
148 -static struct seg_ops segkp_ops = {
149 - SEGKP_BADOP(int), /* dup */
150 - SEGKP_BADOP(int), /* unmap */
151 - SEGKP_BADOP(void), /* free */
152 - segkp_fault,
153 - SEGKP_BADOP(faultcode_t), /* faulta */
154 - SEGKP_BADOP(int), /* setprot */
155 - segkp_checkprot,
156 - segkp_kluster,
157 - SEGKP_BADOP(size_t), /* swapout */
158 - SEGKP_BADOP(int), /* sync */
159 - SEGKP_BADOP(size_t), /* incore */
160 - SEGKP_BADOP(int), /* lockop */
161 - SEGKP_BADOP(int), /* getprot */
162 - SEGKP_BADOP(u_offset_t), /* getoffset */
163 - SEGKP_BADOP(int), /* gettype */
164 - SEGKP_BADOP(int), /* getvp */
165 - SEGKP_BADOP(int), /* advise */
166 - segkp_dump, /* dump */
167 - segkp_pagelock, /* pagelock */
168 - SEGKP_BADOP(int), /* setpgsz */
169 - segkp_getmemid, /* getmemid */
170 - segkp_getpolicy, /* getpolicy */
171 - segkp_capable, /* capable */
172 - seg_inherit_notsup /* inherit */
141 +static const struct seg_ops segkp_ops = {
142 + .fault = segkp_fault,
143 + .checkprot = segkp_checkprot,
144 + .kluster = segkp_kluster,
145 + .dump = segkp_dump,
146 + .pagelock = segkp_pagelock,
173 147 };
174 148
175 149
176 -static void
177 -segkp_badop(void)
178 -{
179 - panic("segkp_badop");
180 - /*NOTREACHED*/
181 -}
182 -
183 150 static void segkpinit_mem_config(struct seg *);
184 151
185 152 static uint32_t segkp_indel;
186 153
187 154 /*
188 155 * Allocate the segment specific private data struct and fill it in
189 156 * with the per kp segment mutex, anon ptr. array and hash table.
190 157 */
191 158 int
192 159 segkp_create(struct seg *seg)
193 160 {
194 161 struct segkp_segdata *kpsd;
195 162 size_t np;
196 163
197 164 ASSERT(seg != NULL && seg->s_as == &kas);
198 165 ASSERT(RW_WRITE_HELD(&seg->s_as->a_lock));
199 166
200 167 if (seg->s_size & PAGEOFFSET) {
201 168 panic("Bad segkp size");
202 169 /*NOTREACHED*/
203 170 }
204 171
205 172 kpsd = kmem_zalloc(sizeof (struct segkp_segdata), KM_SLEEP);
206 173
207 174 /*
208 175 * Allocate the virtual memory for segkp and initialize it
209 176 */
210 177 if (segkp_fromheap) {
211 178 np = btop(kvseg.s_size);
212 179 segkp_bitmap = kmem_zalloc(BT_SIZEOFMAP(np), KM_SLEEP);
213 180 kpsd->kpsd_arena = vmem_create("segkp", NULL, 0, PAGESIZE,
214 181 vmem_alloc, vmem_free, heap_arena, 5 * PAGESIZE, VM_SLEEP);
215 182 } else {
216 183 segkp_bitmap = NULL;
217 184 np = btop(seg->s_size);
218 185 kpsd->kpsd_arena = vmem_create("segkp", seg->s_base,
219 186 seg->s_size, PAGESIZE, NULL, NULL, NULL, 5 * PAGESIZE,
220 187 VM_SLEEP);
221 188 }
222 189
223 190 kpsd->kpsd_anon = anon_create(np, ANON_SLEEP | ANON_ALLOC_FORCE);
224 191
225 192 kpsd->kpsd_hash = kmem_zalloc(SEGKP_HASHSZ * sizeof (struct segkp *),
226 193 KM_SLEEP);
227 194 seg->s_data = (void *)kpsd;
228 195 seg->s_ops = &segkp_ops;
229 196 segkpinit_mem_config(seg);
230 197 return (0);
231 198 }
232 199
233 200
234 201 /*
235 202 * Find a free 'freelist' and initialize it with the appropriate attributes
236 203 */
237 204 void *
238 205 segkp_cache_init(struct seg *seg, int maxsize, size_t len, uint_t flags)
239 206 {
240 207 int i;
241 208
242 209 if ((flags & KPD_NO_ANON) && !(flags & KPD_LOCKED))
243 210 return ((void *)-1);
244 211
245 212 mutex_enter(&segkp_lock);
246 213 for (i = 0; i < SEGKP_MAX_CACHE; i++) {
247 214 if (segkp_cache[i].kpf_inuse)
248 215 continue;
249 216 segkp_cache[i].kpf_inuse = 1;
250 217 segkp_cache[i].kpf_max = maxsize;
251 218 segkp_cache[i].kpf_flags = flags;
252 219 segkp_cache[i].kpf_seg = seg;
253 220 segkp_cache[i].kpf_len = len;
254 221 mutex_exit(&segkp_lock);
255 222 return ((void *)(uintptr_t)i);
256 223 }
257 224 mutex_exit(&segkp_lock);
258 225 return ((void *)-1);
259 226 }
260 227
261 228 /*
262 229 * Free all the cache resources.
263 230 */
264 231 void
265 232 segkp_cache_free(void)
266 233 {
267 234 struct segkp_data *kpd;
268 235 struct seg *seg;
269 236 int i;
270 237
271 238 mutex_enter(&segkp_lock);
272 239 for (i = 0; i < SEGKP_MAX_CACHE; i++) {
273 240 if (!segkp_cache[i].kpf_inuse)
274 241 continue;
275 242 /*
276 243 * Disconnect the freelist and process each element
277 244 */
278 245 kpd = segkp_cache[i].kpf_list;
279 246 seg = segkp_cache[i].kpf_seg;
280 247 segkp_cache[i].kpf_list = NULL;
281 248 segkp_cache[i].kpf_count = 0;
282 249 mutex_exit(&segkp_lock);
283 250
284 251 while (kpd != NULL) {
285 252 struct segkp_data *next;
286 253
287 254 next = kpd->kp_next;
288 255 segkp_release_internal(seg, kpd, kpd->kp_len);
289 256 kpd = next;
290 257 }
291 258 mutex_enter(&segkp_lock);
292 259 }
293 260 mutex_exit(&segkp_lock);
294 261 }
295 262
296 263 /*
297 264 * There are 2 entries into segkp_get_internal. The first includes a cookie
298 265 * used to access a pool of cached segkp resources. The second does not
299 266 * use the cache.
300 267 */
301 268 caddr_t
302 269 segkp_get(struct seg *seg, size_t len, uint_t flags)
303 270 {
304 271 struct segkp_data *kpd = NULL;
305 272
306 273 if (segkp_get_internal(seg, len, flags, &kpd, NULL) != NULL) {
307 274 kpd->kp_cookie = -1;
308 275 return (stom(kpd->kp_base, flags));
309 276 }
310 277 return (NULL);
311 278 }
312 279
313 280 /*
314 281 * Return a 'cached' segkp address
315 282 */
316 283 caddr_t
317 284 segkp_cache_get(void *cookie)
318 285 {
319 286 struct segkp_cache *freelist = NULL;
320 287 struct segkp_data *kpd = NULL;
321 288 int index = (int)(uintptr_t)cookie;
322 289 struct seg *seg;
323 290 size_t len;
324 291 uint_t flags;
325 292
326 293 if (index < 0 || index >= SEGKP_MAX_CACHE)
327 294 return (NULL);
328 295 freelist = &segkp_cache[index];
329 296
330 297 mutex_enter(&segkp_lock);
331 298 seg = freelist->kpf_seg;
332 299 flags = freelist->kpf_flags;
333 300 if (freelist->kpf_list != NULL) {
334 301 kpd = freelist->kpf_list;
335 302 freelist->kpf_list = kpd->kp_next;
336 303 freelist->kpf_count--;
337 304 mutex_exit(&segkp_lock);
338 305 kpd->kp_next = NULL;
339 306 segkp_insert(seg, kpd);
340 307 return (stom(kpd->kp_base, flags));
341 308 }
342 309 len = freelist->kpf_len;
343 310 mutex_exit(&segkp_lock);
344 311 if (segkp_get_internal(seg, len, flags, &kpd, NULL) != NULL) {
345 312 kpd->kp_cookie = index;
346 313 return (stom(kpd->kp_base, flags));
347 314 }
348 315 return (NULL);
349 316 }
350 317
351 318 caddr_t
352 319 segkp_get_withanonmap(
353 320 struct seg *seg,
354 321 size_t len,
355 322 uint_t flags,
356 323 struct anon_map *amp)
357 324 {
358 325 struct segkp_data *kpd = NULL;
359 326
360 327 ASSERT(amp != NULL);
361 328 flags |= KPD_HASAMP;
362 329 if (segkp_get_internal(seg, len, flags, &kpd, amp) != NULL) {
363 330 kpd->kp_cookie = -1;
364 331 return (stom(kpd->kp_base, flags));
365 332 }
366 333 return (NULL);
367 334 }
368 335
369 336 /*
370 337 * This does the real work of segkp allocation.
371 338 * Return to client base addr. len must be page-aligned. A null value is
372 339 * returned if there are no more vm resources (e.g. pages, swap). The len
373 340 * and base recorded in the private data structure include the redzone
374 341 * and the redzone length (if applicable). If the user requests a redzone
375 342 * either the first or last page is left unmapped depending whether stacks
376 343 * grow to low or high memory.
377 344 *
378 345 * The client may also specify a no-wait flag. If that is set then the
379 346 * request will choose a non-blocking path when requesting resources.
380 347 * The default is make the client wait.
381 348 */
382 349 static caddr_t
383 350 segkp_get_internal(
384 351 struct seg *seg,
385 352 size_t len,
386 353 uint_t flags,
387 354 struct segkp_data **tkpd,
388 355 struct anon_map *amp)
389 356 {
390 357 struct segkp_segdata *kpsd = (struct segkp_segdata *)seg->s_data;
391 358 struct segkp_data *kpd;
392 359 caddr_t vbase = NULL; /* always first virtual, may not be mapped */
393 360 pgcnt_t np = 0; /* number of pages in the resource */
394 361 pgcnt_t segkpindex;
395 362 long i;
396 363 caddr_t va;
397 364 pgcnt_t pages = 0;
398 365 ulong_t anon_idx = 0;
399 366 int kmflag = (flags & KPD_NOWAIT) ? KM_NOSLEEP : KM_SLEEP;
400 367 caddr_t s_base = (segkp_fromheap) ? kvseg.s_base : seg->s_base;
401 368
402 369 if (len & PAGEOFFSET) {
403 370 panic("segkp_get: len is not page-aligned");
404 371 /*NOTREACHED*/
405 372 }
406 373
407 374 ASSERT(((flags & KPD_HASAMP) == 0) == (amp == NULL));
408 375
409 376 /* Only allow KPD_NO_ANON if we are going to lock it down */
410 377 if ((flags & (KPD_LOCKED|KPD_NO_ANON)) == KPD_NO_ANON)
411 378 return (NULL);
412 379
413 380 if ((kpd = kmem_zalloc(sizeof (struct segkp_data), kmflag)) == NULL)
414 381 return (NULL);
415 382 /*
416 383 * Fix up the len to reflect the REDZONE if applicable
417 384 */
418 385 if (flags & KPD_HASREDZONE)
419 386 len += PAGESIZE;
420 387 np = btop(len);
421 388
422 389 vbase = vmem_alloc(SEGKP_VMEM(seg), len, kmflag | VM_BESTFIT);
423 390 if (vbase == NULL) {
424 391 kmem_free(kpd, sizeof (struct segkp_data));
425 392 return (NULL);
426 393 }
427 394
428 395 /* If locking, reserve physical memory */
429 396 if (flags & KPD_LOCKED) {
430 397 pages = btop(SEGKP_MAPLEN(len, flags));
431 398 if (page_resv(pages, kmflag) == 0) {
432 399 vmem_free(SEGKP_VMEM(seg), vbase, len);
433 400 kmem_free(kpd, sizeof (struct segkp_data));
434 401 return (NULL);
435 402 }
436 403 if ((flags & KPD_NO_ANON) == 0)
437 404 atomic_add_long(&anon_segkp_pages_locked, pages);
438 405 }
439 406
440 407 /*
441 408 * Reserve sufficient swap space for this vm resource. We'll
442 409 * actually allocate it in the loop below, but reserving it
443 410 * here allows us to back out more gracefully than if we
444 411 * had an allocation failure in the body of the loop.
445 412 *
446 413 * Note that we don't need swap space for the red zone page.
447 414 */
448 415 if (amp != NULL) {
449 416 /*
450 417 * The swap reservation has been done, if required, and the
451 418 * anon_hdr is separate.
452 419 */
453 420 anon_idx = 0;
454 421 kpd->kp_anon_idx = anon_idx;
455 422 kpd->kp_anon = amp->ahp;
456 423
457 424 TRACE_5(TR_FAC_VM, TR_ANON_SEGKP, "anon segkp:%p %p %lu %u %u",
458 425 kpd, vbase, len, flags, 1);
459 426
460 427 } else if ((flags & KPD_NO_ANON) == 0) {
461 428 if (anon_resv_zone(SEGKP_MAPLEN(len, flags), NULL) == 0) {
462 429 if (flags & KPD_LOCKED) {
463 430 atomic_add_long(&anon_segkp_pages_locked,
464 431 -pages);
465 432 page_unresv(pages);
466 433 }
467 434 vmem_free(SEGKP_VMEM(seg), vbase, len);
468 435 kmem_free(kpd, sizeof (struct segkp_data));
469 436 return (NULL);
470 437 }
471 438 atomic_add_long(&anon_segkp_pages_resv,
472 439 btop(SEGKP_MAPLEN(len, flags)));
473 440 anon_idx = ((uintptr_t)(vbase - s_base)) >> PAGESHIFT;
474 441 kpd->kp_anon_idx = anon_idx;
475 442 kpd->kp_anon = kpsd->kpsd_anon;
476 443
477 444 TRACE_5(TR_FAC_VM, TR_ANON_SEGKP, "anon segkp:%p %p %lu %u %u",
478 445 kpd, vbase, len, flags, 1);
479 446 } else {
480 447 kpd->kp_anon = NULL;
481 448 kpd->kp_anon_idx = 0;
482 449 }
483 450
484 451 /*
485 452 * Allocate page and anon resources for the virtual address range
486 453 * except the redzone
487 454 */
488 455 if (segkp_fromheap)
489 456 segkpindex = btop((uintptr_t)(vbase - kvseg.s_base));
490 457 for (i = 0, va = vbase; i < np; i++, va += PAGESIZE) {
491 458 page_t *pl[2];
492 459 struct vnode *vp;
493 460 anoff_t off;
494 461 int err;
495 462 page_t *pp = NULL;
496 463
497 464 /*
498 465 * Mark this page to be a segkp page in the bitmap.
499 466 */
500 467 if (segkp_fromheap) {
501 468 BT_ATOMIC_SET(segkp_bitmap, segkpindex);
502 469 segkpindex++;
503 470 }
504 471
505 472 /*
506 473 * If this page is the red zone page, we don't need swap
507 474 * space for it. Note that we skip over the code that
508 475 * establishes MMU mappings, so that the page remains
509 476 * invalid.
510 477 */
511 478 if ((flags & KPD_HASREDZONE) && KPD_REDZONE(kpd) == i)
512 479 continue;
513 480
514 481 if (kpd->kp_anon != NULL) {
515 482 struct anon *ap;
516 483
517 484 ASSERT(anon_get_ptr(kpd->kp_anon, anon_idx + i)
518 485 == NULL);
519 486 /*
520 487 * Determine the "vp" and "off" of the anon slot.
521 488 */
522 489 ap = anon_alloc(NULL, 0);
523 490 if (amp != NULL)
524 491 ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER);
525 492 (void) anon_set_ptr(kpd->kp_anon, anon_idx + i,
526 493 ap, ANON_SLEEP);
527 494 if (amp != NULL)
528 495 ANON_LOCK_EXIT(&->a_rwlock);
529 496 swap_xlate(ap, &vp, &off);
530 497
531 498 /*
532 499 * Create a page with the specified identity. The
533 500 * page is returned with the "shared" lock held.
534 501 */
535 502 err = VOP_GETPAGE(vp, (offset_t)off, PAGESIZE,
536 503 NULL, pl, PAGESIZE, seg, va, S_CREATE,
537 504 kcred, NULL);
538 505 if (err) {
539 506 /*
540 507 * XXX - This should not fail.
541 508 */
542 509 panic("segkp_get: no pages");
543 510 /*NOTREACHED*/
544 511 }
545 512 pp = pl[0];
546 513 } else {
547 514 ASSERT(page_exists(&kvp,
548 515 (u_offset_t)(uintptr_t)va) == NULL);
549 516
550 517 if ((pp = page_create_va(&kvp,
551 518 (u_offset_t)(uintptr_t)va, PAGESIZE,
552 519 (flags & KPD_NOWAIT ? 0 : PG_WAIT) | PG_EXCL |
553 520 PG_NORELOC, seg, va)) == NULL) {
554 521 /*
555 522 * Legitimize resource; then destroy it.
556 523 * Easier than trying to unwind here.
557 524 */
558 525 kpd->kp_flags = flags;
559 526 kpd->kp_base = vbase;
560 527 kpd->kp_len = len;
561 528 segkp_release_internal(seg, kpd, va - vbase);
562 529 return (NULL);
563 530 }
564 531 page_io_unlock(pp);
565 532 }
566 533
567 534 if (flags & KPD_ZERO)
568 535 pagezero(pp, 0, PAGESIZE);
569 536
570 537 /*
571 538 * Load and lock an MMU translation for the page.
572 539 */
573 540 hat_memload(seg->s_as->a_hat, va, pp, (PROT_READ|PROT_WRITE),
574 541 ((flags & KPD_LOCKED) ? HAT_LOAD_LOCK : HAT_LOAD));
575 542
576 543 /*
577 544 * Now, release lock on the page.
578 545 */
579 546 if (flags & KPD_LOCKED) {
580 547 /*
581 548 * Indicate to page_retire framework that this
582 549 * page can only be retired when it is freed.
583 550 */
584 551 PP_SETRAF(pp);
585 552 page_downgrade(pp);
586 553 } else
587 554 page_unlock(pp);
588 555 }
589 556
590 557 kpd->kp_flags = flags;
591 558 kpd->kp_base = vbase;
592 559 kpd->kp_len = len;
593 560 segkp_insert(seg, kpd);
594 561 *tkpd = kpd;
595 562 return (stom(kpd->kp_base, flags));
596 563 }
597 564
598 565 /*
599 566 * Release the resource to cache if the pool(designate by the cookie)
600 567 * has less than the maximum allowable. If inserted in cache,
601 568 * segkp_delete insures element is taken off of active list.
602 569 */
603 570 void
604 571 segkp_release(struct seg *seg, caddr_t vaddr)
605 572 {
606 573 struct segkp_cache *freelist;
607 574 struct segkp_data *kpd = NULL;
608 575
609 576 if ((kpd = segkp_find(seg, vaddr)) == NULL) {
610 577 panic("segkp_release: null kpd");
611 578 /*NOTREACHED*/
612 579 }
613 580
614 581 if (kpd->kp_cookie != -1) {
615 582 freelist = &segkp_cache[kpd->kp_cookie];
616 583 mutex_enter(&segkp_lock);
617 584 if (!segkp_indel && freelist->kpf_count < freelist->kpf_max) {
618 585 segkp_delete(seg, kpd);
619 586 kpd->kp_next = freelist->kpf_list;
620 587 freelist->kpf_list = kpd;
621 588 freelist->kpf_count++;
622 589 mutex_exit(&segkp_lock);
623 590 return;
624 591 } else {
625 592 mutex_exit(&segkp_lock);
626 593 kpd->kp_cookie = -1;
627 594 }
628 595 }
629 596 segkp_release_internal(seg, kpd, kpd->kp_len);
630 597 }
631 598
632 599 /*
633 600 * Free the entire resource. segkp_unlock gets called with the start of the
634 601 * mapped portion of the resource. The length is the size of the mapped
635 602 * portion
636 603 */
637 604 static void
638 605 segkp_release_internal(struct seg *seg, struct segkp_data *kpd, size_t len)
639 606 {
640 607 caddr_t va;
641 608 long i;
642 609 long redzone;
643 610 size_t np;
644 611 page_t *pp;
645 612 struct vnode *vp;
646 613 anoff_t off;
647 614 struct anon *ap;
648 615 pgcnt_t segkpindex;
649 616
650 617 ASSERT(kpd != NULL);
651 618 ASSERT((kpd->kp_flags & KPD_HASAMP) == 0 || kpd->kp_cookie == -1);
652 619 np = btop(len);
653 620
654 621 /* Remove from active hash list */
655 622 if (kpd->kp_cookie == -1) {
656 623 mutex_enter(&segkp_lock);
657 624 segkp_delete(seg, kpd);
658 625 mutex_exit(&segkp_lock);
659 626 }
660 627
661 628 /*
662 629 * Precompute redzone page index.
663 630 */
664 631 redzone = -1;
665 632 if (kpd->kp_flags & KPD_HASREDZONE)
666 633 redzone = KPD_REDZONE(kpd);
667 634
668 635
669 636 va = kpd->kp_base;
670 637
671 638 hat_unload(seg->s_as->a_hat, va, (np << PAGESHIFT),
672 639 ((kpd->kp_flags & KPD_LOCKED) ? HAT_UNLOAD_UNLOCK : HAT_UNLOAD));
673 640 /*
674 641 * Free up those anon resources that are quiescent.
675 642 */
676 643 if (segkp_fromheap)
677 644 segkpindex = btop((uintptr_t)(va - kvseg.s_base));
678 645 for (i = 0; i < np; i++, va += PAGESIZE) {
679 646
680 647 /*
681 648 * Clear the bit for this page from the bitmap.
682 649 */
683 650 if (segkp_fromheap) {
684 651 BT_ATOMIC_CLEAR(segkp_bitmap, segkpindex);
685 652 segkpindex++;
686 653 }
687 654
688 655 if (i == redzone)
689 656 continue;
690 657 if (kpd->kp_anon) {
691 658 /*
692 659 * Free up anon resources and destroy the
693 660 * associated pages.
694 661 *
695 662 * Release the lock if there is one. Have to get the
696 663 * page to do this, unfortunately.
697 664 */
698 665 if (kpd->kp_flags & KPD_LOCKED) {
699 666 ap = anon_get_ptr(kpd->kp_anon,
700 667 kpd->kp_anon_idx + i);
701 668 swap_xlate(ap, &vp, &off);
702 669 /* Find the shared-locked page. */
703 670 pp = page_find(vp, (u_offset_t)off);
704 671 if (pp == NULL) {
705 672 panic("segkp_release: "
706 673 "kp_anon: no page to unlock ");
707 674 /*NOTREACHED*/
708 675 }
709 676 if (PP_ISRAF(pp))
710 677 PP_CLRRAF(pp);
711 678
712 679 page_unlock(pp);
713 680 }
714 681 if ((kpd->kp_flags & KPD_HASAMP) == 0) {
715 682 anon_free(kpd->kp_anon, kpd->kp_anon_idx + i,
716 683 PAGESIZE);
717 684 anon_unresv_zone(PAGESIZE, NULL);
718 685 atomic_dec_ulong(&anon_segkp_pages_resv);
719 686 }
720 687 TRACE_5(TR_FAC_VM,
721 688 TR_ANON_SEGKP, "anon segkp:%p %p %lu %u %u",
722 689 kpd, va, PAGESIZE, 0, 0);
723 690 } else {
724 691 if (kpd->kp_flags & KPD_LOCKED) {
725 692 pp = page_find(&kvp, (u_offset_t)(uintptr_t)va);
726 693 if (pp == NULL) {
727 694 panic("segkp_release: "
728 695 "no page to unlock");
729 696 /*NOTREACHED*/
730 697 }
731 698 if (PP_ISRAF(pp))
732 699 PP_CLRRAF(pp);
733 700 /*
734 701 * We should just upgrade the lock here
735 702 * but there is no upgrade that waits.
736 703 */
737 704 page_unlock(pp);
738 705 }
739 706 pp = page_lookup(&kvp, (u_offset_t)(uintptr_t)va,
740 707 SE_EXCL);
741 708 if (pp != NULL)
742 709 page_destroy(pp, 0);
743 710 }
744 711 }
745 712
746 713 /* If locked, release physical memory reservation */
747 714 if (kpd->kp_flags & KPD_LOCKED) {
748 715 pgcnt_t pages = btop(SEGKP_MAPLEN(kpd->kp_len, kpd->kp_flags));
749 716 if ((kpd->kp_flags & KPD_NO_ANON) == 0)
750 717 atomic_add_long(&anon_segkp_pages_locked, -pages);
751 718 page_unresv(pages);
752 719 }
↓ open down ↓ |
560 lines elided |
↑ open up ↑ |
753 720
754 721 vmem_free(SEGKP_VMEM(seg), kpd->kp_base, kpd->kp_len);
755 722 kmem_free(kpd, sizeof (struct segkp_data));
756 723 }
757 724
758 725 /*
759 726 * segkp_map_red() will check the current frame pointer against the
760 727 * stack base. If the amount of stack remaining is questionable
761 728 * (less than red_minavail), then segkp_map_red() will map in the redzone
762 729 * and return 1. Otherwise, it will return 0. segkp_map_red() can
763 - * _only_ be called when:
764 - *
765 - * - it is safe to sleep on page_create_va().
766 - * - the caller is non-swappable.
730 + * _only_ be called when it is safe to sleep on page_create_va().
767 731 *
768 732 * It is up to the caller to remember whether segkp_map_red() successfully
769 733 * mapped the redzone, and, if so, to call segkp_unmap_red() at a later
770 - * time. Note that the caller must _remain_ non-swappable until after
771 - * calling segkp_unmap_red().
734 + * time.
772 735 *
773 736 * Currently, this routine is only called from pagefault() (which necessarily
774 737 * satisfies the above conditions).
775 738 */
776 739 #if defined(STACK_GROWTH_DOWN)
777 740 int
778 741 segkp_map_red(void)
779 742 {
780 743 uintptr_t fp = STACK_BIAS + (uintptr_t)getfp();
781 744 #ifndef _LP64
782 745 caddr_t stkbase;
783 746 #endif
784 747
785 - ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
786 -
787 748 /*
788 749 * Optimize for the common case where we simply return.
789 750 */
790 751 if ((curthread->t_red_pp == NULL) &&
791 752 (fp - (uintptr_t)curthread->t_stkbase >= red_minavail))
792 753 return (0);
793 754
794 755 #if defined(_LP64)
795 756 /*
796 757 * XXX We probably need something better than this.
797 758 */
798 759 panic("kernel stack overflow");
799 760 /*NOTREACHED*/
800 761 #else /* _LP64 */
801 762 if (curthread->t_red_pp == NULL) {
802 763 page_t *red_pp;
803 764 struct seg kseg;
804 765
805 766 caddr_t red_va = (caddr_t)
806 767 (((uintptr_t)curthread->t_stkbase & (uintptr_t)PAGEMASK) -
807 768 PAGESIZE);
808 769
809 770 ASSERT(page_exists(&kvp, (u_offset_t)(uintptr_t)red_va) ==
810 771 NULL);
811 772
812 773 /*
813 774 * Allocate the physical for the red page.
814 775 */
815 776 /*
816 777 * No PG_NORELOC here to avoid waits. Unlikely to get
817 778 * a relocate happening in the short time the page exists
818 779 * and it will be OK anyway.
819 780 */
820 781
821 782 kseg.s_as = &kas;
822 783 red_pp = page_create_va(&kvp, (u_offset_t)(uintptr_t)red_va,
823 784 PAGESIZE, PG_WAIT | PG_EXCL, &kseg, red_va);
824 785 ASSERT(red_pp != NULL);
825 786
826 787 /*
827 788 * So we now have a page to jam into the redzone...
828 789 */
829 790 page_io_unlock(red_pp);
830 791
831 792 hat_memload(kas.a_hat, red_va, red_pp,
832 793 (PROT_READ|PROT_WRITE), HAT_LOAD_LOCK);
833 794 page_downgrade(red_pp);
834 795
835 796 /*
836 797 * The page is left SE_SHARED locked so we can hold on to
837 798 * the page_t pointer.
838 799 */
839 800 curthread->t_red_pp = red_pp;
840 801
841 802 atomic_inc_32(&red_nmapped);
842 803 while (fp - (uintptr_t)curthread->t_stkbase < red_closest) {
843 804 (void) atomic_cas_32(&red_closest, red_closest,
844 805 (uint32_t)(fp - (uintptr_t)curthread->t_stkbase));
845 806 }
846 807 return (1);
847 808 }
848 809
849 810 stkbase = (caddr_t)(((uintptr_t)curthread->t_stkbase &
850 811 (uintptr_t)PAGEMASK) - PAGESIZE);
851 812
852 813 atomic_inc_32(&red_ndoubles);
853 814
854 815 if (fp - (uintptr_t)stkbase < RED_DEEP_THRESHOLD) {
855 816 /*
856 817 * Oh boy. We're already deep within the mapped-in
857 818 * redzone page, and the caller is trying to prepare
858 819 * for a deep stack run. We're running without a
859 820 * redzone right now: if the caller plows off the
860 821 * end of the stack, it'll plow another thread or
861 822 * LWP structure. That situation could result in
862 823 * a very hard-to-debug panic, so, in the spirit of
863 824 * recording the name of one's killer in one's own
864 825 * blood, we're going to record hrestime and the calling
865 826 * thread.
866 827 */
867 828 red_deep_hires = hrestime.tv_nsec;
868 829 red_deep_thread = curthread;
869 830 }
870 831
871 832 /*
872 833 * If this is a DEBUG kernel, and we've run too deep for comfort, toss.
873 834 */
874 835 ASSERT(fp - (uintptr_t)stkbase >= RED_DEEP_THRESHOLD);
875 836 return (0);
876 837 #endif /* _LP64 */
↓ open down ↓ |
80 lines elided |
↑ open up ↑ |
877 838 }
878 839
879 840 void
880 841 segkp_unmap_red(void)
881 842 {
882 843 page_t *pp;
883 844 caddr_t red_va = (caddr_t)(((uintptr_t)curthread->t_stkbase &
884 845 (uintptr_t)PAGEMASK) - PAGESIZE);
885 846
886 847 ASSERT(curthread->t_red_pp != NULL);
887 - ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
888 848
889 849 /*
890 850 * Because we locked the mapping down, we can't simply rely
891 851 * on page_destroy() to clean everything up; we need to call
892 852 * hat_unload() to explicitly unlock the mapping resources.
893 853 */
894 854 hat_unload(kas.a_hat, red_va, PAGESIZE, HAT_UNLOAD_UNLOCK);
895 855
896 856 pp = curthread->t_red_pp;
897 857
898 858 ASSERT(pp == page_find(&kvp, (u_offset_t)(uintptr_t)red_va));
899 859
900 860 /*
901 861 * Need to upgrade the SE_SHARED lock to SE_EXCL.
902 862 */
903 863 if (!page_tryupgrade(pp)) {
904 864 /*
905 865 * As there is now wait for upgrade, release the
906 866 * SE_SHARED lock and wait for SE_EXCL.
907 867 */
908 868 page_unlock(pp);
909 869 pp = page_lookup(&kvp, (u_offset_t)(uintptr_t)red_va, SE_EXCL);
910 870 /* pp may be NULL here, hence the test below */
911 871 }
912 872
913 873 /*
914 874 * Destroy the page, with dontfree set to zero (i.e. free it).
915 875 */
916 876 if (pp != NULL)
917 877 page_destroy(pp, 0);
918 878 curthread->t_red_pp = NULL;
919 879 }
920 880 #else
921 881 #error Red stacks only supported with downwards stack growth.
922 882 #endif
923 883
924 884 /*
925 885 * Handle a fault on an address corresponding to one of the
926 886 * resources in the segkp segment.
927 887 */
928 888 faultcode_t
929 889 segkp_fault(
930 890 struct hat *hat,
931 891 struct seg *seg,
932 892 caddr_t vaddr,
933 893 size_t len,
934 894 enum fault_type type,
935 895 enum seg_rw rw)
936 896 {
937 897 struct segkp_data *kpd = NULL;
938 898 int err;
939 899
940 900 ASSERT(seg->s_as == &kas && RW_READ_HELD(&seg->s_as->a_lock));
941 901
942 902 /*
943 903 * Sanity checks.
944 904 */
945 905 if (type == F_PROT) {
946 906 panic("segkp_fault: unexpected F_PROT fault");
947 907 /*NOTREACHED*/
948 908 }
949 909
950 910 if ((kpd = segkp_find(seg, vaddr)) == NULL)
951 911 return (FC_NOMAP);
952 912
953 913 mutex_enter(&kpd->kp_lock);
954 914
955 915 if (type == F_SOFTLOCK) {
956 916 ASSERT(!(kpd->kp_flags & KPD_LOCKED));
957 917 /*
958 918 * The F_SOFTLOCK case has more stringent
959 919 * range requirements: the given range must exactly coincide
960 920 * with the resource's mapped portion. Note reference to
961 921 * redzone is handled since vaddr would not equal base
962 922 */
963 923 if (vaddr != stom(kpd->kp_base, kpd->kp_flags) ||
964 924 len != SEGKP_MAPLEN(kpd->kp_len, kpd->kp_flags)) {
965 925 mutex_exit(&kpd->kp_lock);
966 926 return (FC_MAKE_ERR(EFAULT));
967 927 }
968 928
969 929 if ((err = segkp_load(hat, seg, vaddr, len, kpd, KPD_LOCKED))) {
970 930 mutex_exit(&kpd->kp_lock);
971 931 return (FC_MAKE_ERR(err));
972 932 }
973 933 kpd->kp_flags |= KPD_LOCKED;
974 934 mutex_exit(&kpd->kp_lock);
975 935 return (0);
976 936 }
977 937
978 938 if (type == F_INVAL) {
979 939 ASSERT(!(kpd->kp_flags & KPD_NO_ANON));
980 940
981 941 /*
982 942 * Check if we touched the redzone. Somewhat optimistic
983 943 * here if we are touching the redzone of our own stack
984 944 * since we wouldn't have a stack to get this far...
985 945 */
986 946 if ((kpd->kp_flags & KPD_HASREDZONE) &&
987 947 btop((uintptr_t)(vaddr - kpd->kp_base)) == KPD_REDZONE(kpd))
988 948 panic("segkp_fault: accessing redzone");
989 949
990 950 /*
991 951 * This fault may occur while the page is being F_SOFTLOCK'ed.
992 952 * Return since a 2nd segkp_load is unnecessary and also would
993 953 * result in the page being locked twice and eventually
994 954 * hang the thread_reaper thread.
995 955 */
996 956 if (kpd->kp_flags & KPD_LOCKED) {
997 957 mutex_exit(&kpd->kp_lock);
998 958 return (0);
999 959 }
1000 960
1001 961 err = segkp_load(hat, seg, vaddr, len, kpd, kpd->kp_flags);
1002 962 mutex_exit(&kpd->kp_lock);
1003 963 return (err ? FC_MAKE_ERR(err) : 0);
1004 964 }
1005 965
1006 966 if (type == F_SOFTUNLOCK) {
1007 967 uint_t flags;
1008 968
1009 969 /*
1010 970 * Make sure the addr is LOCKED and it has anon backing
1011 971 * before unlocking
1012 972 */
1013 973 if ((kpd->kp_flags & (KPD_LOCKED|KPD_NO_ANON)) != KPD_LOCKED) {
1014 974 panic("segkp_fault: bad unlock");
1015 975 /*NOTREACHED*/
1016 976 }
1017 977
1018 978 if (vaddr != stom(kpd->kp_base, kpd->kp_flags) ||
1019 979 len != SEGKP_MAPLEN(kpd->kp_len, kpd->kp_flags)) {
1020 980 panic("segkp_fault: bad range");
1021 981 /*NOTREACHED*/
1022 982 }
1023 983
1024 984 if (rw == S_WRITE)
1025 985 flags = kpd->kp_flags | KPD_WRITEDIRTY;
1026 986 else
1027 987 flags = kpd->kp_flags;
1028 988 err = segkp_unlock(hat, seg, vaddr, len, kpd, flags);
1029 989 kpd->kp_flags &= ~KPD_LOCKED;
1030 990 mutex_exit(&kpd->kp_lock);
1031 991 return (err ? FC_MAKE_ERR(err) : 0);
1032 992 }
1033 993 mutex_exit(&kpd->kp_lock);
1034 994 panic("segkp_fault: bogus fault type: %d\n", type);
1035 995 /*NOTREACHED*/
1036 996 }
1037 997
1038 998 /*
1039 999 * Check that the given protections suffice over the range specified by
1040 1000 * vaddr and len. For this segment type, the only issue is whether or
1041 1001 * not the range lies completely within the mapped part of an allocated
1042 1002 * resource.
1043 1003 */
1044 1004 /* ARGSUSED */
1045 1005 static int
1046 1006 segkp_checkprot(struct seg *seg, caddr_t vaddr, size_t len, uint_t prot)
1047 1007 {
1048 1008 struct segkp_data *kpd = NULL;
1049 1009 caddr_t mbase;
1050 1010 size_t mlen;
1051 1011
1052 1012 if ((kpd = segkp_find(seg, vaddr)) == NULL)
1053 1013 return (EACCES);
1054 1014
1055 1015 mutex_enter(&kpd->kp_lock);
1056 1016 mbase = stom(kpd->kp_base, kpd->kp_flags);
1057 1017 mlen = SEGKP_MAPLEN(kpd->kp_len, kpd->kp_flags);
1058 1018 if (len > mlen || vaddr < mbase ||
1059 1019 ((vaddr + len) > (mbase + mlen))) {
1060 1020 mutex_exit(&kpd->kp_lock);
1061 1021 return (EACCES);
1062 1022 }
1063 1023 mutex_exit(&kpd->kp_lock);
1064 1024 return (0);
1065 1025 }
1066 1026
1067 1027
1068 1028 /*
1069 1029 * Check to see if it makes sense to do kluster/read ahead to
1070 1030 * addr + delta relative to the mapping at addr. We assume here
1071 1031 * that delta is a signed PAGESIZE'd multiple (which can be negative).
1072 1032 *
1073 1033 * For seg_u we always "approve" of this action from our standpoint.
1074 1034 */
1075 1035 /*ARGSUSED*/
1076 1036 static int
1077 1037 segkp_kluster(struct seg *seg, caddr_t addr, ssize_t delta)
1078 1038 {
1079 1039 return (0);
1080 1040 }
1081 1041
1082 1042 /*
1083 1043 * Load and possibly lock intra-slot resources in the range given by
1084 1044 * vaddr and len.
1085 1045 */
1086 1046 static int
1087 1047 segkp_load(
1088 1048 struct hat *hat,
1089 1049 struct seg *seg,
1090 1050 caddr_t vaddr,
1091 1051 size_t len,
1092 1052 struct segkp_data *kpd,
1093 1053 uint_t flags)
1094 1054 {
1095 1055 caddr_t va;
1096 1056 caddr_t vlim;
1097 1057 ulong_t i;
1098 1058 uint_t lock;
1099 1059
1100 1060 ASSERT(MUTEX_HELD(&kpd->kp_lock));
1101 1061
1102 1062 len = P2ROUNDUP(len, PAGESIZE);
1103 1063
1104 1064 /* If locking, reserve physical memory */
1105 1065 if (flags & KPD_LOCKED) {
1106 1066 pgcnt_t pages = btop(len);
1107 1067 if ((kpd->kp_flags & KPD_NO_ANON) == 0)
1108 1068 atomic_add_long(&anon_segkp_pages_locked, pages);
1109 1069 (void) page_resv(pages, KM_SLEEP);
1110 1070 }
1111 1071
1112 1072 /*
1113 1073 * Loop through the pages in the given range.
1114 1074 */
1115 1075 va = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
1116 1076 vaddr = va;
1117 1077 vlim = va + len;
1118 1078 lock = flags & KPD_LOCKED;
1119 1079 i = ((uintptr_t)(va - kpd->kp_base)) >> PAGESHIFT;
1120 1080 for (; va < vlim; va += PAGESIZE, i++) {
1121 1081 page_t *pl[2]; /* second element NULL terminator */
1122 1082 struct vnode *vp;
1123 1083 anoff_t off;
1124 1084 int err;
1125 1085 struct anon *ap;
1126 1086
1127 1087 /*
1128 1088 * Summon the page. If it's not resident, arrange
1129 1089 * for synchronous i/o to pull it in.
1130 1090 */
1131 1091 ap = anon_get_ptr(kpd->kp_anon, kpd->kp_anon_idx + i);
1132 1092 swap_xlate(ap, &vp, &off);
1133 1093
1134 1094 /*
1135 1095 * The returned page list will have exactly one entry,
1136 1096 * which is returned to us already kept.
1137 1097 */
1138 1098 err = VOP_GETPAGE(vp, (offset_t)off, PAGESIZE, NULL,
1139 1099 pl, PAGESIZE, seg, va, S_READ, kcred, NULL);
1140 1100
1141 1101 if (err) {
1142 1102 /*
1143 1103 * Back out of what we've done so far.
1144 1104 */
1145 1105 (void) segkp_unlock(hat, seg, vaddr,
1146 1106 (va - vaddr), kpd, flags);
1147 1107 return (err);
1148 1108 }
1149 1109
1150 1110 /*
1151 1111 * Load an MMU translation for the page.
1152 1112 */
1153 1113 hat_memload(hat, va, pl[0], (PROT_READ|PROT_WRITE),
1154 1114 lock ? HAT_LOAD_LOCK : HAT_LOAD);
1155 1115
1156 1116 if (!lock) {
1157 1117 /*
1158 1118 * Now, release "shared" lock on the page.
1159 1119 */
1160 1120 page_unlock(pl[0]);
1161 1121 }
1162 1122 }
1163 1123 return (0);
1164 1124 }
1165 1125
1166 1126 /*
1167 1127 * At the very least unload the mmu-translations and unlock the range if locked
1168 1128 * Can be called with the following flag value KPD_WRITEDIRTY which specifies
1169 1129 * any dirty pages should be written to disk.
1170 1130 */
1171 1131 static int
1172 1132 segkp_unlock(
1173 1133 struct hat *hat,
1174 1134 struct seg *seg,
1175 1135 caddr_t vaddr,
1176 1136 size_t len,
1177 1137 struct segkp_data *kpd,
1178 1138 uint_t flags)
1179 1139 {
1180 1140 caddr_t va;
1181 1141 caddr_t vlim;
1182 1142 ulong_t i;
1183 1143 struct page *pp;
1184 1144 struct vnode *vp;
1185 1145 anoff_t off;
1186 1146 struct anon *ap;
1187 1147
1188 1148 #ifdef lint
1189 1149 seg = seg;
1190 1150 #endif /* lint */
1191 1151
1192 1152 ASSERT(MUTEX_HELD(&kpd->kp_lock));
1193 1153
1194 1154 /*
1195 1155 * Loop through the pages in the given range. It is assumed
1196 1156 * segkp_unlock is called with page aligned base
1197 1157 */
1198 1158 va = vaddr;
1199 1159 vlim = va + len;
1200 1160 i = ((uintptr_t)(va - kpd->kp_base)) >> PAGESHIFT;
1201 1161 hat_unload(hat, va, len,
1202 1162 ((flags & KPD_LOCKED) ? HAT_UNLOAD_UNLOCK : HAT_UNLOAD));
1203 1163 for (; va < vlim; va += PAGESIZE, i++) {
1204 1164 /*
1205 1165 * Find the page associated with this part of the
1206 1166 * slot, tracking it down through its associated swap
1207 1167 * space.
1208 1168 */
1209 1169 ap = anon_get_ptr(kpd->kp_anon, kpd->kp_anon_idx + i);
1210 1170 swap_xlate(ap, &vp, &off);
1211 1171
1212 1172 if (flags & KPD_LOCKED) {
1213 1173 if ((pp = page_find(vp, off)) == NULL) {
1214 1174 if (flags & KPD_LOCKED) {
1215 1175 panic("segkp_softunlock: missing page");
1216 1176 /*NOTREACHED*/
1217 1177 }
1218 1178 }
1219 1179 } else {
1220 1180 /*
1221 1181 * Nothing to do if the slot is not locked and the
1222 1182 * page doesn't exist.
1223 1183 */
1224 1184 if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL)
1225 1185 continue;
1226 1186 }
1227 1187
1228 1188 /*
1229 1189 * If the page doesn't have any translations, is
1230 1190 * dirty and not being shared, then push it out
1231 1191 * asynchronously and avoid waiting for the
1232 1192 * pageout daemon to do it for us.
1233 1193 *
1234 1194 * XXX - Do we really need to get the "exclusive"
1235 1195 * lock via an upgrade?
1236 1196 */
1237 1197 if ((flags & KPD_WRITEDIRTY) && !hat_page_is_mapped(pp) &&
1238 1198 hat_ismod(pp) && page_tryupgrade(pp)) {
1239 1199 /*
1240 1200 * Hold the vnode before releasing the page lock to
1241 1201 * prevent it from being freed and re-used by some
1242 1202 * other thread.
1243 1203 */
1244 1204 VN_HOLD(vp);
1245 1205 page_unlock(pp);
1246 1206
1247 1207 /*
1248 1208 * Want most powerful credentials we can get so
1249 1209 * use kcred.
1250 1210 */
1251 1211 (void) VOP_PUTPAGE(vp, (offset_t)off, PAGESIZE,
1252 1212 B_ASYNC | B_FREE, kcred, NULL);
1253 1213 VN_RELE(vp);
1254 1214 } else {
1255 1215 page_unlock(pp);
1256 1216 }
1257 1217 }
1258 1218
1259 1219 /* If unlocking, release physical memory */
1260 1220 if (flags & KPD_LOCKED) {
1261 1221 pgcnt_t pages = btopr(len);
1262 1222 if ((kpd->kp_flags & KPD_NO_ANON) == 0)
1263 1223 atomic_add_long(&anon_segkp_pages_locked, -pages);
1264 1224 page_unresv(pages);
1265 1225 }
1266 1226 return (0);
1267 1227 }
1268 1228
1269 1229 /*
1270 1230 * Insert the kpd in the hash table.
1271 1231 */
1272 1232 static void
1273 1233 segkp_insert(struct seg *seg, struct segkp_data *kpd)
1274 1234 {
1275 1235 struct segkp_segdata *kpsd = (struct segkp_segdata *)seg->s_data;
1276 1236 int index;
1277 1237
1278 1238 /*
1279 1239 * Insert the kpd based on the address that will be returned
1280 1240 * via segkp_release.
1281 1241 */
1282 1242 index = SEGKP_HASH(stom(kpd->kp_base, kpd->kp_flags));
1283 1243 mutex_enter(&segkp_lock);
1284 1244 kpd->kp_next = kpsd->kpsd_hash[index];
1285 1245 kpsd->kpsd_hash[index] = kpd;
1286 1246 mutex_exit(&segkp_lock);
1287 1247 }
1288 1248
1289 1249 /*
1290 1250 * Remove kpd from the hash table.
1291 1251 */
1292 1252 static void
1293 1253 segkp_delete(struct seg *seg, struct segkp_data *kpd)
1294 1254 {
1295 1255 struct segkp_segdata *kpsd = (struct segkp_segdata *)seg->s_data;
1296 1256 struct segkp_data **kpp;
1297 1257 int index;
1298 1258
1299 1259 ASSERT(MUTEX_HELD(&segkp_lock));
1300 1260
1301 1261 index = SEGKP_HASH(stom(kpd->kp_base, kpd->kp_flags));
1302 1262 for (kpp = &kpsd->kpsd_hash[index];
1303 1263 *kpp != NULL; kpp = &((*kpp)->kp_next)) {
1304 1264 if (*kpp == kpd) {
1305 1265 *kpp = kpd->kp_next;
1306 1266 return;
1307 1267 }
1308 1268 }
1309 1269 panic("segkp_delete: unable to find element to delete");
1310 1270 /*NOTREACHED*/
1311 1271 }
1312 1272
1313 1273 /*
1314 1274 * Find the kpd associated with a vaddr.
1315 1275 *
1316 1276 * Most of the callers of segkp_find will pass the vaddr that
1317 1277 * hashes to the desired index, but there are cases where
1318 1278 * this is not true in which case we have to (potentially) scan
1319 1279 * the whole table looking for it. This should be very rare
1320 1280 * (e.g. a segkp_fault(F_INVAL) on an address somewhere in the
1321 1281 * middle of the segkp_data region).
1322 1282 */
1323 1283 static struct segkp_data *
1324 1284 segkp_find(struct seg *seg, caddr_t vaddr)
1325 1285 {
1326 1286 struct segkp_segdata *kpsd = (struct segkp_segdata *)seg->s_data;
1327 1287 struct segkp_data *kpd;
1328 1288 int i;
1329 1289 int stop;
1330 1290
1331 1291 i = stop = SEGKP_HASH(vaddr);
1332 1292 mutex_enter(&segkp_lock);
1333 1293 do {
1334 1294 for (kpd = kpsd->kpsd_hash[i]; kpd != NULL;
1335 1295 kpd = kpd->kp_next) {
1336 1296 if (vaddr >= kpd->kp_base &&
1337 1297 vaddr < kpd->kp_base + kpd->kp_len) {
1338 1298 mutex_exit(&segkp_lock);
1339 1299 return (kpd);
1340 1300 }
1341 1301 }
1342 1302 if (--i < 0)
1343 1303 i = SEGKP_HASHSZ - 1; /* Wrap */
1344 1304 } while (i != stop);
1345 1305 mutex_exit(&segkp_lock);
1346 1306 return (NULL); /* Not found */
1347 1307 }
1348 1308
1349 1309 /*
1350 1310 * returns size of swappable area.
1351 1311 */
1352 1312 size_t
1353 1313 swapsize(caddr_t v)
1354 1314 {
1355 1315 struct segkp_data *kpd;
1356 1316
1357 1317 if ((kpd = segkp_find(segkp, v)) != NULL)
1358 1318 return (SEGKP_MAPLEN(kpd->kp_len, kpd->kp_flags));
1359 1319 else
1360 1320 return (NULL);
1361 1321 }
1362 1322
1363 1323 /*
1364 1324 * Dump out all the active segkp pages
1365 1325 */
1366 1326 static void
1367 1327 segkp_dump(struct seg *seg)
1368 1328 {
1369 1329 int i;
1370 1330 struct segkp_data *kpd;
1371 1331 struct segkp_segdata *kpsd = (struct segkp_segdata *)seg->s_data;
1372 1332
1373 1333 for (i = 0; i < SEGKP_HASHSZ; i++) {
1374 1334 for (kpd = kpsd->kpsd_hash[i];
1375 1335 kpd != NULL; kpd = kpd->kp_next) {
1376 1336 pfn_t pfn;
1377 1337 caddr_t addr;
1378 1338 caddr_t eaddr;
1379 1339
1380 1340 addr = kpd->kp_base;
1381 1341 eaddr = addr + kpd->kp_len;
1382 1342 while (addr < eaddr) {
1383 1343 ASSERT(seg->s_as == &kas);
1384 1344 pfn = hat_getpfnum(seg->s_as->a_hat, addr);
1385 1345 if (pfn != PFN_INVALID)
1386 1346 dump_addpage(seg->s_as, addr, pfn);
1387 1347 addr += PAGESIZE;
1388 1348 dump_timeleft = dump_timeout;
1389 1349 }
↓ open down ↓ |
492 lines elided |
↑ open up ↑ |
1390 1350 }
1391 1351 }
1392 1352 }
1393 1353
1394 1354 /*ARGSUSED*/
1395 1355 static int
1396 1356 segkp_pagelock(struct seg *seg, caddr_t addr, size_t len,
1397 1357 struct page ***ppp, enum lock_type type, enum seg_rw rw)
1398 1358 {
1399 1359 return (ENOTSUP);
1400 -}
1401 -
1402 -/*ARGSUSED*/
1403 -static int
1404 -segkp_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
1405 -{
1406 - return (ENODEV);
1407 -}
1408 -
1409 -/*ARGSUSED*/
1410 -static lgrp_mem_policy_info_t *
1411 -segkp_getpolicy(struct seg *seg, caddr_t addr)
1412 -{
1413 - return (NULL);
1414 -}
1415 -
1416 -/*ARGSUSED*/
1417 -static int
1418 -segkp_capable(struct seg *seg, segcapability_t capability)
1419 -{
1420 - return (0);
1421 1360 }
1422 1361
1423 1362 #include <sys/mem_config.h>
1424 1363
1425 1364 /*ARGSUSED*/
1426 1365 static void
1427 1366 segkp_mem_config_post_add(void *arg, pgcnt_t delta_pages)
1428 1367 {}
1429 1368
1430 1369 /*
1431 1370 * During memory delete, turn off caches so that pages are not held.
1432 1371 * A better solution may be to unlock the pages while they are
1433 1372 * in the cache so that they may be collected naturally.
1434 1373 */
1435 1374
1436 1375 /*ARGSUSED*/
1437 1376 static int
1438 1377 segkp_mem_config_pre_del(void *arg, pgcnt_t delta_pages)
1439 1378 {
1440 1379 atomic_inc_32(&segkp_indel);
1441 1380 segkp_cache_free();
1442 1381 return (0);
1443 1382 }
1444 1383
1445 1384 /*ARGSUSED*/
1446 1385 static void
1447 1386 segkp_mem_config_post_del(void *arg, pgcnt_t delta_pages, int cancelled)
1448 1387 {
1449 1388 atomic_dec_32(&segkp_indel);
1450 1389 }
1451 1390
1452 1391 static kphysm_setup_vector_t segkp_mem_config_vec = {
1453 1392 KPHYSM_SETUP_VECTOR_VERSION,
1454 1393 segkp_mem_config_post_add,
1455 1394 segkp_mem_config_pre_del,
1456 1395 segkp_mem_config_post_del,
1457 1396 };
1458 1397
1459 1398 static void
1460 1399 segkpinit_mem_config(struct seg *seg)
1461 1400 {
1462 1401 int ret;
1463 1402
1464 1403 ret = kphysm_setup_func_register(&segkp_mem_config_vec, (void *)seg);
1465 1404 ASSERT(ret == 0);
1466 1405 }
↓ open down ↓ |
36 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX