Print this page
no need for bad-op segment op functions
The segment drivers have a number of bad-op functions that simply panic.
Keeping the function pointer NULL will accomplish the same thing in most
cases. In other cases, keeping the function pointer NULL will result in
proper error code being returned.
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/vm/seg_kp.c
+++ new/usr/src/uts/common/vm/seg_kp.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 */
24 24
25 25 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
26 26 /* All Rights Reserved */
27 27
28 28 /*
29 29 * Portions of this source code were derived from Berkeley 4.3 BSD
30 30 * under license from the Regents of the University of California.
31 31 */
32 32
33 33 /*
34 34 * segkp is a segment driver that administers the allocation and deallocation
35 35 * of pageable variable size chunks of kernel virtual address space. Each
36 36 * allocated resource is page-aligned.
37 37 *
38 38 * The user may specify whether the resource should be initialized to 0,
39 39 * include a redzone, or locked in memory.
40 40 */
41 41
42 42 #include <sys/types.h>
43 43 #include <sys/t_lock.h>
44 44 #include <sys/thread.h>
45 45 #include <sys/param.h>
46 46 #include <sys/errno.h>
47 47 #include <sys/sysmacros.h>
48 48 #include <sys/systm.h>
49 49 #include <sys/buf.h>
50 50 #include <sys/mman.h>
51 51 #include <sys/vnode.h>
52 52 #include <sys/cmn_err.h>
53 53 #include <sys/swap.h>
54 54 #include <sys/tuneable.h>
55 55 #include <sys/kmem.h>
56 56 #include <sys/vmem.h>
57 57 #include <sys/cred.h>
58 58 #include <sys/dumphdr.h>
59 59 #include <sys/debug.h>
60 60 #include <sys/vtrace.h>
61 61 #include <sys/stack.h>
62 62 #include <sys/atomic.h>
63 63 #include <sys/archsystm.h>
64 64 #include <sys/lgrp.h>
65 65
66 66 #include <vm/as.h>
67 67 #include <vm/seg.h>
↓ open down ↓ |
67 lines elided |
↑ open up ↑ |
68 68 #include <vm/seg_kp.h>
69 69 #include <vm/seg_kmem.h>
70 70 #include <vm/anon.h>
71 71 #include <vm/page.h>
72 72 #include <vm/hat.h>
73 73 #include <sys/bitmap.h>
74 74
75 75 /*
76 76 * Private seg op routines
77 77 */
78 -static void segkp_badop(void);
79 78 static void segkp_dump(struct seg *seg);
80 79 static int segkp_checkprot(struct seg *seg, caddr_t addr, size_t len,
81 80 uint_t prot);
82 81 static int segkp_kluster(struct seg *seg, caddr_t addr, ssize_t delta);
83 82 static int segkp_pagelock(struct seg *seg, caddr_t addr, size_t len,
84 83 struct page ***page, enum lock_type type,
85 84 enum seg_rw rw);
86 85 static void segkp_insert(struct seg *seg, struct segkp_data *kpd);
87 86 static void segkp_delete(struct seg *seg, struct segkp_data *kpd);
88 87 static caddr_t segkp_get_internal(struct seg *seg, size_t len, uint_t flags,
89 88 struct segkp_data **tkpd, struct anon_map *amp);
90 89 static void segkp_release_internal(struct seg *seg,
91 90 struct segkp_data *kpd, size_t len);
92 91 static int segkp_unlock(struct hat *hat, struct seg *seg, caddr_t vaddr,
93 92 size_t len, struct segkp_data *kpd, uint_t flags);
94 93 static int segkp_load(struct hat *hat, struct seg *seg, caddr_t vaddr,
95 94 size_t len, struct segkp_data *kpd, uint_t flags);
96 95 static struct segkp_data *segkp_find(struct seg *seg, caddr_t vaddr);
97 96 static int segkp_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp);
98 97 static lgrp_mem_policy_info_t *segkp_getpolicy(struct seg *seg,
99 98 caddr_t addr);
100 99 static int segkp_capable(struct seg *seg, segcapability_t capability);
101 100
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
102 101 /*
103 102 * Lock used to protect the hash table(s) and caches.
104 103 */
105 104 static kmutex_t segkp_lock;
106 105
107 106 /*
108 107 * The segkp caches
109 108 */
110 109 static struct segkp_cache segkp_cache[SEGKP_MAX_CACHE];
111 110
112 -#define SEGKP_BADOP(t) (t(*)())segkp_badop
113 -
114 111 /*
115 112 * When there are fewer than red_minavail bytes left on the stack,
116 113 * segkp_map_red() will map in the redzone (if called). 5000 seems
117 114 * to work reasonably well...
118 115 */
119 116 long red_minavail = 5000;
120 117
121 118 /*
122 119 * will be set to 1 for 32 bit x86 systems only, in startup.c
123 120 */
124 121 int segkp_fromheap = 0;
125 122 ulong_t *segkp_bitmap;
126 123
127 124 /*
128 125 * If segkp_map_red() is called with the redzone already mapped and
129 126 * with less than RED_DEEP_THRESHOLD bytes available on the stack,
130 127 * then the stack situation has become quite serious; if much more stack
131 128 * is consumed, we have the potential of scrogging the next thread/LWP
132 129 * structure. To help debug the "can't happen" panics which may
133 130 * result from this condition, we record hrestime and the calling thread
134 131 * in red_deep_hires and red_deep_thread respectively.
135 132 */
136 133 #define RED_DEEP_THRESHOLD 2000
137 134
138 135 hrtime_t red_deep_hires;
↓ open down ↓ |
15 lines elided |
↑ open up ↑ |
139 136 kthread_t *red_deep_thread;
140 137
141 138 uint32_t red_nmapped;
142 139 uint32_t red_closest = UINT_MAX;
143 140 uint32_t red_ndoubles;
144 141
145 142 pgcnt_t anon_segkp_pages_locked; /* See vm/anon.h */
146 143 pgcnt_t anon_segkp_pages_resv; /* anon reserved by seg_kp */
147 144
148 145 static struct seg_ops segkp_ops = {
149 - .dup = SEGKP_BADOP(int),
150 - .unmap = SEGKP_BADOP(int),
151 - .free = SEGKP_BADOP(void),
152 146 .fault = segkp_fault,
153 - .faulta = SEGKP_BADOP(faultcode_t),
154 - .setprot = SEGKP_BADOP(int),
155 147 .checkprot = segkp_checkprot,
156 148 .kluster = segkp_kluster,
157 - .sync = SEGKP_BADOP(int),
158 - .incore = SEGKP_BADOP(size_t),
159 - .lockop = SEGKP_BADOP(int),
160 - .getprot = SEGKP_BADOP(int),
161 - .getoffset = SEGKP_BADOP(u_offset_t),
162 - .gettype = SEGKP_BADOP(int),
163 - .getvp = SEGKP_BADOP(int),
164 - .advise = SEGKP_BADOP(int),
165 149 .dump = segkp_dump,
166 150 .pagelock = segkp_pagelock,
167 - .setpagesize = SEGKP_BADOP(int),
168 151 .getmemid = segkp_getmemid,
169 152 .getpolicy = segkp_getpolicy,
170 153 .capable = segkp_capable,
171 154 .inherit = seg_inherit_notsup,
172 155 };
173 156
174 -
175 -static void
176 -segkp_badop(void)
177 -{
178 - panic("segkp_badop");
179 - /*NOTREACHED*/
180 -}
181 157
182 158 static void segkpinit_mem_config(struct seg *);
183 159
184 160 static uint32_t segkp_indel;
185 161
186 162 /*
187 163 * Allocate the segment specific private data struct and fill it in
188 164 * with the per kp segment mutex, anon ptr. array and hash table.
189 165 */
190 166 int
191 167 segkp_create(struct seg *seg)
192 168 {
193 169 struct segkp_segdata *kpsd;
194 170 size_t np;
195 171
196 172 ASSERT(seg != NULL && seg->s_as == &kas);
197 173 ASSERT(RW_WRITE_HELD(&seg->s_as->a_lock));
198 174
199 175 if (seg->s_size & PAGEOFFSET) {
200 176 panic("Bad segkp size");
201 177 /*NOTREACHED*/
202 178 }
203 179
204 180 kpsd = kmem_zalloc(sizeof (struct segkp_segdata), KM_SLEEP);
205 181
206 182 /*
207 183 * Allocate the virtual memory for segkp and initialize it
208 184 */
209 185 if (segkp_fromheap) {
210 186 np = btop(kvseg.s_size);
211 187 segkp_bitmap = kmem_zalloc(BT_SIZEOFMAP(np), KM_SLEEP);
212 188 kpsd->kpsd_arena = vmem_create("segkp", NULL, 0, PAGESIZE,
213 189 vmem_alloc, vmem_free, heap_arena, 5 * PAGESIZE, VM_SLEEP);
214 190 } else {
215 191 segkp_bitmap = NULL;
216 192 np = btop(seg->s_size);
217 193 kpsd->kpsd_arena = vmem_create("segkp", seg->s_base,
218 194 seg->s_size, PAGESIZE, NULL, NULL, NULL, 5 * PAGESIZE,
219 195 VM_SLEEP);
220 196 }
221 197
222 198 kpsd->kpsd_anon = anon_create(np, ANON_SLEEP | ANON_ALLOC_FORCE);
223 199
224 200 kpsd->kpsd_hash = kmem_zalloc(SEGKP_HASHSZ * sizeof (struct segkp *),
225 201 KM_SLEEP);
226 202 seg->s_data = (void *)kpsd;
227 203 seg->s_ops = &segkp_ops;
228 204 segkpinit_mem_config(seg);
229 205 return (0);
230 206 }
231 207
232 208
233 209 /*
234 210 * Find a free 'freelist' and initialize it with the appropriate attributes
235 211 */
236 212 void *
237 213 segkp_cache_init(struct seg *seg, int maxsize, size_t len, uint_t flags)
238 214 {
239 215 int i;
240 216
241 217 if ((flags & KPD_NO_ANON) && !(flags & KPD_LOCKED))
242 218 return ((void *)-1);
243 219
244 220 mutex_enter(&segkp_lock);
245 221 for (i = 0; i < SEGKP_MAX_CACHE; i++) {
246 222 if (segkp_cache[i].kpf_inuse)
247 223 continue;
248 224 segkp_cache[i].kpf_inuse = 1;
249 225 segkp_cache[i].kpf_max = maxsize;
250 226 segkp_cache[i].kpf_flags = flags;
251 227 segkp_cache[i].kpf_seg = seg;
252 228 segkp_cache[i].kpf_len = len;
253 229 mutex_exit(&segkp_lock);
254 230 return ((void *)(uintptr_t)i);
255 231 }
256 232 mutex_exit(&segkp_lock);
257 233 return ((void *)-1);
258 234 }
259 235
260 236 /*
261 237 * Free all the cache resources.
262 238 */
263 239 void
264 240 segkp_cache_free(void)
265 241 {
266 242 struct segkp_data *kpd;
267 243 struct seg *seg;
268 244 int i;
269 245
270 246 mutex_enter(&segkp_lock);
271 247 for (i = 0; i < SEGKP_MAX_CACHE; i++) {
272 248 if (!segkp_cache[i].kpf_inuse)
273 249 continue;
274 250 /*
275 251 * Disconnect the freelist and process each element
276 252 */
277 253 kpd = segkp_cache[i].kpf_list;
278 254 seg = segkp_cache[i].kpf_seg;
279 255 segkp_cache[i].kpf_list = NULL;
280 256 segkp_cache[i].kpf_count = 0;
281 257 mutex_exit(&segkp_lock);
282 258
283 259 while (kpd != NULL) {
284 260 struct segkp_data *next;
285 261
286 262 next = kpd->kp_next;
287 263 segkp_release_internal(seg, kpd, kpd->kp_len);
288 264 kpd = next;
289 265 }
290 266 mutex_enter(&segkp_lock);
291 267 }
292 268 mutex_exit(&segkp_lock);
293 269 }
294 270
295 271 /*
296 272 * There are 2 entries into segkp_get_internal. The first includes a cookie
297 273 * used to access a pool of cached segkp resources. The second does not
298 274 * use the cache.
299 275 */
300 276 caddr_t
301 277 segkp_get(struct seg *seg, size_t len, uint_t flags)
302 278 {
303 279 struct segkp_data *kpd = NULL;
304 280
305 281 if (segkp_get_internal(seg, len, flags, &kpd, NULL) != NULL) {
306 282 kpd->kp_cookie = -1;
307 283 return (stom(kpd->kp_base, flags));
308 284 }
309 285 return (NULL);
310 286 }
311 287
312 288 /*
313 289 * Return a 'cached' segkp address
314 290 */
315 291 caddr_t
316 292 segkp_cache_get(void *cookie)
317 293 {
318 294 struct segkp_cache *freelist = NULL;
319 295 struct segkp_data *kpd = NULL;
320 296 int index = (int)(uintptr_t)cookie;
321 297 struct seg *seg;
322 298 size_t len;
323 299 uint_t flags;
324 300
325 301 if (index < 0 || index >= SEGKP_MAX_CACHE)
326 302 return (NULL);
327 303 freelist = &segkp_cache[index];
328 304
329 305 mutex_enter(&segkp_lock);
330 306 seg = freelist->kpf_seg;
331 307 flags = freelist->kpf_flags;
332 308 if (freelist->kpf_list != NULL) {
333 309 kpd = freelist->kpf_list;
334 310 freelist->kpf_list = kpd->kp_next;
335 311 freelist->kpf_count--;
336 312 mutex_exit(&segkp_lock);
337 313 kpd->kp_next = NULL;
338 314 segkp_insert(seg, kpd);
339 315 return (stom(kpd->kp_base, flags));
340 316 }
341 317 len = freelist->kpf_len;
342 318 mutex_exit(&segkp_lock);
343 319 if (segkp_get_internal(seg, len, flags, &kpd, NULL) != NULL) {
344 320 kpd->kp_cookie = index;
345 321 return (stom(kpd->kp_base, flags));
346 322 }
347 323 return (NULL);
348 324 }
349 325
350 326 caddr_t
351 327 segkp_get_withanonmap(
352 328 struct seg *seg,
353 329 size_t len,
354 330 uint_t flags,
355 331 struct anon_map *amp)
356 332 {
357 333 struct segkp_data *kpd = NULL;
358 334
359 335 ASSERT(amp != NULL);
360 336 flags |= KPD_HASAMP;
361 337 if (segkp_get_internal(seg, len, flags, &kpd, amp) != NULL) {
362 338 kpd->kp_cookie = -1;
363 339 return (stom(kpd->kp_base, flags));
364 340 }
365 341 return (NULL);
366 342 }
367 343
368 344 /*
369 345 * This does the real work of segkp allocation.
370 346 * Return to client base addr. len must be page-aligned. A null value is
371 347 * returned if there are no more vm resources (e.g. pages, swap). The len
372 348 * and base recorded in the private data structure include the redzone
373 349 * and the redzone length (if applicable). If the user requests a redzone
374 350 * either the first or last page is left unmapped depending whether stacks
375 351 * grow to low or high memory.
376 352 *
377 353 * The client may also specify a no-wait flag. If that is set then the
378 354 * request will choose a non-blocking path when requesting resources.
379 355 * The default is make the client wait.
380 356 */
381 357 static caddr_t
382 358 segkp_get_internal(
383 359 struct seg *seg,
384 360 size_t len,
385 361 uint_t flags,
386 362 struct segkp_data **tkpd,
387 363 struct anon_map *amp)
388 364 {
389 365 struct segkp_segdata *kpsd = (struct segkp_segdata *)seg->s_data;
390 366 struct segkp_data *kpd;
391 367 caddr_t vbase = NULL; /* always first virtual, may not be mapped */
392 368 pgcnt_t np = 0; /* number of pages in the resource */
393 369 pgcnt_t segkpindex;
394 370 long i;
395 371 caddr_t va;
396 372 pgcnt_t pages = 0;
397 373 ulong_t anon_idx = 0;
398 374 int kmflag = (flags & KPD_NOWAIT) ? KM_NOSLEEP : KM_SLEEP;
399 375 caddr_t s_base = (segkp_fromheap) ? kvseg.s_base : seg->s_base;
400 376
401 377 if (len & PAGEOFFSET) {
402 378 panic("segkp_get: len is not page-aligned");
403 379 /*NOTREACHED*/
404 380 }
405 381
406 382 ASSERT(((flags & KPD_HASAMP) == 0) == (amp == NULL));
407 383
408 384 /* Only allow KPD_NO_ANON if we are going to lock it down */
409 385 if ((flags & (KPD_LOCKED|KPD_NO_ANON)) == KPD_NO_ANON)
410 386 return (NULL);
411 387
412 388 if ((kpd = kmem_zalloc(sizeof (struct segkp_data), kmflag)) == NULL)
413 389 return (NULL);
414 390 /*
415 391 * Fix up the len to reflect the REDZONE if applicable
416 392 */
417 393 if (flags & KPD_HASREDZONE)
418 394 len += PAGESIZE;
419 395 np = btop(len);
420 396
421 397 vbase = vmem_alloc(SEGKP_VMEM(seg), len, kmflag | VM_BESTFIT);
422 398 if (vbase == NULL) {
423 399 kmem_free(kpd, sizeof (struct segkp_data));
424 400 return (NULL);
425 401 }
426 402
427 403 /* If locking, reserve physical memory */
428 404 if (flags & KPD_LOCKED) {
429 405 pages = btop(SEGKP_MAPLEN(len, flags));
430 406 if (page_resv(pages, kmflag) == 0) {
431 407 vmem_free(SEGKP_VMEM(seg), vbase, len);
432 408 kmem_free(kpd, sizeof (struct segkp_data));
433 409 return (NULL);
434 410 }
435 411 if ((flags & KPD_NO_ANON) == 0)
436 412 atomic_add_long(&anon_segkp_pages_locked, pages);
437 413 }
438 414
439 415 /*
440 416 * Reserve sufficient swap space for this vm resource. We'll
441 417 * actually allocate it in the loop below, but reserving it
442 418 * here allows us to back out more gracefully than if we
443 419 * had an allocation failure in the body of the loop.
444 420 *
445 421 * Note that we don't need swap space for the red zone page.
446 422 */
447 423 if (amp != NULL) {
448 424 /*
449 425 * The swap reservation has been done, if required, and the
450 426 * anon_hdr is separate.
451 427 */
452 428 anon_idx = 0;
453 429 kpd->kp_anon_idx = anon_idx;
454 430 kpd->kp_anon = amp->ahp;
455 431
456 432 TRACE_5(TR_FAC_VM, TR_ANON_SEGKP, "anon segkp:%p %p %lu %u %u",
457 433 kpd, vbase, len, flags, 1);
458 434
459 435 } else if ((flags & KPD_NO_ANON) == 0) {
460 436 if (anon_resv_zone(SEGKP_MAPLEN(len, flags), NULL) == 0) {
461 437 if (flags & KPD_LOCKED) {
462 438 atomic_add_long(&anon_segkp_pages_locked,
463 439 -pages);
464 440 page_unresv(pages);
465 441 }
466 442 vmem_free(SEGKP_VMEM(seg), vbase, len);
467 443 kmem_free(kpd, sizeof (struct segkp_data));
468 444 return (NULL);
469 445 }
470 446 atomic_add_long(&anon_segkp_pages_resv,
471 447 btop(SEGKP_MAPLEN(len, flags)));
472 448 anon_idx = ((uintptr_t)(vbase - s_base)) >> PAGESHIFT;
473 449 kpd->kp_anon_idx = anon_idx;
474 450 kpd->kp_anon = kpsd->kpsd_anon;
475 451
476 452 TRACE_5(TR_FAC_VM, TR_ANON_SEGKP, "anon segkp:%p %p %lu %u %u",
477 453 kpd, vbase, len, flags, 1);
478 454 } else {
479 455 kpd->kp_anon = NULL;
480 456 kpd->kp_anon_idx = 0;
481 457 }
482 458
483 459 /*
484 460 * Allocate page and anon resources for the virtual address range
485 461 * except the redzone
486 462 */
487 463 if (segkp_fromheap)
488 464 segkpindex = btop((uintptr_t)(vbase - kvseg.s_base));
489 465 for (i = 0, va = vbase; i < np; i++, va += PAGESIZE) {
490 466 page_t *pl[2];
491 467 struct vnode *vp;
492 468 anoff_t off;
493 469 int err;
494 470 page_t *pp = NULL;
495 471
496 472 /*
497 473 * Mark this page to be a segkp page in the bitmap.
498 474 */
499 475 if (segkp_fromheap) {
500 476 BT_ATOMIC_SET(segkp_bitmap, segkpindex);
501 477 segkpindex++;
502 478 }
503 479
504 480 /*
505 481 * If this page is the red zone page, we don't need swap
506 482 * space for it. Note that we skip over the code that
507 483 * establishes MMU mappings, so that the page remains
508 484 * invalid.
509 485 */
510 486 if ((flags & KPD_HASREDZONE) && KPD_REDZONE(kpd) == i)
511 487 continue;
512 488
513 489 if (kpd->kp_anon != NULL) {
514 490 struct anon *ap;
515 491
516 492 ASSERT(anon_get_ptr(kpd->kp_anon, anon_idx + i)
517 493 == NULL);
518 494 /*
519 495 * Determine the "vp" and "off" of the anon slot.
520 496 */
521 497 ap = anon_alloc(NULL, 0);
522 498 if (amp != NULL)
523 499 ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER);
524 500 (void) anon_set_ptr(kpd->kp_anon, anon_idx + i,
525 501 ap, ANON_SLEEP);
526 502 if (amp != NULL)
527 503 ANON_LOCK_EXIT(&->a_rwlock);
528 504 swap_xlate(ap, &vp, &off);
529 505
530 506 /*
531 507 * Create a page with the specified identity. The
532 508 * page is returned with the "shared" lock held.
533 509 */
534 510 err = VOP_GETPAGE(vp, (offset_t)off, PAGESIZE,
535 511 NULL, pl, PAGESIZE, seg, va, S_CREATE,
536 512 kcred, NULL);
537 513 if (err) {
538 514 /*
539 515 * XXX - This should not fail.
540 516 */
541 517 panic("segkp_get: no pages");
542 518 /*NOTREACHED*/
543 519 }
544 520 pp = pl[0];
545 521 } else {
546 522 ASSERT(page_exists(&kvp,
547 523 (u_offset_t)(uintptr_t)va) == NULL);
548 524
549 525 if ((pp = page_create_va(&kvp,
550 526 (u_offset_t)(uintptr_t)va, PAGESIZE,
551 527 (flags & KPD_NOWAIT ? 0 : PG_WAIT) | PG_EXCL |
552 528 PG_NORELOC, seg, va)) == NULL) {
553 529 /*
554 530 * Legitimize resource; then destroy it.
555 531 * Easier than trying to unwind here.
556 532 */
557 533 kpd->kp_flags = flags;
558 534 kpd->kp_base = vbase;
559 535 kpd->kp_len = len;
560 536 segkp_release_internal(seg, kpd, va - vbase);
561 537 return (NULL);
562 538 }
563 539 page_io_unlock(pp);
564 540 }
565 541
566 542 if (flags & KPD_ZERO)
567 543 pagezero(pp, 0, PAGESIZE);
568 544
569 545 /*
570 546 * Load and lock an MMU translation for the page.
571 547 */
572 548 hat_memload(seg->s_as->a_hat, va, pp, (PROT_READ|PROT_WRITE),
573 549 ((flags & KPD_LOCKED) ? HAT_LOAD_LOCK : HAT_LOAD));
574 550
575 551 /*
576 552 * Now, release lock on the page.
577 553 */
578 554 if (flags & KPD_LOCKED) {
579 555 /*
580 556 * Indicate to page_retire framework that this
581 557 * page can only be retired when it is freed.
582 558 */
583 559 PP_SETRAF(pp);
584 560 page_downgrade(pp);
585 561 } else
586 562 page_unlock(pp);
587 563 }
588 564
589 565 kpd->kp_flags = flags;
590 566 kpd->kp_base = vbase;
591 567 kpd->kp_len = len;
592 568 segkp_insert(seg, kpd);
593 569 *tkpd = kpd;
594 570 return (stom(kpd->kp_base, flags));
595 571 }
596 572
597 573 /*
598 574 * Release the resource to cache if the pool(designate by the cookie)
599 575 * has less than the maximum allowable. If inserted in cache,
600 576 * segkp_delete insures element is taken off of active list.
601 577 */
602 578 void
603 579 segkp_release(struct seg *seg, caddr_t vaddr)
604 580 {
605 581 struct segkp_cache *freelist;
606 582 struct segkp_data *kpd = NULL;
607 583
608 584 if ((kpd = segkp_find(seg, vaddr)) == NULL) {
609 585 panic("segkp_release: null kpd");
610 586 /*NOTREACHED*/
611 587 }
612 588
613 589 if (kpd->kp_cookie != -1) {
614 590 freelist = &segkp_cache[kpd->kp_cookie];
615 591 mutex_enter(&segkp_lock);
616 592 if (!segkp_indel && freelist->kpf_count < freelist->kpf_max) {
617 593 segkp_delete(seg, kpd);
618 594 kpd->kp_next = freelist->kpf_list;
619 595 freelist->kpf_list = kpd;
620 596 freelist->kpf_count++;
621 597 mutex_exit(&segkp_lock);
622 598 return;
623 599 } else {
624 600 mutex_exit(&segkp_lock);
625 601 kpd->kp_cookie = -1;
626 602 }
627 603 }
628 604 segkp_release_internal(seg, kpd, kpd->kp_len);
629 605 }
630 606
631 607 /*
632 608 * Free the entire resource. segkp_unlock gets called with the start of the
633 609 * mapped portion of the resource. The length is the size of the mapped
634 610 * portion
635 611 */
636 612 static void
637 613 segkp_release_internal(struct seg *seg, struct segkp_data *kpd, size_t len)
638 614 {
639 615 caddr_t va;
640 616 long i;
641 617 long redzone;
642 618 size_t np;
643 619 page_t *pp;
644 620 struct vnode *vp;
645 621 anoff_t off;
646 622 struct anon *ap;
647 623 pgcnt_t segkpindex;
648 624
649 625 ASSERT(kpd != NULL);
650 626 ASSERT((kpd->kp_flags & KPD_HASAMP) == 0 || kpd->kp_cookie == -1);
651 627 np = btop(len);
652 628
653 629 /* Remove from active hash list */
654 630 if (kpd->kp_cookie == -1) {
655 631 mutex_enter(&segkp_lock);
656 632 segkp_delete(seg, kpd);
657 633 mutex_exit(&segkp_lock);
658 634 }
659 635
660 636 /*
661 637 * Precompute redzone page index.
662 638 */
663 639 redzone = -1;
664 640 if (kpd->kp_flags & KPD_HASREDZONE)
665 641 redzone = KPD_REDZONE(kpd);
666 642
667 643
668 644 va = kpd->kp_base;
669 645
670 646 hat_unload(seg->s_as->a_hat, va, (np << PAGESHIFT),
671 647 ((kpd->kp_flags & KPD_LOCKED) ? HAT_UNLOAD_UNLOCK : HAT_UNLOAD));
672 648 /*
673 649 * Free up those anon resources that are quiescent.
674 650 */
675 651 if (segkp_fromheap)
676 652 segkpindex = btop((uintptr_t)(va - kvseg.s_base));
677 653 for (i = 0; i < np; i++, va += PAGESIZE) {
678 654
679 655 /*
680 656 * Clear the bit for this page from the bitmap.
681 657 */
682 658 if (segkp_fromheap) {
683 659 BT_ATOMIC_CLEAR(segkp_bitmap, segkpindex);
684 660 segkpindex++;
685 661 }
686 662
687 663 if (i == redzone)
688 664 continue;
689 665 if (kpd->kp_anon) {
690 666 /*
691 667 * Free up anon resources and destroy the
692 668 * associated pages.
693 669 *
694 670 * Release the lock if there is one. Have to get the
695 671 * page to do this, unfortunately.
696 672 */
697 673 if (kpd->kp_flags & KPD_LOCKED) {
698 674 ap = anon_get_ptr(kpd->kp_anon,
699 675 kpd->kp_anon_idx + i);
700 676 swap_xlate(ap, &vp, &off);
701 677 /* Find the shared-locked page. */
702 678 pp = page_find(vp, (u_offset_t)off);
703 679 if (pp == NULL) {
704 680 panic("segkp_release: "
705 681 "kp_anon: no page to unlock ");
706 682 /*NOTREACHED*/
707 683 }
708 684 if (PP_ISRAF(pp))
709 685 PP_CLRRAF(pp);
710 686
711 687 page_unlock(pp);
712 688 }
713 689 if ((kpd->kp_flags & KPD_HASAMP) == 0) {
714 690 anon_free(kpd->kp_anon, kpd->kp_anon_idx + i,
715 691 PAGESIZE);
716 692 anon_unresv_zone(PAGESIZE, NULL);
717 693 atomic_dec_ulong(&anon_segkp_pages_resv);
718 694 }
719 695 TRACE_5(TR_FAC_VM,
720 696 TR_ANON_SEGKP, "anon segkp:%p %p %lu %u %u",
721 697 kpd, va, PAGESIZE, 0, 0);
722 698 } else {
723 699 if (kpd->kp_flags & KPD_LOCKED) {
724 700 pp = page_find(&kvp, (u_offset_t)(uintptr_t)va);
725 701 if (pp == NULL) {
726 702 panic("segkp_release: "
727 703 "no page to unlock");
728 704 /*NOTREACHED*/
729 705 }
730 706 if (PP_ISRAF(pp))
731 707 PP_CLRRAF(pp);
732 708 /*
733 709 * We should just upgrade the lock here
734 710 * but there is no upgrade that waits.
735 711 */
736 712 page_unlock(pp);
737 713 }
738 714 pp = page_lookup(&kvp, (u_offset_t)(uintptr_t)va,
739 715 SE_EXCL);
740 716 if (pp != NULL)
741 717 page_destroy(pp, 0);
742 718 }
743 719 }
744 720
745 721 /* If locked, release physical memory reservation */
746 722 if (kpd->kp_flags & KPD_LOCKED) {
747 723 pgcnt_t pages = btop(SEGKP_MAPLEN(kpd->kp_len, kpd->kp_flags));
748 724 if ((kpd->kp_flags & KPD_NO_ANON) == 0)
749 725 atomic_add_long(&anon_segkp_pages_locked, -pages);
750 726 page_unresv(pages);
751 727 }
752 728
753 729 vmem_free(SEGKP_VMEM(seg), kpd->kp_base, kpd->kp_len);
754 730 kmem_free(kpd, sizeof (struct segkp_data));
755 731 }
756 732
757 733 /*
758 734 * segkp_map_red() will check the current frame pointer against the
759 735 * stack base. If the amount of stack remaining is questionable
760 736 * (less than red_minavail), then segkp_map_red() will map in the redzone
761 737 * and return 1. Otherwise, it will return 0. segkp_map_red() can
762 738 * _only_ be called when it is safe to sleep on page_create_va().
763 739 *
764 740 * It is up to the caller to remember whether segkp_map_red() successfully
765 741 * mapped the redzone, and, if so, to call segkp_unmap_red() at a later
766 742 * time.
767 743 *
768 744 * Currently, this routine is only called from pagefault() (which necessarily
769 745 * satisfies the above conditions).
770 746 */
771 747 #if defined(STACK_GROWTH_DOWN)
772 748 int
773 749 segkp_map_red(void)
774 750 {
775 751 uintptr_t fp = STACK_BIAS + (uintptr_t)getfp();
776 752 #ifndef _LP64
777 753 caddr_t stkbase;
778 754 #endif
779 755
780 756 /*
781 757 * Optimize for the common case where we simply return.
782 758 */
783 759 if ((curthread->t_red_pp == NULL) &&
784 760 (fp - (uintptr_t)curthread->t_stkbase >= red_minavail))
785 761 return (0);
786 762
787 763 #if defined(_LP64)
788 764 /*
789 765 * XXX We probably need something better than this.
790 766 */
791 767 panic("kernel stack overflow");
792 768 /*NOTREACHED*/
793 769 #else /* _LP64 */
794 770 if (curthread->t_red_pp == NULL) {
795 771 page_t *red_pp;
796 772 struct seg kseg;
797 773
798 774 caddr_t red_va = (caddr_t)
799 775 (((uintptr_t)curthread->t_stkbase & (uintptr_t)PAGEMASK) -
800 776 PAGESIZE);
801 777
802 778 ASSERT(page_exists(&kvp, (u_offset_t)(uintptr_t)red_va) ==
803 779 NULL);
804 780
805 781 /*
806 782 * Allocate the physical for the red page.
807 783 */
808 784 /*
809 785 * No PG_NORELOC here to avoid waits. Unlikely to get
810 786 * a relocate happening in the short time the page exists
811 787 * and it will be OK anyway.
812 788 */
813 789
814 790 kseg.s_as = &kas;
815 791 red_pp = page_create_va(&kvp, (u_offset_t)(uintptr_t)red_va,
816 792 PAGESIZE, PG_WAIT | PG_EXCL, &kseg, red_va);
817 793 ASSERT(red_pp != NULL);
818 794
819 795 /*
820 796 * So we now have a page to jam into the redzone...
821 797 */
822 798 page_io_unlock(red_pp);
823 799
824 800 hat_memload(kas.a_hat, red_va, red_pp,
825 801 (PROT_READ|PROT_WRITE), HAT_LOAD_LOCK);
826 802 page_downgrade(red_pp);
827 803
828 804 /*
829 805 * The page is left SE_SHARED locked so we can hold on to
830 806 * the page_t pointer.
831 807 */
832 808 curthread->t_red_pp = red_pp;
833 809
834 810 atomic_inc_32(&red_nmapped);
835 811 while (fp - (uintptr_t)curthread->t_stkbase < red_closest) {
836 812 (void) atomic_cas_32(&red_closest, red_closest,
837 813 (uint32_t)(fp - (uintptr_t)curthread->t_stkbase));
838 814 }
839 815 return (1);
840 816 }
841 817
842 818 stkbase = (caddr_t)(((uintptr_t)curthread->t_stkbase &
843 819 (uintptr_t)PAGEMASK) - PAGESIZE);
844 820
845 821 atomic_inc_32(&red_ndoubles);
846 822
847 823 if (fp - (uintptr_t)stkbase < RED_DEEP_THRESHOLD) {
848 824 /*
849 825 * Oh boy. We're already deep within the mapped-in
850 826 * redzone page, and the caller is trying to prepare
851 827 * for a deep stack run. We're running without a
852 828 * redzone right now: if the caller plows off the
853 829 * end of the stack, it'll plow another thread or
854 830 * LWP structure. That situation could result in
855 831 * a very hard-to-debug panic, so, in the spirit of
856 832 * recording the name of one's killer in one's own
857 833 * blood, we're going to record hrestime and the calling
858 834 * thread.
859 835 */
860 836 red_deep_hires = hrestime.tv_nsec;
861 837 red_deep_thread = curthread;
862 838 }
863 839
864 840 /*
865 841 * If this is a DEBUG kernel, and we've run too deep for comfort, toss.
866 842 */
867 843 ASSERT(fp - (uintptr_t)stkbase >= RED_DEEP_THRESHOLD);
868 844 return (0);
869 845 #endif /* _LP64 */
870 846 }
871 847
872 848 void
873 849 segkp_unmap_red(void)
874 850 {
875 851 page_t *pp;
876 852 caddr_t red_va = (caddr_t)(((uintptr_t)curthread->t_stkbase &
877 853 (uintptr_t)PAGEMASK) - PAGESIZE);
878 854
879 855 ASSERT(curthread->t_red_pp != NULL);
880 856
881 857 /*
882 858 * Because we locked the mapping down, we can't simply rely
883 859 * on page_destroy() to clean everything up; we need to call
884 860 * hat_unload() to explicitly unlock the mapping resources.
885 861 */
886 862 hat_unload(kas.a_hat, red_va, PAGESIZE, HAT_UNLOAD_UNLOCK);
887 863
888 864 pp = curthread->t_red_pp;
889 865
890 866 ASSERT(pp == page_find(&kvp, (u_offset_t)(uintptr_t)red_va));
891 867
892 868 /*
893 869 * Need to upgrade the SE_SHARED lock to SE_EXCL.
894 870 */
895 871 if (!page_tryupgrade(pp)) {
896 872 /*
897 873 * As there is now wait for upgrade, release the
898 874 * SE_SHARED lock and wait for SE_EXCL.
899 875 */
900 876 page_unlock(pp);
901 877 pp = page_lookup(&kvp, (u_offset_t)(uintptr_t)red_va, SE_EXCL);
902 878 /* pp may be NULL here, hence the test below */
903 879 }
904 880
905 881 /*
906 882 * Destroy the page, with dontfree set to zero (i.e. free it).
907 883 */
908 884 if (pp != NULL)
909 885 page_destroy(pp, 0);
910 886 curthread->t_red_pp = NULL;
911 887 }
912 888 #else
913 889 #error Red stacks only supported with downwards stack growth.
914 890 #endif
915 891
916 892 /*
917 893 * Handle a fault on an address corresponding to one of the
918 894 * resources in the segkp segment.
919 895 */
920 896 faultcode_t
921 897 segkp_fault(
922 898 struct hat *hat,
923 899 struct seg *seg,
924 900 caddr_t vaddr,
925 901 size_t len,
926 902 enum fault_type type,
927 903 enum seg_rw rw)
928 904 {
929 905 struct segkp_data *kpd = NULL;
930 906 int err;
931 907
932 908 ASSERT(seg->s_as == &kas && RW_READ_HELD(&seg->s_as->a_lock));
933 909
934 910 /*
935 911 * Sanity checks.
936 912 */
937 913 if (type == F_PROT) {
938 914 panic("segkp_fault: unexpected F_PROT fault");
939 915 /*NOTREACHED*/
940 916 }
941 917
942 918 if ((kpd = segkp_find(seg, vaddr)) == NULL)
943 919 return (FC_NOMAP);
944 920
945 921 mutex_enter(&kpd->kp_lock);
946 922
947 923 if (type == F_SOFTLOCK) {
948 924 ASSERT(!(kpd->kp_flags & KPD_LOCKED));
949 925 /*
950 926 * The F_SOFTLOCK case has more stringent
951 927 * range requirements: the given range must exactly coincide
952 928 * with the resource's mapped portion. Note reference to
953 929 * redzone is handled since vaddr would not equal base
954 930 */
955 931 if (vaddr != stom(kpd->kp_base, kpd->kp_flags) ||
956 932 len != SEGKP_MAPLEN(kpd->kp_len, kpd->kp_flags)) {
957 933 mutex_exit(&kpd->kp_lock);
958 934 return (FC_MAKE_ERR(EFAULT));
959 935 }
960 936
961 937 if ((err = segkp_load(hat, seg, vaddr, len, kpd, KPD_LOCKED))) {
962 938 mutex_exit(&kpd->kp_lock);
963 939 return (FC_MAKE_ERR(err));
964 940 }
965 941 kpd->kp_flags |= KPD_LOCKED;
966 942 mutex_exit(&kpd->kp_lock);
967 943 return (0);
968 944 }
969 945
970 946 if (type == F_INVAL) {
971 947 ASSERT(!(kpd->kp_flags & KPD_NO_ANON));
972 948
973 949 /*
974 950 * Check if we touched the redzone. Somewhat optimistic
975 951 * here if we are touching the redzone of our own stack
976 952 * since we wouldn't have a stack to get this far...
977 953 */
978 954 if ((kpd->kp_flags & KPD_HASREDZONE) &&
979 955 btop((uintptr_t)(vaddr - kpd->kp_base)) == KPD_REDZONE(kpd))
980 956 panic("segkp_fault: accessing redzone");
981 957
982 958 /*
983 959 * This fault may occur while the page is being F_SOFTLOCK'ed.
984 960 * Return since a 2nd segkp_load is unnecessary and also would
985 961 * result in the page being locked twice and eventually
986 962 * hang the thread_reaper thread.
987 963 */
988 964 if (kpd->kp_flags & KPD_LOCKED) {
989 965 mutex_exit(&kpd->kp_lock);
990 966 return (0);
991 967 }
992 968
993 969 err = segkp_load(hat, seg, vaddr, len, kpd, kpd->kp_flags);
994 970 mutex_exit(&kpd->kp_lock);
995 971 return (err ? FC_MAKE_ERR(err) : 0);
996 972 }
997 973
998 974 if (type == F_SOFTUNLOCK) {
999 975 uint_t flags;
1000 976
1001 977 /*
1002 978 * Make sure the addr is LOCKED and it has anon backing
1003 979 * before unlocking
1004 980 */
1005 981 if ((kpd->kp_flags & (KPD_LOCKED|KPD_NO_ANON)) != KPD_LOCKED) {
1006 982 panic("segkp_fault: bad unlock");
1007 983 /*NOTREACHED*/
1008 984 }
1009 985
1010 986 if (vaddr != stom(kpd->kp_base, kpd->kp_flags) ||
1011 987 len != SEGKP_MAPLEN(kpd->kp_len, kpd->kp_flags)) {
1012 988 panic("segkp_fault: bad range");
1013 989 /*NOTREACHED*/
1014 990 }
1015 991
1016 992 if (rw == S_WRITE)
1017 993 flags = kpd->kp_flags | KPD_WRITEDIRTY;
1018 994 else
1019 995 flags = kpd->kp_flags;
1020 996 err = segkp_unlock(hat, seg, vaddr, len, kpd, flags);
1021 997 kpd->kp_flags &= ~KPD_LOCKED;
1022 998 mutex_exit(&kpd->kp_lock);
1023 999 return (err ? FC_MAKE_ERR(err) : 0);
1024 1000 }
1025 1001 mutex_exit(&kpd->kp_lock);
1026 1002 panic("segkp_fault: bogus fault type: %d\n", type);
1027 1003 /*NOTREACHED*/
1028 1004 }
1029 1005
1030 1006 /*
1031 1007 * Check that the given protections suffice over the range specified by
1032 1008 * vaddr and len. For this segment type, the only issue is whether or
1033 1009 * not the range lies completely within the mapped part of an allocated
1034 1010 * resource.
1035 1011 */
1036 1012 /* ARGSUSED */
1037 1013 static int
1038 1014 segkp_checkprot(struct seg *seg, caddr_t vaddr, size_t len, uint_t prot)
1039 1015 {
1040 1016 struct segkp_data *kpd = NULL;
1041 1017 caddr_t mbase;
1042 1018 size_t mlen;
1043 1019
1044 1020 if ((kpd = segkp_find(seg, vaddr)) == NULL)
1045 1021 return (EACCES);
1046 1022
1047 1023 mutex_enter(&kpd->kp_lock);
1048 1024 mbase = stom(kpd->kp_base, kpd->kp_flags);
1049 1025 mlen = SEGKP_MAPLEN(kpd->kp_len, kpd->kp_flags);
1050 1026 if (len > mlen || vaddr < mbase ||
1051 1027 ((vaddr + len) > (mbase + mlen))) {
1052 1028 mutex_exit(&kpd->kp_lock);
1053 1029 return (EACCES);
1054 1030 }
1055 1031 mutex_exit(&kpd->kp_lock);
1056 1032 return (0);
1057 1033 }
1058 1034
1059 1035
1060 1036 /*
1061 1037 * Check to see if it makes sense to do kluster/read ahead to
1062 1038 * addr + delta relative to the mapping at addr. We assume here
1063 1039 * that delta is a signed PAGESIZE'd multiple (which can be negative).
1064 1040 *
1065 1041 * For seg_u we always "approve" of this action from our standpoint.
1066 1042 */
1067 1043 /*ARGSUSED*/
1068 1044 static int
1069 1045 segkp_kluster(struct seg *seg, caddr_t addr, ssize_t delta)
1070 1046 {
1071 1047 return (0);
1072 1048 }
1073 1049
1074 1050 /*
1075 1051 * Load and possibly lock intra-slot resources in the range given by
1076 1052 * vaddr and len.
1077 1053 */
1078 1054 static int
1079 1055 segkp_load(
1080 1056 struct hat *hat,
1081 1057 struct seg *seg,
1082 1058 caddr_t vaddr,
1083 1059 size_t len,
1084 1060 struct segkp_data *kpd,
1085 1061 uint_t flags)
1086 1062 {
1087 1063 caddr_t va;
1088 1064 caddr_t vlim;
1089 1065 ulong_t i;
1090 1066 uint_t lock;
1091 1067
1092 1068 ASSERT(MUTEX_HELD(&kpd->kp_lock));
1093 1069
1094 1070 len = P2ROUNDUP(len, PAGESIZE);
1095 1071
1096 1072 /* If locking, reserve physical memory */
1097 1073 if (flags & KPD_LOCKED) {
1098 1074 pgcnt_t pages = btop(len);
1099 1075 if ((kpd->kp_flags & KPD_NO_ANON) == 0)
1100 1076 atomic_add_long(&anon_segkp_pages_locked, pages);
1101 1077 (void) page_resv(pages, KM_SLEEP);
1102 1078 }
1103 1079
1104 1080 /*
1105 1081 * Loop through the pages in the given range.
1106 1082 */
1107 1083 va = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
1108 1084 vaddr = va;
1109 1085 vlim = va + len;
1110 1086 lock = flags & KPD_LOCKED;
1111 1087 i = ((uintptr_t)(va - kpd->kp_base)) >> PAGESHIFT;
1112 1088 for (; va < vlim; va += PAGESIZE, i++) {
1113 1089 page_t *pl[2]; /* second element NULL terminator */
1114 1090 struct vnode *vp;
1115 1091 anoff_t off;
1116 1092 int err;
1117 1093 struct anon *ap;
1118 1094
1119 1095 /*
1120 1096 * Summon the page. If it's not resident, arrange
1121 1097 * for synchronous i/o to pull it in.
1122 1098 */
1123 1099 ap = anon_get_ptr(kpd->kp_anon, kpd->kp_anon_idx + i);
1124 1100 swap_xlate(ap, &vp, &off);
1125 1101
1126 1102 /*
1127 1103 * The returned page list will have exactly one entry,
1128 1104 * which is returned to us already kept.
1129 1105 */
1130 1106 err = VOP_GETPAGE(vp, (offset_t)off, PAGESIZE, NULL,
1131 1107 pl, PAGESIZE, seg, va, S_READ, kcred, NULL);
1132 1108
1133 1109 if (err) {
1134 1110 /*
1135 1111 * Back out of what we've done so far.
1136 1112 */
1137 1113 (void) segkp_unlock(hat, seg, vaddr,
1138 1114 (va - vaddr), kpd, flags);
1139 1115 return (err);
1140 1116 }
1141 1117
1142 1118 /*
1143 1119 * Load an MMU translation for the page.
1144 1120 */
1145 1121 hat_memload(hat, va, pl[0], (PROT_READ|PROT_WRITE),
1146 1122 lock ? HAT_LOAD_LOCK : HAT_LOAD);
1147 1123
1148 1124 if (!lock) {
1149 1125 /*
1150 1126 * Now, release "shared" lock on the page.
1151 1127 */
1152 1128 page_unlock(pl[0]);
1153 1129 }
1154 1130 }
1155 1131 return (0);
1156 1132 }
1157 1133
1158 1134 /*
1159 1135 * At the very least unload the mmu-translations and unlock the range if locked
1160 1136 * Can be called with the following flag value KPD_WRITEDIRTY which specifies
1161 1137 * any dirty pages should be written to disk.
1162 1138 */
1163 1139 static int
1164 1140 segkp_unlock(
1165 1141 struct hat *hat,
1166 1142 struct seg *seg,
1167 1143 caddr_t vaddr,
1168 1144 size_t len,
1169 1145 struct segkp_data *kpd,
1170 1146 uint_t flags)
1171 1147 {
1172 1148 caddr_t va;
1173 1149 caddr_t vlim;
1174 1150 ulong_t i;
1175 1151 struct page *pp;
1176 1152 struct vnode *vp;
1177 1153 anoff_t off;
1178 1154 struct anon *ap;
1179 1155
1180 1156 #ifdef lint
1181 1157 seg = seg;
1182 1158 #endif /* lint */
1183 1159
1184 1160 ASSERT(MUTEX_HELD(&kpd->kp_lock));
1185 1161
1186 1162 /*
1187 1163 * Loop through the pages in the given range. It is assumed
1188 1164 * segkp_unlock is called with page aligned base
1189 1165 */
1190 1166 va = vaddr;
1191 1167 vlim = va + len;
1192 1168 i = ((uintptr_t)(va - kpd->kp_base)) >> PAGESHIFT;
1193 1169 hat_unload(hat, va, len,
1194 1170 ((flags & KPD_LOCKED) ? HAT_UNLOAD_UNLOCK : HAT_UNLOAD));
1195 1171 for (; va < vlim; va += PAGESIZE, i++) {
1196 1172 /*
1197 1173 * Find the page associated with this part of the
1198 1174 * slot, tracking it down through its associated swap
1199 1175 * space.
1200 1176 */
1201 1177 ap = anon_get_ptr(kpd->kp_anon, kpd->kp_anon_idx + i);
1202 1178 swap_xlate(ap, &vp, &off);
1203 1179
1204 1180 if (flags & KPD_LOCKED) {
1205 1181 if ((pp = page_find(vp, off)) == NULL) {
1206 1182 if (flags & KPD_LOCKED) {
1207 1183 panic("segkp_softunlock: missing page");
1208 1184 /*NOTREACHED*/
1209 1185 }
1210 1186 }
1211 1187 } else {
1212 1188 /*
1213 1189 * Nothing to do if the slot is not locked and the
1214 1190 * page doesn't exist.
1215 1191 */
1216 1192 if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL)
1217 1193 continue;
1218 1194 }
1219 1195
1220 1196 /*
1221 1197 * If the page doesn't have any translations, is
1222 1198 * dirty and not being shared, then push it out
1223 1199 * asynchronously and avoid waiting for the
1224 1200 * pageout daemon to do it for us.
1225 1201 *
1226 1202 * XXX - Do we really need to get the "exclusive"
1227 1203 * lock via an upgrade?
1228 1204 */
1229 1205 if ((flags & KPD_WRITEDIRTY) && !hat_page_is_mapped(pp) &&
1230 1206 hat_ismod(pp) && page_tryupgrade(pp)) {
1231 1207 /*
1232 1208 * Hold the vnode before releasing the page lock to
1233 1209 * prevent it from being freed and re-used by some
1234 1210 * other thread.
1235 1211 */
1236 1212 VN_HOLD(vp);
1237 1213 page_unlock(pp);
1238 1214
1239 1215 /*
1240 1216 * Want most powerful credentials we can get so
1241 1217 * use kcred.
1242 1218 */
1243 1219 (void) VOP_PUTPAGE(vp, (offset_t)off, PAGESIZE,
1244 1220 B_ASYNC | B_FREE, kcred, NULL);
1245 1221 VN_RELE(vp);
1246 1222 } else {
1247 1223 page_unlock(pp);
1248 1224 }
1249 1225 }
1250 1226
1251 1227 /* If unlocking, release physical memory */
1252 1228 if (flags & KPD_LOCKED) {
1253 1229 pgcnt_t pages = btopr(len);
1254 1230 if ((kpd->kp_flags & KPD_NO_ANON) == 0)
1255 1231 atomic_add_long(&anon_segkp_pages_locked, -pages);
1256 1232 page_unresv(pages);
1257 1233 }
1258 1234 return (0);
1259 1235 }
1260 1236
1261 1237 /*
1262 1238 * Insert the kpd in the hash table.
1263 1239 */
1264 1240 static void
1265 1241 segkp_insert(struct seg *seg, struct segkp_data *kpd)
1266 1242 {
1267 1243 struct segkp_segdata *kpsd = (struct segkp_segdata *)seg->s_data;
1268 1244 int index;
1269 1245
1270 1246 /*
1271 1247 * Insert the kpd based on the address that will be returned
1272 1248 * via segkp_release.
1273 1249 */
1274 1250 index = SEGKP_HASH(stom(kpd->kp_base, kpd->kp_flags));
1275 1251 mutex_enter(&segkp_lock);
1276 1252 kpd->kp_next = kpsd->kpsd_hash[index];
1277 1253 kpsd->kpsd_hash[index] = kpd;
1278 1254 mutex_exit(&segkp_lock);
1279 1255 }
1280 1256
1281 1257 /*
1282 1258 * Remove kpd from the hash table.
1283 1259 */
1284 1260 static void
1285 1261 segkp_delete(struct seg *seg, struct segkp_data *kpd)
1286 1262 {
1287 1263 struct segkp_segdata *kpsd = (struct segkp_segdata *)seg->s_data;
1288 1264 struct segkp_data **kpp;
1289 1265 int index;
1290 1266
1291 1267 ASSERT(MUTEX_HELD(&segkp_lock));
1292 1268
1293 1269 index = SEGKP_HASH(stom(kpd->kp_base, kpd->kp_flags));
1294 1270 for (kpp = &kpsd->kpsd_hash[index];
1295 1271 *kpp != NULL; kpp = &((*kpp)->kp_next)) {
1296 1272 if (*kpp == kpd) {
1297 1273 *kpp = kpd->kp_next;
1298 1274 return;
1299 1275 }
1300 1276 }
1301 1277 panic("segkp_delete: unable to find element to delete");
1302 1278 /*NOTREACHED*/
1303 1279 }
1304 1280
1305 1281 /*
1306 1282 * Find the kpd associated with a vaddr.
1307 1283 *
1308 1284 * Most of the callers of segkp_find will pass the vaddr that
1309 1285 * hashes to the desired index, but there are cases where
1310 1286 * this is not true in which case we have to (potentially) scan
1311 1287 * the whole table looking for it. This should be very rare
1312 1288 * (e.g. a segkp_fault(F_INVAL) on an address somewhere in the
1313 1289 * middle of the segkp_data region).
1314 1290 */
1315 1291 static struct segkp_data *
1316 1292 segkp_find(struct seg *seg, caddr_t vaddr)
1317 1293 {
1318 1294 struct segkp_segdata *kpsd = (struct segkp_segdata *)seg->s_data;
1319 1295 struct segkp_data *kpd;
1320 1296 int i;
1321 1297 int stop;
1322 1298
1323 1299 i = stop = SEGKP_HASH(vaddr);
1324 1300 mutex_enter(&segkp_lock);
1325 1301 do {
1326 1302 for (kpd = kpsd->kpsd_hash[i]; kpd != NULL;
1327 1303 kpd = kpd->kp_next) {
1328 1304 if (vaddr >= kpd->kp_base &&
1329 1305 vaddr < kpd->kp_base + kpd->kp_len) {
1330 1306 mutex_exit(&segkp_lock);
1331 1307 return (kpd);
1332 1308 }
1333 1309 }
1334 1310 if (--i < 0)
1335 1311 i = SEGKP_HASHSZ - 1; /* Wrap */
1336 1312 } while (i != stop);
1337 1313 mutex_exit(&segkp_lock);
1338 1314 return (NULL); /* Not found */
1339 1315 }
1340 1316
1341 1317 /*
1342 1318 * returns size of swappable area.
1343 1319 */
1344 1320 size_t
1345 1321 swapsize(caddr_t v)
1346 1322 {
1347 1323 struct segkp_data *kpd;
1348 1324
1349 1325 if ((kpd = segkp_find(segkp, v)) != NULL)
1350 1326 return (SEGKP_MAPLEN(kpd->kp_len, kpd->kp_flags));
1351 1327 else
1352 1328 return (NULL);
1353 1329 }
1354 1330
1355 1331 /*
1356 1332 * Dump out all the active segkp pages
1357 1333 */
1358 1334 static void
1359 1335 segkp_dump(struct seg *seg)
1360 1336 {
1361 1337 int i;
1362 1338 struct segkp_data *kpd;
1363 1339 struct segkp_segdata *kpsd = (struct segkp_segdata *)seg->s_data;
1364 1340
1365 1341 for (i = 0; i < SEGKP_HASHSZ; i++) {
1366 1342 for (kpd = kpsd->kpsd_hash[i];
1367 1343 kpd != NULL; kpd = kpd->kp_next) {
1368 1344 pfn_t pfn;
1369 1345 caddr_t addr;
1370 1346 caddr_t eaddr;
1371 1347
1372 1348 addr = kpd->kp_base;
1373 1349 eaddr = addr + kpd->kp_len;
1374 1350 while (addr < eaddr) {
1375 1351 ASSERT(seg->s_as == &kas);
1376 1352 pfn = hat_getpfnum(seg->s_as->a_hat, addr);
1377 1353 if (pfn != PFN_INVALID)
1378 1354 dump_addpage(seg->s_as, addr, pfn);
1379 1355 addr += PAGESIZE;
1380 1356 dump_timeleft = dump_timeout;
1381 1357 }
1382 1358 }
1383 1359 }
1384 1360 }
1385 1361
1386 1362 /*ARGSUSED*/
1387 1363 static int
1388 1364 segkp_pagelock(struct seg *seg, caddr_t addr, size_t len,
1389 1365 struct page ***ppp, enum lock_type type, enum seg_rw rw)
1390 1366 {
1391 1367 return (ENOTSUP);
1392 1368 }
1393 1369
1394 1370 /*ARGSUSED*/
1395 1371 static int
1396 1372 segkp_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
1397 1373 {
1398 1374 return (ENODEV);
1399 1375 }
1400 1376
1401 1377 /*ARGSUSED*/
1402 1378 static lgrp_mem_policy_info_t *
1403 1379 segkp_getpolicy(struct seg *seg, caddr_t addr)
1404 1380 {
1405 1381 return (NULL);
1406 1382 }
1407 1383
1408 1384 /*ARGSUSED*/
1409 1385 static int
1410 1386 segkp_capable(struct seg *seg, segcapability_t capability)
1411 1387 {
1412 1388 return (0);
1413 1389 }
1414 1390
1415 1391 #include <sys/mem_config.h>
1416 1392
1417 1393 /*ARGSUSED*/
1418 1394 static void
1419 1395 segkp_mem_config_post_add(void *arg, pgcnt_t delta_pages)
1420 1396 {}
1421 1397
1422 1398 /*
1423 1399 * During memory delete, turn off caches so that pages are not held.
1424 1400 * A better solution may be to unlock the pages while they are
1425 1401 * in the cache so that they may be collected naturally.
1426 1402 */
1427 1403
1428 1404 /*ARGSUSED*/
1429 1405 static int
1430 1406 segkp_mem_config_pre_del(void *arg, pgcnt_t delta_pages)
1431 1407 {
1432 1408 atomic_inc_32(&segkp_indel);
1433 1409 segkp_cache_free();
1434 1410 return (0);
1435 1411 }
1436 1412
1437 1413 /*ARGSUSED*/
1438 1414 static void
1439 1415 segkp_mem_config_post_del(void *arg, pgcnt_t delta_pages, int cancelled)
1440 1416 {
1441 1417 atomic_dec_32(&segkp_indel);
1442 1418 }
1443 1419
1444 1420 static kphysm_setup_vector_t segkp_mem_config_vec = {
1445 1421 KPHYSM_SETUP_VECTOR_VERSION,
1446 1422 segkp_mem_config_post_add,
1447 1423 segkp_mem_config_pre_del,
1448 1424 segkp_mem_config_post_del,
1449 1425 };
1450 1426
1451 1427 static void
1452 1428 segkpinit_mem_config(struct seg *seg)
1453 1429 {
1454 1430 int ret;
1455 1431
1456 1432 ret = kphysm_setup_func_register(&segkp_mem_config_vec, (void *)seg);
1457 1433 ASSERT(ret == 0);
1458 1434 }
↓ open down ↓ |
1268 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX