Print this page
6154 const-ify segment ops structures
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/vm/vm_as.c
+++ new/usr/src/uts/common/vm/vm_as.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 * Copyright 2015, Joyent, Inc. All rights reserved.
25 25 */
26 26
27 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 28 /* All Rights Reserved */
29 29
30 30 /*
31 31 * University Copyright- Copyright (c) 1982, 1986, 1988
32 32 * The Regents of the University of California
33 33 * All Rights Reserved
34 34 *
35 35 * University Acknowledgment- Portions of this document are derived from
36 36 * software developed by the University of California, Berkeley, and its
37 37 * contributors.
38 38 */
39 39
40 40 /*
41 41 * VM - address spaces.
42 42 */
43 43
44 44 #include <sys/types.h>
45 45 #include <sys/t_lock.h>
46 46 #include <sys/param.h>
47 47 #include <sys/errno.h>
48 48 #include <sys/systm.h>
49 49 #include <sys/mman.h>
50 50 #include <sys/sysmacros.h>
51 51 #include <sys/cpuvar.h>
52 52 #include <sys/sysinfo.h>
53 53 #include <sys/kmem.h>
54 54 #include <sys/vnode.h>
55 55 #include <sys/vmsystm.h>
56 56 #include <sys/cmn_err.h>
57 57 #include <sys/debug.h>
58 58 #include <sys/tnf_probe.h>
59 59 #include <sys/vtrace.h>
60 60
61 61 #include <vm/hat.h>
62 62 #include <vm/xhat.h>
63 63 #include <vm/as.h>
64 64 #include <vm/seg.h>
65 65 #include <vm/seg_vn.h>
66 66 #include <vm/seg_dev.h>
67 67 #include <vm/seg_kmem.h>
68 68 #include <vm/seg_map.h>
69 69 #include <vm/seg_spt.h>
70 70 #include <vm/page.h>
71 71
72 72 clock_t deadlk_wait = 1; /* number of ticks to wait before retrying */
73 73
74 74 static struct kmem_cache *as_cache;
75 75
76 76 static void as_setwatchprot(struct as *, caddr_t, size_t, uint_t);
77 77 static void as_clearwatchprot(struct as *, caddr_t, size_t);
78 78 int as_map_locked(struct as *, caddr_t, size_t, int ((*)()), void *);
79 79
80 80
81 81 /*
82 82 * Verifying the segment lists is very time-consuming; it may not be
83 83 * desirable always to define VERIFY_SEGLIST when DEBUG is set.
84 84 */
85 85 #ifdef DEBUG
86 86 #define VERIFY_SEGLIST
87 87 int do_as_verify = 0;
88 88 #endif
89 89
90 90 /*
91 91 * Allocate a new callback data structure entry and fill in the events of
92 92 * interest, the address range of interest, and the callback argument.
93 93 * Link the entry on the as->a_callbacks list. A callback entry for the
94 94 * entire address space may be specified with vaddr = 0 and size = -1.
95 95 *
96 96 * CALLERS RESPONSIBILITY: If not calling from within the process context for
97 97 * the specified as, the caller must guarantee persistence of the specified as
98 98 * for the duration of this function (eg. pages being locked within the as
99 99 * will guarantee persistence).
100 100 */
101 101 int
102 102 as_add_callback(struct as *as, void (*cb_func)(), void *arg, uint_t events,
103 103 caddr_t vaddr, size_t size, int sleepflag)
104 104 {
105 105 struct as_callback *current_head, *cb;
106 106 caddr_t saddr;
107 107 size_t rsize;
108 108
109 109 /* callback function and an event are mandatory */
110 110 if ((cb_func == NULL) || ((events & AS_ALL_EVENT) == 0))
111 111 return (EINVAL);
112 112
113 113 /* Adding a callback after as_free has been called is not allowed */
114 114 if (as == &kas)
115 115 return (ENOMEM);
116 116
117 117 /*
118 118 * vaddr = 0 and size = -1 is used to indicate that the callback range
119 119 * is the entire address space so no rounding is done in that case.
120 120 */
121 121 if (size != -1) {
122 122 saddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
123 123 rsize = (((size_t)(vaddr + size) + PAGEOFFSET) & PAGEMASK) -
124 124 (size_t)saddr;
125 125 /* check for wraparound */
126 126 if (saddr + rsize < saddr)
127 127 return (ENOMEM);
128 128 } else {
129 129 if (vaddr != 0)
130 130 return (EINVAL);
131 131 saddr = vaddr;
132 132 rsize = size;
133 133 }
134 134
135 135 /* Allocate and initialize a callback entry */
136 136 cb = kmem_zalloc(sizeof (struct as_callback), sleepflag);
137 137 if (cb == NULL)
138 138 return (EAGAIN);
139 139
140 140 cb->ascb_func = cb_func;
141 141 cb->ascb_arg = arg;
142 142 cb->ascb_events = events;
143 143 cb->ascb_saddr = saddr;
144 144 cb->ascb_len = rsize;
145 145
146 146 /* Add the entry to the list */
147 147 mutex_enter(&as->a_contents);
148 148 current_head = as->a_callbacks;
149 149 as->a_callbacks = cb;
150 150 cb->ascb_next = current_head;
151 151
152 152 /*
153 153 * The call to this function may lose in a race with
154 154 * a pertinent event - eg. a thread does long term memory locking
155 155 * but before the callback is added another thread executes as_unmap.
156 156 * A broadcast here resolves that.
157 157 */
158 158 if ((cb->ascb_events & AS_UNMAPWAIT_EVENT) && AS_ISUNMAPWAIT(as)) {
159 159 AS_CLRUNMAPWAIT(as);
160 160 cv_broadcast(&as->a_cv);
161 161 }
162 162
163 163 mutex_exit(&as->a_contents);
164 164 return (0);
165 165 }
166 166
167 167 /*
168 168 * Search the callback list for an entry which pertains to arg.
169 169 *
170 170 * This is called from within the client upon completion of the callback.
171 171 * RETURN VALUES:
172 172 * AS_CALLBACK_DELETED (callback entry found and deleted)
173 173 * AS_CALLBACK_NOTFOUND (no callback entry found - this is ok)
174 174 * AS_CALLBACK_DELETE_DEFERRED (callback is in process, delete of this
175 175 * entry will be made in as_do_callbacks)
176 176 *
177 177 * If as_delete_callback encounters a matching entry with AS_CALLBACK_CALLED
178 178 * set, it indicates that as_do_callbacks is processing this entry. The
179 179 * AS_ALL_EVENT events are cleared in the entry, and a broadcast is made
180 180 * to unblock as_do_callbacks, in case it is blocked.
181 181 *
182 182 * CALLERS RESPONSIBILITY: If not calling from within the process context for
183 183 * the specified as, the caller must guarantee persistence of the specified as
184 184 * for the duration of this function (eg. pages being locked within the as
185 185 * will guarantee persistence).
186 186 */
187 187 uint_t
188 188 as_delete_callback(struct as *as, void *arg)
189 189 {
190 190 struct as_callback **prevcb = &as->a_callbacks;
191 191 struct as_callback *cb;
192 192 uint_t rc = AS_CALLBACK_NOTFOUND;
193 193
194 194 mutex_enter(&as->a_contents);
195 195 for (cb = as->a_callbacks; cb; prevcb = &cb->ascb_next, cb = *prevcb) {
196 196 if (cb->ascb_arg != arg)
197 197 continue;
198 198
199 199 /*
200 200 * If the events indicate AS_CALLBACK_CALLED, just clear
201 201 * AS_ALL_EVENT in the events field and wakeup the thread
202 202 * that may be waiting in as_do_callbacks. as_do_callbacks
203 203 * will take care of removing this entry from the list. In
204 204 * that case, return AS_CALLBACK_DELETE_DEFERRED. Otherwise
205 205 * (AS_CALLBACK_CALLED not set), just remove it from the
206 206 * list, return the memory and return AS_CALLBACK_DELETED.
207 207 */
208 208 if ((cb->ascb_events & AS_CALLBACK_CALLED) != 0) {
209 209 /* leave AS_CALLBACK_CALLED */
210 210 cb->ascb_events &= ~AS_ALL_EVENT;
211 211 rc = AS_CALLBACK_DELETE_DEFERRED;
212 212 cv_broadcast(&as->a_cv);
213 213 } else {
214 214 *prevcb = cb->ascb_next;
215 215 kmem_free(cb, sizeof (struct as_callback));
216 216 rc = AS_CALLBACK_DELETED;
217 217 }
218 218 break;
219 219 }
220 220 mutex_exit(&as->a_contents);
221 221 return (rc);
222 222 }
223 223
224 224 /*
225 225 * Searches the as callback list for a matching entry.
226 226 * Returns a pointer to the first matching callback, or NULL if
227 227 * nothing is found.
228 228 * This function never sleeps so it is ok to call it with more
229 229 * locks held but the (required) a_contents mutex.
230 230 *
231 231 * See also comment on as_do_callbacks below.
232 232 */
233 233 static struct as_callback *
234 234 as_find_callback(struct as *as, uint_t events, caddr_t event_addr,
235 235 size_t event_len)
236 236 {
237 237 struct as_callback *cb;
238 238
239 239 ASSERT(MUTEX_HELD(&as->a_contents));
240 240 for (cb = as->a_callbacks; cb != NULL; cb = cb->ascb_next) {
241 241 /*
242 242 * If the callback has not already been called, then
243 243 * check if events or address range pertains. An event_len
244 244 * of zero means do an unconditional callback.
245 245 */
246 246 if (((cb->ascb_events & AS_CALLBACK_CALLED) != 0) ||
247 247 ((event_len != 0) && (((cb->ascb_events & events) == 0) ||
248 248 (event_addr + event_len < cb->ascb_saddr) ||
249 249 (event_addr > (cb->ascb_saddr + cb->ascb_len))))) {
250 250 continue;
251 251 }
252 252 break;
253 253 }
254 254 return (cb);
255 255 }
256 256
257 257 /*
258 258 * Executes a given callback and removes it from the callback list for
259 259 * this address space.
260 260 * This function may sleep so the caller must drop all locks except
261 261 * a_contents before calling this func.
262 262 *
263 263 * See also comments on as_do_callbacks below.
264 264 */
265 265 static void
266 266 as_execute_callback(struct as *as, struct as_callback *cb,
267 267 uint_t events)
268 268 {
269 269 struct as_callback **prevcb;
270 270 void *cb_arg;
271 271
272 272 ASSERT(MUTEX_HELD(&as->a_contents) && (cb->ascb_events & events));
273 273 cb->ascb_events |= AS_CALLBACK_CALLED;
274 274 mutex_exit(&as->a_contents);
275 275 (*cb->ascb_func)(as, cb->ascb_arg, events);
276 276 mutex_enter(&as->a_contents);
277 277 /*
278 278 * the callback function is required to delete the callback
279 279 * when the callback function determines it is OK for
280 280 * this thread to continue. as_delete_callback will clear
281 281 * the AS_ALL_EVENT in the events field when it is deleted.
282 282 * If the callback function called as_delete_callback,
283 283 * events will already be cleared and there will be no blocking.
284 284 */
285 285 while ((cb->ascb_events & events) != 0) {
286 286 cv_wait(&as->a_cv, &as->a_contents);
287 287 }
288 288 /*
289 289 * This entry needs to be taken off the list. Normally, the
290 290 * callback func itself does that, but unfortunately the list
291 291 * may have changed while the callback was running because the
292 292 * a_contents mutex was dropped and someone else other than the
293 293 * callback func itself could have called as_delete_callback,
294 294 * so we have to search to find this entry again. The entry
295 295 * must have AS_CALLBACK_CALLED, and have the same 'arg'.
296 296 */
297 297 cb_arg = cb->ascb_arg;
298 298 prevcb = &as->a_callbacks;
299 299 for (cb = as->a_callbacks; cb != NULL;
300 300 prevcb = &cb->ascb_next, cb = *prevcb) {
301 301 if (((cb->ascb_events & AS_CALLBACK_CALLED) == 0) ||
302 302 (cb_arg != cb->ascb_arg)) {
303 303 continue;
304 304 }
305 305 *prevcb = cb->ascb_next;
306 306 kmem_free(cb, sizeof (struct as_callback));
307 307 break;
308 308 }
309 309 }
310 310
311 311 /*
312 312 * Check the callback list for a matching event and intersection of
313 313 * address range. If there is a match invoke the callback. Skip an entry if:
314 314 * - a callback is already in progress for this entry (AS_CALLBACK_CALLED)
315 315 * - not event of interest
316 316 * - not address range of interest
317 317 *
318 318 * An event_len of zero indicates a request for an unconditional callback
319 319 * (regardless of event), only the AS_CALLBACK_CALLED is checked. The
320 320 * a_contents lock must be dropped before a callback, so only one callback
321 321 * can be done before returning. Return -1 (true) if a callback was
322 322 * executed and removed from the list, else return 0 (false).
323 323 *
324 324 * The logically separate parts, i.e. finding a matching callback and
325 325 * executing a given callback have been separated into two functions
326 326 * so that they can be called with different sets of locks held beyond
327 327 * the always-required a_contents. as_find_callback does not sleep so
328 328 * it is ok to call it if more locks than a_contents (i.e. the a_lock
329 329 * rwlock) are held. as_execute_callback on the other hand may sleep
330 330 * so all locks beyond a_contents must be dropped by the caller if one
331 331 * does not want to end comatose.
332 332 */
333 333 static int
334 334 as_do_callbacks(struct as *as, uint_t events, caddr_t event_addr,
335 335 size_t event_len)
336 336 {
337 337 struct as_callback *cb;
338 338
339 339 if ((cb = as_find_callback(as, events, event_addr, event_len))) {
340 340 as_execute_callback(as, cb, events);
341 341 return (-1);
342 342 }
343 343 return (0);
344 344 }
345 345
346 346 /*
347 347 * Search for the segment containing addr. If a segment containing addr
348 348 * exists, that segment is returned. If no such segment exists, and
349 349 * the list spans addresses greater than addr, then the first segment
350 350 * whose base is greater than addr is returned; otherwise, NULL is
351 351 * returned unless tail is true, in which case the last element of the
352 352 * list is returned.
353 353 *
354 354 * a_seglast is used to cache the last found segment for repeated
355 355 * searches to the same addr (which happens frequently).
356 356 */
357 357 struct seg *
358 358 as_findseg(struct as *as, caddr_t addr, int tail)
359 359 {
360 360 struct seg *seg = as->a_seglast;
361 361 avl_index_t where;
362 362
363 363 ASSERT(AS_LOCK_HELD(as, &as->a_lock));
364 364
365 365 if (seg != NULL &&
366 366 seg->s_base <= addr &&
367 367 addr < seg->s_base + seg->s_size)
368 368 return (seg);
369 369
370 370 seg = avl_find(&as->a_segtree, &addr, &where);
371 371 if (seg != NULL)
372 372 return (as->a_seglast = seg);
373 373
374 374 seg = avl_nearest(&as->a_segtree, where, AVL_AFTER);
375 375 if (seg == NULL && tail)
376 376 seg = avl_last(&as->a_segtree);
377 377 return (as->a_seglast = seg);
378 378 }
379 379
380 380 #ifdef VERIFY_SEGLIST
381 381 /*
382 382 * verify that the linked list is coherent
383 383 */
384 384 static void
385 385 as_verify(struct as *as)
386 386 {
387 387 struct seg *seg, *seglast, *p, *n;
388 388 uint_t nsegs = 0;
389 389
390 390 if (do_as_verify == 0)
391 391 return;
392 392
393 393 seglast = as->a_seglast;
394 394
395 395 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
396 396 ASSERT(seg->s_as == as);
397 397 p = AS_SEGPREV(as, seg);
398 398 n = AS_SEGNEXT(as, seg);
399 399 ASSERT(p == NULL || p->s_as == as);
400 400 ASSERT(p == NULL || p->s_base < seg->s_base);
401 401 ASSERT(n == NULL || n->s_base > seg->s_base);
402 402 ASSERT(n != NULL || seg == avl_last(&as->a_segtree));
403 403 if (seg == seglast)
404 404 seglast = NULL;
405 405 nsegs++;
406 406 }
407 407 ASSERT(seglast == NULL);
408 408 ASSERT(avl_numnodes(&as->a_segtree) == nsegs);
409 409 }
410 410 #endif /* VERIFY_SEGLIST */
411 411
412 412 /*
413 413 * Add a new segment to the address space. The avl_find()
414 414 * may be expensive so we attempt to use last segment accessed
415 415 * in as_gap() as an insertion point.
416 416 */
417 417 int
418 418 as_addseg(struct as *as, struct seg *newseg)
419 419 {
420 420 struct seg *seg;
421 421 caddr_t addr;
422 422 caddr_t eaddr;
423 423 avl_index_t where;
424 424
425 425 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
426 426
427 427 as->a_updatedir = 1; /* inform /proc */
428 428 gethrestime(&as->a_updatetime);
429 429
430 430 if (as->a_lastgaphl != NULL) {
431 431 struct seg *hseg = NULL;
432 432 struct seg *lseg = NULL;
433 433
434 434 if (as->a_lastgaphl->s_base > newseg->s_base) {
435 435 hseg = as->a_lastgaphl;
436 436 lseg = AVL_PREV(&as->a_segtree, hseg);
437 437 } else {
438 438 lseg = as->a_lastgaphl;
439 439 hseg = AVL_NEXT(&as->a_segtree, lseg);
440 440 }
441 441
442 442 if (hseg && lseg && lseg->s_base < newseg->s_base &&
443 443 hseg->s_base > newseg->s_base) {
444 444 avl_insert_here(&as->a_segtree, newseg, lseg,
445 445 AVL_AFTER);
446 446 as->a_lastgaphl = NULL;
447 447 as->a_seglast = newseg;
448 448 return (0);
449 449 }
450 450 as->a_lastgaphl = NULL;
451 451 }
452 452
453 453 addr = newseg->s_base;
454 454 eaddr = addr + newseg->s_size;
455 455 again:
456 456
457 457 seg = avl_find(&as->a_segtree, &addr, &where);
458 458
459 459 if (seg == NULL)
460 460 seg = avl_nearest(&as->a_segtree, where, AVL_AFTER);
461 461
462 462 if (seg == NULL)
463 463 seg = avl_last(&as->a_segtree);
464 464
465 465 if (seg != NULL) {
466 466 caddr_t base = seg->s_base;
↓ open down ↓ |
466 lines elided |
↑ open up ↑ |
467 467
468 468 /*
469 469 * If top of seg is below the requested address, then
470 470 * the insertion point is at the end of the linked list,
471 471 * and seg points to the tail of the list. Otherwise,
472 472 * the insertion point is immediately before seg.
473 473 */
474 474 if (base + seg->s_size > addr) {
475 475 if (addr >= base || eaddr > base) {
476 476 #ifdef __sparc
477 - extern struct seg_ops segnf_ops;
477 + extern const struct seg_ops segnf_ops;
478 478
479 479 /*
480 480 * no-fault segs must disappear if overlaid.
481 481 * XXX need new segment type so
482 482 * we don't have to check s_ops
483 483 */
484 484 if (seg->s_ops == &segnf_ops) {
485 485 seg_unmap(seg);
486 486 goto again;
487 487 }
488 488 #endif
489 489 return (-1); /* overlapping segment */
490 490 }
491 491 }
492 492 }
493 493 as->a_seglast = newseg;
494 494 avl_insert(&as->a_segtree, newseg, where);
495 495
496 496 #ifdef VERIFY_SEGLIST
497 497 as_verify(as);
498 498 #endif
499 499 return (0);
500 500 }
501 501
502 502 struct seg *
503 503 as_removeseg(struct as *as, struct seg *seg)
504 504 {
505 505 avl_tree_t *t;
506 506
507 507 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
508 508
509 509 as->a_updatedir = 1; /* inform /proc */
510 510 gethrestime(&as->a_updatetime);
511 511
512 512 if (seg == NULL)
513 513 return (NULL);
514 514
515 515 t = &as->a_segtree;
516 516 if (as->a_seglast == seg)
517 517 as->a_seglast = NULL;
518 518 as->a_lastgaphl = NULL;
519 519
520 520 /*
521 521 * if this segment is at an address higher than
522 522 * a_lastgap, set a_lastgap to the next segment (NULL if last segment)
523 523 */
524 524 if (as->a_lastgap &&
525 525 (seg == as->a_lastgap || seg->s_base > as->a_lastgap->s_base))
526 526 as->a_lastgap = AVL_NEXT(t, seg);
527 527
528 528 /*
529 529 * remove the segment from the seg tree
530 530 */
531 531 avl_remove(t, seg);
532 532
533 533 #ifdef VERIFY_SEGLIST
534 534 as_verify(as);
535 535 #endif
536 536 return (seg);
537 537 }
538 538
539 539 /*
540 540 * Find a segment containing addr.
541 541 */
542 542 struct seg *
543 543 as_segat(struct as *as, caddr_t addr)
544 544 {
545 545 struct seg *seg = as->a_seglast;
546 546
547 547 ASSERT(AS_LOCK_HELD(as, &as->a_lock));
548 548
549 549 if (seg != NULL && seg->s_base <= addr &&
550 550 addr < seg->s_base + seg->s_size)
551 551 return (seg);
552 552
553 553 seg = avl_find(&as->a_segtree, &addr, NULL);
554 554 return (seg);
555 555 }
556 556
557 557 /*
558 558 * Serialize all searches for holes in an address space to
559 559 * prevent two or more threads from allocating the same virtual
560 560 * address range. The address space must not be "read/write"
561 561 * locked by the caller since we may block.
562 562 */
563 563 void
564 564 as_rangelock(struct as *as)
565 565 {
566 566 mutex_enter(&as->a_contents);
567 567 while (AS_ISCLAIMGAP(as))
568 568 cv_wait(&as->a_cv, &as->a_contents);
569 569 AS_SETCLAIMGAP(as);
570 570 mutex_exit(&as->a_contents);
571 571 }
572 572
573 573 /*
574 574 * Release hold on a_state & AS_CLAIMGAP and signal any other blocked threads.
575 575 */
576 576 void
577 577 as_rangeunlock(struct as *as)
578 578 {
579 579 mutex_enter(&as->a_contents);
580 580 AS_CLRCLAIMGAP(as);
581 581 cv_signal(&as->a_cv);
582 582 mutex_exit(&as->a_contents);
583 583 }
584 584
585 585 /*
586 586 * compar segments (or just an address) by segment address range
587 587 */
588 588 static int
589 589 as_segcompar(const void *x, const void *y)
590 590 {
591 591 struct seg *a = (struct seg *)x;
592 592 struct seg *b = (struct seg *)y;
593 593
594 594 if (a->s_base < b->s_base)
595 595 return (-1);
596 596 if (a->s_base >= b->s_base + b->s_size)
597 597 return (1);
598 598 return (0);
599 599 }
600 600
601 601
602 602 void
603 603 as_avlinit(struct as *as)
604 604 {
605 605 avl_create(&as->a_segtree, as_segcompar, sizeof (struct seg),
606 606 offsetof(struct seg, s_tree));
607 607 avl_create(&as->a_wpage, wp_compare, sizeof (struct watched_page),
608 608 offsetof(struct watched_page, wp_link));
609 609 }
610 610
611 611 /*ARGSUSED*/
612 612 static int
613 613 as_constructor(void *buf, void *cdrarg, int kmflags)
614 614 {
615 615 struct as *as = buf;
616 616
617 617 mutex_init(&as->a_contents, NULL, MUTEX_DEFAULT, NULL);
618 618 cv_init(&as->a_cv, NULL, CV_DEFAULT, NULL);
619 619 rw_init(&as->a_lock, NULL, RW_DEFAULT, NULL);
620 620 as_avlinit(as);
621 621 return (0);
622 622 }
623 623
624 624 /*ARGSUSED1*/
625 625 static void
626 626 as_destructor(void *buf, void *cdrarg)
627 627 {
628 628 struct as *as = buf;
629 629
630 630 avl_destroy(&as->a_segtree);
631 631 mutex_destroy(&as->a_contents);
632 632 cv_destroy(&as->a_cv);
633 633 rw_destroy(&as->a_lock);
634 634 }
635 635
636 636 void
637 637 as_init(void)
638 638 {
639 639 as_cache = kmem_cache_create("as_cache", sizeof (struct as), 0,
640 640 as_constructor, as_destructor, NULL, NULL, NULL, 0);
641 641 }
642 642
643 643 /*
644 644 * Allocate and initialize an address space data structure.
645 645 * We call hat_alloc to allow any machine dependent
646 646 * information in the hat structure to be initialized.
647 647 */
648 648 struct as *
649 649 as_alloc(void)
650 650 {
651 651 struct as *as;
652 652
653 653 as = kmem_cache_alloc(as_cache, KM_SLEEP);
654 654
655 655 as->a_flags = 0;
656 656 as->a_vbits = 0;
657 657 as->a_hrm = NULL;
658 658 as->a_seglast = NULL;
659 659 as->a_size = 0;
660 660 as->a_resvsize = 0;
661 661 as->a_updatedir = 0;
662 662 gethrestime(&as->a_updatetime);
663 663 as->a_objectdir = NULL;
664 664 as->a_sizedir = 0;
665 665 as->a_userlimit = (caddr_t)USERLIMIT;
666 666 as->a_lastgap = NULL;
667 667 as->a_lastgaphl = NULL;
668 668 as->a_callbacks = NULL;
669 669
670 670 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
671 671 as->a_hat = hat_alloc(as); /* create hat for default system mmu */
672 672 AS_LOCK_EXIT(as, &as->a_lock);
673 673
674 674 as->a_xhat = NULL;
675 675
676 676 return (as);
677 677 }
678 678
679 679 /*
680 680 * Free an address space data structure.
681 681 * Need to free the hat first and then
682 682 * all the segments on this as and finally
683 683 * the space for the as struct itself.
684 684 */
685 685 void
686 686 as_free(struct as *as)
687 687 {
688 688 struct hat *hat = as->a_hat;
689 689 struct seg *seg, *next;
690 690 int called = 0;
691 691
692 692 top:
693 693 /*
694 694 * Invoke ALL callbacks. as_do_callbacks will do one callback
695 695 * per call, and not return (-1) until the callback has completed.
696 696 * When as_do_callbacks returns zero, all callbacks have completed.
697 697 */
698 698 mutex_enter(&as->a_contents);
699 699 while (as->a_callbacks && as_do_callbacks(as, AS_ALL_EVENT, 0, 0))
700 700 ;
701 701
702 702 /* This will prevent new XHATs from attaching to as */
703 703 if (!called)
704 704 AS_SETBUSY(as);
705 705 mutex_exit(&as->a_contents);
706 706 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
707 707
708 708 if (!called) {
709 709 called = 1;
710 710 hat_free_start(hat);
711 711 if (as->a_xhat != NULL)
712 712 xhat_free_start_all(as);
713 713 }
714 714 for (seg = AS_SEGFIRST(as); seg != NULL; seg = next) {
715 715 int err;
716 716
717 717 next = AS_SEGNEXT(as, seg);
718 718 retry:
719 719 err = segop_unmap(seg, seg->s_base, seg->s_size);
720 720 if (err == EAGAIN) {
721 721 mutex_enter(&as->a_contents);
722 722 if (as->a_callbacks) {
723 723 AS_LOCK_EXIT(as, &as->a_lock);
724 724 } else if (!AS_ISNOUNMAPWAIT(as)) {
725 725 /*
726 726 * Memory is currently locked. Wait for a
727 727 * cv_signal that it has been unlocked, then
728 728 * try the operation again.
729 729 */
730 730 if (AS_ISUNMAPWAIT(as) == 0)
731 731 cv_broadcast(&as->a_cv);
732 732 AS_SETUNMAPWAIT(as);
733 733 AS_LOCK_EXIT(as, &as->a_lock);
734 734 while (AS_ISUNMAPWAIT(as))
735 735 cv_wait(&as->a_cv, &as->a_contents);
736 736 } else {
737 737 /*
738 738 * We may have raced with
739 739 * segvn_reclaim()/segspt_reclaim(). In this
740 740 * case clean nounmapwait flag and retry since
741 741 * softlockcnt in this segment may be already
742 742 * 0. We don't drop as writer lock so our
743 743 * number of retries without sleeping should
744 744 * be very small. See segvn_reclaim() for
745 745 * more comments.
746 746 */
747 747 AS_CLRNOUNMAPWAIT(as);
748 748 mutex_exit(&as->a_contents);
749 749 goto retry;
750 750 }
751 751 mutex_exit(&as->a_contents);
752 752 goto top;
753 753 } else {
754 754 /*
755 755 * We do not expect any other error return at this
756 756 * time. This is similar to an ASSERT in seg_unmap()
757 757 */
758 758 ASSERT(err == 0);
759 759 }
760 760 }
761 761 hat_free_end(hat);
762 762 if (as->a_xhat != NULL)
763 763 xhat_free_end_all(as);
764 764 AS_LOCK_EXIT(as, &as->a_lock);
765 765
766 766 /* /proc stuff */
767 767 ASSERT(avl_numnodes(&as->a_wpage) == 0);
768 768 if (as->a_objectdir) {
769 769 kmem_free(as->a_objectdir, as->a_sizedir * sizeof (vnode_t *));
770 770 as->a_objectdir = NULL;
771 771 as->a_sizedir = 0;
772 772 }
773 773
774 774 /*
775 775 * Free the struct as back to kmem. Assert it has no segments.
776 776 */
777 777 ASSERT(avl_numnodes(&as->a_segtree) == 0);
778 778 kmem_cache_free(as_cache, as);
779 779 }
780 780
781 781 int
782 782 as_dup(struct as *as, struct proc *forkedproc)
783 783 {
784 784 struct as *newas;
785 785 struct seg *seg, *newseg;
786 786 size_t purgesize = 0;
787 787 int error;
788 788
789 789 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
790 790 as_clearwatch(as);
791 791 newas = as_alloc();
792 792 newas->a_userlimit = as->a_userlimit;
793 793 newas->a_proc = forkedproc;
794 794
795 795 AS_LOCK_ENTER(newas, &newas->a_lock, RW_WRITER);
796 796
797 797 /* This will prevent new XHATs from attaching */
798 798 mutex_enter(&as->a_contents);
799 799 AS_SETBUSY(as);
800 800 mutex_exit(&as->a_contents);
801 801 mutex_enter(&newas->a_contents);
802 802 AS_SETBUSY(newas);
803 803 mutex_exit(&newas->a_contents);
804 804
805 805 (void) hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_SRD);
806 806
807 807 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
808 808
809 809 if (seg->s_flags & S_PURGE) {
810 810 purgesize += seg->s_size;
811 811 continue;
812 812 }
813 813
814 814 newseg = seg_alloc(newas, seg->s_base, seg->s_size);
815 815 if (newseg == NULL) {
816 816 AS_LOCK_EXIT(newas, &newas->a_lock);
817 817 as_setwatch(as);
818 818 mutex_enter(&as->a_contents);
819 819 AS_CLRBUSY(as);
820 820 mutex_exit(&as->a_contents);
821 821 AS_LOCK_EXIT(as, &as->a_lock);
822 822 as_free(newas);
823 823 return (-1);
824 824 }
825 825 if ((error = segop_dup(seg, newseg)) != 0) {
826 826 /*
827 827 * We call seg_free() on the new seg
828 828 * because the segment is not set up
829 829 * completely; i.e. it has no ops.
830 830 */
831 831 as_setwatch(as);
832 832 mutex_enter(&as->a_contents);
833 833 AS_CLRBUSY(as);
834 834 mutex_exit(&as->a_contents);
835 835 AS_LOCK_EXIT(as, &as->a_lock);
836 836 seg_free(newseg);
837 837 AS_LOCK_EXIT(newas, &newas->a_lock);
838 838 as_free(newas);
839 839 return (error);
840 840 }
841 841 newas->a_size += seg->s_size;
842 842 }
843 843 newas->a_resvsize = as->a_resvsize - purgesize;
844 844
845 845 error = hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_ALL);
846 846 if (as->a_xhat != NULL)
847 847 error |= xhat_dup_all(as, newas, NULL, 0, HAT_DUP_ALL);
848 848
849 849 mutex_enter(&newas->a_contents);
850 850 AS_CLRBUSY(newas);
851 851 mutex_exit(&newas->a_contents);
852 852 AS_LOCK_EXIT(newas, &newas->a_lock);
853 853
854 854 as_setwatch(as);
855 855 mutex_enter(&as->a_contents);
856 856 AS_CLRBUSY(as);
857 857 mutex_exit(&as->a_contents);
858 858 AS_LOCK_EXIT(as, &as->a_lock);
859 859 if (error != 0) {
860 860 as_free(newas);
861 861 return (error);
862 862 }
863 863 forkedproc->p_as = newas;
864 864 return (0);
865 865 }
866 866
867 867 /*
868 868 * Handle a ``fault'' at addr for size bytes.
869 869 */
870 870 faultcode_t
871 871 as_fault(struct hat *hat, struct as *as, caddr_t addr, size_t size,
872 872 enum fault_type type, enum seg_rw rw)
873 873 {
874 874 struct seg *seg;
↓ open down ↓ |
387 lines elided |
↑ open up ↑ |
875 875 caddr_t raddr; /* rounded down addr */
876 876 size_t rsize; /* rounded up size */
877 877 size_t ssize;
878 878 faultcode_t res = 0;
879 879 caddr_t addrsav;
880 880 struct seg *segsav;
881 881 int as_lock_held;
882 882 klwp_t *lwp = ttolwp(curthread);
883 883 int is_xhat = 0;
884 884 int holding_wpage = 0;
885 - extern struct seg_ops segdev_ops;
886 -
887 -
888 885
889 886 if (as->a_hat != hat) {
890 887 /* This must be an XHAT then */
891 888 is_xhat = 1;
892 889
893 890 if ((type != F_INVAL) || (as == &kas))
894 891 return (FC_NOSUPPORT);
895 892 }
896 893
897 894 retry:
898 895 if (!is_xhat) {
899 896 /*
900 897 * Indicate that the lwp is not to be stopped while waiting
901 898 * for a pagefault. This is to avoid deadlock while debugging
902 899 * a process via /proc over NFS (in particular).
903 900 */
904 901 if (lwp != NULL)
905 902 lwp->lwp_nostop++;
906 903
907 904 /*
908 905 * same length must be used when we softlock and softunlock.
909 906 * We don't support softunlocking lengths less than
910 907 * the original length when there is largepage support.
911 908 * See seg_dev.c for more comments.
912 909 */
913 910 switch (type) {
914 911
915 912 case F_SOFTLOCK:
916 913 CPU_STATS_ADD_K(vm, softlock, 1);
917 914 break;
918 915
919 916 case F_SOFTUNLOCK:
920 917 break;
921 918
922 919 case F_PROT:
923 920 CPU_STATS_ADD_K(vm, prot_fault, 1);
924 921 break;
925 922
926 923 case F_INVAL:
927 924 CPU_STATS_ENTER_K();
928 925 CPU_STATS_ADDQ(CPU, vm, as_fault, 1);
929 926 if (as == &kas)
930 927 CPU_STATS_ADDQ(CPU, vm, kernel_asflt, 1);
931 928 CPU_STATS_EXIT_K();
932 929 break;
933 930 }
934 931 }
935 932
936 933 /* Kernel probe */
937 934 TNF_PROBE_3(address_fault, "vm pagefault", /* CSTYLED */,
938 935 tnf_opaque, address, addr,
939 936 tnf_fault_type, fault_type, type,
940 937 tnf_seg_access, access, rw);
941 938
942 939 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
943 940 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
944 941 (size_t)raddr;
945 942
946 943 /*
947 944 * XXX -- Don't grab the as lock for segkmap. We should grab it for
948 945 * correctness, but then we could be stuck holding this lock for
949 946 * a LONG time if the fault needs to be resolved on a slow
950 947 * filesystem, and then no-one will be able to exec new commands,
951 948 * as exec'ing requires the write lock on the as.
952 949 */
953 950 if (as == &kas && segkmap && segkmap->s_base <= raddr &&
954 951 raddr + size < segkmap->s_base + segkmap->s_size) {
955 952 /*
956 953 * if (as==&kas), this can't be XHAT: we've already returned
957 954 * FC_NOSUPPORT.
958 955 */
959 956 seg = segkmap;
960 957 as_lock_held = 0;
961 958 } else {
962 959 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
963 960 if (is_xhat && avl_numnodes(&as->a_wpage) != 0) {
964 961 /*
965 962 * Grab and hold the writers' lock on the as
966 963 * if the fault is to a watched page.
967 964 * This will keep CPUs from "peeking" at the
968 965 * address range while we're temporarily boosting
969 966 * the permissions for the XHAT device to
970 967 * resolve the fault in the segment layer.
971 968 *
972 969 * We could check whether faulted address
973 970 * is within a watched page and only then grab
974 971 * the writer lock, but this is simpler.
975 972 */
976 973 AS_LOCK_EXIT(as, &as->a_lock);
977 974 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
978 975 }
979 976
980 977 seg = as_segat(as, raddr);
981 978 if (seg == NULL) {
982 979 AS_LOCK_EXIT(as, &as->a_lock);
983 980 if ((lwp != NULL) && (!is_xhat))
984 981 lwp->lwp_nostop--;
985 982 return (FC_NOMAP);
986 983 }
987 984
988 985 as_lock_held = 1;
989 986 }
990 987
991 988 addrsav = raddr;
992 989 segsav = seg;
993 990
994 991 for (; rsize != 0; rsize -= ssize, raddr += ssize) {
995 992 if (raddr >= seg->s_base + seg->s_size) {
996 993 seg = AS_SEGNEXT(as, seg);
997 994 if (seg == NULL || raddr != seg->s_base) {
998 995 res = FC_NOMAP;
999 996 break;
1000 997 }
1001 998 }
1002 999 if (raddr + rsize > seg->s_base + seg->s_size)
1003 1000 ssize = seg->s_base + seg->s_size - raddr;
1004 1001 else
1005 1002 ssize = rsize;
1006 1003
1007 1004 if (!is_xhat || (seg->s_ops != &segdev_ops)) {
1008 1005
1009 1006 if (is_xhat && avl_numnodes(&as->a_wpage) != 0 &&
1010 1007 pr_is_watchpage_as(raddr, rw, as)) {
1011 1008 /*
1012 1009 * Handle watch pages. If we're faulting on a
1013 1010 * watched page from an X-hat, we have to
1014 1011 * restore the original permissions while we
1015 1012 * handle the fault.
1016 1013 */
1017 1014 as_clearwatch(as);
1018 1015 holding_wpage = 1;
1019 1016 }
1020 1017
1021 1018 res = segop_fault(hat, seg, raddr, ssize, type, rw);
1022 1019
1023 1020 /* Restore watchpoints */
1024 1021 if (holding_wpage) {
1025 1022 as_setwatch(as);
1026 1023 holding_wpage = 0;
1027 1024 }
1028 1025
1029 1026 if (res != 0)
1030 1027 break;
1031 1028 } else {
1032 1029 /* XHAT does not support seg_dev */
1033 1030 res = FC_NOSUPPORT;
1034 1031 break;
1035 1032 }
1036 1033 }
1037 1034
1038 1035 /*
1039 1036 * If we were SOFTLOCKing and encountered a failure,
1040 1037 * we must SOFTUNLOCK the range we already did. (Maybe we
1041 1038 * should just panic if we are SOFTLOCKing or even SOFTUNLOCKing
1042 1039 * right here...)
1043 1040 */
1044 1041 if (res != 0 && type == F_SOFTLOCK) {
1045 1042 for (seg = segsav; addrsav < raddr; addrsav += ssize) {
1046 1043 if (addrsav >= seg->s_base + seg->s_size)
1047 1044 seg = AS_SEGNEXT(as, seg);
1048 1045 ASSERT(seg != NULL);
1049 1046 /*
1050 1047 * Now call the fault routine again to perform the
1051 1048 * unlock using S_OTHER instead of the rw variable
1052 1049 * since we never got a chance to touch the pages.
1053 1050 */
1054 1051 if (raddr > seg->s_base + seg->s_size)
1055 1052 ssize = seg->s_base + seg->s_size - addrsav;
1056 1053 else
1057 1054 ssize = raddr - addrsav;
1058 1055 (void) segop_fault(hat, seg, addrsav, ssize,
1059 1056 F_SOFTUNLOCK, S_OTHER);
1060 1057 }
1061 1058 }
1062 1059 if (as_lock_held)
1063 1060 AS_LOCK_EXIT(as, &as->a_lock);
1064 1061 if ((lwp != NULL) && (!is_xhat))
1065 1062 lwp->lwp_nostop--;
1066 1063
1067 1064 /*
1068 1065 * If the lower levels returned EDEADLK for a fault,
1069 1066 * It means that we should retry the fault. Let's wait
1070 1067 * a bit also to let the deadlock causing condition clear.
1071 1068 * This is part of a gross hack to work around a design flaw
1072 1069 * in the ufs/sds logging code and should go away when the
1073 1070 * logging code is re-designed to fix the problem. See bug
1074 1071 * 4125102 for details of the problem.
1075 1072 */
1076 1073 if (FC_ERRNO(res) == EDEADLK) {
1077 1074 delay(deadlk_wait);
1078 1075 res = 0;
1079 1076 goto retry;
1080 1077 }
1081 1078 return (res);
1082 1079 }
1083 1080
1084 1081
1085 1082
1086 1083 /*
1087 1084 * Asynchronous ``fault'' at addr for size bytes.
1088 1085 */
1089 1086 faultcode_t
1090 1087 as_faulta(struct as *as, caddr_t addr, size_t size)
1091 1088 {
1092 1089 struct seg *seg;
1093 1090 caddr_t raddr; /* rounded down addr */
1094 1091 size_t rsize; /* rounded up size */
1095 1092 faultcode_t res = 0;
1096 1093 klwp_t *lwp = ttolwp(curthread);
1097 1094
1098 1095 retry:
1099 1096 /*
1100 1097 * Indicate that the lwp is not to be stopped while waiting
1101 1098 * for a pagefault. This is to avoid deadlock while debugging
1102 1099 * a process via /proc over NFS (in particular).
1103 1100 */
1104 1101 if (lwp != NULL)
1105 1102 lwp->lwp_nostop++;
1106 1103
1107 1104 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1108 1105 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
1109 1106 (size_t)raddr;
1110 1107
1111 1108 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1112 1109 seg = as_segat(as, raddr);
1113 1110 if (seg == NULL) {
1114 1111 AS_LOCK_EXIT(as, &as->a_lock);
1115 1112 if (lwp != NULL)
1116 1113 lwp->lwp_nostop--;
1117 1114 return (FC_NOMAP);
1118 1115 }
1119 1116
1120 1117 for (; rsize != 0; rsize -= PAGESIZE, raddr += PAGESIZE) {
1121 1118 if (raddr >= seg->s_base + seg->s_size) {
1122 1119 seg = AS_SEGNEXT(as, seg);
1123 1120 if (seg == NULL || raddr != seg->s_base) {
1124 1121 res = FC_NOMAP;
1125 1122 break;
1126 1123 }
1127 1124 }
1128 1125 res = segop_faulta(seg, raddr);
1129 1126 if (res != 0)
1130 1127 break;
1131 1128 }
1132 1129 AS_LOCK_EXIT(as, &as->a_lock);
1133 1130 if (lwp != NULL)
1134 1131 lwp->lwp_nostop--;
1135 1132 /*
1136 1133 * If the lower levels returned EDEADLK for a fault,
1137 1134 * It means that we should retry the fault. Let's wait
1138 1135 * a bit also to let the deadlock causing condition clear.
1139 1136 * This is part of a gross hack to work around a design flaw
1140 1137 * in the ufs/sds logging code and should go away when the
1141 1138 * logging code is re-designed to fix the problem. See bug
1142 1139 * 4125102 for details of the problem.
1143 1140 */
1144 1141 if (FC_ERRNO(res) == EDEADLK) {
1145 1142 delay(deadlk_wait);
1146 1143 res = 0;
1147 1144 goto retry;
1148 1145 }
1149 1146 return (res);
1150 1147 }
1151 1148
1152 1149 /*
1153 1150 * Set the virtual mapping for the interval from [addr : addr + size)
1154 1151 * in address space `as' to have the specified protection.
1155 1152 * It is ok for the range to cross over several segments,
1156 1153 * as long as they are contiguous.
1157 1154 */
1158 1155 int
1159 1156 as_setprot(struct as *as, caddr_t addr, size_t size, uint_t prot)
1160 1157 {
1161 1158 struct seg *seg;
1162 1159 struct as_callback *cb;
1163 1160 size_t ssize;
1164 1161 caddr_t raddr; /* rounded down addr */
1165 1162 size_t rsize; /* rounded up size */
1166 1163 int error = 0, writer = 0;
1167 1164 caddr_t saveraddr;
1168 1165 size_t saversize;
1169 1166
1170 1167 setprot_top:
1171 1168 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1172 1169 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
1173 1170 (size_t)raddr;
1174 1171
1175 1172 if (raddr + rsize < raddr) /* check for wraparound */
1176 1173 return (ENOMEM);
1177 1174
1178 1175 saveraddr = raddr;
1179 1176 saversize = rsize;
1180 1177
1181 1178 /*
1182 1179 * Normally we only lock the as as a reader. But
1183 1180 * if due to setprot the segment driver needs to split
1184 1181 * a segment it will return IE_RETRY. Therefore we re-acquire
1185 1182 * the as lock as a writer so the segment driver can change
1186 1183 * the seg list. Also the segment driver will return IE_RETRY
1187 1184 * after it has changed the segment list so we therefore keep
1188 1185 * locking as a writer. Since these opeartions should be rare
1189 1186 * want to only lock as a writer when necessary.
1190 1187 */
1191 1188 if (writer || avl_numnodes(&as->a_wpage) != 0) {
1192 1189 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1193 1190 } else {
1194 1191 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1195 1192 }
1196 1193
1197 1194 as_clearwatchprot(as, raddr, rsize);
1198 1195 seg = as_segat(as, raddr);
1199 1196 if (seg == NULL) {
1200 1197 as_setwatch(as);
1201 1198 AS_LOCK_EXIT(as, &as->a_lock);
1202 1199 return (ENOMEM);
1203 1200 }
1204 1201
1205 1202 for (; rsize != 0; rsize -= ssize, raddr += ssize) {
1206 1203 if (raddr >= seg->s_base + seg->s_size) {
1207 1204 seg = AS_SEGNEXT(as, seg);
1208 1205 if (seg == NULL || raddr != seg->s_base) {
1209 1206 error = ENOMEM;
1210 1207 break;
1211 1208 }
1212 1209 }
1213 1210 if ((raddr + rsize) > (seg->s_base + seg->s_size))
1214 1211 ssize = seg->s_base + seg->s_size - raddr;
1215 1212 else
1216 1213 ssize = rsize;
1217 1214 retry:
1218 1215 error = segop_setprot(seg, raddr, ssize, prot);
1219 1216
1220 1217 if (error == IE_NOMEM) {
1221 1218 error = EAGAIN;
1222 1219 break;
1223 1220 }
1224 1221
1225 1222 if (error == IE_RETRY) {
1226 1223 AS_LOCK_EXIT(as, &as->a_lock);
1227 1224 writer = 1;
1228 1225 goto setprot_top;
1229 1226 }
1230 1227
1231 1228 if (error == EAGAIN) {
1232 1229 /*
1233 1230 * Make sure we have a_lock as writer.
1234 1231 */
1235 1232 if (writer == 0) {
1236 1233 AS_LOCK_EXIT(as, &as->a_lock);
1237 1234 writer = 1;
1238 1235 goto setprot_top;
1239 1236 }
1240 1237
1241 1238 /*
1242 1239 * Memory is currently locked. It must be unlocked
1243 1240 * before this operation can succeed through a retry.
1244 1241 * The possible reasons for locked memory and
1245 1242 * corresponding strategies for unlocking are:
1246 1243 * (1) Normal I/O
1247 1244 * wait for a signal that the I/O operation
1248 1245 * has completed and the memory is unlocked.
1249 1246 * (2) Asynchronous I/O
1250 1247 * The aio subsystem does not unlock pages when
1251 1248 * the I/O is completed. Those pages are unlocked
1252 1249 * when the application calls aiowait/aioerror.
1253 1250 * So, to prevent blocking forever, cv_broadcast()
1254 1251 * is done to wake up aio_cleanup_thread.
1255 1252 * Subsequently, segvn_reclaim will be called, and
1256 1253 * that will do AS_CLRUNMAPWAIT() and wake us up.
1257 1254 * (3) Long term page locking:
1258 1255 * Drivers intending to have pages locked for a
1259 1256 * period considerably longer than for normal I/O
1260 1257 * (essentially forever) may have registered for a
1261 1258 * callback so they may unlock these pages on
1262 1259 * request. This is needed to allow this operation
1263 1260 * to succeed. Each entry on the callback list is
1264 1261 * examined. If the event or address range pertains
1265 1262 * the callback is invoked (unless it already is in
1266 1263 * progress). The a_contents lock must be dropped
1267 1264 * before the callback, so only one callback can
1268 1265 * be done at a time. Go to the top and do more
1269 1266 * until zero is returned. If zero is returned,
1270 1267 * either there were no callbacks for this event
1271 1268 * or they were already in progress.
1272 1269 */
1273 1270 mutex_enter(&as->a_contents);
1274 1271 if (as->a_callbacks &&
1275 1272 (cb = as_find_callback(as, AS_SETPROT_EVENT,
1276 1273 seg->s_base, seg->s_size))) {
1277 1274 AS_LOCK_EXIT(as, &as->a_lock);
1278 1275 as_execute_callback(as, cb, AS_SETPROT_EVENT);
1279 1276 } else if (!AS_ISNOUNMAPWAIT(as)) {
1280 1277 if (AS_ISUNMAPWAIT(as) == 0)
1281 1278 cv_broadcast(&as->a_cv);
1282 1279 AS_SETUNMAPWAIT(as);
1283 1280 AS_LOCK_EXIT(as, &as->a_lock);
1284 1281 while (AS_ISUNMAPWAIT(as))
1285 1282 cv_wait(&as->a_cv, &as->a_contents);
1286 1283 } else {
1287 1284 /*
1288 1285 * We may have raced with
1289 1286 * segvn_reclaim()/segspt_reclaim(). In this
1290 1287 * case clean nounmapwait flag and retry since
1291 1288 * softlockcnt in this segment may be already
1292 1289 * 0. We don't drop as writer lock so our
1293 1290 * number of retries without sleeping should
1294 1291 * be very small. See segvn_reclaim() for
1295 1292 * more comments.
1296 1293 */
1297 1294 AS_CLRNOUNMAPWAIT(as);
1298 1295 mutex_exit(&as->a_contents);
1299 1296 goto retry;
1300 1297 }
1301 1298 mutex_exit(&as->a_contents);
1302 1299 goto setprot_top;
1303 1300 } else if (error != 0)
1304 1301 break;
1305 1302 }
1306 1303 if (error != 0) {
1307 1304 as_setwatch(as);
1308 1305 } else {
1309 1306 as_setwatchprot(as, saveraddr, saversize, prot);
1310 1307 }
1311 1308 AS_LOCK_EXIT(as, &as->a_lock);
1312 1309 return (error);
1313 1310 }
1314 1311
1315 1312 /*
1316 1313 * Check to make sure that the interval [addr, addr + size)
1317 1314 * in address space `as' has at least the specified protection.
1318 1315 * It is ok for the range to cross over several segments, as long
1319 1316 * as they are contiguous.
1320 1317 */
1321 1318 int
1322 1319 as_checkprot(struct as *as, caddr_t addr, size_t size, uint_t prot)
1323 1320 {
1324 1321 struct seg *seg;
1325 1322 size_t ssize;
1326 1323 caddr_t raddr; /* rounded down addr */
1327 1324 size_t rsize; /* rounded up size */
1328 1325 int error = 0;
1329 1326
1330 1327 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1331 1328 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
1332 1329 (size_t)raddr;
1333 1330
1334 1331 if (raddr + rsize < raddr) /* check for wraparound */
1335 1332 return (ENOMEM);
1336 1333
1337 1334 /*
1338 1335 * This is ugly as sin...
1339 1336 * Normally, we only acquire the address space readers lock.
1340 1337 * However, if the address space has watchpoints present,
1341 1338 * we must acquire the writer lock on the address space for
1342 1339 * the benefit of as_clearwatchprot() and as_setwatchprot().
1343 1340 */
1344 1341 if (avl_numnodes(&as->a_wpage) != 0)
1345 1342 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1346 1343 else
1347 1344 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1348 1345 as_clearwatchprot(as, raddr, rsize);
1349 1346 seg = as_segat(as, raddr);
1350 1347 if (seg == NULL) {
1351 1348 as_setwatch(as);
1352 1349 AS_LOCK_EXIT(as, &as->a_lock);
1353 1350 return (ENOMEM);
1354 1351 }
1355 1352
1356 1353 for (; rsize != 0; rsize -= ssize, raddr += ssize) {
1357 1354 if (raddr >= seg->s_base + seg->s_size) {
1358 1355 seg = AS_SEGNEXT(as, seg);
1359 1356 if (seg == NULL || raddr != seg->s_base) {
1360 1357 error = ENOMEM;
1361 1358 break;
1362 1359 }
1363 1360 }
1364 1361 if ((raddr + rsize) > (seg->s_base + seg->s_size))
1365 1362 ssize = seg->s_base + seg->s_size - raddr;
1366 1363 else
1367 1364 ssize = rsize;
1368 1365
1369 1366 error = segop_checkprot(seg, raddr, ssize, prot);
1370 1367 if (error != 0)
1371 1368 break;
1372 1369 }
1373 1370 as_setwatch(as);
1374 1371 AS_LOCK_EXIT(as, &as->a_lock);
1375 1372 return (error);
1376 1373 }
1377 1374
1378 1375 int
1379 1376 as_unmap(struct as *as, caddr_t addr, size_t size)
1380 1377 {
1381 1378 struct seg *seg, *seg_next;
1382 1379 struct as_callback *cb;
1383 1380 caddr_t raddr, eaddr;
1384 1381 size_t ssize, rsize = 0;
1385 1382 int err;
1386 1383
1387 1384 top:
1388 1385 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1389 1386 eaddr = (caddr_t)(((uintptr_t)(addr + size) + PAGEOFFSET) &
1390 1387 (uintptr_t)PAGEMASK);
1391 1388
1392 1389 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1393 1390
1394 1391 as->a_updatedir = 1; /* inform /proc */
1395 1392 gethrestime(&as->a_updatetime);
1396 1393
1397 1394 /*
1398 1395 * Use as_findseg to find the first segment in the range, then
1399 1396 * step through the segments in order, following s_next.
1400 1397 */
1401 1398 as_clearwatchprot(as, raddr, eaddr - raddr);
1402 1399
1403 1400 for (seg = as_findseg(as, raddr, 0); seg != NULL; seg = seg_next) {
1404 1401 if (eaddr <= seg->s_base)
1405 1402 break; /* eaddr was in a gap; all done */
1406 1403
1407 1404 /* this is implied by the test above */
1408 1405 ASSERT(raddr < eaddr);
1409 1406
1410 1407 if (raddr < seg->s_base)
1411 1408 raddr = seg->s_base; /* raddr was in a gap */
1412 1409
1413 1410 if (eaddr > (seg->s_base + seg->s_size))
1414 1411 ssize = seg->s_base + seg->s_size - raddr;
1415 1412 else
1416 1413 ssize = eaddr - raddr;
1417 1414
1418 1415 /*
1419 1416 * Save next segment pointer since seg can be
1420 1417 * destroyed during the segment unmap operation.
1421 1418 */
1422 1419 seg_next = AS_SEGNEXT(as, seg);
1423 1420
1424 1421 /*
1425 1422 * We didn't count /dev/null mappings, so ignore them here.
1426 1423 * We'll handle MAP_NORESERVE cases in segvn_unmap(). (Again,
1427 1424 * we have to do this check here while we have seg.)
1428 1425 */
1429 1426 rsize = 0;
1430 1427 if (!SEG_IS_DEVNULL_MAPPING(seg) &&
1431 1428 !SEG_IS_PARTIAL_RESV(seg))
1432 1429 rsize = ssize;
1433 1430
1434 1431 retry:
1435 1432 err = segop_unmap(seg, raddr, ssize);
1436 1433 if (err == EAGAIN) {
1437 1434 /*
1438 1435 * Memory is currently locked. It must be unlocked
1439 1436 * before this operation can succeed through a retry.
1440 1437 * The possible reasons for locked memory and
1441 1438 * corresponding strategies for unlocking are:
1442 1439 * (1) Normal I/O
1443 1440 * wait for a signal that the I/O operation
1444 1441 * has completed and the memory is unlocked.
1445 1442 * (2) Asynchronous I/O
1446 1443 * The aio subsystem does not unlock pages when
1447 1444 * the I/O is completed. Those pages are unlocked
1448 1445 * when the application calls aiowait/aioerror.
1449 1446 * So, to prevent blocking forever, cv_broadcast()
1450 1447 * is done to wake up aio_cleanup_thread.
1451 1448 * Subsequently, segvn_reclaim will be called, and
1452 1449 * that will do AS_CLRUNMAPWAIT() and wake us up.
1453 1450 * (3) Long term page locking:
1454 1451 * Drivers intending to have pages locked for a
1455 1452 * period considerably longer than for normal I/O
1456 1453 * (essentially forever) may have registered for a
1457 1454 * callback so they may unlock these pages on
1458 1455 * request. This is needed to allow this operation
1459 1456 * to succeed. Each entry on the callback list is
1460 1457 * examined. If the event or address range pertains
1461 1458 * the callback is invoked (unless it already is in
1462 1459 * progress). The a_contents lock must be dropped
1463 1460 * before the callback, so only one callback can
1464 1461 * be done at a time. Go to the top and do more
1465 1462 * until zero is returned. If zero is returned,
1466 1463 * either there were no callbacks for this event
1467 1464 * or they were already in progress.
1468 1465 */
1469 1466 mutex_enter(&as->a_contents);
1470 1467 if (as->a_callbacks &&
1471 1468 (cb = as_find_callback(as, AS_UNMAP_EVENT,
1472 1469 seg->s_base, seg->s_size))) {
1473 1470 AS_LOCK_EXIT(as, &as->a_lock);
1474 1471 as_execute_callback(as, cb, AS_UNMAP_EVENT);
1475 1472 } else if (!AS_ISNOUNMAPWAIT(as)) {
1476 1473 if (AS_ISUNMAPWAIT(as) == 0)
1477 1474 cv_broadcast(&as->a_cv);
1478 1475 AS_SETUNMAPWAIT(as);
1479 1476 AS_LOCK_EXIT(as, &as->a_lock);
1480 1477 while (AS_ISUNMAPWAIT(as))
1481 1478 cv_wait(&as->a_cv, &as->a_contents);
1482 1479 } else {
1483 1480 /*
1484 1481 * We may have raced with
1485 1482 * segvn_reclaim()/segspt_reclaim(). In this
1486 1483 * case clean nounmapwait flag and retry since
1487 1484 * softlockcnt in this segment may be already
1488 1485 * 0. We don't drop as writer lock so our
1489 1486 * number of retries without sleeping should
1490 1487 * be very small. See segvn_reclaim() for
1491 1488 * more comments.
1492 1489 */
1493 1490 AS_CLRNOUNMAPWAIT(as);
1494 1491 mutex_exit(&as->a_contents);
1495 1492 goto retry;
1496 1493 }
1497 1494 mutex_exit(&as->a_contents);
1498 1495 goto top;
1499 1496 } else if (err == IE_RETRY) {
1500 1497 AS_LOCK_EXIT(as, &as->a_lock);
1501 1498 goto top;
1502 1499 } else if (err) {
1503 1500 as_setwatch(as);
1504 1501 AS_LOCK_EXIT(as, &as->a_lock);
1505 1502 return (-1);
1506 1503 }
1507 1504
1508 1505 as->a_size -= ssize;
1509 1506 if (rsize)
1510 1507 as->a_resvsize -= rsize;
1511 1508 raddr += ssize;
1512 1509 }
1513 1510 AS_LOCK_EXIT(as, &as->a_lock);
1514 1511 return (0);
1515 1512 }
1516 1513
1517 1514 static int
1518 1515 as_map_segvn_segs(struct as *as, caddr_t addr, size_t size, uint_t szcvec,
1519 1516 int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated)
1520 1517 {
1521 1518 uint_t szc;
1522 1519 uint_t nszc;
1523 1520 int error;
1524 1521 caddr_t a;
1525 1522 caddr_t eaddr;
1526 1523 size_t segsize;
1527 1524 struct seg *seg;
1528 1525 size_t pgsz;
1529 1526 int do_off = (vn_a->vp != NULL || vn_a->amp != NULL);
1530 1527 uint_t save_szcvec;
1531 1528
1532 1529 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
1533 1530 ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
1534 1531 ASSERT(IS_P2ALIGNED(size, PAGESIZE));
1535 1532 ASSERT(vn_a->vp == NULL || vn_a->amp == NULL);
1536 1533 if (!do_off) {
1537 1534 vn_a->offset = 0;
1538 1535 }
1539 1536
1540 1537 if (szcvec <= 1) {
1541 1538 seg = seg_alloc(as, addr, size);
1542 1539 if (seg == NULL) {
1543 1540 return (ENOMEM);
1544 1541 }
1545 1542 vn_a->szc = 0;
1546 1543 error = (*crfp)(seg, vn_a);
1547 1544 if (error != 0) {
1548 1545 seg_free(seg);
1549 1546 } else {
1550 1547 as->a_size += size;
1551 1548 as->a_resvsize += size;
1552 1549 }
1553 1550 return (error);
1554 1551 }
1555 1552
1556 1553 eaddr = addr + size;
1557 1554 save_szcvec = szcvec;
1558 1555 szcvec >>= 1;
1559 1556 szc = 0;
1560 1557 nszc = 0;
1561 1558 while (szcvec) {
1562 1559 if ((szcvec & 0x1) == 0) {
1563 1560 nszc++;
1564 1561 szcvec >>= 1;
1565 1562 continue;
1566 1563 }
1567 1564 nszc++;
1568 1565 pgsz = page_get_pagesize(nszc);
1569 1566 a = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz);
1570 1567 if (a != addr) {
1571 1568 ASSERT(a < eaddr);
1572 1569 segsize = a - addr;
1573 1570 seg = seg_alloc(as, addr, segsize);
1574 1571 if (seg == NULL) {
1575 1572 return (ENOMEM);
1576 1573 }
1577 1574 vn_a->szc = szc;
1578 1575 error = (*crfp)(seg, vn_a);
1579 1576 if (error != 0) {
1580 1577 seg_free(seg);
1581 1578 return (error);
1582 1579 }
1583 1580 as->a_size += segsize;
1584 1581 as->a_resvsize += segsize;
1585 1582 *segcreated = 1;
1586 1583 if (do_off) {
1587 1584 vn_a->offset += segsize;
1588 1585 }
1589 1586 addr = a;
1590 1587 }
1591 1588 szc = nszc;
1592 1589 szcvec >>= 1;
1593 1590 }
1594 1591
1595 1592 ASSERT(addr < eaddr);
1596 1593 szcvec = save_szcvec | 1; /* add 8K pages */
1597 1594 while (szcvec) {
1598 1595 a = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz);
1599 1596 ASSERT(a >= addr);
1600 1597 if (a != addr) {
1601 1598 segsize = a - addr;
1602 1599 seg = seg_alloc(as, addr, segsize);
1603 1600 if (seg == NULL) {
1604 1601 return (ENOMEM);
1605 1602 }
1606 1603 vn_a->szc = szc;
1607 1604 error = (*crfp)(seg, vn_a);
1608 1605 if (error != 0) {
1609 1606 seg_free(seg);
1610 1607 return (error);
1611 1608 }
1612 1609 as->a_size += segsize;
1613 1610 as->a_resvsize += segsize;
1614 1611 *segcreated = 1;
1615 1612 if (do_off) {
1616 1613 vn_a->offset += segsize;
1617 1614 }
1618 1615 addr = a;
1619 1616 }
1620 1617 szcvec &= ~(1 << szc);
1621 1618 if (szcvec) {
1622 1619 szc = highbit(szcvec) - 1;
1623 1620 pgsz = page_get_pagesize(szc);
1624 1621 }
1625 1622 }
1626 1623 ASSERT(addr == eaddr);
1627 1624
1628 1625 return (0);
1629 1626 }
1630 1627
1631 1628 static int
1632 1629 as_map_vnsegs(struct as *as, caddr_t addr, size_t size,
1633 1630 int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated)
1634 1631 {
1635 1632 uint_t mapflags = vn_a->flags & (MAP_TEXT | MAP_INITDATA);
1636 1633 int type = (vn_a->type == MAP_SHARED) ? MAPPGSZC_SHM : MAPPGSZC_PRIVM;
1637 1634 uint_t szcvec = map_pgszcvec(addr, size, (uintptr_t)addr, mapflags,
1638 1635 type, 0);
1639 1636 int error;
1640 1637 struct seg *seg;
1641 1638 struct vattr va;
1642 1639 u_offset_t eoff;
1643 1640 size_t save_size = 0;
1644 1641 extern size_t textrepl_size_thresh;
1645 1642
1646 1643 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
1647 1644 ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
1648 1645 ASSERT(IS_P2ALIGNED(size, PAGESIZE));
1649 1646 ASSERT(vn_a->vp != NULL);
1650 1647 ASSERT(vn_a->amp == NULL);
1651 1648
1652 1649 again:
1653 1650 if (szcvec <= 1) {
1654 1651 seg = seg_alloc(as, addr, size);
1655 1652 if (seg == NULL) {
1656 1653 return (ENOMEM);
1657 1654 }
1658 1655 vn_a->szc = 0;
1659 1656 error = (*crfp)(seg, vn_a);
1660 1657 if (error != 0) {
1661 1658 seg_free(seg);
1662 1659 } else {
1663 1660 as->a_size += size;
1664 1661 as->a_resvsize += size;
1665 1662 }
1666 1663 return (error);
1667 1664 }
1668 1665
1669 1666 va.va_mask = AT_SIZE;
1670 1667 if (VOP_GETATTR(vn_a->vp, &va, ATTR_HINT, vn_a->cred, NULL) != 0) {
1671 1668 szcvec = 0;
1672 1669 goto again;
1673 1670 }
1674 1671 eoff = vn_a->offset & PAGEMASK;
1675 1672 if (eoff >= va.va_size) {
1676 1673 szcvec = 0;
1677 1674 goto again;
1678 1675 }
1679 1676 eoff += size;
1680 1677 if (btopr(va.va_size) < btopr(eoff)) {
1681 1678 save_size = size;
1682 1679 size = va.va_size - (vn_a->offset & PAGEMASK);
1683 1680 size = P2ROUNDUP_TYPED(size, PAGESIZE, size_t);
1684 1681 szcvec = map_pgszcvec(addr, size, (uintptr_t)addr, mapflags,
1685 1682 type, 0);
1686 1683 if (szcvec <= 1) {
1687 1684 size = save_size;
1688 1685 goto again;
1689 1686 }
1690 1687 }
1691 1688
1692 1689 if (size > textrepl_size_thresh) {
1693 1690 vn_a->flags |= _MAP_TEXTREPL;
1694 1691 }
1695 1692 error = as_map_segvn_segs(as, addr, size, szcvec, crfp, vn_a,
1696 1693 segcreated);
1697 1694 if (error != 0) {
1698 1695 return (error);
1699 1696 }
1700 1697 if (save_size) {
1701 1698 addr += size;
1702 1699 size = save_size - size;
1703 1700 szcvec = 0;
1704 1701 goto again;
1705 1702 }
1706 1703 return (0);
1707 1704 }
1708 1705
1709 1706 /*
1710 1707 * as_map_ansegs: shared or private anonymous memory. Note that the flags
1711 1708 * passed to map_pgszvec cannot be MAP_INITDATA, for anon.
1712 1709 */
1713 1710 static int
1714 1711 as_map_ansegs(struct as *as, caddr_t addr, size_t size,
1715 1712 int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated)
1716 1713 {
1717 1714 uint_t szcvec;
1718 1715 uchar_t type;
1719 1716
1720 1717 ASSERT(vn_a->type == MAP_SHARED || vn_a->type == MAP_PRIVATE);
1721 1718 if (vn_a->type == MAP_SHARED) {
1722 1719 type = MAPPGSZC_SHM;
1723 1720 } else if (vn_a->type == MAP_PRIVATE) {
1724 1721 if (vn_a->szc == AS_MAP_HEAP) {
1725 1722 type = MAPPGSZC_HEAP;
1726 1723 } else if (vn_a->szc == AS_MAP_STACK) {
1727 1724 type = MAPPGSZC_STACK;
1728 1725 } else {
1729 1726 type = MAPPGSZC_PRIVM;
1730 1727 }
1731 1728 }
1732 1729 szcvec = map_pgszcvec(addr, size, vn_a->amp == NULL ?
1733 1730 (uintptr_t)addr : (uintptr_t)P2ROUNDUP(vn_a->offset, PAGESIZE),
1734 1731 (vn_a->flags & MAP_TEXT), type, 0);
1735 1732 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
1736 1733 ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
1737 1734 ASSERT(IS_P2ALIGNED(size, PAGESIZE));
1738 1735 ASSERT(vn_a->vp == NULL);
1739 1736
1740 1737 return (as_map_segvn_segs(as, addr, size, szcvec,
1741 1738 crfp, vn_a, segcreated));
1742 1739 }
1743 1740
1744 1741 int
1745 1742 as_map(struct as *as, caddr_t addr, size_t size, int (*crfp)(), void *argsp)
1746 1743 {
1747 1744 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1748 1745 return (as_map_locked(as, addr, size, crfp, argsp));
1749 1746 }
1750 1747
1751 1748 int
1752 1749 as_map_locked(struct as *as, caddr_t addr, size_t size, int (*crfp)(),
1753 1750 void *argsp)
1754 1751 {
1755 1752 struct seg *seg = NULL;
1756 1753 caddr_t raddr; /* rounded down addr */
1757 1754 size_t rsize; /* rounded up size */
1758 1755 int error;
1759 1756 int unmap = 0;
1760 1757 struct proc *p = curproc;
1761 1758 struct segvn_crargs crargs;
1762 1759
1763 1760 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1764 1761 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
1765 1762 (size_t)raddr;
1766 1763
1767 1764 /*
1768 1765 * check for wrap around
1769 1766 */
1770 1767 if ((raddr + rsize < raddr) || (as->a_size > (ULONG_MAX - size))) {
1771 1768 AS_LOCK_EXIT(as, &as->a_lock);
1772 1769 return (ENOMEM);
1773 1770 }
1774 1771
1775 1772 as->a_updatedir = 1; /* inform /proc */
1776 1773 gethrestime(&as->a_updatetime);
1777 1774
1778 1775 if (as != &kas && as->a_size + rsize > (size_t)p->p_vmem_ctl) {
1779 1776 AS_LOCK_EXIT(as, &as->a_lock);
1780 1777
1781 1778 (void) rctl_action(rctlproc_legacy[RLIMIT_VMEM], p->p_rctls, p,
1782 1779 RCA_UNSAFE_ALL);
1783 1780
1784 1781 return (ENOMEM);
1785 1782 }
1786 1783
1787 1784 if (AS_MAP_CHECK_VNODE_LPOOB(crfp, argsp)) {
1788 1785 crargs = *(struct segvn_crargs *)argsp;
1789 1786 error = as_map_vnsegs(as, raddr, rsize, crfp, &crargs, &unmap);
1790 1787 if (error != 0) {
1791 1788 AS_LOCK_EXIT(as, &as->a_lock);
1792 1789 if (unmap) {
1793 1790 (void) as_unmap(as, addr, size);
1794 1791 }
1795 1792 return (error);
1796 1793 }
1797 1794 } else if (AS_MAP_CHECK_ANON_LPOOB(crfp, argsp)) {
1798 1795 crargs = *(struct segvn_crargs *)argsp;
1799 1796 error = as_map_ansegs(as, raddr, rsize, crfp, &crargs, &unmap);
1800 1797 if (error != 0) {
1801 1798 AS_LOCK_EXIT(as, &as->a_lock);
1802 1799 if (unmap) {
1803 1800 (void) as_unmap(as, addr, size);
1804 1801 }
1805 1802 return (error);
1806 1803 }
1807 1804 } else {
1808 1805 seg = seg_alloc(as, addr, size);
1809 1806 if (seg == NULL) {
1810 1807 AS_LOCK_EXIT(as, &as->a_lock);
1811 1808 return (ENOMEM);
1812 1809 }
1813 1810
1814 1811 error = (*crfp)(seg, argsp);
1815 1812 if (error != 0) {
1816 1813 seg_free(seg);
1817 1814 AS_LOCK_EXIT(as, &as->a_lock);
1818 1815 return (error);
1819 1816 }
1820 1817 /*
1821 1818 * Add size now so as_unmap will work if as_ctl fails.
1822 1819 */
1823 1820 as->a_size += rsize;
1824 1821 as->a_resvsize += rsize;
1825 1822 }
1826 1823
1827 1824 as_setwatch(as);
1828 1825
1829 1826 /*
1830 1827 * If the address space is locked,
1831 1828 * establish memory locks for the new segment.
1832 1829 */
1833 1830 mutex_enter(&as->a_contents);
1834 1831 if (AS_ISPGLCK(as)) {
1835 1832 mutex_exit(&as->a_contents);
1836 1833 AS_LOCK_EXIT(as, &as->a_lock);
1837 1834 error = as_ctl(as, addr, size, MC_LOCK, 0, 0, NULL, 0);
1838 1835 if (error != 0)
1839 1836 (void) as_unmap(as, addr, size);
1840 1837 } else {
1841 1838 mutex_exit(&as->a_contents);
1842 1839 AS_LOCK_EXIT(as, &as->a_lock);
1843 1840 }
1844 1841 return (error);
1845 1842 }
1846 1843
1847 1844
1848 1845 /*
1849 1846 * Delete all segments in the address space marked with S_PURGE.
1850 1847 * This is currently used for Sparc V9 nofault ASI segments (seg_nf.c).
1851 1848 * These segments are deleted as a first step before calls to as_gap(), so
1852 1849 * that they don't affect mmap() or shmat().
1853 1850 */
1854 1851 void
1855 1852 as_purge(struct as *as)
1856 1853 {
1857 1854 struct seg *seg;
1858 1855 struct seg *next_seg;
1859 1856
1860 1857 /*
1861 1858 * the setting of NEEDSPURGE is protect by as_rangelock(), so
1862 1859 * no need to grab a_contents mutex for this check
1863 1860 */
1864 1861 if ((as->a_flags & AS_NEEDSPURGE) == 0)
1865 1862 return;
1866 1863
1867 1864 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1868 1865 next_seg = NULL;
1869 1866 seg = AS_SEGFIRST(as);
1870 1867 while (seg != NULL) {
1871 1868 next_seg = AS_SEGNEXT(as, seg);
1872 1869 if (seg->s_flags & S_PURGE)
1873 1870 (void) segop_unmap(seg, seg->s_base, seg->s_size);
1874 1871 seg = next_seg;
1875 1872 }
1876 1873 AS_LOCK_EXIT(as, &as->a_lock);
1877 1874
1878 1875 mutex_enter(&as->a_contents);
1879 1876 as->a_flags &= ~AS_NEEDSPURGE;
1880 1877 mutex_exit(&as->a_contents);
1881 1878 }
1882 1879
1883 1880 /*
1884 1881 * Find a hole within [*basep, *basep + *lenp), which contains a mappable
1885 1882 * range of addresses at least "minlen" long, where the base of the range is
1886 1883 * at "off" phase from an "align" boundary and there is space for a
1887 1884 * "redzone"-sized redzone on eithe rside of the range. Thus,
1888 1885 * if align was 4M and off was 16k, the user wants a hole which will start
1889 1886 * 16k into a 4M page.
1890 1887 *
1891 1888 * If flags specifies AH_HI, the hole will have the highest possible address
1892 1889 * in the range. We use the as->a_lastgap field to figure out where to
1893 1890 * start looking for a gap.
1894 1891 *
1895 1892 * Otherwise, the gap will have the lowest possible address.
1896 1893 *
1897 1894 * If flags specifies AH_CONTAIN, the hole will contain the address addr.
1898 1895 *
1899 1896 * If an adequate hole is found, *basep and *lenp are set to reflect the part of
1900 1897 * the hole that is within range, and 0 is returned. On failure, -1 is returned.
1901 1898 *
1902 1899 * NOTE: This routine is not correct when base+len overflows caddr_t.
1903 1900 */
1904 1901 int
1905 1902 as_gap_aligned(struct as *as, size_t minlen, caddr_t *basep, size_t *lenp,
1906 1903 uint_t flags, caddr_t addr, size_t align, size_t redzone, size_t off)
1907 1904 {
1908 1905 caddr_t lobound = *basep;
1909 1906 caddr_t hibound = lobound + *lenp;
1910 1907 struct seg *lseg, *hseg;
1911 1908 caddr_t lo, hi;
1912 1909 int forward;
1913 1910 caddr_t save_base;
1914 1911 size_t save_len;
1915 1912 size_t save_minlen;
1916 1913 size_t save_redzone;
1917 1914 int fast_path = 1;
1918 1915
1919 1916 save_base = *basep;
1920 1917 save_len = *lenp;
1921 1918 save_minlen = minlen;
1922 1919 save_redzone = redzone;
1923 1920
1924 1921 /*
1925 1922 * For the first pass/fast_path, just add align and redzone into
1926 1923 * minlen since if we get an allocation, we can guarantee that it
1927 1924 * will fit the alignment and redzone requested.
1928 1925 * This increases the chance that hibound will be adjusted to
1929 1926 * a_lastgap->s_base which will likely allow us to find an
1930 1927 * acceptable hole in the address space quicker.
1931 1928 * If we can't find a hole with this fast_path, then we look for
1932 1929 * smaller holes in which the alignment and offset may allow
1933 1930 * the allocation to fit.
1934 1931 */
1935 1932 minlen += align;
1936 1933 minlen += 2 * redzone;
1937 1934 redzone = 0;
1938 1935
1939 1936 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1940 1937 if (AS_SEGFIRST(as) == NULL) {
1941 1938 if (valid_va_range_aligned(basep, lenp, minlen, flags & AH_DIR,
1942 1939 align, redzone, off)) {
1943 1940 AS_LOCK_EXIT(as, &as->a_lock);
1944 1941 return (0);
1945 1942 } else {
1946 1943 AS_LOCK_EXIT(as, &as->a_lock);
1947 1944 *basep = save_base;
1948 1945 *lenp = save_len;
1949 1946 return (-1);
1950 1947 }
1951 1948 }
1952 1949
1953 1950 retry:
1954 1951 /*
1955 1952 * Set up to iterate over all the inter-segment holes in the given
1956 1953 * direction. lseg is NULL for the lowest-addressed hole and hseg is
1957 1954 * NULL for the highest-addressed hole. If moving backwards, we reset
1958 1955 * sseg to denote the highest-addressed segment.
1959 1956 */
1960 1957 forward = (flags & AH_DIR) == AH_LO;
1961 1958 if (forward) {
1962 1959 hseg = as_findseg(as, lobound, 1);
1963 1960 lseg = AS_SEGPREV(as, hseg);
1964 1961 } else {
1965 1962
1966 1963 /*
1967 1964 * If allocating at least as much as the last allocation,
1968 1965 * use a_lastgap's base as a better estimate of hibound.
1969 1966 */
1970 1967 if (as->a_lastgap &&
1971 1968 minlen >= as->a_lastgap->s_size &&
1972 1969 hibound >= as->a_lastgap->s_base)
1973 1970 hibound = as->a_lastgap->s_base;
1974 1971
1975 1972 hseg = as_findseg(as, hibound, 1);
1976 1973 if (hseg->s_base + hseg->s_size < hibound) {
1977 1974 lseg = hseg;
1978 1975 hseg = NULL;
1979 1976 } else {
1980 1977 lseg = AS_SEGPREV(as, hseg);
1981 1978 }
1982 1979 }
1983 1980
1984 1981 for (;;) {
1985 1982 /*
1986 1983 * Set lo and hi to the hole's boundaries. (We should really
1987 1984 * use MAXADDR in place of hibound in the expression below,
1988 1985 * but can't express it easily; using hibound in its place is
1989 1986 * harmless.)
1990 1987 */
1991 1988 lo = (lseg == NULL) ? 0 : lseg->s_base + lseg->s_size;
1992 1989 hi = (hseg == NULL) ? hibound : hseg->s_base;
1993 1990 /*
1994 1991 * If the iteration has moved past the interval from lobound
1995 1992 * to hibound it's pointless to continue.
1996 1993 */
1997 1994 if ((forward && lo > hibound) || (!forward && hi < lobound))
1998 1995 break;
1999 1996 else if (lo > hibound || hi < lobound)
2000 1997 goto cont;
2001 1998 /*
2002 1999 * Candidate hole lies at least partially within the allowable
2003 2000 * range. Restrict it to fall completely within that range,
2004 2001 * i.e., to [max(lo, lobound), min(hi, hibound)].
2005 2002 */
2006 2003 if (lo < lobound)
2007 2004 lo = lobound;
2008 2005 if (hi > hibound)
2009 2006 hi = hibound;
2010 2007 /*
2011 2008 * Verify that the candidate hole is big enough and meets
2012 2009 * hardware constraints. If the hole is too small, no need
2013 2010 * to do the further checks since they will fail.
2014 2011 */
2015 2012 *basep = lo;
2016 2013 *lenp = hi - lo;
2017 2014 if (*lenp >= minlen && valid_va_range_aligned(basep, lenp,
2018 2015 minlen, forward ? AH_LO : AH_HI, align, redzone, off) &&
2019 2016 ((flags & AH_CONTAIN) == 0 ||
2020 2017 (*basep <= addr && *basep + *lenp > addr))) {
2021 2018 if (!forward)
2022 2019 as->a_lastgap = hseg;
2023 2020 if (hseg != NULL)
2024 2021 as->a_lastgaphl = hseg;
2025 2022 else
2026 2023 as->a_lastgaphl = lseg;
2027 2024 AS_LOCK_EXIT(as, &as->a_lock);
2028 2025 return (0);
2029 2026 }
2030 2027 cont:
2031 2028 /*
2032 2029 * Move to the next hole.
2033 2030 */
2034 2031 if (forward) {
2035 2032 lseg = hseg;
2036 2033 if (lseg == NULL)
2037 2034 break;
2038 2035 hseg = AS_SEGNEXT(as, hseg);
2039 2036 } else {
2040 2037 hseg = lseg;
2041 2038 if (hseg == NULL)
2042 2039 break;
2043 2040 lseg = AS_SEGPREV(as, lseg);
2044 2041 }
2045 2042 }
2046 2043 if (fast_path && (align != 0 || save_redzone != 0)) {
2047 2044 fast_path = 0;
2048 2045 minlen = save_minlen;
2049 2046 redzone = save_redzone;
2050 2047 goto retry;
2051 2048 }
2052 2049 *basep = save_base;
2053 2050 *lenp = save_len;
2054 2051 AS_LOCK_EXIT(as, &as->a_lock);
2055 2052 return (-1);
2056 2053 }
2057 2054
2058 2055 /*
2059 2056 * Find a hole of at least size minlen within [*basep, *basep + *lenp).
2060 2057 *
2061 2058 * If flags specifies AH_HI, the hole will have the highest possible address
2062 2059 * in the range. We use the as->a_lastgap field to figure out where to
2063 2060 * start looking for a gap.
2064 2061 *
2065 2062 * Otherwise, the gap will have the lowest possible address.
2066 2063 *
2067 2064 * If flags specifies AH_CONTAIN, the hole will contain the address addr.
2068 2065 *
2069 2066 * If an adequate hole is found, base and len are set to reflect the part of
2070 2067 * the hole that is within range, and 0 is returned, otherwise,
2071 2068 * -1 is returned.
2072 2069 *
2073 2070 * NOTE: This routine is not correct when base+len overflows caddr_t.
2074 2071 */
2075 2072 int
2076 2073 as_gap(struct as *as, size_t minlen, caddr_t *basep, size_t *lenp, uint_t flags,
2077 2074 caddr_t addr)
2078 2075 {
2079 2076
2080 2077 return (as_gap_aligned(as, minlen, basep, lenp, flags, addr, 0, 0, 0));
↓ open down ↓ |
1183 lines elided |
↑ open up ↑ |
2081 2078 }
2082 2079
2083 2080 /*
2084 2081 * Return the next range within [base, base + len) that is backed
2085 2082 * with "real memory". Skip holes and non-seg_vn segments.
2086 2083 * We're lazy and only return one segment at a time.
2087 2084 */
2088 2085 int
2089 2086 as_memory(struct as *as, caddr_t *basep, size_t *lenp)
2090 2087 {
2091 - extern struct seg_ops segspt_shmops; /* needs a header file */
2088 + extern const struct seg_ops segspt_shmops; /* needs a header file */
2092 2089 struct seg *seg;
2093 2090 caddr_t addr, eaddr;
2094 2091 caddr_t segend;
2095 2092
2096 2093 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2097 2094
2098 2095 addr = *basep;
2099 2096 eaddr = addr + *lenp;
2100 2097
2101 2098 seg = as_findseg(as, addr, 0);
2102 2099 if (seg != NULL)
2103 2100 addr = MAX(seg->s_base, addr);
2104 2101
2105 2102 for (;;) {
2106 2103 if (seg == NULL || addr >= eaddr || eaddr <= seg->s_base) {
2107 2104 AS_LOCK_EXIT(as, &as->a_lock);
2108 2105 return (EINVAL);
2109 2106 }
2110 2107
2111 2108 if (seg->s_ops == &segvn_ops) {
2112 2109 segend = seg->s_base + seg->s_size;
2113 2110 break;
2114 2111 }
2115 2112
2116 2113 /*
2117 2114 * We do ISM by looking into the private data
2118 2115 * to determine the real size of the segment.
2119 2116 */
2120 2117 if (seg->s_ops == &segspt_shmops) {
2121 2118 segend = seg->s_base + spt_realsize(seg);
2122 2119 if (addr < segend)
2123 2120 break;
2124 2121 }
2125 2122
2126 2123 seg = AS_SEGNEXT(as, seg);
2127 2124
2128 2125 if (seg != NULL)
2129 2126 addr = seg->s_base;
2130 2127 }
2131 2128
2132 2129 *basep = addr;
2133 2130
2134 2131 if (segend > eaddr)
2135 2132 *lenp = eaddr - addr;
2136 2133 else
2137 2134 *lenp = segend - addr;
2138 2135
2139 2136 AS_LOCK_EXIT(as, &as->a_lock);
2140 2137 return (0);
2141 2138 }
2142 2139
2143 2140 /*
2144 2141 * Swap the pages associated with the address space as out to
2145 2142 * secondary storage, returning the number of bytes actually
2146 2143 * swapped.
2147 2144 *
2148 2145 * The value returned is intended to correlate well with the process's
2149 2146 * memory requirements. Its usefulness for this purpose depends on
2150 2147 * how well the segment-level routines do at returning accurate
2151 2148 * information.
2152 2149 */
2153 2150 size_t
2154 2151 as_swapout(struct as *as)
2155 2152 {
2156 2153 struct seg *seg;
2157 2154 size_t swpcnt = 0;
2158 2155
2159 2156 /*
2160 2157 * Kernel-only processes have given up their address
2161 2158 * spaces. Of course, we shouldn't be attempting to
2162 2159 * swap out such processes in the first place...
2163 2160 */
2164 2161 if (as == NULL)
2165 2162 return (0);
2166 2163
2167 2164 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2168 2165
2169 2166 /* Prevent XHATs from attaching */
2170 2167 mutex_enter(&as->a_contents);
2171 2168 AS_SETBUSY(as);
2172 2169 mutex_exit(&as->a_contents);
2173 2170
2174 2171
2175 2172 /*
2176 2173 * Free all mapping resources associated with the address
2177 2174 * space. The segment-level swapout routines capitalize
2178 2175 * on this unmapping by scavanging pages that have become
2179 2176 * unmapped here.
2180 2177 */
2181 2178 hat_swapout(as->a_hat);
2182 2179 if (as->a_xhat != NULL)
2183 2180 xhat_swapout_all(as);
2184 2181
↓ open down ↓ |
83 lines elided |
↑ open up ↑ |
2185 2182 mutex_enter(&as->a_contents);
2186 2183 AS_CLRBUSY(as);
2187 2184 mutex_exit(&as->a_contents);
2188 2185
2189 2186 /*
2190 2187 * Call the swapout routines of all segments in the address
2191 2188 * space to do the actual work, accumulating the amount of
2192 2189 * space reclaimed.
2193 2190 */
2194 2191 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
2195 - struct seg_ops *ov = seg->s_ops;
2192 + const struct seg_ops *ov = seg->s_ops;
2196 2193
2197 2194 /*
2198 2195 * We have to check to see if the seg has
2199 2196 * an ops vector because the seg may have
2200 2197 * been in the middle of being set up when
2201 2198 * the process was picked for swapout.
2202 2199 */
2203 2200 if ((ov != NULL) && (ov->swapout != NULL))
2204 2201 swpcnt += segop_swapout(seg);
2205 2202 }
2206 2203 AS_LOCK_EXIT(as, &as->a_lock);
2207 2204 return (swpcnt);
2208 2205 }
2209 2206
2210 2207 /*
2211 2208 * Determine whether data from the mappings in interval [addr, addr + size)
2212 2209 * are in the primary memory (core) cache.
2213 2210 */
2214 2211 int
2215 2212 as_incore(struct as *as, caddr_t addr,
2216 2213 size_t size, char *vec, size_t *sizep)
2217 2214 {
2218 2215 struct seg *seg;
2219 2216 size_t ssize;
2220 2217 caddr_t raddr; /* rounded down addr */
2221 2218 size_t rsize; /* rounded up size */
2222 2219 size_t isize; /* iteration size */
2223 2220 int error = 0; /* result, assume success */
2224 2221
2225 2222 *sizep = 0;
2226 2223 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2227 2224 rsize = ((((size_t)addr + size) + PAGEOFFSET) & PAGEMASK) -
2228 2225 (size_t)raddr;
2229 2226
2230 2227 if (raddr + rsize < raddr) /* check for wraparound */
2231 2228 return (ENOMEM);
2232 2229
2233 2230 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2234 2231 seg = as_segat(as, raddr);
2235 2232 if (seg == NULL) {
2236 2233 AS_LOCK_EXIT(as, &as->a_lock);
2237 2234 return (-1);
2238 2235 }
2239 2236
2240 2237 for (; rsize != 0; rsize -= ssize, raddr += ssize) {
2241 2238 if (raddr >= seg->s_base + seg->s_size) {
2242 2239 seg = AS_SEGNEXT(as, seg);
2243 2240 if (seg == NULL || raddr != seg->s_base) {
2244 2241 error = -1;
2245 2242 break;
2246 2243 }
2247 2244 }
2248 2245 if ((raddr + rsize) > (seg->s_base + seg->s_size))
2249 2246 ssize = seg->s_base + seg->s_size - raddr;
2250 2247 else
2251 2248 ssize = rsize;
2252 2249 *sizep += isize = segop_incore(seg, raddr, ssize, vec);
2253 2250 if (isize != ssize) {
2254 2251 error = -1;
2255 2252 break;
2256 2253 }
2257 2254 vec += btopr(ssize);
2258 2255 }
2259 2256 AS_LOCK_EXIT(as, &as->a_lock);
2260 2257 return (error);
2261 2258 }
2262 2259
2263 2260 static void
2264 2261 as_segunlock(struct seg *seg, caddr_t addr, int attr,
2265 2262 ulong_t *bitmap, size_t position, size_t npages)
2266 2263 {
2267 2264 caddr_t range_start;
2268 2265 size_t pos1 = position;
2269 2266 size_t pos2;
2270 2267 size_t size;
2271 2268 size_t end_pos = npages + position;
2272 2269
2273 2270 while (bt_range(bitmap, &pos1, &pos2, end_pos)) {
2274 2271 size = ptob((pos2 - pos1));
2275 2272 range_start = (caddr_t)((uintptr_t)addr +
2276 2273 ptob(pos1 - position));
2277 2274
2278 2275 (void) segop_lockop(seg, range_start, size, attr, MC_UNLOCK,
2279 2276 (ulong_t *)NULL, (size_t)NULL);
2280 2277 pos1 = pos2;
2281 2278 }
2282 2279 }
2283 2280
2284 2281 static void
2285 2282 as_unlockerr(struct as *as, int attr, ulong_t *mlock_map,
2286 2283 caddr_t raddr, size_t rsize)
2287 2284 {
2288 2285 struct seg *seg = as_segat(as, raddr);
2289 2286 size_t ssize;
2290 2287
2291 2288 while (rsize != 0) {
2292 2289 if (raddr >= seg->s_base + seg->s_size)
2293 2290 seg = AS_SEGNEXT(as, seg);
2294 2291
2295 2292 if ((raddr + rsize) > (seg->s_base + seg->s_size))
2296 2293 ssize = seg->s_base + seg->s_size - raddr;
2297 2294 else
2298 2295 ssize = rsize;
2299 2296
2300 2297 as_segunlock(seg, raddr, attr, mlock_map, 0, btopr(ssize));
2301 2298
2302 2299 rsize -= ssize;
2303 2300 raddr += ssize;
2304 2301 }
2305 2302 }
2306 2303
2307 2304 /*
2308 2305 * Cache control operations over the interval [addr, addr + size) in
2309 2306 * address space "as".
2310 2307 */
2311 2308 /*ARGSUSED*/
2312 2309 int
2313 2310 as_ctl(struct as *as, caddr_t addr, size_t size, int func, int attr,
2314 2311 uintptr_t arg, ulong_t *lock_map, size_t pos)
2315 2312 {
2316 2313 struct seg *seg; /* working segment */
2317 2314 caddr_t raddr; /* rounded down addr */
2318 2315 caddr_t initraddr; /* saved initial rounded down addr */
2319 2316 size_t rsize; /* rounded up size */
2320 2317 size_t initrsize; /* saved initial rounded up size */
2321 2318 size_t ssize; /* size of seg */
2322 2319 int error = 0; /* result */
2323 2320 size_t mlock_size; /* size of bitmap */
2324 2321 ulong_t *mlock_map; /* pointer to bitmap used */
2325 2322 /* to represent the locked */
2326 2323 /* pages. */
2327 2324 retry:
2328 2325 if (error == IE_RETRY)
2329 2326 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
2330 2327 else
2331 2328 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2332 2329
2333 2330 /*
2334 2331 * If these are address space lock/unlock operations, loop over
2335 2332 * all segments in the address space, as appropriate.
2336 2333 */
2337 2334 if (func == MC_LOCKAS) {
2338 2335 size_t npages, idx;
2339 2336 size_t rlen = 0; /* rounded as length */
2340 2337
2341 2338 idx = pos;
2342 2339
2343 2340 if (arg & MCL_FUTURE) {
2344 2341 mutex_enter(&as->a_contents);
2345 2342 AS_SETPGLCK(as);
2346 2343 mutex_exit(&as->a_contents);
2347 2344 }
2348 2345 if ((arg & MCL_CURRENT) == 0) {
2349 2346 AS_LOCK_EXIT(as, &as->a_lock);
2350 2347 return (0);
2351 2348 }
2352 2349
2353 2350 seg = AS_SEGFIRST(as);
2354 2351 if (seg == NULL) {
2355 2352 AS_LOCK_EXIT(as, &as->a_lock);
2356 2353 return (0);
2357 2354 }
2358 2355
2359 2356 do {
2360 2357 raddr = (caddr_t)((uintptr_t)seg->s_base &
2361 2358 (uintptr_t)PAGEMASK);
2362 2359 rlen += (((uintptr_t)(seg->s_base + seg->s_size) +
2363 2360 PAGEOFFSET) & PAGEMASK) - (uintptr_t)raddr;
2364 2361 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2365 2362
2366 2363 mlock_size = BT_BITOUL(btopr(rlen));
2367 2364 if ((mlock_map = (ulong_t *)kmem_zalloc(mlock_size *
2368 2365 sizeof (ulong_t), KM_NOSLEEP)) == NULL) {
2369 2366 AS_LOCK_EXIT(as, &as->a_lock);
2370 2367 return (EAGAIN);
2371 2368 }
2372 2369
2373 2370 for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) {
2374 2371 error = segop_lockop(seg, seg->s_base,
2375 2372 seg->s_size, attr, MC_LOCK, mlock_map, pos);
2376 2373 if (error != 0)
2377 2374 break;
2378 2375 pos += seg_pages(seg);
2379 2376 }
2380 2377
2381 2378 if (error) {
2382 2379 for (seg = AS_SEGFIRST(as); seg != NULL;
2383 2380 seg = AS_SEGNEXT(as, seg)) {
2384 2381
2385 2382 raddr = (caddr_t)((uintptr_t)seg->s_base &
2386 2383 (uintptr_t)PAGEMASK);
2387 2384 npages = seg_pages(seg);
2388 2385 as_segunlock(seg, raddr, attr, mlock_map,
2389 2386 idx, npages);
2390 2387 idx += npages;
2391 2388 }
2392 2389 }
2393 2390
2394 2391 kmem_free(mlock_map, mlock_size * sizeof (ulong_t));
2395 2392 AS_LOCK_EXIT(as, &as->a_lock);
2396 2393 goto lockerr;
2397 2394 } else if (func == MC_UNLOCKAS) {
2398 2395 mutex_enter(&as->a_contents);
2399 2396 AS_CLRPGLCK(as);
2400 2397 mutex_exit(&as->a_contents);
2401 2398
2402 2399 for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) {
2403 2400 error = segop_lockop(seg, seg->s_base,
2404 2401 seg->s_size, attr, MC_UNLOCK, NULL, 0);
2405 2402 if (error != 0)
2406 2403 break;
2407 2404 }
2408 2405
2409 2406 AS_LOCK_EXIT(as, &as->a_lock);
2410 2407 goto lockerr;
2411 2408 }
2412 2409
2413 2410 /*
2414 2411 * Normalize addresses and sizes.
2415 2412 */
2416 2413 initraddr = raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2417 2414 initrsize = rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
2418 2415 (size_t)raddr;
2419 2416
2420 2417 if (raddr + rsize < raddr) { /* check for wraparound */
2421 2418 AS_LOCK_EXIT(as, &as->a_lock);
2422 2419 return (ENOMEM);
2423 2420 }
2424 2421
2425 2422 /*
2426 2423 * Get initial segment.
2427 2424 */
2428 2425 if ((seg = as_segat(as, raddr)) == NULL) {
2429 2426 AS_LOCK_EXIT(as, &as->a_lock);
2430 2427 return (ENOMEM);
2431 2428 }
2432 2429
2433 2430 if (func == MC_LOCK) {
2434 2431 mlock_size = BT_BITOUL(btopr(rsize));
2435 2432 if ((mlock_map = (ulong_t *)kmem_zalloc(mlock_size *
2436 2433 sizeof (ulong_t), KM_NOSLEEP)) == NULL) {
2437 2434 AS_LOCK_EXIT(as, &as->a_lock);
2438 2435 return (EAGAIN);
2439 2436 }
2440 2437 }
2441 2438
2442 2439 /*
2443 2440 * Loop over all segments. If a hole in the address range is
2444 2441 * discovered, then fail. For each segment, perform the appropriate
2445 2442 * control operation.
2446 2443 */
2447 2444 while (rsize != 0) {
2448 2445
2449 2446 /*
2450 2447 * Make sure there's no hole, calculate the portion
2451 2448 * of the next segment to be operated over.
2452 2449 */
2453 2450 if (raddr >= seg->s_base + seg->s_size) {
2454 2451 seg = AS_SEGNEXT(as, seg);
2455 2452 if (seg == NULL || raddr != seg->s_base) {
2456 2453 if (func == MC_LOCK) {
2457 2454 as_unlockerr(as, attr, mlock_map,
2458 2455 initraddr, initrsize - rsize);
2459 2456 kmem_free(mlock_map,
2460 2457 mlock_size * sizeof (ulong_t));
2461 2458 }
2462 2459 AS_LOCK_EXIT(as, &as->a_lock);
2463 2460 return (ENOMEM);
2464 2461 }
2465 2462 }
2466 2463 if ((raddr + rsize) > (seg->s_base + seg->s_size))
2467 2464 ssize = seg->s_base + seg->s_size - raddr;
2468 2465 else
2469 2466 ssize = rsize;
2470 2467
2471 2468 /*
2472 2469 * Dispatch on specific function.
2473 2470 */
2474 2471 switch (func) {
2475 2472
2476 2473 /*
2477 2474 * Synchronize cached data from mappings with backing
2478 2475 * objects.
2479 2476 */
2480 2477 case MC_SYNC:
2481 2478 if (error = segop_sync(seg, raddr, ssize,
2482 2479 attr, (uint_t)arg)) {
2483 2480 AS_LOCK_EXIT(as, &as->a_lock);
2484 2481 return (error);
2485 2482 }
2486 2483 break;
2487 2484
2488 2485 /*
2489 2486 * Lock pages in memory.
2490 2487 */
2491 2488 case MC_LOCK:
2492 2489 if (error = segop_lockop(seg, raddr, ssize,
2493 2490 attr, func, mlock_map, pos)) {
2494 2491 as_unlockerr(as, attr, mlock_map, initraddr,
2495 2492 initrsize - rsize + ssize);
2496 2493 kmem_free(mlock_map, mlock_size *
2497 2494 sizeof (ulong_t));
2498 2495 AS_LOCK_EXIT(as, &as->a_lock);
2499 2496 goto lockerr;
2500 2497 }
2501 2498 break;
2502 2499
2503 2500 /*
2504 2501 * Unlock mapped pages.
2505 2502 */
2506 2503 case MC_UNLOCK:
2507 2504 (void) segop_lockop(seg, raddr, ssize, attr, func,
2508 2505 (ulong_t *)NULL, (size_t)NULL);
2509 2506 break;
2510 2507
2511 2508 /*
2512 2509 * Store VM advise for mapped pages in segment layer.
2513 2510 */
2514 2511 case MC_ADVISE:
2515 2512 error = segop_advise(seg, raddr, ssize, (uint_t)arg);
2516 2513
2517 2514 /*
2518 2515 * Check for regular errors and special retry error
2519 2516 */
2520 2517 if (error) {
2521 2518 if (error == IE_RETRY) {
2522 2519 /*
2523 2520 * Need to acquire writers lock, so
2524 2521 * have to drop readers lock and start
2525 2522 * all over again
2526 2523 */
2527 2524 AS_LOCK_EXIT(as, &as->a_lock);
2528 2525 goto retry;
2529 2526 } else if (error == IE_REATTACH) {
2530 2527 /*
2531 2528 * Find segment for current address
2532 2529 * because current segment just got
2533 2530 * split or concatenated
2534 2531 */
2535 2532 seg = as_segat(as, raddr);
2536 2533 if (seg == NULL) {
2537 2534 AS_LOCK_EXIT(as, &as->a_lock);
2538 2535 return (ENOMEM);
2539 2536 }
2540 2537 } else {
2541 2538 /*
2542 2539 * Regular error
2543 2540 */
2544 2541 AS_LOCK_EXIT(as, &as->a_lock);
2545 2542 return (error);
2546 2543 }
2547 2544 }
2548 2545 break;
2549 2546
2550 2547 case MC_INHERIT_ZERO:
2551 2548 error = segop_inherit(seg, raddr, ssize, SEGP_INH_ZERO);
2552 2549 if (error != 0) {
2553 2550 AS_LOCK_EXIT(as, &as->a_lock);
2554 2551 return (error);
2555 2552 }
2556 2553 break;
2557 2554
2558 2555 /*
2559 2556 * Can't happen.
2560 2557 */
2561 2558 default:
2562 2559 panic("as_ctl: bad operation %d", func);
2563 2560 /*NOTREACHED*/
2564 2561 }
2565 2562
2566 2563 rsize -= ssize;
2567 2564 raddr += ssize;
2568 2565 }
2569 2566
2570 2567 if (func == MC_LOCK)
2571 2568 kmem_free(mlock_map, mlock_size * sizeof (ulong_t));
2572 2569 AS_LOCK_EXIT(as, &as->a_lock);
2573 2570 return (0);
2574 2571 lockerr:
2575 2572
2576 2573 /*
2577 2574 * If the lower levels returned EDEADLK for a segment lockop,
2578 2575 * it means that we should retry the operation. Let's wait
2579 2576 * a bit also to let the deadlock causing condition clear.
2580 2577 * This is part of a gross hack to work around a design flaw
2581 2578 * in the ufs/sds logging code and should go away when the
2582 2579 * logging code is re-designed to fix the problem. See bug
2583 2580 * 4125102 for details of the problem.
2584 2581 */
2585 2582 if (error == EDEADLK) {
2586 2583 delay(deadlk_wait);
2587 2584 error = 0;
2588 2585 goto retry;
2589 2586 }
2590 2587 return (error);
2591 2588 }
2592 2589
2593 2590 int
2594 2591 fc_decode(faultcode_t fault_err)
2595 2592 {
2596 2593 int error = 0;
2597 2594
2598 2595 switch (FC_CODE(fault_err)) {
2599 2596 case FC_OBJERR:
2600 2597 error = FC_ERRNO(fault_err);
2601 2598 break;
2602 2599 case FC_PROT:
2603 2600 error = EACCES;
2604 2601 break;
2605 2602 default:
2606 2603 error = EFAULT;
2607 2604 break;
2608 2605 }
2609 2606 return (error);
2610 2607 }
2611 2608
2612 2609 /*
2613 2610 * Pagelock pages from a range that spans more than 1 segment. Obtain shadow
2614 2611 * lists from each segment and copy them to one contiguous shadow list (plist)
2615 2612 * as expected by the caller. Save pointers to per segment shadow lists at
2616 2613 * the tail of plist so that they can be used during as_pageunlock().
2617 2614 */
2618 2615 static int
2619 2616 as_pagelock_segs(struct as *as, struct seg *seg, struct page ***ppp,
2620 2617 caddr_t addr, size_t size, enum seg_rw rw)
2621 2618 {
2622 2619 caddr_t sv_addr = addr;
2623 2620 size_t sv_size = size;
2624 2621 struct seg *sv_seg = seg;
↓ open down ↓ |
419 lines elided |
↑ open up ↑ |
2625 2622 ulong_t segcnt = 1;
2626 2623 ulong_t cnt;
2627 2624 size_t ssize;
2628 2625 pgcnt_t npages = btop(size);
2629 2626 page_t **plist;
2630 2627 page_t **pl;
2631 2628 int error;
2632 2629 caddr_t eaddr;
2633 2630 faultcode_t fault_err = 0;
2634 2631 pgcnt_t pl_off;
2635 - extern struct seg_ops segspt_shmops;
2632 + extern const struct seg_ops segspt_shmops;
2636 2633
2637 2634 ASSERT(AS_LOCK_HELD(as, &as->a_lock));
2638 2635 ASSERT(seg != NULL);
2639 2636 ASSERT(addr >= seg->s_base && addr < seg->s_base + seg->s_size);
2640 2637 ASSERT(addr + size > seg->s_base + seg->s_size);
2641 2638 ASSERT(IS_P2ALIGNED(size, PAGESIZE));
2642 2639 ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
2643 2640
2644 2641 /*
2645 2642 * Count the number of segments covered by the range we are about to
2646 2643 * lock. The segment count is used to size the shadow list we return
2647 2644 * back to the caller.
2648 2645 */
2649 2646 for (; size != 0; size -= ssize, addr += ssize) {
2650 2647 if (addr >= seg->s_base + seg->s_size) {
2651 2648
2652 2649 seg = AS_SEGNEXT(as, seg);
2653 2650 if (seg == NULL || addr != seg->s_base) {
2654 2651 AS_LOCK_EXIT(as, &as->a_lock);
2655 2652 return (EFAULT);
2656 2653 }
2657 2654 /*
2658 2655 * Do a quick check if subsequent segments
2659 2656 * will most likely support pagelock.
2660 2657 */
2661 2658 if (seg->s_ops == &segvn_ops) {
2662 2659 vnode_t *vp;
2663 2660
2664 2661 if (segop_getvp(seg, addr, &vp) != 0 ||
2665 2662 vp != NULL) {
2666 2663 AS_LOCK_EXIT(as, &as->a_lock);
2667 2664 goto slow;
2668 2665 }
2669 2666 } else if (seg->s_ops != &segspt_shmops) {
2670 2667 AS_LOCK_EXIT(as, &as->a_lock);
2671 2668 goto slow;
2672 2669 }
2673 2670 segcnt++;
2674 2671 }
2675 2672 if (addr + size > seg->s_base + seg->s_size) {
2676 2673 ssize = seg->s_base + seg->s_size - addr;
2677 2674 } else {
2678 2675 ssize = size;
2679 2676 }
2680 2677 }
2681 2678 ASSERT(segcnt > 1);
2682 2679
2683 2680 plist = kmem_zalloc((npages + segcnt) * sizeof (page_t *), KM_SLEEP);
2684 2681
2685 2682 addr = sv_addr;
2686 2683 size = sv_size;
2687 2684 seg = sv_seg;
2688 2685
2689 2686 for (cnt = 0, pl_off = 0; size != 0; size -= ssize, addr += ssize) {
2690 2687 if (addr >= seg->s_base + seg->s_size) {
2691 2688 seg = AS_SEGNEXT(as, seg);
2692 2689 ASSERT(seg != NULL && addr == seg->s_base);
2693 2690 cnt++;
2694 2691 ASSERT(cnt < segcnt);
2695 2692 }
2696 2693 if (addr + size > seg->s_base + seg->s_size) {
2697 2694 ssize = seg->s_base + seg->s_size - addr;
2698 2695 } else {
2699 2696 ssize = size;
2700 2697 }
2701 2698 pl = &plist[npages + cnt];
2702 2699 error = segop_pagelock(seg, addr, ssize, (page_t ***)pl,
2703 2700 L_PAGELOCK, rw);
2704 2701 if (error) {
2705 2702 break;
2706 2703 }
2707 2704 ASSERT(plist[npages + cnt] != NULL);
2708 2705 ASSERT(pl_off + btop(ssize) <= npages);
2709 2706 bcopy(plist[npages + cnt], &plist[pl_off],
2710 2707 btop(ssize) * sizeof (page_t *));
2711 2708 pl_off += btop(ssize);
2712 2709 }
2713 2710
2714 2711 if (size == 0) {
2715 2712 AS_LOCK_EXIT(as, &as->a_lock);
2716 2713 ASSERT(cnt == segcnt - 1);
2717 2714 *ppp = plist;
2718 2715 return (0);
2719 2716 }
2720 2717
2721 2718 /*
2722 2719 * one of pagelock calls failed. The error type is in error variable.
2723 2720 * Unlock what we've locked so far and retry with F_SOFTLOCK if error
2724 2721 * type is either EFAULT or ENOTSUP. Otherwise just return the error
2725 2722 * back to the caller.
2726 2723 */
2727 2724
2728 2725 eaddr = addr;
2729 2726 seg = sv_seg;
2730 2727
2731 2728 for (cnt = 0, addr = sv_addr; addr < eaddr; addr += ssize) {
2732 2729 if (addr >= seg->s_base + seg->s_size) {
2733 2730 seg = AS_SEGNEXT(as, seg);
2734 2731 ASSERT(seg != NULL && addr == seg->s_base);
2735 2732 cnt++;
2736 2733 ASSERT(cnt < segcnt);
2737 2734 }
2738 2735 if (eaddr > seg->s_base + seg->s_size) {
2739 2736 ssize = seg->s_base + seg->s_size - addr;
2740 2737 } else {
2741 2738 ssize = eaddr - addr;
2742 2739 }
2743 2740 pl = &plist[npages + cnt];
2744 2741 ASSERT(*pl != NULL);
2745 2742 (void) segop_pagelock(seg, addr, ssize, (page_t ***)pl,
2746 2743 L_PAGEUNLOCK, rw);
2747 2744 }
2748 2745
2749 2746 AS_LOCK_EXIT(as, &as->a_lock);
2750 2747
2751 2748 kmem_free(plist, (npages + segcnt) * sizeof (page_t *));
2752 2749
2753 2750 if (error != ENOTSUP && error != EFAULT) {
2754 2751 return (error);
2755 2752 }
2756 2753
2757 2754 slow:
2758 2755 /*
2759 2756 * If we are here because pagelock failed due to the need to cow fault
2760 2757 * in the pages we want to lock F_SOFTLOCK will do this job and in
2761 2758 * next as_pagelock() call for this address range pagelock will
2762 2759 * hopefully succeed.
2763 2760 */
2764 2761 fault_err = as_fault(as->a_hat, as, sv_addr, sv_size, F_SOFTLOCK, rw);
2765 2762 if (fault_err != 0) {
2766 2763 return (fc_decode(fault_err));
2767 2764 }
2768 2765 *ppp = NULL;
2769 2766
2770 2767 return (0);
2771 2768 }
2772 2769
2773 2770 /*
2774 2771 * lock pages in a given address space. Return shadow list. If
2775 2772 * the list is NULL, the MMU mapping is also locked.
2776 2773 */
2777 2774 int
2778 2775 as_pagelock(struct as *as, struct page ***ppp, caddr_t addr,
2779 2776 size_t size, enum seg_rw rw)
2780 2777 {
2781 2778 size_t rsize;
2782 2779 caddr_t raddr;
2783 2780 faultcode_t fault_err;
2784 2781 struct seg *seg;
2785 2782 int err;
2786 2783
2787 2784 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_AS_LOCK_START,
2788 2785 "as_pagelock_start: addr %p size %ld", addr, size);
2789 2786
2790 2787 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2791 2788 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
2792 2789 (size_t)raddr;
2793 2790
2794 2791 /*
2795 2792 * if the request crosses two segments let
2796 2793 * as_fault handle it.
2797 2794 */
2798 2795 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2799 2796
2800 2797 seg = as_segat(as, raddr);
2801 2798 if (seg == NULL) {
2802 2799 AS_LOCK_EXIT(as, &as->a_lock);
2803 2800 return (EFAULT);
2804 2801 }
2805 2802 ASSERT(raddr >= seg->s_base && raddr < seg->s_base + seg->s_size);
2806 2803 if (raddr + rsize > seg->s_base + seg->s_size) {
2807 2804 return (as_pagelock_segs(as, seg, ppp, raddr, rsize, rw));
2808 2805 }
2809 2806 if (raddr + rsize <= raddr) {
2810 2807 AS_LOCK_EXIT(as, &as->a_lock);
2811 2808 return (EFAULT);
2812 2809 }
2813 2810
2814 2811 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEG_LOCK_START,
2815 2812 "seg_lock_1_start: raddr %p rsize %ld", raddr, rsize);
2816 2813
2817 2814 /*
2818 2815 * try to lock pages and pass back shadow list
2819 2816 */
2820 2817 err = segop_pagelock(seg, raddr, rsize, ppp, L_PAGELOCK, rw);
2821 2818
2822 2819 TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_SEG_LOCK_END, "seg_lock_1_end");
2823 2820
2824 2821 AS_LOCK_EXIT(as, &as->a_lock);
2825 2822
2826 2823 if (err == 0 || (err != ENOTSUP && err != EFAULT)) {
2827 2824 return (err);
2828 2825 }
2829 2826
2830 2827 /*
2831 2828 * Use F_SOFTLOCK to lock the pages because pagelock failed either due
2832 2829 * to no pagelock support for this segment or pages need to be cow
2833 2830 * faulted in. If fault is needed F_SOFTLOCK will do this job for
2834 2831 * this as_pagelock() call and in the next as_pagelock() call for the
2835 2832 * same address range pagelock call will hopefull succeed.
2836 2833 */
2837 2834 fault_err = as_fault(as->a_hat, as, addr, size, F_SOFTLOCK, rw);
2838 2835 if (fault_err != 0) {
2839 2836 return (fc_decode(fault_err));
2840 2837 }
2841 2838 *ppp = NULL;
2842 2839
2843 2840 TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_AS_LOCK_END, "as_pagelock_end");
2844 2841 return (0);
2845 2842 }
2846 2843
2847 2844 /*
2848 2845 * unlock pages locked by as_pagelock_segs(). Retrieve per segment shadow
2849 2846 * lists from the end of plist and call pageunlock interface for each segment.
2850 2847 * Drop as lock and free plist.
2851 2848 */
2852 2849 static void
2853 2850 as_pageunlock_segs(struct as *as, struct seg *seg, caddr_t addr, size_t size,
2854 2851 struct page **plist, enum seg_rw rw)
2855 2852 {
2856 2853 ulong_t cnt;
2857 2854 caddr_t eaddr = addr + size;
2858 2855 pgcnt_t npages = btop(size);
2859 2856 size_t ssize;
2860 2857 page_t **pl;
2861 2858
2862 2859 ASSERT(AS_LOCK_HELD(as, &as->a_lock));
2863 2860 ASSERT(seg != NULL);
2864 2861 ASSERT(addr >= seg->s_base && addr < seg->s_base + seg->s_size);
2865 2862 ASSERT(addr + size > seg->s_base + seg->s_size);
2866 2863 ASSERT(IS_P2ALIGNED(size, PAGESIZE));
2867 2864 ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
2868 2865 ASSERT(plist != NULL);
2869 2866
2870 2867 for (cnt = 0; addr < eaddr; addr += ssize) {
2871 2868 if (addr >= seg->s_base + seg->s_size) {
2872 2869 seg = AS_SEGNEXT(as, seg);
2873 2870 ASSERT(seg != NULL && addr == seg->s_base);
2874 2871 cnt++;
2875 2872 }
2876 2873 if (eaddr > seg->s_base + seg->s_size) {
2877 2874 ssize = seg->s_base + seg->s_size - addr;
2878 2875 } else {
2879 2876 ssize = eaddr - addr;
2880 2877 }
2881 2878 pl = &plist[npages + cnt];
2882 2879 ASSERT(*pl != NULL);
2883 2880 (void) segop_pagelock(seg, addr, ssize, (page_t ***)pl,
2884 2881 L_PAGEUNLOCK, rw);
2885 2882 }
2886 2883 ASSERT(cnt > 0);
2887 2884 AS_LOCK_EXIT(as, &as->a_lock);
2888 2885
2889 2886 cnt++;
2890 2887 kmem_free(plist, (npages + cnt) * sizeof (page_t *));
2891 2888 }
2892 2889
2893 2890 /*
2894 2891 * unlock pages in a given address range
2895 2892 */
2896 2893 void
2897 2894 as_pageunlock(struct as *as, struct page **pp, caddr_t addr, size_t size,
2898 2895 enum seg_rw rw)
2899 2896 {
2900 2897 struct seg *seg;
2901 2898 size_t rsize;
2902 2899 caddr_t raddr;
2903 2900
2904 2901 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_AS_UNLOCK_START,
2905 2902 "as_pageunlock_start: addr %p size %ld", addr, size);
2906 2903
2907 2904 /*
2908 2905 * if the shadow list is NULL, as_pagelock was
2909 2906 * falling back to as_fault
2910 2907 */
2911 2908 if (pp == NULL) {
2912 2909 (void) as_fault(as->a_hat, as, addr, size, F_SOFTUNLOCK, rw);
2913 2910 return;
2914 2911 }
2915 2912
2916 2913 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2917 2914 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
2918 2915 (size_t)raddr;
2919 2916
2920 2917 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2921 2918 seg = as_segat(as, raddr);
2922 2919 ASSERT(seg != NULL);
2923 2920
2924 2921 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEG_UNLOCK_START,
2925 2922 "seg_unlock_start: raddr %p rsize %ld", raddr, rsize);
2926 2923
2927 2924 ASSERT(raddr >= seg->s_base && raddr < seg->s_base + seg->s_size);
2928 2925 if (raddr + rsize <= seg->s_base + seg->s_size) {
2929 2926 (void) segop_pagelock(seg, raddr, rsize, &pp, L_PAGEUNLOCK, rw);
2930 2927 } else {
2931 2928 as_pageunlock_segs(as, seg, raddr, rsize, pp, rw);
2932 2929 return;
2933 2930 }
2934 2931 AS_LOCK_EXIT(as, &as->a_lock);
2935 2932 TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_AS_UNLOCK_END, "as_pageunlock_end");
2936 2933 }
2937 2934
2938 2935 int
2939 2936 as_setpagesize(struct as *as, caddr_t addr, size_t size, uint_t szc,
2940 2937 boolean_t wait)
2941 2938 {
2942 2939 struct seg *seg;
2943 2940 size_t ssize;
2944 2941 caddr_t raddr; /* rounded down addr */
2945 2942 size_t rsize; /* rounded up size */
2946 2943 int error = 0;
2947 2944 size_t pgsz = page_get_pagesize(szc);
2948 2945
2949 2946 setpgsz_top:
2950 2947 if (!IS_P2ALIGNED(addr, pgsz) || !IS_P2ALIGNED(size, pgsz)) {
2951 2948 return (EINVAL);
2952 2949 }
2953 2950
2954 2951 raddr = addr;
2955 2952 rsize = size;
2956 2953
2957 2954 if (raddr + rsize < raddr) /* check for wraparound */
2958 2955 return (ENOMEM);
2959 2956
2960 2957 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
2961 2958 as_clearwatchprot(as, raddr, rsize);
2962 2959 seg = as_segat(as, raddr);
2963 2960 if (seg == NULL) {
2964 2961 as_setwatch(as);
2965 2962 AS_LOCK_EXIT(as, &as->a_lock);
2966 2963 return (ENOMEM);
2967 2964 }
2968 2965
2969 2966 for (; rsize != 0; rsize -= ssize, raddr += ssize) {
2970 2967 if (raddr >= seg->s_base + seg->s_size) {
2971 2968 seg = AS_SEGNEXT(as, seg);
2972 2969 if (seg == NULL || raddr != seg->s_base) {
2973 2970 error = ENOMEM;
2974 2971 break;
2975 2972 }
2976 2973 }
2977 2974 if ((raddr + rsize) > (seg->s_base + seg->s_size)) {
2978 2975 ssize = seg->s_base + seg->s_size - raddr;
2979 2976 } else {
2980 2977 ssize = rsize;
2981 2978 }
2982 2979
2983 2980 retry:
2984 2981 error = segop_setpagesize(seg, raddr, ssize, szc);
2985 2982
2986 2983 if (error == IE_NOMEM) {
2987 2984 error = EAGAIN;
2988 2985 break;
2989 2986 }
2990 2987
2991 2988 if (error == IE_RETRY) {
2992 2989 AS_LOCK_EXIT(as, &as->a_lock);
2993 2990 goto setpgsz_top;
2994 2991 }
2995 2992
2996 2993 if (error == ENOTSUP) {
2997 2994 error = EINVAL;
2998 2995 break;
2999 2996 }
3000 2997
3001 2998 if (wait && (error == EAGAIN)) {
3002 2999 /*
3003 3000 * Memory is currently locked. It must be unlocked
3004 3001 * before this operation can succeed through a retry.
3005 3002 * The possible reasons for locked memory and
3006 3003 * corresponding strategies for unlocking are:
3007 3004 * (1) Normal I/O
3008 3005 * wait for a signal that the I/O operation
3009 3006 * has completed and the memory is unlocked.
3010 3007 * (2) Asynchronous I/O
3011 3008 * The aio subsystem does not unlock pages when
3012 3009 * the I/O is completed. Those pages are unlocked
3013 3010 * when the application calls aiowait/aioerror.
3014 3011 * So, to prevent blocking forever, cv_broadcast()
3015 3012 * is done to wake up aio_cleanup_thread.
3016 3013 * Subsequently, segvn_reclaim will be called, and
3017 3014 * that will do AS_CLRUNMAPWAIT() and wake us up.
3018 3015 * (3) Long term page locking:
3019 3016 * This is not relevant for as_setpagesize()
3020 3017 * because we cannot change the page size for
3021 3018 * driver memory. The attempt to do so will
3022 3019 * fail with a different error than EAGAIN so
3023 3020 * there's no need to trigger as callbacks like
3024 3021 * as_unmap, as_setprot or as_free would do.
3025 3022 */
3026 3023 mutex_enter(&as->a_contents);
3027 3024 if (!AS_ISNOUNMAPWAIT(as)) {
3028 3025 if (AS_ISUNMAPWAIT(as) == 0) {
3029 3026 cv_broadcast(&as->a_cv);
3030 3027 }
3031 3028 AS_SETUNMAPWAIT(as);
3032 3029 AS_LOCK_EXIT(as, &as->a_lock);
3033 3030 while (AS_ISUNMAPWAIT(as)) {
3034 3031 cv_wait(&as->a_cv, &as->a_contents);
3035 3032 }
3036 3033 } else {
3037 3034 /*
3038 3035 * We may have raced with
3039 3036 * segvn_reclaim()/segspt_reclaim(). In this
3040 3037 * case clean nounmapwait flag and retry since
3041 3038 * softlockcnt in this segment may be already
3042 3039 * 0. We don't drop as writer lock so our
3043 3040 * number of retries without sleeping should
3044 3041 * be very small. See segvn_reclaim() for
3045 3042 * more comments.
3046 3043 */
3047 3044 AS_CLRNOUNMAPWAIT(as);
3048 3045 mutex_exit(&as->a_contents);
3049 3046 goto retry;
3050 3047 }
3051 3048 mutex_exit(&as->a_contents);
3052 3049 goto setpgsz_top;
3053 3050 } else if (error != 0) {
3054 3051 break;
3055 3052 }
3056 3053 }
3057 3054 as_setwatch(as);
3058 3055 AS_LOCK_EXIT(as, &as->a_lock);
3059 3056 return (error);
3060 3057 }
3061 3058
3062 3059 /*
3063 3060 * as_iset3_default_lpsize() just calls segop_setpagesize() on all segments
3064 3061 * in its chunk where s_szc is less than the szc we want to set.
3065 3062 */
3066 3063 static int
3067 3064 as_iset3_default_lpsize(struct as *as, caddr_t raddr, size_t rsize, uint_t szc,
3068 3065 int *retry)
3069 3066 {
3070 3067 struct seg *seg;
3071 3068 size_t ssize;
3072 3069 int error;
3073 3070
3074 3071 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3075 3072
3076 3073 seg = as_segat(as, raddr);
3077 3074 if (seg == NULL) {
3078 3075 panic("as_iset3_default_lpsize: no seg");
3079 3076 }
3080 3077
3081 3078 for (; rsize != 0; rsize -= ssize, raddr += ssize) {
3082 3079 if (raddr >= seg->s_base + seg->s_size) {
3083 3080 seg = AS_SEGNEXT(as, seg);
3084 3081 if (seg == NULL || raddr != seg->s_base) {
3085 3082 panic("as_iset3_default_lpsize: as changed");
3086 3083 }
3087 3084 }
3088 3085 if ((raddr + rsize) > (seg->s_base + seg->s_size)) {
3089 3086 ssize = seg->s_base + seg->s_size - raddr;
3090 3087 } else {
3091 3088 ssize = rsize;
3092 3089 }
3093 3090
3094 3091 if (szc > seg->s_szc) {
3095 3092 error = segop_setpagesize(seg, raddr, ssize, szc);
3096 3093 /* Only retry on EINVAL segments that have no vnode. */
3097 3094 if (error == EINVAL) {
3098 3095 vnode_t *vp = NULL;
3099 3096 if ((segop_gettype(seg, raddr) & MAP_SHARED) &&
3100 3097 (segop_getvp(seg, raddr, &vp) != 0 ||
3101 3098 vp == NULL)) {
3102 3099 *retry = 1;
3103 3100 } else {
3104 3101 *retry = 0;
3105 3102 }
3106 3103 }
3107 3104 if (error) {
3108 3105 return (error);
3109 3106 }
3110 3107 }
3111 3108 }
3112 3109 return (0);
3113 3110 }
3114 3111
3115 3112 /*
3116 3113 * as_iset2_default_lpsize() calls as_iset3_default_lpsize() to set the
3117 3114 * pagesize on each segment in its range, but if any fails with EINVAL,
3118 3115 * then it reduces the pagesizes to the next size in the bitmap and
3119 3116 * retries as_iset3_default_lpsize(). The reason why the code retries
3120 3117 * smaller allowed sizes on EINVAL is because (a) the anon offset may not
3121 3118 * match the bigger sizes, and (b) it's hard to get this offset (to begin
3122 3119 * with) to pass to map_pgszcvec().
3123 3120 */
3124 3121 static int
3125 3122 as_iset2_default_lpsize(struct as *as, caddr_t addr, size_t size, uint_t szc,
3126 3123 uint_t szcvec)
3127 3124 {
3128 3125 int error;
3129 3126 int retry;
3130 3127
3131 3128 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3132 3129
3133 3130 for (;;) {
3134 3131 error = as_iset3_default_lpsize(as, addr, size, szc, &retry);
3135 3132 if (error == EINVAL && retry) {
3136 3133 szcvec &= ~(1 << szc);
3137 3134 if (szcvec <= 1) {
3138 3135 return (EINVAL);
3139 3136 }
3140 3137 szc = highbit(szcvec) - 1;
3141 3138 } else {
3142 3139 return (error);
3143 3140 }
3144 3141 }
3145 3142 }
3146 3143
3147 3144 /*
3148 3145 * as_iset1_default_lpsize() breaks its chunk into areas where existing
3149 3146 * segments have a smaller szc than we want to set. For each such area,
3150 3147 * it calls as_iset2_default_lpsize()
3151 3148 */
3152 3149 static int
3153 3150 as_iset1_default_lpsize(struct as *as, caddr_t raddr, size_t rsize, uint_t szc,
3154 3151 uint_t szcvec)
3155 3152 {
3156 3153 struct seg *seg;
3157 3154 size_t ssize;
3158 3155 caddr_t setaddr = raddr;
3159 3156 size_t setsize = 0;
3160 3157 int set;
3161 3158 int error;
3162 3159
3163 3160 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3164 3161
3165 3162 seg = as_segat(as, raddr);
3166 3163 if (seg == NULL) {
3167 3164 panic("as_iset1_default_lpsize: no seg");
3168 3165 }
3169 3166 if (seg->s_szc < szc) {
3170 3167 set = 1;
3171 3168 } else {
3172 3169 set = 0;
3173 3170 }
3174 3171
3175 3172 for (; rsize != 0; rsize -= ssize, raddr += ssize, setsize += ssize) {
3176 3173 if (raddr >= seg->s_base + seg->s_size) {
3177 3174 seg = AS_SEGNEXT(as, seg);
3178 3175 if (seg == NULL || raddr != seg->s_base) {
3179 3176 panic("as_iset1_default_lpsize: as changed");
3180 3177 }
3181 3178 if (seg->s_szc >= szc && set) {
3182 3179 ASSERT(setsize != 0);
3183 3180 error = as_iset2_default_lpsize(as,
3184 3181 setaddr, setsize, szc, szcvec);
3185 3182 if (error) {
3186 3183 return (error);
3187 3184 }
3188 3185 set = 0;
3189 3186 } else if (seg->s_szc < szc && !set) {
3190 3187 setaddr = raddr;
3191 3188 setsize = 0;
3192 3189 set = 1;
3193 3190 }
3194 3191 }
3195 3192 if ((raddr + rsize) > (seg->s_base + seg->s_size)) {
3196 3193 ssize = seg->s_base + seg->s_size - raddr;
3197 3194 } else {
3198 3195 ssize = rsize;
3199 3196 }
3200 3197 }
3201 3198 error = 0;
3202 3199 if (set) {
3203 3200 ASSERT(setsize != 0);
3204 3201 error = as_iset2_default_lpsize(as, setaddr, setsize,
3205 3202 szc, szcvec);
3206 3203 }
3207 3204 return (error);
3208 3205 }
3209 3206
3210 3207 /*
3211 3208 * as_iset_default_lpsize() breaks its chunk according to the size code bitmap
3212 3209 * returned by map_pgszcvec() (similar to as_map_segvn_segs()), and passes each
3213 3210 * chunk to as_iset1_default_lpsize().
3214 3211 */
3215 3212 static int
3216 3213 as_iset_default_lpsize(struct as *as, caddr_t addr, size_t size, int flags,
3217 3214 int type)
3218 3215 {
3219 3216 int rtype = (type & MAP_SHARED) ? MAPPGSZC_SHM : MAPPGSZC_PRIVM;
3220 3217 uint_t szcvec = map_pgszcvec(addr, size, (uintptr_t)addr,
3221 3218 flags, rtype, 1);
3222 3219 uint_t szc;
3223 3220 uint_t nszc;
3224 3221 int error;
3225 3222 caddr_t a;
3226 3223 caddr_t eaddr;
3227 3224 size_t segsize;
3228 3225 size_t pgsz;
3229 3226 uint_t save_szcvec;
3230 3227
3231 3228 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3232 3229 ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
3233 3230 ASSERT(IS_P2ALIGNED(size, PAGESIZE));
3234 3231
3235 3232 szcvec &= ~1;
3236 3233 if (szcvec <= 1) { /* skip if base page size */
3237 3234 return (0);
3238 3235 }
3239 3236
3240 3237 /* Get the pagesize of the first larger page size. */
3241 3238 szc = lowbit(szcvec) - 1;
3242 3239 pgsz = page_get_pagesize(szc);
3243 3240 eaddr = addr + size;
3244 3241 addr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz);
3245 3242 eaddr = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz);
3246 3243
3247 3244 save_szcvec = szcvec;
3248 3245 szcvec >>= (szc + 1);
3249 3246 nszc = szc;
3250 3247 while (szcvec) {
3251 3248 if ((szcvec & 0x1) == 0) {
3252 3249 nszc++;
3253 3250 szcvec >>= 1;
3254 3251 continue;
3255 3252 }
3256 3253 nszc++;
3257 3254 pgsz = page_get_pagesize(nszc);
3258 3255 a = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz);
3259 3256 if (a != addr) {
3260 3257 ASSERT(szc > 0);
3261 3258 ASSERT(a < eaddr);
3262 3259 segsize = a - addr;
3263 3260 error = as_iset1_default_lpsize(as, addr, segsize, szc,
3264 3261 save_szcvec);
3265 3262 if (error) {
3266 3263 return (error);
3267 3264 }
3268 3265 addr = a;
3269 3266 }
3270 3267 szc = nszc;
3271 3268 szcvec >>= 1;
3272 3269 }
3273 3270
3274 3271 ASSERT(addr < eaddr);
3275 3272 szcvec = save_szcvec;
3276 3273 while (szcvec) {
3277 3274 a = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz);
3278 3275 ASSERT(a >= addr);
3279 3276 if (a != addr) {
3280 3277 ASSERT(szc > 0);
3281 3278 segsize = a - addr;
3282 3279 error = as_iset1_default_lpsize(as, addr, segsize, szc,
3283 3280 save_szcvec);
3284 3281 if (error) {
3285 3282 return (error);
3286 3283 }
3287 3284 addr = a;
3288 3285 }
3289 3286 szcvec &= ~(1 << szc);
3290 3287 if (szcvec) {
3291 3288 szc = highbit(szcvec) - 1;
3292 3289 pgsz = page_get_pagesize(szc);
3293 3290 }
3294 3291 }
3295 3292 ASSERT(addr == eaddr);
3296 3293
3297 3294 return (0);
3298 3295 }
3299 3296
3300 3297 /*
3301 3298 * Set the default large page size for the range. Called via memcntl with
3302 3299 * page size set to 0. as_set_default_lpsize breaks the range down into
3303 3300 * chunks with the same type/flags, ignores-non segvn segments, and passes
3304 3301 * each chunk to as_iset_default_lpsize().
3305 3302 */
3306 3303 int
3307 3304 as_set_default_lpsize(struct as *as, caddr_t addr, size_t size)
3308 3305 {
3309 3306 struct seg *seg;
3310 3307 caddr_t raddr;
3311 3308 size_t rsize;
3312 3309 size_t ssize;
3313 3310 int rtype, rflags;
3314 3311 int stype, sflags;
3315 3312 int error;
3316 3313 caddr_t setaddr;
3317 3314 size_t setsize;
3318 3315 int segvn;
3319 3316
3320 3317 if (size == 0)
3321 3318 return (0);
3322 3319
3323 3320 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3324 3321 again:
3325 3322 error = 0;
3326 3323
3327 3324 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3328 3325 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
3329 3326 (size_t)raddr;
3330 3327
3331 3328 if (raddr + rsize < raddr) { /* check for wraparound */
3332 3329 AS_LOCK_EXIT(as, &as->a_lock);
3333 3330 return (ENOMEM);
3334 3331 }
3335 3332 as_clearwatchprot(as, raddr, rsize);
3336 3333 seg = as_segat(as, raddr);
3337 3334 if (seg == NULL) {
3338 3335 as_setwatch(as);
3339 3336 AS_LOCK_EXIT(as, &as->a_lock);
3340 3337 return (ENOMEM);
3341 3338 }
3342 3339 if (seg->s_ops == &segvn_ops) {
3343 3340 rtype = segop_gettype(seg, addr);
3344 3341 rflags = rtype & (MAP_TEXT | MAP_INITDATA);
3345 3342 rtype = rtype & (MAP_SHARED | MAP_PRIVATE);
3346 3343 segvn = 1;
3347 3344 } else {
3348 3345 segvn = 0;
3349 3346 }
3350 3347 setaddr = raddr;
3351 3348 setsize = 0;
3352 3349
3353 3350 for (; rsize != 0; rsize -= ssize, raddr += ssize, setsize += ssize) {
3354 3351 if (raddr >= (seg->s_base + seg->s_size)) {
3355 3352 seg = AS_SEGNEXT(as, seg);
3356 3353 if (seg == NULL || raddr != seg->s_base) {
3357 3354 error = ENOMEM;
3358 3355 break;
3359 3356 }
3360 3357 if (seg->s_ops == &segvn_ops) {
3361 3358 stype = segop_gettype(seg, raddr);
3362 3359 sflags = stype & (MAP_TEXT | MAP_INITDATA);
3363 3360 stype &= (MAP_SHARED | MAP_PRIVATE);
3364 3361 if (segvn && (rflags != sflags ||
3365 3362 rtype != stype)) {
3366 3363 /*
3367 3364 * The next segment is also segvn but
3368 3365 * has different flags and/or type.
3369 3366 */
3370 3367 ASSERT(setsize != 0);
3371 3368 error = as_iset_default_lpsize(as,
3372 3369 setaddr, setsize, rflags, rtype);
3373 3370 if (error) {
3374 3371 break;
3375 3372 }
3376 3373 rflags = sflags;
3377 3374 rtype = stype;
3378 3375 setaddr = raddr;
3379 3376 setsize = 0;
3380 3377 } else if (!segvn) {
3381 3378 rflags = sflags;
3382 3379 rtype = stype;
3383 3380 setaddr = raddr;
3384 3381 setsize = 0;
3385 3382 segvn = 1;
3386 3383 }
3387 3384 } else if (segvn) {
3388 3385 /* The next segment is not segvn. */
3389 3386 ASSERT(setsize != 0);
3390 3387 error = as_iset_default_lpsize(as,
3391 3388 setaddr, setsize, rflags, rtype);
3392 3389 if (error) {
3393 3390 break;
3394 3391 }
3395 3392 segvn = 0;
3396 3393 }
3397 3394 }
3398 3395 if ((raddr + rsize) > (seg->s_base + seg->s_size)) {
3399 3396 ssize = seg->s_base + seg->s_size - raddr;
3400 3397 } else {
3401 3398 ssize = rsize;
3402 3399 }
3403 3400 }
3404 3401 if (error == 0 && segvn) {
3405 3402 /* The last chunk when rsize == 0. */
3406 3403 ASSERT(setsize != 0);
3407 3404 error = as_iset_default_lpsize(as, setaddr, setsize,
3408 3405 rflags, rtype);
3409 3406 }
3410 3407
3411 3408 if (error == IE_RETRY) {
3412 3409 goto again;
3413 3410 } else if (error == IE_NOMEM) {
3414 3411 error = EAGAIN;
3415 3412 } else if (error == ENOTSUP) {
3416 3413 error = EINVAL;
3417 3414 } else if (error == EAGAIN) {
3418 3415 mutex_enter(&as->a_contents);
3419 3416 if (!AS_ISNOUNMAPWAIT(as)) {
3420 3417 if (AS_ISUNMAPWAIT(as) == 0) {
3421 3418 cv_broadcast(&as->a_cv);
3422 3419 }
3423 3420 AS_SETUNMAPWAIT(as);
3424 3421 AS_LOCK_EXIT(as, &as->a_lock);
3425 3422 while (AS_ISUNMAPWAIT(as)) {
3426 3423 cv_wait(&as->a_cv, &as->a_contents);
3427 3424 }
3428 3425 mutex_exit(&as->a_contents);
3429 3426 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3430 3427 } else {
3431 3428 /*
3432 3429 * We may have raced with
3433 3430 * segvn_reclaim()/segspt_reclaim(). In this case
3434 3431 * clean nounmapwait flag and retry since softlockcnt
3435 3432 * in this segment may be already 0. We don't drop as
3436 3433 * writer lock so our number of retries without
3437 3434 * sleeping should be very small. See segvn_reclaim()
3438 3435 * for more comments.
3439 3436 */
3440 3437 AS_CLRNOUNMAPWAIT(as);
3441 3438 mutex_exit(&as->a_contents);
3442 3439 }
3443 3440 goto again;
3444 3441 }
3445 3442
3446 3443 as_setwatch(as);
3447 3444 AS_LOCK_EXIT(as, &as->a_lock);
3448 3445 return (error);
3449 3446 }
3450 3447
3451 3448 /*
3452 3449 * Setup all of the uninitialized watched pages that we can.
3453 3450 */
3454 3451 void
3455 3452 as_setwatch(struct as *as)
3456 3453 {
3457 3454 struct watched_page *pwp;
3458 3455 struct seg *seg;
3459 3456 caddr_t vaddr;
3460 3457 uint_t prot;
3461 3458 int err, retrycnt;
3462 3459
3463 3460 if (avl_numnodes(&as->a_wpage) == 0)
3464 3461 return;
3465 3462
3466 3463 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3467 3464
3468 3465 for (pwp = avl_first(&as->a_wpage); pwp != NULL;
3469 3466 pwp = AVL_NEXT(&as->a_wpage, pwp)) {
3470 3467 retrycnt = 0;
3471 3468 retry:
3472 3469 vaddr = pwp->wp_vaddr;
3473 3470 if (pwp->wp_oprot != 0 || /* already set up */
3474 3471 (seg = as_segat(as, vaddr)) == NULL ||
3475 3472 segop_getprot(seg, vaddr, 0, &prot) != 0)
3476 3473 continue;
3477 3474
3478 3475 pwp->wp_oprot = prot;
3479 3476 if (pwp->wp_read)
3480 3477 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3481 3478 if (pwp->wp_write)
3482 3479 prot &= ~PROT_WRITE;
3483 3480 if (pwp->wp_exec)
3484 3481 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3485 3482 if (!(pwp->wp_flags & WP_NOWATCH) && prot != pwp->wp_oprot) {
3486 3483 err = segop_setprot(seg, vaddr, PAGESIZE, prot);
3487 3484 if (err == IE_RETRY) {
3488 3485 pwp->wp_oprot = 0;
3489 3486 ASSERT(retrycnt == 0);
3490 3487 retrycnt++;
3491 3488 goto retry;
3492 3489 }
3493 3490 }
3494 3491 pwp->wp_prot = prot;
3495 3492 }
3496 3493 }
3497 3494
3498 3495 /*
3499 3496 * Clear all of the watched pages in the address space.
3500 3497 */
3501 3498 void
3502 3499 as_clearwatch(struct as *as)
3503 3500 {
3504 3501 struct watched_page *pwp;
3505 3502 struct seg *seg;
3506 3503 caddr_t vaddr;
3507 3504 uint_t prot;
3508 3505 int err, retrycnt;
3509 3506
3510 3507 if (avl_numnodes(&as->a_wpage) == 0)
3511 3508 return;
3512 3509
3513 3510 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3514 3511
3515 3512 for (pwp = avl_first(&as->a_wpage); pwp != NULL;
3516 3513 pwp = AVL_NEXT(&as->a_wpage, pwp)) {
3517 3514 retrycnt = 0;
3518 3515 retry:
3519 3516 vaddr = pwp->wp_vaddr;
3520 3517 if (pwp->wp_oprot == 0 || /* not set up */
3521 3518 (seg = as_segat(as, vaddr)) == NULL)
3522 3519 continue;
3523 3520
3524 3521 if ((prot = pwp->wp_oprot) != pwp->wp_prot) {
3525 3522 err = segop_setprot(seg, vaddr, PAGESIZE, prot);
3526 3523 if (err == IE_RETRY) {
3527 3524 ASSERT(retrycnt == 0);
3528 3525 retrycnt++;
3529 3526 goto retry;
3530 3527 }
3531 3528 }
3532 3529 pwp->wp_oprot = 0;
3533 3530 pwp->wp_prot = 0;
3534 3531 }
3535 3532 }
3536 3533
3537 3534 /*
3538 3535 * Force a new setup for all the watched pages in the range.
3539 3536 */
3540 3537 static void
3541 3538 as_setwatchprot(struct as *as, caddr_t addr, size_t size, uint_t prot)
3542 3539 {
3543 3540 struct watched_page *pwp;
3544 3541 struct watched_page tpw;
3545 3542 caddr_t eaddr = addr + size;
3546 3543 caddr_t vaddr;
3547 3544 struct seg *seg;
3548 3545 int err, retrycnt;
3549 3546 uint_t wprot;
3550 3547 avl_index_t where;
3551 3548
3552 3549 if (avl_numnodes(&as->a_wpage) == 0)
3553 3550 return;
3554 3551
3555 3552 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3556 3553
3557 3554 tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3558 3555 if ((pwp = avl_find(&as->a_wpage, &tpw, &where)) == NULL)
3559 3556 pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER);
3560 3557
3561 3558 while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3562 3559 retrycnt = 0;
3563 3560 vaddr = pwp->wp_vaddr;
3564 3561
3565 3562 wprot = prot;
3566 3563 if (pwp->wp_read)
3567 3564 wprot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3568 3565 if (pwp->wp_write)
3569 3566 wprot &= ~PROT_WRITE;
3570 3567 if (pwp->wp_exec)
3571 3568 wprot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3572 3569 if (!(pwp->wp_flags & WP_NOWATCH) && wprot != pwp->wp_oprot) {
3573 3570 retry:
3574 3571 seg = as_segat(as, vaddr);
3575 3572 if (seg == NULL) {
3576 3573 panic("as_setwatchprot: no seg");
3577 3574 /*NOTREACHED*/
3578 3575 }
3579 3576 err = segop_setprot(seg, vaddr, PAGESIZE, wprot);
3580 3577 if (err == IE_RETRY) {
3581 3578 ASSERT(retrycnt == 0);
3582 3579 retrycnt++;
3583 3580 goto retry;
3584 3581 }
3585 3582 }
3586 3583 pwp->wp_oprot = prot;
3587 3584 pwp->wp_prot = wprot;
3588 3585
3589 3586 pwp = AVL_NEXT(&as->a_wpage, pwp);
3590 3587 }
3591 3588 }
3592 3589
3593 3590 /*
3594 3591 * Clear all of the watched pages in the range.
3595 3592 */
3596 3593 static void
3597 3594 as_clearwatchprot(struct as *as, caddr_t addr, size_t size)
3598 3595 {
3599 3596 caddr_t eaddr = addr + size;
3600 3597 struct watched_page *pwp;
3601 3598 struct watched_page tpw;
3602 3599 uint_t prot;
3603 3600 struct seg *seg;
3604 3601 int err, retrycnt;
3605 3602 avl_index_t where;
3606 3603
3607 3604 if (avl_numnodes(&as->a_wpage) == 0)
3608 3605 return;
3609 3606
3610 3607 tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3611 3608 if ((pwp = avl_find(&as->a_wpage, &tpw, &where)) == NULL)
3612 3609 pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER);
3613 3610
3614 3611 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3615 3612
3616 3613 while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3617 3614
3618 3615 if ((prot = pwp->wp_oprot) != 0) {
3619 3616 retrycnt = 0;
3620 3617
3621 3618 if (prot != pwp->wp_prot) {
3622 3619 retry:
3623 3620 seg = as_segat(as, pwp->wp_vaddr);
3624 3621 if (seg == NULL)
3625 3622 continue;
3626 3623 err = segop_setprot(seg, pwp->wp_vaddr,
3627 3624 PAGESIZE, prot);
3628 3625 if (err == IE_RETRY) {
3629 3626 ASSERT(retrycnt == 0);
3630 3627 retrycnt++;
3631 3628 goto retry;
3632 3629
3633 3630 }
3634 3631 }
3635 3632 pwp->wp_oprot = 0;
3636 3633 pwp->wp_prot = 0;
3637 3634 }
3638 3635
3639 3636 pwp = AVL_NEXT(&as->a_wpage, pwp);
3640 3637 }
3641 3638 }
3642 3639
3643 3640 void
3644 3641 as_signal_proc(struct as *as, k_siginfo_t *siginfo)
3645 3642 {
3646 3643 struct proc *p;
3647 3644
3648 3645 mutex_enter(&pidlock);
3649 3646 for (p = practive; p; p = p->p_next) {
3650 3647 if (p->p_as == as) {
3651 3648 mutex_enter(&p->p_lock);
3652 3649 if (p->p_as == as)
3653 3650 sigaddq(p, NULL, siginfo, KM_NOSLEEP);
3654 3651 mutex_exit(&p->p_lock);
3655 3652 }
3656 3653 }
3657 3654 mutex_exit(&pidlock);
3658 3655 }
3659 3656
3660 3657 /*
3661 3658 * return memory object ID
3662 3659 */
3663 3660 int
3664 3661 as_getmemid(struct as *as, caddr_t addr, memid_t *memidp)
3665 3662 {
3666 3663 struct seg *seg;
3667 3664 int sts;
3668 3665
3669 3666 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
3670 3667 seg = as_segat(as, addr);
3671 3668 if (seg == NULL) {
3672 3669 AS_LOCK_EXIT(as, &as->a_lock);
3673 3670 return (EFAULT);
3674 3671 }
3675 3672
3676 3673 sts = segop_getmemid(seg, addr, memidp);
3677 3674
3678 3675 AS_LOCK_EXIT(as, &as->a_lock);
3679 3676 return (sts);
3680 3677 }
↓ open down ↓ |
1035 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX