Print this page
remove xhat
The xhat infrastructure was added to support hardware such as the zulu
graphics card - hardware which had on-board MMUs. The VM used the xhat code
to keep the CPU's and Zulu's page tables in-sync. Since the only xhat user
was zulu (which is gone), we can safely remove it simplifying the whole VM
subsystem.
Assorted notes:
- AS_BUSY flag was used solely by xhat
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/vm/vm_as.c
+++ new/usr/src/uts/common/vm/vm_as.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 * Copyright 2015, Joyent, Inc. All rights reserved.
25 25 */
26 26
27 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 28 /* All Rights Reserved */
29 29
30 30 /*
31 31 * University Copyright- Copyright (c) 1982, 1986, 1988
32 32 * The Regents of the University of California
33 33 * All Rights Reserved
34 34 *
35 35 * University Acknowledgment- Portions of this document are derived from
36 36 * software developed by the University of California, Berkeley, and its
37 37 * contributors.
38 38 */
39 39
40 40 /*
41 41 * VM - address spaces.
42 42 */
43 43
44 44 #include <sys/types.h>
45 45 #include <sys/t_lock.h>
46 46 #include <sys/param.h>
47 47 #include <sys/errno.h>
48 48 #include <sys/systm.h>
49 49 #include <sys/mman.h>
50 50 #include <sys/sysmacros.h>
51 51 #include <sys/cpuvar.h>
↓ open down ↓ |
51 lines elided |
↑ open up ↑ |
52 52 #include <sys/sysinfo.h>
53 53 #include <sys/kmem.h>
54 54 #include <sys/vnode.h>
55 55 #include <sys/vmsystm.h>
56 56 #include <sys/cmn_err.h>
57 57 #include <sys/debug.h>
58 58 #include <sys/tnf_probe.h>
59 59 #include <sys/vtrace.h>
60 60
61 61 #include <vm/hat.h>
62 -#include <vm/xhat.h>
63 62 #include <vm/as.h>
64 63 #include <vm/seg.h>
65 64 #include <vm/seg_vn.h>
66 65 #include <vm/seg_dev.h>
67 66 #include <vm/seg_kmem.h>
68 67 #include <vm/seg_map.h>
69 68 #include <vm/seg_spt.h>
70 69 #include <vm/page.h>
71 70
72 71 clock_t deadlk_wait = 1; /* number of ticks to wait before retrying */
73 72
74 73 static struct kmem_cache *as_cache;
75 74
76 75 static void as_setwatchprot(struct as *, caddr_t, size_t, uint_t);
77 76 static void as_clearwatchprot(struct as *, caddr_t, size_t);
78 77 int as_map_locked(struct as *, caddr_t, size_t, int ((*)()), void *);
79 78
80 79
81 80 /*
82 81 * Verifying the segment lists is very time-consuming; it may not be
83 82 * desirable always to define VERIFY_SEGLIST when DEBUG is set.
84 83 */
85 84 #ifdef DEBUG
86 85 #define VERIFY_SEGLIST
87 86 int do_as_verify = 0;
88 87 #endif
89 88
90 89 /*
91 90 * Allocate a new callback data structure entry and fill in the events of
92 91 * interest, the address range of interest, and the callback argument.
93 92 * Link the entry on the as->a_callbacks list. A callback entry for the
94 93 * entire address space may be specified with vaddr = 0 and size = -1.
95 94 *
96 95 * CALLERS RESPONSIBILITY: If not calling from within the process context for
97 96 * the specified as, the caller must guarantee persistence of the specified as
98 97 * for the duration of this function (eg. pages being locked within the as
99 98 * will guarantee persistence).
100 99 */
101 100 int
102 101 as_add_callback(struct as *as, void (*cb_func)(), void *arg, uint_t events,
103 102 caddr_t vaddr, size_t size, int sleepflag)
104 103 {
105 104 struct as_callback *current_head, *cb;
106 105 caddr_t saddr;
107 106 size_t rsize;
108 107
109 108 /* callback function and an event are mandatory */
110 109 if ((cb_func == NULL) || ((events & AS_ALL_EVENT) == 0))
111 110 return (EINVAL);
112 111
113 112 /* Adding a callback after as_free has been called is not allowed */
114 113 if (as == &kas)
115 114 return (ENOMEM);
116 115
117 116 /*
118 117 * vaddr = 0 and size = -1 is used to indicate that the callback range
119 118 * is the entire address space so no rounding is done in that case.
120 119 */
121 120 if (size != -1) {
122 121 saddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
123 122 rsize = (((size_t)(vaddr + size) + PAGEOFFSET) & PAGEMASK) -
124 123 (size_t)saddr;
125 124 /* check for wraparound */
126 125 if (saddr + rsize < saddr)
127 126 return (ENOMEM);
128 127 } else {
129 128 if (vaddr != 0)
130 129 return (EINVAL);
131 130 saddr = vaddr;
132 131 rsize = size;
133 132 }
134 133
135 134 /* Allocate and initialize a callback entry */
136 135 cb = kmem_zalloc(sizeof (struct as_callback), sleepflag);
137 136 if (cb == NULL)
138 137 return (EAGAIN);
139 138
140 139 cb->ascb_func = cb_func;
141 140 cb->ascb_arg = arg;
142 141 cb->ascb_events = events;
143 142 cb->ascb_saddr = saddr;
144 143 cb->ascb_len = rsize;
145 144
146 145 /* Add the entry to the list */
147 146 mutex_enter(&as->a_contents);
148 147 current_head = as->a_callbacks;
149 148 as->a_callbacks = cb;
150 149 cb->ascb_next = current_head;
151 150
152 151 /*
153 152 * The call to this function may lose in a race with
154 153 * a pertinent event - eg. a thread does long term memory locking
155 154 * but before the callback is added another thread executes as_unmap.
156 155 * A broadcast here resolves that.
157 156 */
158 157 if ((cb->ascb_events & AS_UNMAPWAIT_EVENT) && AS_ISUNMAPWAIT(as)) {
159 158 AS_CLRUNMAPWAIT(as);
160 159 cv_broadcast(&as->a_cv);
161 160 }
162 161
163 162 mutex_exit(&as->a_contents);
164 163 return (0);
165 164 }
166 165
167 166 /*
168 167 * Search the callback list for an entry which pertains to arg.
169 168 *
170 169 * This is called from within the client upon completion of the callback.
171 170 * RETURN VALUES:
172 171 * AS_CALLBACK_DELETED (callback entry found and deleted)
173 172 * AS_CALLBACK_NOTFOUND (no callback entry found - this is ok)
174 173 * AS_CALLBACK_DELETE_DEFERRED (callback is in process, delete of this
175 174 * entry will be made in as_do_callbacks)
176 175 *
177 176 * If as_delete_callback encounters a matching entry with AS_CALLBACK_CALLED
178 177 * set, it indicates that as_do_callbacks is processing this entry. The
179 178 * AS_ALL_EVENT events are cleared in the entry, and a broadcast is made
180 179 * to unblock as_do_callbacks, in case it is blocked.
181 180 *
182 181 * CALLERS RESPONSIBILITY: If not calling from within the process context for
183 182 * the specified as, the caller must guarantee persistence of the specified as
184 183 * for the duration of this function (eg. pages being locked within the as
185 184 * will guarantee persistence).
186 185 */
187 186 uint_t
188 187 as_delete_callback(struct as *as, void *arg)
189 188 {
190 189 struct as_callback **prevcb = &as->a_callbacks;
191 190 struct as_callback *cb;
192 191 uint_t rc = AS_CALLBACK_NOTFOUND;
193 192
194 193 mutex_enter(&as->a_contents);
195 194 for (cb = as->a_callbacks; cb; prevcb = &cb->ascb_next, cb = *prevcb) {
196 195 if (cb->ascb_arg != arg)
197 196 continue;
198 197
199 198 /*
200 199 * If the events indicate AS_CALLBACK_CALLED, just clear
201 200 * AS_ALL_EVENT in the events field and wakeup the thread
202 201 * that may be waiting in as_do_callbacks. as_do_callbacks
203 202 * will take care of removing this entry from the list. In
204 203 * that case, return AS_CALLBACK_DELETE_DEFERRED. Otherwise
205 204 * (AS_CALLBACK_CALLED not set), just remove it from the
206 205 * list, return the memory and return AS_CALLBACK_DELETED.
207 206 */
208 207 if ((cb->ascb_events & AS_CALLBACK_CALLED) != 0) {
209 208 /* leave AS_CALLBACK_CALLED */
210 209 cb->ascb_events &= ~AS_ALL_EVENT;
211 210 rc = AS_CALLBACK_DELETE_DEFERRED;
212 211 cv_broadcast(&as->a_cv);
213 212 } else {
214 213 *prevcb = cb->ascb_next;
215 214 kmem_free(cb, sizeof (struct as_callback));
216 215 rc = AS_CALLBACK_DELETED;
217 216 }
218 217 break;
219 218 }
220 219 mutex_exit(&as->a_contents);
221 220 return (rc);
222 221 }
223 222
224 223 /*
225 224 * Searches the as callback list for a matching entry.
226 225 * Returns a pointer to the first matching callback, or NULL if
227 226 * nothing is found.
228 227 * This function never sleeps so it is ok to call it with more
229 228 * locks held but the (required) a_contents mutex.
230 229 *
231 230 * See also comment on as_do_callbacks below.
232 231 */
233 232 static struct as_callback *
234 233 as_find_callback(struct as *as, uint_t events, caddr_t event_addr,
235 234 size_t event_len)
236 235 {
237 236 struct as_callback *cb;
238 237
239 238 ASSERT(MUTEX_HELD(&as->a_contents));
240 239 for (cb = as->a_callbacks; cb != NULL; cb = cb->ascb_next) {
241 240 /*
242 241 * If the callback has not already been called, then
243 242 * check if events or address range pertains. An event_len
244 243 * of zero means do an unconditional callback.
245 244 */
246 245 if (((cb->ascb_events & AS_CALLBACK_CALLED) != 0) ||
247 246 ((event_len != 0) && (((cb->ascb_events & events) == 0) ||
248 247 (event_addr + event_len < cb->ascb_saddr) ||
249 248 (event_addr > (cb->ascb_saddr + cb->ascb_len))))) {
250 249 continue;
251 250 }
252 251 break;
253 252 }
254 253 return (cb);
255 254 }
256 255
257 256 /*
258 257 * Executes a given callback and removes it from the callback list for
259 258 * this address space.
260 259 * This function may sleep so the caller must drop all locks except
261 260 * a_contents before calling this func.
262 261 *
263 262 * See also comments on as_do_callbacks below.
264 263 */
265 264 static void
266 265 as_execute_callback(struct as *as, struct as_callback *cb,
267 266 uint_t events)
268 267 {
269 268 struct as_callback **prevcb;
270 269 void *cb_arg;
271 270
272 271 ASSERT(MUTEX_HELD(&as->a_contents) && (cb->ascb_events & events));
273 272 cb->ascb_events |= AS_CALLBACK_CALLED;
274 273 mutex_exit(&as->a_contents);
275 274 (*cb->ascb_func)(as, cb->ascb_arg, events);
276 275 mutex_enter(&as->a_contents);
277 276 /*
278 277 * the callback function is required to delete the callback
279 278 * when the callback function determines it is OK for
280 279 * this thread to continue. as_delete_callback will clear
281 280 * the AS_ALL_EVENT in the events field when it is deleted.
282 281 * If the callback function called as_delete_callback,
283 282 * events will already be cleared and there will be no blocking.
284 283 */
285 284 while ((cb->ascb_events & events) != 0) {
286 285 cv_wait(&as->a_cv, &as->a_contents);
287 286 }
288 287 /*
289 288 * This entry needs to be taken off the list. Normally, the
290 289 * callback func itself does that, but unfortunately the list
291 290 * may have changed while the callback was running because the
292 291 * a_contents mutex was dropped and someone else other than the
293 292 * callback func itself could have called as_delete_callback,
294 293 * so we have to search to find this entry again. The entry
295 294 * must have AS_CALLBACK_CALLED, and have the same 'arg'.
296 295 */
297 296 cb_arg = cb->ascb_arg;
298 297 prevcb = &as->a_callbacks;
299 298 for (cb = as->a_callbacks; cb != NULL;
300 299 prevcb = &cb->ascb_next, cb = *prevcb) {
301 300 if (((cb->ascb_events & AS_CALLBACK_CALLED) == 0) ||
302 301 (cb_arg != cb->ascb_arg)) {
303 302 continue;
304 303 }
305 304 *prevcb = cb->ascb_next;
306 305 kmem_free(cb, sizeof (struct as_callback));
307 306 break;
308 307 }
309 308 }
310 309
311 310 /*
312 311 * Check the callback list for a matching event and intersection of
313 312 * address range. If there is a match invoke the callback. Skip an entry if:
314 313 * - a callback is already in progress for this entry (AS_CALLBACK_CALLED)
315 314 * - not event of interest
316 315 * - not address range of interest
317 316 *
318 317 * An event_len of zero indicates a request for an unconditional callback
319 318 * (regardless of event), only the AS_CALLBACK_CALLED is checked. The
320 319 * a_contents lock must be dropped before a callback, so only one callback
321 320 * can be done before returning. Return -1 (true) if a callback was
322 321 * executed and removed from the list, else return 0 (false).
323 322 *
324 323 * The logically separate parts, i.e. finding a matching callback and
325 324 * executing a given callback have been separated into two functions
326 325 * so that they can be called with different sets of locks held beyond
327 326 * the always-required a_contents. as_find_callback does not sleep so
328 327 * it is ok to call it if more locks than a_contents (i.e. the a_lock
329 328 * rwlock) are held. as_execute_callback on the other hand may sleep
330 329 * so all locks beyond a_contents must be dropped by the caller if one
331 330 * does not want to end comatose.
332 331 */
333 332 static int
334 333 as_do_callbacks(struct as *as, uint_t events, caddr_t event_addr,
335 334 size_t event_len)
336 335 {
337 336 struct as_callback *cb;
338 337
339 338 if ((cb = as_find_callback(as, events, event_addr, event_len))) {
340 339 as_execute_callback(as, cb, events);
341 340 return (-1);
342 341 }
343 342 return (0);
344 343 }
345 344
346 345 /*
347 346 * Search for the segment containing addr. If a segment containing addr
348 347 * exists, that segment is returned. If no such segment exists, and
349 348 * the list spans addresses greater than addr, then the first segment
350 349 * whose base is greater than addr is returned; otherwise, NULL is
351 350 * returned unless tail is true, in which case the last element of the
352 351 * list is returned.
353 352 *
354 353 * a_seglast is used to cache the last found segment for repeated
355 354 * searches to the same addr (which happens frequently).
356 355 */
357 356 struct seg *
358 357 as_findseg(struct as *as, caddr_t addr, int tail)
359 358 {
360 359 struct seg *seg = as->a_seglast;
361 360 avl_index_t where;
362 361
363 362 ASSERT(AS_LOCK_HELD(as, &as->a_lock));
364 363
365 364 if (seg != NULL &&
366 365 seg->s_base <= addr &&
367 366 addr < seg->s_base + seg->s_size)
368 367 return (seg);
369 368
370 369 seg = avl_find(&as->a_segtree, &addr, &where);
371 370 if (seg != NULL)
372 371 return (as->a_seglast = seg);
373 372
374 373 seg = avl_nearest(&as->a_segtree, where, AVL_AFTER);
375 374 if (seg == NULL && tail)
376 375 seg = avl_last(&as->a_segtree);
377 376 return (as->a_seglast = seg);
378 377 }
379 378
380 379 #ifdef VERIFY_SEGLIST
381 380 /*
382 381 * verify that the linked list is coherent
383 382 */
384 383 static void
385 384 as_verify(struct as *as)
386 385 {
387 386 struct seg *seg, *seglast, *p, *n;
388 387 uint_t nsegs = 0;
389 388
390 389 if (do_as_verify == 0)
391 390 return;
392 391
393 392 seglast = as->a_seglast;
394 393
395 394 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
396 395 ASSERT(seg->s_as == as);
397 396 p = AS_SEGPREV(as, seg);
398 397 n = AS_SEGNEXT(as, seg);
399 398 ASSERT(p == NULL || p->s_as == as);
400 399 ASSERT(p == NULL || p->s_base < seg->s_base);
401 400 ASSERT(n == NULL || n->s_base > seg->s_base);
402 401 ASSERT(n != NULL || seg == avl_last(&as->a_segtree));
403 402 if (seg == seglast)
404 403 seglast = NULL;
405 404 nsegs++;
406 405 }
407 406 ASSERT(seglast == NULL);
408 407 ASSERT(avl_numnodes(&as->a_segtree) == nsegs);
409 408 }
410 409 #endif /* VERIFY_SEGLIST */
411 410
412 411 /*
413 412 * Add a new segment to the address space. The avl_find()
414 413 * may be expensive so we attempt to use last segment accessed
415 414 * in as_gap() as an insertion point.
416 415 */
417 416 int
418 417 as_addseg(struct as *as, struct seg *newseg)
419 418 {
420 419 struct seg *seg;
421 420 caddr_t addr;
422 421 caddr_t eaddr;
423 422 avl_index_t where;
424 423
425 424 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
426 425
427 426 as->a_updatedir = 1; /* inform /proc */
428 427 gethrestime(&as->a_updatetime);
429 428
430 429 if (as->a_lastgaphl != NULL) {
431 430 struct seg *hseg = NULL;
432 431 struct seg *lseg = NULL;
433 432
434 433 if (as->a_lastgaphl->s_base > newseg->s_base) {
435 434 hseg = as->a_lastgaphl;
436 435 lseg = AVL_PREV(&as->a_segtree, hseg);
437 436 } else {
438 437 lseg = as->a_lastgaphl;
439 438 hseg = AVL_NEXT(&as->a_segtree, lseg);
440 439 }
441 440
442 441 if (hseg && lseg && lseg->s_base < newseg->s_base &&
443 442 hseg->s_base > newseg->s_base) {
444 443 avl_insert_here(&as->a_segtree, newseg, lseg,
445 444 AVL_AFTER);
446 445 as->a_lastgaphl = NULL;
447 446 as->a_seglast = newseg;
448 447 return (0);
449 448 }
450 449 as->a_lastgaphl = NULL;
451 450 }
452 451
453 452 addr = newseg->s_base;
454 453 eaddr = addr + newseg->s_size;
455 454 again:
456 455
457 456 seg = avl_find(&as->a_segtree, &addr, &where);
458 457
459 458 if (seg == NULL)
460 459 seg = avl_nearest(&as->a_segtree, where, AVL_AFTER);
461 460
462 461 if (seg == NULL)
463 462 seg = avl_last(&as->a_segtree);
464 463
465 464 if (seg != NULL) {
466 465 caddr_t base = seg->s_base;
467 466
468 467 /*
469 468 * If top of seg is below the requested address, then
470 469 * the insertion point is at the end of the linked list,
471 470 * and seg points to the tail of the list. Otherwise,
472 471 * the insertion point is immediately before seg.
473 472 */
474 473 if (base + seg->s_size > addr) {
475 474 if (addr >= base || eaddr > base) {
476 475 #ifdef __sparc
477 476 extern struct seg_ops segnf_ops;
478 477
479 478 /*
480 479 * no-fault segs must disappear if overlaid.
481 480 * XXX need new segment type so
482 481 * we don't have to check s_ops
483 482 */
484 483 if (seg->s_ops == &segnf_ops) {
485 484 seg_unmap(seg);
486 485 goto again;
487 486 }
488 487 #endif
489 488 return (-1); /* overlapping segment */
490 489 }
491 490 }
492 491 }
493 492 as->a_seglast = newseg;
494 493 avl_insert(&as->a_segtree, newseg, where);
495 494
496 495 #ifdef VERIFY_SEGLIST
497 496 as_verify(as);
498 497 #endif
499 498 return (0);
500 499 }
501 500
502 501 struct seg *
503 502 as_removeseg(struct as *as, struct seg *seg)
504 503 {
505 504 avl_tree_t *t;
506 505
507 506 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
508 507
509 508 as->a_updatedir = 1; /* inform /proc */
510 509 gethrestime(&as->a_updatetime);
511 510
512 511 if (seg == NULL)
513 512 return (NULL);
514 513
515 514 t = &as->a_segtree;
516 515 if (as->a_seglast == seg)
517 516 as->a_seglast = NULL;
518 517 as->a_lastgaphl = NULL;
519 518
520 519 /*
521 520 * if this segment is at an address higher than
522 521 * a_lastgap, set a_lastgap to the next segment (NULL if last segment)
523 522 */
524 523 if (as->a_lastgap &&
525 524 (seg == as->a_lastgap || seg->s_base > as->a_lastgap->s_base))
526 525 as->a_lastgap = AVL_NEXT(t, seg);
527 526
528 527 /*
529 528 * remove the segment from the seg tree
530 529 */
531 530 avl_remove(t, seg);
532 531
533 532 #ifdef VERIFY_SEGLIST
534 533 as_verify(as);
535 534 #endif
536 535 return (seg);
537 536 }
538 537
539 538 /*
540 539 * Find a segment containing addr.
541 540 */
542 541 struct seg *
543 542 as_segat(struct as *as, caddr_t addr)
544 543 {
545 544 struct seg *seg = as->a_seglast;
546 545
547 546 ASSERT(AS_LOCK_HELD(as, &as->a_lock));
548 547
549 548 if (seg != NULL && seg->s_base <= addr &&
550 549 addr < seg->s_base + seg->s_size)
551 550 return (seg);
552 551
553 552 seg = avl_find(&as->a_segtree, &addr, NULL);
554 553 return (seg);
555 554 }
556 555
557 556 /*
558 557 * Serialize all searches for holes in an address space to
559 558 * prevent two or more threads from allocating the same virtual
560 559 * address range. The address space must not be "read/write"
561 560 * locked by the caller since we may block.
562 561 */
563 562 void
564 563 as_rangelock(struct as *as)
565 564 {
566 565 mutex_enter(&as->a_contents);
567 566 while (AS_ISCLAIMGAP(as))
568 567 cv_wait(&as->a_cv, &as->a_contents);
569 568 AS_SETCLAIMGAP(as);
570 569 mutex_exit(&as->a_contents);
571 570 }
572 571
573 572 /*
574 573 * Release hold on a_state & AS_CLAIMGAP and signal any other blocked threads.
575 574 */
576 575 void
577 576 as_rangeunlock(struct as *as)
578 577 {
579 578 mutex_enter(&as->a_contents);
580 579 AS_CLRCLAIMGAP(as);
581 580 cv_signal(&as->a_cv);
582 581 mutex_exit(&as->a_contents);
583 582 }
584 583
585 584 /*
586 585 * compar segments (or just an address) by segment address range
587 586 */
588 587 static int
589 588 as_segcompar(const void *x, const void *y)
590 589 {
591 590 struct seg *a = (struct seg *)x;
592 591 struct seg *b = (struct seg *)y;
593 592
594 593 if (a->s_base < b->s_base)
595 594 return (-1);
596 595 if (a->s_base >= b->s_base + b->s_size)
597 596 return (1);
598 597 return (0);
599 598 }
600 599
601 600
602 601 void
603 602 as_avlinit(struct as *as)
604 603 {
605 604 avl_create(&as->a_segtree, as_segcompar, sizeof (struct seg),
606 605 offsetof(struct seg, s_tree));
607 606 avl_create(&as->a_wpage, wp_compare, sizeof (struct watched_page),
608 607 offsetof(struct watched_page, wp_link));
609 608 }
610 609
611 610 /*ARGSUSED*/
612 611 static int
613 612 as_constructor(void *buf, void *cdrarg, int kmflags)
614 613 {
615 614 struct as *as = buf;
616 615
617 616 mutex_init(&as->a_contents, NULL, MUTEX_DEFAULT, NULL);
618 617 cv_init(&as->a_cv, NULL, CV_DEFAULT, NULL);
619 618 rw_init(&as->a_lock, NULL, RW_DEFAULT, NULL);
620 619 as_avlinit(as);
621 620 return (0);
622 621 }
623 622
624 623 /*ARGSUSED1*/
625 624 static void
626 625 as_destructor(void *buf, void *cdrarg)
627 626 {
628 627 struct as *as = buf;
629 628
630 629 avl_destroy(&as->a_segtree);
631 630 mutex_destroy(&as->a_contents);
632 631 cv_destroy(&as->a_cv);
633 632 rw_destroy(&as->a_lock);
634 633 }
635 634
636 635 void
637 636 as_init(void)
638 637 {
639 638 as_cache = kmem_cache_create("as_cache", sizeof (struct as), 0,
640 639 as_constructor, as_destructor, NULL, NULL, NULL, 0);
641 640 }
642 641
643 642 /*
644 643 * Allocate and initialize an address space data structure.
645 644 * We call hat_alloc to allow any machine dependent
646 645 * information in the hat structure to be initialized.
647 646 */
648 647 struct as *
649 648 as_alloc(void)
650 649 {
651 650 struct as *as;
652 651
653 652 as = kmem_cache_alloc(as_cache, KM_SLEEP);
654 653
655 654 as->a_flags = 0;
656 655 as->a_vbits = 0;
657 656 as->a_hrm = NULL;
658 657 as->a_seglast = NULL;
659 658 as->a_size = 0;
660 659 as->a_resvsize = 0;
661 660 as->a_updatedir = 0;
662 661 gethrestime(&as->a_updatetime);
663 662 as->a_objectdir = NULL;
↓ open down ↓ |
591 lines elided |
↑ open up ↑ |
664 663 as->a_sizedir = 0;
665 664 as->a_userlimit = (caddr_t)USERLIMIT;
666 665 as->a_lastgap = NULL;
667 666 as->a_lastgaphl = NULL;
668 667 as->a_callbacks = NULL;
669 668
670 669 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
671 670 as->a_hat = hat_alloc(as); /* create hat for default system mmu */
672 671 AS_LOCK_EXIT(as, &as->a_lock);
673 672
674 - as->a_xhat = NULL;
675 -
676 673 return (as);
677 674 }
678 675
679 676 /*
680 677 * Free an address space data structure.
681 678 * Need to free the hat first and then
682 679 * all the segments on this as and finally
683 680 * the space for the as struct itself.
684 681 */
685 682 void
686 683 as_free(struct as *as)
687 684 {
688 685 struct hat *hat = as->a_hat;
689 686 struct seg *seg, *next;
690 - int called = 0;
687 + boolean_t free_started = B_FALSE;
691 688
692 689 top:
693 690 /*
694 691 * Invoke ALL callbacks. as_do_callbacks will do one callback
695 692 * per call, and not return (-1) until the callback has completed.
696 693 * When as_do_callbacks returns zero, all callbacks have completed.
697 694 */
698 695 mutex_enter(&as->a_contents);
699 696 while (as->a_callbacks && as_do_callbacks(as, AS_ALL_EVENT, 0, 0))
700 697 ;
701 698
702 - /* This will prevent new XHATs from attaching to as */
703 - if (!called)
704 - AS_SETBUSY(as);
705 699 mutex_exit(&as->a_contents);
706 700 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
707 701
708 - if (!called) {
709 - called = 1;
702 + if (!free_started) {
703 + free_started = B_TRUE;
710 704 hat_free_start(hat);
711 - if (as->a_xhat != NULL)
712 - xhat_free_start_all(as);
713 705 }
714 706 for (seg = AS_SEGFIRST(as); seg != NULL; seg = next) {
715 707 int err;
716 708
717 709 next = AS_SEGNEXT(as, seg);
718 710 retry:
719 711 err = SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
720 712 if (err == EAGAIN) {
721 713 mutex_enter(&as->a_contents);
722 714 if (as->a_callbacks) {
723 715 AS_LOCK_EXIT(as, &as->a_lock);
724 716 } else if (!AS_ISNOUNMAPWAIT(as)) {
725 717 /*
726 718 * Memory is currently locked. Wait for a
727 719 * cv_signal that it has been unlocked, then
728 720 * try the operation again.
729 721 */
730 722 if (AS_ISUNMAPWAIT(as) == 0)
731 723 cv_broadcast(&as->a_cv);
732 724 AS_SETUNMAPWAIT(as);
733 725 AS_LOCK_EXIT(as, &as->a_lock);
734 726 while (AS_ISUNMAPWAIT(as))
735 727 cv_wait(&as->a_cv, &as->a_contents);
736 728 } else {
737 729 /*
738 730 * We may have raced with
739 731 * segvn_reclaim()/segspt_reclaim(). In this
740 732 * case clean nounmapwait flag and retry since
741 733 * softlockcnt in this segment may be already
742 734 * 0. We don't drop as writer lock so our
743 735 * number of retries without sleeping should
744 736 * be very small. See segvn_reclaim() for
745 737 * more comments.
746 738 */
747 739 AS_CLRNOUNMAPWAIT(as);
748 740 mutex_exit(&as->a_contents);
749 741 goto retry;
750 742 }
751 743 mutex_exit(&as->a_contents);
↓ open down ↓ |
29 lines elided |
↑ open up ↑ |
752 744 goto top;
753 745 } else {
754 746 /*
755 747 * We do not expect any other error return at this
756 748 * time. This is similar to an ASSERT in seg_unmap()
757 749 */
758 750 ASSERT(err == 0);
759 751 }
760 752 }
761 753 hat_free_end(hat);
762 - if (as->a_xhat != NULL)
763 - xhat_free_end_all(as);
764 754 AS_LOCK_EXIT(as, &as->a_lock);
765 755
766 756 /* /proc stuff */
767 757 ASSERT(avl_numnodes(&as->a_wpage) == 0);
768 758 if (as->a_objectdir) {
769 759 kmem_free(as->a_objectdir, as->a_sizedir * sizeof (vnode_t *));
770 760 as->a_objectdir = NULL;
771 761 as->a_sizedir = 0;
772 762 }
773 763
774 764 /*
775 765 * Free the struct as back to kmem. Assert it has no segments.
776 766 */
777 767 ASSERT(avl_numnodes(&as->a_segtree) == 0);
778 768 kmem_cache_free(as_cache, as);
779 769 }
780 770
781 771 int
782 772 as_dup(struct as *as, struct proc *forkedproc)
783 773 {
784 774 struct as *newas;
785 775 struct seg *seg, *newseg;
786 776 size_t purgesize = 0;
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
787 777 int error;
788 778
789 779 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
790 780 as_clearwatch(as);
791 781 newas = as_alloc();
792 782 newas->a_userlimit = as->a_userlimit;
793 783 newas->a_proc = forkedproc;
794 784
795 785 AS_LOCK_ENTER(newas, &newas->a_lock, RW_WRITER);
796 786
797 - /* This will prevent new XHATs from attaching */
798 - mutex_enter(&as->a_contents);
799 - AS_SETBUSY(as);
800 - mutex_exit(&as->a_contents);
801 - mutex_enter(&newas->a_contents);
802 - AS_SETBUSY(newas);
803 - mutex_exit(&newas->a_contents);
804 -
805 787 (void) hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_SRD);
806 788
807 789 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
808 790
809 791 if (seg->s_flags & S_PURGE) {
810 792 purgesize += seg->s_size;
811 793 continue;
812 794 }
813 795
814 796 newseg = seg_alloc(newas, seg->s_base, seg->s_size);
815 797 if (newseg == NULL) {
816 798 AS_LOCK_EXIT(newas, &newas->a_lock);
817 799 as_setwatch(as);
818 - mutex_enter(&as->a_contents);
819 - AS_CLRBUSY(as);
820 - mutex_exit(&as->a_contents);
821 800 AS_LOCK_EXIT(as, &as->a_lock);
822 801 as_free(newas);
823 802 return (-1);
824 803 }
825 804 if ((error = SEGOP_DUP(seg, newseg)) != 0) {
826 805 /*
827 806 * We call seg_free() on the new seg
828 807 * because the segment is not set up
829 808 * completely; i.e. it has no ops.
830 809 */
831 810 as_setwatch(as);
832 - mutex_enter(&as->a_contents);
833 - AS_CLRBUSY(as);
834 - mutex_exit(&as->a_contents);
835 811 AS_LOCK_EXIT(as, &as->a_lock);
836 812 seg_free(newseg);
837 813 AS_LOCK_EXIT(newas, &newas->a_lock);
838 814 as_free(newas);
839 815 return (error);
840 816 }
841 817 newas->a_size += seg->s_size;
842 818 }
843 819 newas->a_resvsize = as->a_resvsize - purgesize;
844 820
845 821 error = hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_ALL);
846 - if (as->a_xhat != NULL)
847 - error |= xhat_dup_all(as, newas, NULL, 0, HAT_DUP_ALL);
848 822
849 - mutex_enter(&newas->a_contents);
850 - AS_CLRBUSY(newas);
851 - mutex_exit(&newas->a_contents);
852 823 AS_LOCK_EXIT(newas, &newas->a_lock);
853 824
854 825 as_setwatch(as);
855 - mutex_enter(&as->a_contents);
856 - AS_CLRBUSY(as);
857 - mutex_exit(&as->a_contents);
858 826 AS_LOCK_EXIT(as, &as->a_lock);
859 827 if (error != 0) {
860 828 as_free(newas);
861 829 return (error);
862 830 }
863 831 forkedproc->p_as = newas;
864 832 return (0);
865 833 }
866 834
867 835 /*
868 836 * Handle a ``fault'' at addr for size bytes.
869 837 */
870 838 faultcode_t
871 839 as_fault(struct hat *hat, struct as *as, caddr_t addr, size_t size,
872 840 enum fault_type type, enum seg_rw rw)
↓ open down ↓ |
5 lines elided |
↑ open up ↑ |
873 841 {
874 842 struct seg *seg;
875 843 caddr_t raddr; /* rounded down addr */
876 844 size_t rsize; /* rounded up size */
877 845 size_t ssize;
878 846 faultcode_t res = 0;
879 847 caddr_t addrsav;
880 848 struct seg *segsav;
881 849 int as_lock_held;
882 850 klwp_t *lwp = ttolwp(curthread);
883 - int is_xhat = 0;
884 851 int holding_wpage = 0;
885 - extern struct seg_ops segdev_ops;
886 852
887 853
888 854
889 - if (as->a_hat != hat) {
890 - /* This must be an XHAT then */
891 - is_xhat = 1;
892 -
893 - if ((type != F_INVAL) || (as == &kas))
894 - return (FC_NOSUPPORT);
895 - }
896 -
897 855 retry:
898 - if (!is_xhat) {
899 - /*
900 - * Indicate that the lwp is not to be stopped while waiting
901 - * for a pagefault. This is to avoid deadlock while debugging
902 - * a process via /proc over NFS (in particular).
903 - */
904 - if (lwp != NULL)
905 - lwp->lwp_nostop++;
856 + /*
857 + * Indicate that the lwp is not to be stopped while waiting for a
858 + * pagefault. This is to avoid deadlock while debugging a process
859 + * via /proc over NFS (in particular).
860 + */
861 + if (lwp != NULL)
862 + lwp->lwp_nostop++;
906 863
907 - /*
908 - * same length must be used when we softlock and softunlock.
909 - * We don't support softunlocking lengths less than
910 - * the original length when there is largepage support.
911 - * See seg_dev.c for more comments.
912 - */
913 - switch (type) {
864 + /*
865 + * same length must be used when we softlock and softunlock. We
866 + * don't support softunlocking lengths less than the original length
867 + * when there is largepage support. See seg_dev.c for more
868 + * comments.
869 + */
870 + switch (type) {
914 871
915 - case F_SOFTLOCK:
916 - CPU_STATS_ADD_K(vm, softlock, 1);
917 - break;
872 + case F_SOFTLOCK:
873 + CPU_STATS_ADD_K(vm, softlock, 1);
874 + break;
918 875
919 - case F_SOFTUNLOCK:
920 - break;
876 + case F_SOFTUNLOCK:
877 + break;
921 878
922 - case F_PROT:
923 - CPU_STATS_ADD_K(vm, prot_fault, 1);
924 - break;
879 + case F_PROT:
880 + CPU_STATS_ADD_K(vm, prot_fault, 1);
881 + break;
925 882
926 - case F_INVAL:
927 - CPU_STATS_ENTER_K();
928 - CPU_STATS_ADDQ(CPU, vm, as_fault, 1);
929 - if (as == &kas)
930 - CPU_STATS_ADDQ(CPU, vm, kernel_asflt, 1);
931 - CPU_STATS_EXIT_K();
932 - break;
933 - }
883 + case F_INVAL:
884 + CPU_STATS_ENTER_K();
885 + CPU_STATS_ADDQ(CPU, vm, as_fault, 1);
886 + if (as == &kas)
887 + CPU_STATS_ADDQ(CPU, vm, kernel_asflt, 1);
888 + CPU_STATS_EXIT_K();
889 + break;
934 890 }
935 891
936 892 /* Kernel probe */
937 893 TNF_PROBE_3(address_fault, "vm pagefault", /* CSTYLED */,
938 894 tnf_opaque, address, addr,
939 895 tnf_fault_type, fault_type, type,
940 896 tnf_seg_access, access, rw);
941 897
942 898 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
943 899 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
944 900 (size_t)raddr;
↓ open down ↓ |
1 lines elided |
↑ open up ↑ |
945 901
946 902 /*
947 903 * XXX -- Don't grab the as lock for segkmap. We should grab it for
948 904 * correctness, but then we could be stuck holding this lock for
949 905 * a LONG time if the fault needs to be resolved on a slow
950 906 * filesystem, and then no-one will be able to exec new commands,
951 907 * as exec'ing requires the write lock on the as.
952 908 */
953 909 if (as == &kas && segkmap && segkmap->s_base <= raddr &&
954 910 raddr + size < segkmap->s_base + segkmap->s_size) {
955 - /*
956 - * if (as==&kas), this can't be XHAT: we've already returned
957 - * FC_NOSUPPORT.
958 - */
959 911 seg = segkmap;
960 912 as_lock_held = 0;
961 913 } else {
962 914 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
963 - if (is_xhat && avl_numnodes(&as->a_wpage) != 0) {
964 - /*
965 - * Grab and hold the writers' lock on the as
966 - * if the fault is to a watched page.
967 - * This will keep CPUs from "peeking" at the
968 - * address range while we're temporarily boosting
969 - * the permissions for the XHAT device to
970 - * resolve the fault in the segment layer.
971 - *
972 - * We could check whether faulted address
973 - * is within a watched page and only then grab
974 - * the writer lock, but this is simpler.
975 - */
976 - AS_LOCK_EXIT(as, &as->a_lock);
977 - AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
978 - }
979 915
980 916 seg = as_segat(as, raddr);
981 917 if (seg == NULL) {
982 918 AS_LOCK_EXIT(as, &as->a_lock);
983 - if ((lwp != NULL) && (!is_xhat))
919 + if (lwp != NULL)
984 920 lwp->lwp_nostop--;
985 921 return (FC_NOMAP);
986 922 }
987 923
988 924 as_lock_held = 1;
989 925 }
990 926
991 927 addrsav = raddr;
992 928 segsav = seg;
993 929
994 930 for (; rsize != 0; rsize -= ssize, raddr += ssize) {
995 931 if (raddr >= seg->s_base + seg->s_size) {
996 932 seg = AS_SEGNEXT(as, seg);
↓ open down ↓ |
3 lines elided |
↑ open up ↑ |
997 933 if (seg == NULL || raddr != seg->s_base) {
998 934 res = FC_NOMAP;
999 935 break;
1000 936 }
1001 937 }
1002 938 if (raddr + rsize > seg->s_base + seg->s_size)
1003 939 ssize = seg->s_base + seg->s_size - raddr;
1004 940 else
1005 941 ssize = rsize;
1006 942
1007 - if (!is_xhat || (seg->s_ops != &segdev_ops)) {
943 + res = SEGOP_FAULT(hat, seg, raddr, ssize, type, rw);
1008 944
1009 - if (is_xhat && avl_numnodes(&as->a_wpage) != 0 &&
1010 - pr_is_watchpage_as(raddr, rw, as)) {
1011 - /*
1012 - * Handle watch pages. If we're faulting on a
1013 - * watched page from an X-hat, we have to
1014 - * restore the original permissions while we
1015 - * handle the fault.
1016 - */
1017 - as_clearwatch(as);
1018 - holding_wpage = 1;
1019 - }
1020 -
1021 - res = SEGOP_FAULT(hat, seg, raddr, ssize, type, rw);
1022 -
1023 - /* Restore watchpoints */
1024 - if (holding_wpage) {
1025 - as_setwatch(as);
1026 - holding_wpage = 0;
1027 - }
945 + /* Restore watchpoints */
946 + if (holding_wpage) {
947 + as_setwatch(as);
948 + holding_wpage = 0;
949 + }
1028 950
1029 - if (res != 0)
1030 - break;
1031 - } else {
1032 - /* XHAT does not support seg_dev */
1033 - res = FC_NOSUPPORT;
951 + if (res != 0)
1034 952 break;
1035 - }
1036 953 }
1037 954
1038 955 /*
1039 956 * If we were SOFTLOCKing and encountered a failure,
1040 957 * we must SOFTUNLOCK the range we already did. (Maybe we
1041 958 * should just panic if we are SOFTLOCKing or even SOFTUNLOCKing
1042 959 * right here...)
1043 960 */
1044 961 if (res != 0 && type == F_SOFTLOCK) {
1045 962 for (seg = segsav; addrsav < raddr; addrsav += ssize) {
1046 963 if (addrsav >= seg->s_base + seg->s_size)
1047 964 seg = AS_SEGNEXT(as, seg);
1048 965 ASSERT(seg != NULL);
1049 966 /*
1050 967 * Now call the fault routine again to perform the
1051 968 * unlock using S_OTHER instead of the rw variable
1052 969 * since we never got a chance to touch the pages.
1053 970 */
↓ open down ↓ |
8 lines elided |
↑ open up ↑ |
1054 971 if (raddr > seg->s_base + seg->s_size)
1055 972 ssize = seg->s_base + seg->s_size - addrsav;
1056 973 else
1057 974 ssize = raddr - addrsav;
1058 975 (void) SEGOP_FAULT(hat, seg, addrsav, ssize,
1059 976 F_SOFTUNLOCK, S_OTHER);
1060 977 }
1061 978 }
1062 979 if (as_lock_held)
1063 980 AS_LOCK_EXIT(as, &as->a_lock);
1064 - if ((lwp != NULL) && (!is_xhat))
981 + if (lwp != NULL)
1065 982 lwp->lwp_nostop--;
1066 983
1067 984 /*
1068 985 * If the lower levels returned EDEADLK for a fault,
1069 986 * It means that we should retry the fault. Let's wait
1070 987 * a bit also to let the deadlock causing condition clear.
1071 988 * This is part of a gross hack to work around a design flaw
1072 989 * in the ufs/sds logging code and should go away when the
1073 990 * logging code is re-designed to fix the problem. See bug
1074 991 * 4125102 for details of the problem.
1075 992 */
1076 993 if (FC_ERRNO(res) == EDEADLK) {
1077 994 delay(deadlk_wait);
1078 995 res = 0;
1079 996 goto retry;
1080 997 }
1081 998 return (res);
1082 999 }
1083 1000
1084 1001
1085 1002
1086 1003 /*
1087 1004 * Asynchronous ``fault'' at addr for size bytes.
1088 1005 */
1089 1006 faultcode_t
1090 1007 as_faulta(struct as *as, caddr_t addr, size_t size)
1091 1008 {
1092 1009 struct seg *seg;
1093 1010 caddr_t raddr; /* rounded down addr */
1094 1011 size_t rsize; /* rounded up size */
1095 1012 faultcode_t res = 0;
1096 1013 klwp_t *lwp = ttolwp(curthread);
1097 1014
1098 1015 retry:
1099 1016 /*
1100 1017 * Indicate that the lwp is not to be stopped while waiting
1101 1018 * for a pagefault. This is to avoid deadlock while debugging
1102 1019 * a process via /proc over NFS (in particular).
1103 1020 */
1104 1021 if (lwp != NULL)
1105 1022 lwp->lwp_nostop++;
1106 1023
1107 1024 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1108 1025 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
1109 1026 (size_t)raddr;
1110 1027
1111 1028 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1112 1029 seg = as_segat(as, raddr);
1113 1030 if (seg == NULL) {
1114 1031 AS_LOCK_EXIT(as, &as->a_lock);
1115 1032 if (lwp != NULL)
1116 1033 lwp->lwp_nostop--;
1117 1034 return (FC_NOMAP);
1118 1035 }
1119 1036
1120 1037 for (; rsize != 0; rsize -= PAGESIZE, raddr += PAGESIZE) {
1121 1038 if (raddr >= seg->s_base + seg->s_size) {
1122 1039 seg = AS_SEGNEXT(as, seg);
1123 1040 if (seg == NULL || raddr != seg->s_base) {
1124 1041 res = FC_NOMAP;
1125 1042 break;
1126 1043 }
1127 1044 }
1128 1045 res = SEGOP_FAULTA(seg, raddr);
1129 1046 if (res != 0)
1130 1047 break;
1131 1048 }
1132 1049 AS_LOCK_EXIT(as, &as->a_lock);
1133 1050 if (lwp != NULL)
1134 1051 lwp->lwp_nostop--;
1135 1052 /*
1136 1053 * If the lower levels returned EDEADLK for a fault,
1137 1054 * It means that we should retry the fault. Let's wait
1138 1055 * a bit also to let the deadlock causing condition clear.
1139 1056 * This is part of a gross hack to work around a design flaw
1140 1057 * in the ufs/sds logging code and should go away when the
1141 1058 * logging code is re-designed to fix the problem. See bug
1142 1059 * 4125102 for details of the problem.
1143 1060 */
1144 1061 if (FC_ERRNO(res) == EDEADLK) {
1145 1062 delay(deadlk_wait);
1146 1063 res = 0;
1147 1064 goto retry;
1148 1065 }
1149 1066 return (res);
1150 1067 }
1151 1068
1152 1069 /*
1153 1070 * Set the virtual mapping for the interval from [addr : addr + size)
1154 1071 * in address space `as' to have the specified protection.
1155 1072 * It is ok for the range to cross over several segments,
1156 1073 * as long as they are contiguous.
1157 1074 */
1158 1075 int
1159 1076 as_setprot(struct as *as, caddr_t addr, size_t size, uint_t prot)
1160 1077 {
1161 1078 struct seg *seg;
1162 1079 struct as_callback *cb;
1163 1080 size_t ssize;
1164 1081 caddr_t raddr; /* rounded down addr */
1165 1082 size_t rsize; /* rounded up size */
1166 1083 int error = 0, writer = 0;
1167 1084 caddr_t saveraddr;
1168 1085 size_t saversize;
1169 1086
1170 1087 setprot_top:
1171 1088 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1172 1089 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
1173 1090 (size_t)raddr;
1174 1091
1175 1092 if (raddr + rsize < raddr) /* check for wraparound */
1176 1093 return (ENOMEM);
1177 1094
1178 1095 saveraddr = raddr;
1179 1096 saversize = rsize;
1180 1097
1181 1098 /*
1182 1099 * Normally we only lock the as as a reader. But
1183 1100 * if due to setprot the segment driver needs to split
1184 1101 * a segment it will return IE_RETRY. Therefore we re-acquire
1185 1102 * the as lock as a writer so the segment driver can change
1186 1103 * the seg list. Also the segment driver will return IE_RETRY
1187 1104 * after it has changed the segment list so we therefore keep
1188 1105 * locking as a writer. Since these opeartions should be rare
1189 1106 * want to only lock as a writer when necessary.
1190 1107 */
1191 1108 if (writer || avl_numnodes(&as->a_wpage) != 0) {
1192 1109 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1193 1110 } else {
1194 1111 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1195 1112 }
1196 1113
1197 1114 as_clearwatchprot(as, raddr, rsize);
1198 1115 seg = as_segat(as, raddr);
1199 1116 if (seg == NULL) {
1200 1117 as_setwatch(as);
1201 1118 AS_LOCK_EXIT(as, &as->a_lock);
1202 1119 return (ENOMEM);
1203 1120 }
1204 1121
1205 1122 for (; rsize != 0; rsize -= ssize, raddr += ssize) {
1206 1123 if (raddr >= seg->s_base + seg->s_size) {
1207 1124 seg = AS_SEGNEXT(as, seg);
1208 1125 if (seg == NULL || raddr != seg->s_base) {
1209 1126 error = ENOMEM;
1210 1127 break;
1211 1128 }
1212 1129 }
1213 1130 if ((raddr + rsize) > (seg->s_base + seg->s_size))
1214 1131 ssize = seg->s_base + seg->s_size - raddr;
1215 1132 else
1216 1133 ssize = rsize;
1217 1134 retry:
1218 1135 error = SEGOP_SETPROT(seg, raddr, ssize, prot);
1219 1136
1220 1137 if (error == IE_NOMEM) {
1221 1138 error = EAGAIN;
1222 1139 break;
1223 1140 }
1224 1141
1225 1142 if (error == IE_RETRY) {
1226 1143 AS_LOCK_EXIT(as, &as->a_lock);
1227 1144 writer = 1;
1228 1145 goto setprot_top;
1229 1146 }
1230 1147
1231 1148 if (error == EAGAIN) {
1232 1149 /*
1233 1150 * Make sure we have a_lock as writer.
1234 1151 */
1235 1152 if (writer == 0) {
1236 1153 AS_LOCK_EXIT(as, &as->a_lock);
1237 1154 writer = 1;
1238 1155 goto setprot_top;
1239 1156 }
1240 1157
1241 1158 /*
1242 1159 * Memory is currently locked. It must be unlocked
1243 1160 * before this operation can succeed through a retry.
1244 1161 * The possible reasons for locked memory and
1245 1162 * corresponding strategies for unlocking are:
1246 1163 * (1) Normal I/O
1247 1164 * wait for a signal that the I/O operation
1248 1165 * has completed and the memory is unlocked.
1249 1166 * (2) Asynchronous I/O
1250 1167 * The aio subsystem does not unlock pages when
1251 1168 * the I/O is completed. Those pages are unlocked
1252 1169 * when the application calls aiowait/aioerror.
1253 1170 * So, to prevent blocking forever, cv_broadcast()
1254 1171 * is done to wake up aio_cleanup_thread.
1255 1172 * Subsequently, segvn_reclaim will be called, and
1256 1173 * that will do AS_CLRUNMAPWAIT() and wake us up.
1257 1174 * (3) Long term page locking:
1258 1175 * Drivers intending to have pages locked for a
1259 1176 * period considerably longer than for normal I/O
1260 1177 * (essentially forever) may have registered for a
1261 1178 * callback so they may unlock these pages on
1262 1179 * request. This is needed to allow this operation
1263 1180 * to succeed. Each entry on the callback list is
1264 1181 * examined. If the event or address range pertains
1265 1182 * the callback is invoked (unless it already is in
1266 1183 * progress). The a_contents lock must be dropped
1267 1184 * before the callback, so only one callback can
1268 1185 * be done at a time. Go to the top and do more
1269 1186 * until zero is returned. If zero is returned,
1270 1187 * either there were no callbacks for this event
1271 1188 * or they were already in progress.
1272 1189 */
1273 1190 mutex_enter(&as->a_contents);
1274 1191 if (as->a_callbacks &&
1275 1192 (cb = as_find_callback(as, AS_SETPROT_EVENT,
1276 1193 seg->s_base, seg->s_size))) {
1277 1194 AS_LOCK_EXIT(as, &as->a_lock);
1278 1195 as_execute_callback(as, cb, AS_SETPROT_EVENT);
1279 1196 } else if (!AS_ISNOUNMAPWAIT(as)) {
1280 1197 if (AS_ISUNMAPWAIT(as) == 0)
1281 1198 cv_broadcast(&as->a_cv);
1282 1199 AS_SETUNMAPWAIT(as);
1283 1200 AS_LOCK_EXIT(as, &as->a_lock);
1284 1201 while (AS_ISUNMAPWAIT(as))
1285 1202 cv_wait(&as->a_cv, &as->a_contents);
1286 1203 } else {
1287 1204 /*
1288 1205 * We may have raced with
1289 1206 * segvn_reclaim()/segspt_reclaim(). In this
1290 1207 * case clean nounmapwait flag and retry since
1291 1208 * softlockcnt in this segment may be already
1292 1209 * 0. We don't drop as writer lock so our
1293 1210 * number of retries without sleeping should
1294 1211 * be very small. See segvn_reclaim() for
1295 1212 * more comments.
1296 1213 */
1297 1214 AS_CLRNOUNMAPWAIT(as);
1298 1215 mutex_exit(&as->a_contents);
1299 1216 goto retry;
1300 1217 }
1301 1218 mutex_exit(&as->a_contents);
1302 1219 goto setprot_top;
1303 1220 } else if (error != 0)
1304 1221 break;
1305 1222 }
1306 1223 if (error != 0) {
1307 1224 as_setwatch(as);
1308 1225 } else {
1309 1226 as_setwatchprot(as, saveraddr, saversize, prot);
1310 1227 }
1311 1228 AS_LOCK_EXIT(as, &as->a_lock);
1312 1229 return (error);
1313 1230 }
1314 1231
1315 1232 /*
1316 1233 * Check to make sure that the interval [addr, addr + size)
1317 1234 * in address space `as' has at least the specified protection.
1318 1235 * It is ok for the range to cross over several segments, as long
1319 1236 * as they are contiguous.
1320 1237 */
1321 1238 int
1322 1239 as_checkprot(struct as *as, caddr_t addr, size_t size, uint_t prot)
1323 1240 {
1324 1241 struct seg *seg;
1325 1242 size_t ssize;
1326 1243 caddr_t raddr; /* rounded down addr */
1327 1244 size_t rsize; /* rounded up size */
1328 1245 int error = 0;
1329 1246
1330 1247 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1331 1248 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
1332 1249 (size_t)raddr;
1333 1250
1334 1251 if (raddr + rsize < raddr) /* check for wraparound */
1335 1252 return (ENOMEM);
1336 1253
1337 1254 /*
1338 1255 * This is ugly as sin...
1339 1256 * Normally, we only acquire the address space readers lock.
1340 1257 * However, if the address space has watchpoints present,
1341 1258 * we must acquire the writer lock on the address space for
1342 1259 * the benefit of as_clearwatchprot() and as_setwatchprot().
1343 1260 */
1344 1261 if (avl_numnodes(&as->a_wpage) != 0)
1345 1262 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1346 1263 else
1347 1264 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1348 1265 as_clearwatchprot(as, raddr, rsize);
1349 1266 seg = as_segat(as, raddr);
1350 1267 if (seg == NULL) {
1351 1268 as_setwatch(as);
1352 1269 AS_LOCK_EXIT(as, &as->a_lock);
1353 1270 return (ENOMEM);
1354 1271 }
1355 1272
1356 1273 for (; rsize != 0; rsize -= ssize, raddr += ssize) {
1357 1274 if (raddr >= seg->s_base + seg->s_size) {
1358 1275 seg = AS_SEGNEXT(as, seg);
1359 1276 if (seg == NULL || raddr != seg->s_base) {
1360 1277 error = ENOMEM;
1361 1278 break;
1362 1279 }
1363 1280 }
1364 1281 if ((raddr + rsize) > (seg->s_base + seg->s_size))
1365 1282 ssize = seg->s_base + seg->s_size - raddr;
1366 1283 else
1367 1284 ssize = rsize;
1368 1285
1369 1286 error = SEGOP_CHECKPROT(seg, raddr, ssize, prot);
1370 1287 if (error != 0)
1371 1288 break;
1372 1289 }
1373 1290 as_setwatch(as);
1374 1291 AS_LOCK_EXIT(as, &as->a_lock);
1375 1292 return (error);
1376 1293 }
1377 1294
1378 1295 int
1379 1296 as_unmap(struct as *as, caddr_t addr, size_t size)
1380 1297 {
1381 1298 struct seg *seg, *seg_next;
1382 1299 struct as_callback *cb;
1383 1300 caddr_t raddr, eaddr;
1384 1301 size_t ssize, rsize = 0;
1385 1302 int err;
1386 1303
1387 1304 top:
1388 1305 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1389 1306 eaddr = (caddr_t)(((uintptr_t)(addr + size) + PAGEOFFSET) &
1390 1307 (uintptr_t)PAGEMASK);
1391 1308
1392 1309 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1393 1310
1394 1311 as->a_updatedir = 1; /* inform /proc */
1395 1312 gethrestime(&as->a_updatetime);
1396 1313
1397 1314 /*
1398 1315 * Use as_findseg to find the first segment in the range, then
1399 1316 * step through the segments in order, following s_next.
1400 1317 */
1401 1318 as_clearwatchprot(as, raddr, eaddr - raddr);
1402 1319
1403 1320 for (seg = as_findseg(as, raddr, 0); seg != NULL; seg = seg_next) {
1404 1321 if (eaddr <= seg->s_base)
1405 1322 break; /* eaddr was in a gap; all done */
1406 1323
1407 1324 /* this is implied by the test above */
1408 1325 ASSERT(raddr < eaddr);
1409 1326
1410 1327 if (raddr < seg->s_base)
1411 1328 raddr = seg->s_base; /* raddr was in a gap */
1412 1329
1413 1330 if (eaddr > (seg->s_base + seg->s_size))
1414 1331 ssize = seg->s_base + seg->s_size - raddr;
1415 1332 else
1416 1333 ssize = eaddr - raddr;
1417 1334
1418 1335 /*
1419 1336 * Save next segment pointer since seg can be
1420 1337 * destroyed during the segment unmap operation.
1421 1338 */
1422 1339 seg_next = AS_SEGNEXT(as, seg);
1423 1340
1424 1341 /*
1425 1342 * We didn't count /dev/null mappings, so ignore them here.
1426 1343 * We'll handle MAP_NORESERVE cases in segvn_unmap(). (Again,
1427 1344 * we have to do this check here while we have seg.)
1428 1345 */
1429 1346 rsize = 0;
1430 1347 if (!SEG_IS_DEVNULL_MAPPING(seg) &&
1431 1348 !SEG_IS_PARTIAL_RESV(seg))
1432 1349 rsize = ssize;
1433 1350
1434 1351 retry:
1435 1352 err = SEGOP_UNMAP(seg, raddr, ssize);
1436 1353 if (err == EAGAIN) {
1437 1354 /*
1438 1355 * Memory is currently locked. It must be unlocked
1439 1356 * before this operation can succeed through a retry.
1440 1357 * The possible reasons for locked memory and
1441 1358 * corresponding strategies for unlocking are:
1442 1359 * (1) Normal I/O
1443 1360 * wait for a signal that the I/O operation
1444 1361 * has completed and the memory is unlocked.
1445 1362 * (2) Asynchronous I/O
1446 1363 * The aio subsystem does not unlock pages when
1447 1364 * the I/O is completed. Those pages are unlocked
1448 1365 * when the application calls aiowait/aioerror.
1449 1366 * So, to prevent blocking forever, cv_broadcast()
1450 1367 * is done to wake up aio_cleanup_thread.
1451 1368 * Subsequently, segvn_reclaim will be called, and
1452 1369 * that will do AS_CLRUNMAPWAIT() and wake us up.
1453 1370 * (3) Long term page locking:
1454 1371 * Drivers intending to have pages locked for a
1455 1372 * period considerably longer than for normal I/O
1456 1373 * (essentially forever) may have registered for a
1457 1374 * callback so they may unlock these pages on
1458 1375 * request. This is needed to allow this operation
1459 1376 * to succeed. Each entry on the callback list is
1460 1377 * examined. If the event or address range pertains
1461 1378 * the callback is invoked (unless it already is in
1462 1379 * progress). The a_contents lock must be dropped
1463 1380 * before the callback, so only one callback can
1464 1381 * be done at a time. Go to the top and do more
1465 1382 * until zero is returned. If zero is returned,
1466 1383 * either there were no callbacks for this event
1467 1384 * or they were already in progress.
1468 1385 */
1469 1386 mutex_enter(&as->a_contents);
1470 1387 if (as->a_callbacks &&
1471 1388 (cb = as_find_callback(as, AS_UNMAP_EVENT,
1472 1389 seg->s_base, seg->s_size))) {
1473 1390 AS_LOCK_EXIT(as, &as->a_lock);
1474 1391 as_execute_callback(as, cb, AS_UNMAP_EVENT);
1475 1392 } else if (!AS_ISNOUNMAPWAIT(as)) {
1476 1393 if (AS_ISUNMAPWAIT(as) == 0)
1477 1394 cv_broadcast(&as->a_cv);
1478 1395 AS_SETUNMAPWAIT(as);
1479 1396 AS_LOCK_EXIT(as, &as->a_lock);
1480 1397 while (AS_ISUNMAPWAIT(as))
1481 1398 cv_wait(&as->a_cv, &as->a_contents);
1482 1399 } else {
1483 1400 /*
1484 1401 * We may have raced with
1485 1402 * segvn_reclaim()/segspt_reclaim(). In this
1486 1403 * case clean nounmapwait flag and retry since
1487 1404 * softlockcnt in this segment may be already
1488 1405 * 0. We don't drop as writer lock so our
1489 1406 * number of retries without sleeping should
1490 1407 * be very small. See segvn_reclaim() for
1491 1408 * more comments.
1492 1409 */
1493 1410 AS_CLRNOUNMAPWAIT(as);
1494 1411 mutex_exit(&as->a_contents);
1495 1412 goto retry;
1496 1413 }
1497 1414 mutex_exit(&as->a_contents);
1498 1415 goto top;
1499 1416 } else if (err == IE_RETRY) {
1500 1417 AS_LOCK_EXIT(as, &as->a_lock);
1501 1418 goto top;
1502 1419 } else if (err) {
1503 1420 as_setwatch(as);
1504 1421 AS_LOCK_EXIT(as, &as->a_lock);
1505 1422 return (-1);
1506 1423 }
1507 1424
1508 1425 as->a_size -= ssize;
1509 1426 if (rsize)
1510 1427 as->a_resvsize -= rsize;
1511 1428 raddr += ssize;
1512 1429 }
1513 1430 AS_LOCK_EXIT(as, &as->a_lock);
1514 1431 return (0);
1515 1432 }
1516 1433
1517 1434 static int
1518 1435 as_map_segvn_segs(struct as *as, caddr_t addr, size_t size, uint_t szcvec,
1519 1436 int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated)
1520 1437 {
1521 1438 uint_t szc;
1522 1439 uint_t nszc;
1523 1440 int error;
1524 1441 caddr_t a;
1525 1442 caddr_t eaddr;
1526 1443 size_t segsize;
1527 1444 struct seg *seg;
1528 1445 size_t pgsz;
1529 1446 int do_off = (vn_a->vp != NULL || vn_a->amp != NULL);
1530 1447 uint_t save_szcvec;
1531 1448
1532 1449 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
1533 1450 ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
1534 1451 ASSERT(IS_P2ALIGNED(size, PAGESIZE));
1535 1452 ASSERT(vn_a->vp == NULL || vn_a->amp == NULL);
1536 1453 if (!do_off) {
1537 1454 vn_a->offset = 0;
1538 1455 }
1539 1456
1540 1457 if (szcvec <= 1) {
1541 1458 seg = seg_alloc(as, addr, size);
1542 1459 if (seg == NULL) {
1543 1460 return (ENOMEM);
1544 1461 }
1545 1462 vn_a->szc = 0;
1546 1463 error = (*crfp)(seg, vn_a);
1547 1464 if (error != 0) {
1548 1465 seg_free(seg);
1549 1466 } else {
1550 1467 as->a_size += size;
1551 1468 as->a_resvsize += size;
1552 1469 }
1553 1470 return (error);
1554 1471 }
1555 1472
1556 1473 eaddr = addr + size;
1557 1474 save_szcvec = szcvec;
1558 1475 szcvec >>= 1;
1559 1476 szc = 0;
1560 1477 nszc = 0;
1561 1478 while (szcvec) {
1562 1479 if ((szcvec & 0x1) == 0) {
1563 1480 nszc++;
1564 1481 szcvec >>= 1;
1565 1482 continue;
1566 1483 }
1567 1484 nszc++;
1568 1485 pgsz = page_get_pagesize(nszc);
1569 1486 a = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz);
1570 1487 if (a != addr) {
1571 1488 ASSERT(a < eaddr);
1572 1489 segsize = a - addr;
1573 1490 seg = seg_alloc(as, addr, segsize);
1574 1491 if (seg == NULL) {
1575 1492 return (ENOMEM);
1576 1493 }
1577 1494 vn_a->szc = szc;
1578 1495 error = (*crfp)(seg, vn_a);
1579 1496 if (error != 0) {
1580 1497 seg_free(seg);
1581 1498 return (error);
1582 1499 }
1583 1500 as->a_size += segsize;
1584 1501 as->a_resvsize += segsize;
1585 1502 *segcreated = 1;
1586 1503 if (do_off) {
1587 1504 vn_a->offset += segsize;
1588 1505 }
1589 1506 addr = a;
1590 1507 }
1591 1508 szc = nszc;
1592 1509 szcvec >>= 1;
1593 1510 }
1594 1511
1595 1512 ASSERT(addr < eaddr);
1596 1513 szcvec = save_szcvec | 1; /* add 8K pages */
1597 1514 while (szcvec) {
1598 1515 a = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz);
1599 1516 ASSERT(a >= addr);
1600 1517 if (a != addr) {
1601 1518 segsize = a - addr;
1602 1519 seg = seg_alloc(as, addr, segsize);
1603 1520 if (seg == NULL) {
1604 1521 return (ENOMEM);
1605 1522 }
1606 1523 vn_a->szc = szc;
1607 1524 error = (*crfp)(seg, vn_a);
1608 1525 if (error != 0) {
1609 1526 seg_free(seg);
1610 1527 return (error);
1611 1528 }
1612 1529 as->a_size += segsize;
1613 1530 as->a_resvsize += segsize;
1614 1531 *segcreated = 1;
1615 1532 if (do_off) {
1616 1533 vn_a->offset += segsize;
1617 1534 }
1618 1535 addr = a;
1619 1536 }
1620 1537 szcvec &= ~(1 << szc);
1621 1538 if (szcvec) {
1622 1539 szc = highbit(szcvec) - 1;
1623 1540 pgsz = page_get_pagesize(szc);
1624 1541 }
1625 1542 }
1626 1543 ASSERT(addr == eaddr);
1627 1544
1628 1545 return (0);
1629 1546 }
1630 1547
1631 1548 static int
1632 1549 as_map_vnsegs(struct as *as, caddr_t addr, size_t size,
1633 1550 int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated)
1634 1551 {
1635 1552 uint_t mapflags = vn_a->flags & (MAP_TEXT | MAP_INITDATA);
1636 1553 int type = (vn_a->type == MAP_SHARED) ? MAPPGSZC_SHM : MAPPGSZC_PRIVM;
1637 1554 uint_t szcvec = map_pgszcvec(addr, size, (uintptr_t)addr, mapflags,
1638 1555 type, 0);
1639 1556 int error;
1640 1557 struct seg *seg;
1641 1558 struct vattr va;
1642 1559 u_offset_t eoff;
1643 1560 size_t save_size = 0;
1644 1561 extern size_t textrepl_size_thresh;
1645 1562
1646 1563 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
1647 1564 ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
1648 1565 ASSERT(IS_P2ALIGNED(size, PAGESIZE));
1649 1566 ASSERT(vn_a->vp != NULL);
1650 1567 ASSERT(vn_a->amp == NULL);
1651 1568
1652 1569 again:
1653 1570 if (szcvec <= 1) {
1654 1571 seg = seg_alloc(as, addr, size);
1655 1572 if (seg == NULL) {
1656 1573 return (ENOMEM);
1657 1574 }
1658 1575 vn_a->szc = 0;
1659 1576 error = (*crfp)(seg, vn_a);
1660 1577 if (error != 0) {
1661 1578 seg_free(seg);
1662 1579 } else {
1663 1580 as->a_size += size;
1664 1581 as->a_resvsize += size;
1665 1582 }
1666 1583 return (error);
1667 1584 }
1668 1585
1669 1586 va.va_mask = AT_SIZE;
1670 1587 if (VOP_GETATTR(vn_a->vp, &va, ATTR_HINT, vn_a->cred, NULL) != 0) {
1671 1588 szcvec = 0;
1672 1589 goto again;
1673 1590 }
1674 1591 eoff = vn_a->offset & PAGEMASK;
1675 1592 if (eoff >= va.va_size) {
1676 1593 szcvec = 0;
1677 1594 goto again;
1678 1595 }
1679 1596 eoff += size;
1680 1597 if (btopr(va.va_size) < btopr(eoff)) {
1681 1598 save_size = size;
1682 1599 size = va.va_size - (vn_a->offset & PAGEMASK);
1683 1600 size = P2ROUNDUP_TYPED(size, PAGESIZE, size_t);
1684 1601 szcvec = map_pgszcvec(addr, size, (uintptr_t)addr, mapflags,
1685 1602 type, 0);
1686 1603 if (szcvec <= 1) {
1687 1604 size = save_size;
1688 1605 goto again;
1689 1606 }
1690 1607 }
1691 1608
1692 1609 if (size > textrepl_size_thresh) {
1693 1610 vn_a->flags |= _MAP_TEXTREPL;
1694 1611 }
1695 1612 error = as_map_segvn_segs(as, addr, size, szcvec, crfp, vn_a,
1696 1613 segcreated);
1697 1614 if (error != 0) {
1698 1615 return (error);
1699 1616 }
1700 1617 if (save_size) {
1701 1618 addr += size;
1702 1619 size = save_size - size;
1703 1620 szcvec = 0;
1704 1621 goto again;
1705 1622 }
1706 1623 return (0);
1707 1624 }
1708 1625
1709 1626 /*
1710 1627 * as_map_ansegs: shared or private anonymous memory. Note that the flags
1711 1628 * passed to map_pgszvec cannot be MAP_INITDATA, for anon.
1712 1629 */
1713 1630 static int
1714 1631 as_map_ansegs(struct as *as, caddr_t addr, size_t size,
1715 1632 int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated)
1716 1633 {
1717 1634 uint_t szcvec;
1718 1635 uchar_t type;
1719 1636
1720 1637 ASSERT(vn_a->type == MAP_SHARED || vn_a->type == MAP_PRIVATE);
1721 1638 if (vn_a->type == MAP_SHARED) {
1722 1639 type = MAPPGSZC_SHM;
1723 1640 } else if (vn_a->type == MAP_PRIVATE) {
1724 1641 if (vn_a->szc == AS_MAP_HEAP) {
1725 1642 type = MAPPGSZC_HEAP;
1726 1643 } else if (vn_a->szc == AS_MAP_STACK) {
1727 1644 type = MAPPGSZC_STACK;
1728 1645 } else {
1729 1646 type = MAPPGSZC_PRIVM;
1730 1647 }
1731 1648 }
1732 1649 szcvec = map_pgszcvec(addr, size, vn_a->amp == NULL ?
1733 1650 (uintptr_t)addr : (uintptr_t)P2ROUNDUP(vn_a->offset, PAGESIZE),
1734 1651 (vn_a->flags & MAP_TEXT), type, 0);
1735 1652 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
1736 1653 ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
1737 1654 ASSERT(IS_P2ALIGNED(size, PAGESIZE));
1738 1655 ASSERT(vn_a->vp == NULL);
1739 1656
1740 1657 return (as_map_segvn_segs(as, addr, size, szcvec,
1741 1658 crfp, vn_a, segcreated));
1742 1659 }
1743 1660
1744 1661 int
1745 1662 as_map(struct as *as, caddr_t addr, size_t size, int (*crfp)(), void *argsp)
1746 1663 {
1747 1664 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1748 1665 return (as_map_locked(as, addr, size, crfp, argsp));
1749 1666 }
1750 1667
1751 1668 int
1752 1669 as_map_locked(struct as *as, caddr_t addr, size_t size, int (*crfp)(),
1753 1670 void *argsp)
1754 1671 {
1755 1672 struct seg *seg = NULL;
1756 1673 caddr_t raddr; /* rounded down addr */
1757 1674 size_t rsize; /* rounded up size */
1758 1675 int error;
1759 1676 int unmap = 0;
1760 1677 struct proc *p = curproc;
1761 1678 struct segvn_crargs crargs;
1762 1679
1763 1680 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1764 1681 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
1765 1682 (size_t)raddr;
1766 1683
1767 1684 /*
1768 1685 * check for wrap around
1769 1686 */
1770 1687 if ((raddr + rsize < raddr) || (as->a_size > (ULONG_MAX - size))) {
1771 1688 AS_LOCK_EXIT(as, &as->a_lock);
1772 1689 return (ENOMEM);
1773 1690 }
1774 1691
1775 1692 as->a_updatedir = 1; /* inform /proc */
1776 1693 gethrestime(&as->a_updatetime);
1777 1694
1778 1695 if (as != &kas && as->a_size + rsize > (size_t)p->p_vmem_ctl) {
1779 1696 AS_LOCK_EXIT(as, &as->a_lock);
1780 1697
1781 1698 (void) rctl_action(rctlproc_legacy[RLIMIT_VMEM], p->p_rctls, p,
1782 1699 RCA_UNSAFE_ALL);
1783 1700
1784 1701 return (ENOMEM);
1785 1702 }
1786 1703
1787 1704 if (AS_MAP_CHECK_VNODE_LPOOB(crfp, argsp)) {
1788 1705 crargs = *(struct segvn_crargs *)argsp;
1789 1706 error = as_map_vnsegs(as, raddr, rsize, crfp, &crargs, &unmap);
1790 1707 if (error != 0) {
1791 1708 AS_LOCK_EXIT(as, &as->a_lock);
1792 1709 if (unmap) {
1793 1710 (void) as_unmap(as, addr, size);
1794 1711 }
1795 1712 return (error);
1796 1713 }
1797 1714 } else if (AS_MAP_CHECK_ANON_LPOOB(crfp, argsp)) {
1798 1715 crargs = *(struct segvn_crargs *)argsp;
1799 1716 error = as_map_ansegs(as, raddr, rsize, crfp, &crargs, &unmap);
1800 1717 if (error != 0) {
1801 1718 AS_LOCK_EXIT(as, &as->a_lock);
1802 1719 if (unmap) {
1803 1720 (void) as_unmap(as, addr, size);
1804 1721 }
1805 1722 return (error);
1806 1723 }
1807 1724 } else {
1808 1725 seg = seg_alloc(as, addr, size);
1809 1726 if (seg == NULL) {
1810 1727 AS_LOCK_EXIT(as, &as->a_lock);
1811 1728 return (ENOMEM);
1812 1729 }
1813 1730
1814 1731 error = (*crfp)(seg, argsp);
1815 1732 if (error != 0) {
1816 1733 seg_free(seg);
1817 1734 AS_LOCK_EXIT(as, &as->a_lock);
1818 1735 return (error);
1819 1736 }
1820 1737 /*
1821 1738 * Add size now so as_unmap will work if as_ctl fails.
1822 1739 */
1823 1740 as->a_size += rsize;
1824 1741 as->a_resvsize += rsize;
1825 1742 }
1826 1743
1827 1744 as_setwatch(as);
1828 1745
1829 1746 /*
1830 1747 * If the address space is locked,
1831 1748 * establish memory locks for the new segment.
1832 1749 */
1833 1750 mutex_enter(&as->a_contents);
1834 1751 if (AS_ISPGLCK(as)) {
1835 1752 mutex_exit(&as->a_contents);
1836 1753 AS_LOCK_EXIT(as, &as->a_lock);
1837 1754 error = as_ctl(as, addr, size, MC_LOCK, 0, 0, NULL, 0);
1838 1755 if (error != 0)
1839 1756 (void) as_unmap(as, addr, size);
1840 1757 } else {
1841 1758 mutex_exit(&as->a_contents);
1842 1759 AS_LOCK_EXIT(as, &as->a_lock);
1843 1760 }
1844 1761 return (error);
1845 1762 }
1846 1763
1847 1764
1848 1765 /*
1849 1766 * Delete all segments in the address space marked with S_PURGE.
1850 1767 * This is currently used for Sparc V9 nofault ASI segments (seg_nf.c).
1851 1768 * These segments are deleted as a first step before calls to as_gap(), so
1852 1769 * that they don't affect mmap() or shmat().
1853 1770 */
1854 1771 void
1855 1772 as_purge(struct as *as)
1856 1773 {
1857 1774 struct seg *seg;
1858 1775 struct seg *next_seg;
1859 1776
1860 1777 /*
1861 1778 * the setting of NEEDSPURGE is protect by as_rangelock(), so
1862 1779 * no need to grab a_contents mutex for this check
1863 1780 */
1864 1781 if ((as->a_flags & AS_NEEDSPURGE) == 0)
1865 1782 return;
1866 1783
1867 1784 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1868 1785 next_seg = NULL;
1869 1786 seg = AS_SEGFIRST(as);
1870 1787 while (seg != NULL) {
1871 1788 next_seg = AS_SEGNEXT(as, seg);
1872 1789 if (seg->s_flags & S_PURGE)
1873 1790 SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
1874 1791 seg = next_seg;
1875 1792 }
1876 1793 AS_LOCK_EXIT(as, &as->a_lock);
1877 1794
1878 1795 mutex_enter(&as->a_contents);
1879 1796 as->a_flags &= ~AS_NEEDSPURGE;
1880 1797 mutex_exit(&as->a_contents);
1881 1798 }
1882 1799
1883 1800 /*
1884 1801 * Find a hole within [*basep, *basep + *lenp), which contains a mappable
1885 1802 * range of addresses at least "minlen" long, where the base of the range is
1886 1803 * at "off" phase from an "align" boundary and there is space for a
1887 1804 * "redzone"-sized redzone on eithe rside of the range. Thus,
1888 1805 * if align was 4M and off was 16k, the user wants a hole which will start
1889 1806 * 16k into a 4M page.
1890 1807 *
1891 1808 * If flags specifies AH_HI, the hole will have the highest possible address
1892 1809 * in the range. We use the as->a_lastgap field to figure out where to
1893 1810 * start looking for a gap.
1894 1811 *
1895 1812 * Otherwise, the gap will have the lowest possible address.
1896 1813 *
1897 1814 * If flags specifies AH_CONTAIN, the hole will contain the address addr.
1898 1815 *
1899 1816 * If an adequate hole is found, *basep and *lenp are set to reflect the part of
1900 1817 * the hole that is within range, and 0 is returned. On failure, -1 is returned.
1901 1818 *
1902 1819 * NOTE: This routine is not correct when base+len overflows caddr_t.
1903 1820 */
1904 1821 int
1905 1822 as_gap_aligned(struct as *as, size_t minlen, caddr_t *basep, size_t *lenp,
1906 1823 uint_t flags, caddr_t addr, size_t align, size_t redzone, size_t off)
1907 1824 {
1908 1825 caddr_t lobound = *basep;
1909 1826 caddr_t hibound = lobound + *lenp;
1910 1827 struct seg *lseg, *hseg;
1911 1828 caddr_t lo, hi;
1912 1829 int forward;
1913 1830 caddr_t save_base;
1914 1831 size_t save_len;
1915 1832 size_t save_minlen;
1916 1833 size_t save_redzone;
1917 1834 int fast_path = 1;
1918 1835
1919 1836 save_base = *basep;
1920 1837 save_len = *lenp;
1921 1838 save_minlen = minlen;
1922 1839 save_redzone = redzone;
1923 1840
1924 1841 /*
1925 1842 * For the first pass/fast_path, just add align and redzone into
1926 1843 * minlen since if we get an allocation, we can guarantee that it
1927 1844 * will fit the alignment and redzone requested.
1928 1845 * This increases the chance that hibound will be adjusted to
1929 1846 * a_lastgap->s_base which will likely allow us to find an
1930 1847 * acceptable hole in the address space quicker.
1931 1848 * If we can't find a hole with this fast_path, then we look for
1932 1849 * smaller holes in which the alignment and offset may allow
1933 1850 * the allocation to fit.
1934 1851 */
1935 1852 minlen += align;
1936 1853 minlen += 2 * redzone;
1937 1854 redzone = 0;
1938 1855
1939 1856 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1940 1857 if (AS_SEGFIRST(as) == NULL) {
1941 1858 if (valid_va_range_aligned(basep, lenp, minlen, flags & AH_DIR,
1942 1859 align, redzone, off)) {
1943 1860 AS_LOCK_EXIT(as, &as->a_lock);
1944 1861 return (0);
1945 1862 } else {
1946 1863 AS_LOCK_EXIT(as, &as->a_lock);
1947 1864 *basep = save_base;
1948 1865 *lenp = save_len;
1949 1866 return (-1);
1950 1867 }
1951 1868 }
1952 1869
1953 1870 retry:
1954 1871 /*
1955 1872 * Set up to iterate over all the inter-segment holes in the given
1956 1873 * direction. lseg is NULL for the lowest-addressed hole and hseg is
1957 1874 * NULL for the highest-addressed hole. If moving backwards, we reset
1958 1875 * sseg to denote the highest-addressed segment.
1959 1876 */
1960 1877 forward = (flags & AH_DIR) == AH_LO;
1961 1878 if (forward) {
1962 1879 hseg = as_findseg(as, lobound, 1);
1963 1880 lseg = AS_SEGPREV(as, hseg);
1964 1881 } else {
1965 1882
1966 1883 /*
1967 1884 * If allocating at least as much as the last allocation,
1968 1885 * use a_lastgap's base as a better estimate of hibound.
1969 1886 */
1970 1887 if (as->a_lastgap &&
1971 1888 minlen >= as->a_lastgap->s_size &&
1972 1889 hibound >= as->a_lastgap->s_base)
1973 1890 hibound = as->a_lastgap->s_base;
1974 1891
1975 1892 hseg = as_findseg(as, hibound, 1);
1976 1893 if (hseg->s_base + hseg->s_size < hibound) {
1977 1894 lseg = hseg;
1978 1895 hseg = NULL;
1979 1896 } else {
1980 1897 lseg = AS_SEGPREV(as, hseg);
1981 1898 }
1982 1899 }
1983 1900
1984 1901 for (;;) {
1985 1902 /*
1986 1903 * Set lo and hi to the hole's boundaries. (We should really
1987 1904 * use MAXADDR in place of hibound in the expression below,
1988 1905 * but can't express it easily; using hibound in its place is
1989 1906 * harmless.)
1990 1907 */
1991 1908 lo = (lseg == NULL) ? 0 : lseg->s_base + lseg->s_size;
1992 1909 hi = (hseg == NULL) ? hibound : hseg->s_base;
1993 1910 /*
1994 1911 * If the iteration has moved past the interval from lobound
1995 1912 * to hibound it's pointless to continue.
1996 1913 */
1997 1914 if ((forward && lo > hibound) || (!forward && hi < lobound))
1998 1915 break;
1999 1916 else if (lo > hibound || hi < lobound)
2000 1917 goto cont;
2001 1918 /*
2002 1919 * Candidate hole lies at least partially within the allowable
2003 1920 * range. Restrict it to fall completely within that range,
2004 1921 * i.e., to [max(lo, lobound), min(hi, hibound)].
2005 1922 */
2006 1923 if (lo < lobound)
2007 1924 lo = lobound;
2008 1925 if (hi > hibound)
2009 1926 hi = hibound;
2010 1927 /*
2011 1928 * Verify that the candidate hole is big enough and meets
2012 1929 * hardware constraints. If the hole is too small, no need
2013 1930 * to do the further checks since they will fail.
2014 1931 */
2015 1932 *basep = lo;
2016 1933 *lenp = hi - lo;
2017 1934 if (*lenp >= minlen && valid_va_range_aligned(basep, lenp,
2018 1935 minlen, forward ? AH_LO : AH_HI, align, redzone, off) &&
2019 1936 ((flags & AH_CONTAIN) == 0 ||
2020 1937 (*basep <= addr && *basep + *lenp > addr))) {
2021 1938 if (!forward)
2022 1939 as->a_lastgap = hseg;
2023 1940 if (hseg != NULL)
2024 1941 as->a_lastgaphl = hseg;
2025 1942 else
2026 1943 as->a_lastgaphl = lseg;
2027 1944 AS_LOCK_EXIT(as, &as->a_lock);
2028 1945 return (0);
2029 1946 }
2030 1947 cont:
2031 1948 /*
2032 1949 * Move to the next hole.
2033 1950 */
2034 1951 if (forward) {
2035 1952 lseg = hseg;
2036 1953 if (lseg == NULL)
2037 1954 break;
2038 1955 hseg = AS_SEGNEXT(as, hseg);
2039 1956 } else {
2040 1957 hseg = lseg;
2041 1958 if (hseg == NULL)
2042 1959 break;
2043 1960 lseg = AS_SEGPREV(as, lseg);
2044 1961 }
2045 1962 }
2046 1963 if (fast_path && (align != 0 || save_redzone != 0)) {
2047 1964 fast_path = 0;
2048 1965 minlen = save_minlen;
2049 1966 redzone = save_redzone;
2050 1967 goto retry;
2051 1968 }
2052 1969 *basep = save_base;
2053 1970 *lenp = save_len;
2054 1971 AS_LOCK_EXIT(as, &as->a_lock);
2055 1972 return (-1);
2056 1973 }
2057 1974
2058 1975 /*
2059 1976 * Find a hole of at least size minlen within [*basep, *basep + *lenp).
2060 1977 *
2061 1978 * If flags specifies AH_HI, the hole will have the highest possible address
2062 1979 * in the range. We use the as->a_lastgap field to figure out where to
2063 1980 * start looking for a gap.
2064 1981 *
2065 1982 * Otherwise, the gap will have the lowest possible address.
2066 1983 *
2067 1984 * If flags specifies AH_CONTAIN, the hole will contain the address addr.
2068 1985 *
2069 1986 * If an adequate hole is found, base and len are set to reflect the part of
2070 1987 * the hole that is within range, and 0 is returned, otherwise,
2071 1988 * -1 is returned.
2072 1989 *
2073 1990 * NOTE: This routine is not correct when base+len overflows caddr_t.
2074 1991 */
2075 1992 int
2076 1993 as_gap(struct as *as, size_t minlen, caddr_t *basep, size_t *lenp, uint_t flags,
2077 1994 caddr_t addr)
2078 1995 {
2079 1996
2080 1997 return (as_gap_aligned(as, minlen, basep, lenp, flags, addr, 0, 0, 0));
2081 1998 }
2082 1999
2083 2000 /*
2084 2001 * Return the next range within [base, base + len) that is backed
2085 2002 * with "real memory". Skip holes and non-seg_vn segments.
2086 2003 * We're lazy and only return one segment at a time.
2087 2004 */
2088 2005 int
2089 2006 as_memory(struct as *as, caddr_t *basep, size_t *lenp)
2090 2007 {
2091 2008 extern struct seg_ops segspt_shmops; /* needs a header file */
2092 2009 struct seg *seg;
2093 2010 caddr_t addr, eaddr;
2094 2011 caddr_t segend;
2095 2012
2096 2013 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2097 2014
2098 2015 addr = *basep;
2099 2016 eaddr = addr + *lenp;
2100 2017
2101 2018 seg = as_findseg(as, addr, 0);
2102 2019 if (seg != NULL)
2103 2020 addr = MAX(seg->s_base, addr);
2104 2021
2105 2022 for (;;) {
2106 2023 if (seg == NULL || addr >= eaddr || eaddr <= seg->s_base) {
2107 2024 AS_LOCK_EXIT(as, &as->a_lock);
2108 2025 return (EINVAL);
2109 2026 }
2110 2027
2111 2028 if (seg->s_ops == &segvn_ops) {
2112 2029 segend = seg->s_base + seg->s_size;
2113 2030 break;
2114 2031 }
2115 2032
2116 2033 /*
2117 2034 * We do ISM by looking into the private data
2118 2035 * to determine the real size of the segment.
2119 2036 */
2120 2037 if (seg->s_ops == &segspt_shmops) {
2121 2038 segend = seg->s_base + spt_realsize(seg);
2122 2039 if (addr < segend)
2123 2040 break;
2124 2041 }
2125 2042
2126 2043 seg = AS_SEGNEXT(as, seg);
2127 2044
2128 2045 if (seg != NULL)
2129 2046 addr = seg->s_base;
2130 2047 }
2131 2048
2132 2049 *basep = addr;
2133 2050
2134 2051 if (segend > eaddr)
2135 2052 *lenp = eaddr - addr;
2136 2053 else
2137 2054 *lenp = segend - addr;
2138 2055
2139 2056 AS_LOCK_EXIT(as, &as->a_lock);
2140 2057 return (0);
2141 2058 }
2142 2059
2143 2060 /*
2144 2061 * Swap the pages associated with the address space as out to
2145 2062 * secondary storage, returning the number of bytes actually
2146 2063 * swapped.
2147 2064 *
2148 2065 * The value returned is intended to correlate well with the process's
2149 2066 * memory requirements. Its usefulness for this purpose depends on
2150 2067 * how well the segment-level routines do at returning accurate
2151 2068 * information.
2152 2069 */
2153 2070 size_t
2154 2071 as_swapout(struct as *as)
2155 2072 {
2156 2073 struct seg *seg;
2157 2074 size_t swpcnt = 0;
2158 2075
↓ open down ↓ |
1084 lines elided |
↑ open up ↑ |
2159 2076 /*
2160 2077 * Kernel-only processes have given up their address
2161 2078 * spaces. Of course, we shouldn't be attempting to
2162 2079 * swap out such processes in the first place...
2163 2080 */
2164 2081 if (as == NULL)
2165 2082 return (0);
2166 2083
2167 2084 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2168 2085
2169 - /* Prevent XHATs from attaching */
2170 - mutex_enter(&as->a_contents);
2171 - AS_SETBUSY(as);
2172 - mutex_exit(&as->a_contents);
2173 -
2174 -
2175 2086 /*
2176 2087 * Free all mapping resources associated with the address
2177 2088 * space. The segment-level swapout routines capitalize
2178 2089 * on this unmapping by scavanging pages that have become
2179 2090 * unmapped here.
2180 2091 */
2181 2092 hat_swapout(as->a_hat);
2182 - if (as->a_xhat != NULL)
2183 - xhat_swapout_all(as);
2184 -
2185 - mutex_enter(&as->a_contents);
2186 - AS_CLRBUSY(as);
2187 - mutex_exit(&as->a_contents);
2188 2093
2189 2094 /*
2190 2095 * Call the swapout routines of all segments in the address
2191 2096 * space to do the actual work, accumulating the amount of
2192 2097 * space reclaimed.
2193 2098 */
2194 2099 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
2195 2100 struct seg_ops *ov = seg->s_ops;
2196 2101
2197 2102 /*
2198 2103 * We have to check to see if the seg has
2199 2104 * an ops vector because the seg may have
2200 2105 * been in the middle of being set up when
2201 2106 * the process was picked for swapout.
2202 2107 */
2203 2108 if ((ov != NULL) && (ov->swapout != NULL))
2204 2109 swpcnt += SEGOP_SWAPOUT(seg);
2205 2110 }
2206 2111 AS_LOCK_EXIT(as, &as->a_lock);
2207 2112 return (swpcnt);
2208 2113 }
2209 2114
2210 2115 /*
2211 2116 * Determine whether data from the mappings in interval [addr, addr + size)
2212 2117 * are in the primary memory (core) cache.
2213 2118 */
2214 2119 int
2215 2120 as_incore(struct as *as, caddr_t addr,
2216 2121 size_t size, char *vec, size_t *sizep)
2217 2122 {
2218 2123 struct seg *seg;
2219 2124 size_t ssize;
2220 2125 caddr_t raddr; /* rounded down addr */
2221 2126 size_t rsize; /* rounded up size */
2222 2127 size_t isize; /* iteration size */
2223 2128 int error = 0; /* result, assume success */
2224 2129
2225 2130 *sizep = 0;
2226 2131 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2227 2132 rsize = ((((size_t)addr + size) + PAGEOFFSET) & PAGEMASK) -
2228 2133 (size_t)raddr;
2229 2134
2230 2135 if (raddr + rsize < raddr) /* check for wraparound */
2231 2136 return (ENOMEM);
2232 2137
2233 2138 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2234 2139 seg = as_segat(as, raddr);
2235 2140 if (seg == NULL) {
2236 2141 AS_LOCK_EXIT(as, &as->a_lock);
2237 2142 return (-1);
2238 2143 }
2239 2144
2240 2145 for (; rsize != 0; rsize -= ssize, raddr += ssize) {
2241 2146 if (raddr >= seg->s_base + seg->s_size) {
2242 2147 seg = AS_SEGNEXT(as, seg);
2243 2148 if (seg == NULL || raddr != seg->s_base) {
2244 2149 error = -1;
2245 2150 break;
2246 2151 }
2247 2152 }
2248 2153 if ((raddr + rsize) > (seg->s_base + seg->s_size))
2249 2154 ssize = seg->s_base + seg->s_size - raddr;
2250 2155 else
2251 2156 ssize = rsize;
2252 2157 *sizep += isize = SEGOP_INCORE(seg, raddr, ssize, vec);
2253 2158 if (isize != ssize) {
2254 2159 error = -1;
2255 2160 break;
2256 2161 }
2257 2162 vec += btopr(ssize);
2258 2163 }
2259 2164 AS_LOCK_EXIT(as, &as->a_lock);
2260 2165 return (error);
2261 2166 }
2262 2167
2263 2168 static void
2264 2169 as_segunlock(struct seg *seg, caddr_t addr, int attr,
2265 2170 ulong_t *bitmap, size_t position, size_t npages)
2266 2171 {
2267 2172 caddr_t range_start;
2268 2173 size_t pos1 = position;
2269 2174 size_t pos2;
2270 2175 size_t size;
2271 2176 size_t end_pos = npages + position;
2272 2177
2273 2178 while (bt_range(bitmap, &pos1, &pos2, end_pos)) {
2274 2179 size = ptob((pos2 - pos1));
2275 2180 range_start = (caddr_t)((uintptr_t)addr +
2276 2181 ptob(pos1 - position));
2277 2182
2278 2183 (void) SEGOP_LOCKOP(seg, range_start, size, attr, MC_UNLOCK,
2279 2184 (ulong_t *)NULL, (size_t)NULL);
2280 2185 pos1 = pos2;
2281 2186 }
2282 2187 }
2283 2188
2284 2189 static void
2285 2190 as_unlockerr(struct as *as, int attr, ulong_t *mlock_map,
2286 2191 caddr_t raddr, size_t rsize)
2287 2192 {
2288 2193 struct seg *seg = as_segat(as, raddr);
2289 2194 size_t ssize;
2290 2195
2291 2196 while (rsize != 0) {
2292 2197 if (raddr >= seg->s_base + seg->s_size)
2293 2198 seg = AS_SEGNEXT(as, seg);
2294 2199
2295 2200 if ((raddr + rsize) > (seg->s_base + seg->s_size))
2296 2201 ssize = seg->s_base + seg->s_size - raddr;
2297 2202 else
2298 2203 ssize = rsize;
2299 2204
2300 2205 as_segunlock(seg, raddr, attr, mlock_map, 0, btopr(ssize));
2301 2206
2302 2207 rsize -= ssize;
2303 2208 raddr += ssize;
2304 2209 }
2305 2210 }
2306 2211
2307 2212 /*
2308 2213 * Cache control operations over the interval [addr, addr + size) in
2309 2214 * address space "as".
2310 2215 */
2311 2216 /*ARGSUSED*/
2312 2217 int
2313 2218 as_ctl(struct as *as, caddr_t addr, size_t size, int func, int attr,
2314 2219 uintptr_t arg, ulong_t *lock_map, size_t pos)
2315 2220 {
2316 2221 struct seg *seg; /* working segment */
2317 2222 caddr_t raddr; /* rounded down addr */
2318 2223 caddr_t initraddr; /* saved initial rounded down addr */
2319 2224 size_t rsize; /* rounded up size */
2320 2225 size_t initrsize; /* saved initial rounded up size */
2321 2226 size_t ssize; /* size of seg */
2322 2227 int error = 0; /* result */
2323 2228 size_t mlock_size; /* size of bitmap */
2324 2229 ulong_t *mlock_map; /* pointer to bitmap used */
2325 2230 /* to represent the locked */
2326 2231 /* pages. */
2327 2232 retry:
2328 2233 if (error == IE_RETRY)
2329 2234 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
2330 2235 else
2331 2236 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2332 2237
2333 2238 /*
2334 2239 * If these are address space lock/unlock operations, loop over
2335 2240 * all segments in the address space, as appropriate.
2336 2241 */
2337 2242 if (func == MC_LOCKAS) {
2338 2243 size_t npages, idx;
2339 2244 size_t rlen = 0; /* rounded as length */
2340 2245
2341 2246 idx = pos;
2342 2247
2343 2248 if (arg & MCL_FUTURE) {
2344 2249 mutex_enter(&as->a_contents);
2345 2250 AS_SETPGLCK(as);
2346 2251 mutex_exit(&as->a_contents);
2347 2252 }
2348 2253 if ((arg & MCL_CURRENT) == 0) {
2349 2254 AS_LOCK_EXIT(as, &as->a_lock);
2350 2255 return (0);
2351 2256 }
2352 2257
2353 2258 seg = AS_SEGFIRST(as);
2354 2259 if (seg == NULL) {
2355 2260 AS_LOCK_EXIT(as, &as->a_lock);
2356 2261 return (0);
2357 2262 }
2358 2263
2359 2264 do {
2360 2265 raddr = (caddr_t)((uintptr_t)seg->s_base &
2361 2266 (uintptr_t)PAGEMASK);
2362 2267 rlen += (((uintptr_t)(seg->s_base + seg->s_size) +
2363 2268 PAGEOFFSET) & PAGEMASK) - (uintptr_t)raddr;
2364 2269 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2365 2270
2366 2271 mlock_size = BT_BITOUL(btopr(rlen));
2367 2272 if ((mlock_map = (ulong_t *)kmem_zalloc(mlock_size *
2368 2273 sizeof (ulong_t), KM_NOSLEEP)) == NULL) {
2369 2274 AS_LOCK_EXIT(as, &as->a_lock);
2370 2275 return (EAGAIN);
2371 2276 }
2372 2277
2373 2278 for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) {
2374 2279 error = SEGOP_LOCKOP(seg, seg->s_base,
2375 2280 seg->s_size, attr, MC_LOCK, mlock_map, pos);
2376 2281 if (error != 0)
2377 2282 break;
2378 2283 pos += seg_pages(seg);
2379 2284 }
2380 2285
2381 2286 if (error) {
2382 2287 for (seg = AS_SEGFIRST(as); seg != NULL;
2383 2288 seg = AS_SEGNEXT(as, seg)) {
2384 2289
2385 2290 raddr = (caddr_t)((uintptr_t)seg->s_base &
2386 2291 (uintptr_t)PAGEMASK);
2387 2292 npages = seg_pages(seg);
2388 2293 as_segunlock(seg, raddr, attr, mlock_map,
2389 2294 idx, npages);
2390 2295 idx += npages;
2391 2296 }
2392 2297 }
2393 2298
2394 2299 kmem_free(mlock_map, mlock_size * sizeof (ulong_t));
2395 2300 AS_LOCK_EXIT(as, &as->a_lock);
2396 2301 goto lockerr;
2397 2302 } else if (func == MC_UNLOCKAS) {
2398 2303 mutex_enter(&as->a_contents);
2399 2304 AS_CLRPGLCK(as);
2400 2305 mutex_exit(&as->a_contents);
2401 2306
2402 2307 for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) {
2403 2308 error = SEGOP_LOCKOP(seg, seg->s_base,
2404 2309 seg->s_size, attr, MC_UNLOCK, NULL, 0);
2405 2310 if (error != 0)
2406 2311 break;
2407 2312 }
2408 2313
2409 2314 AS_LOCK_EXIT(as, &as->a_lock);
2410 2315 goto lockerr;
2411 2316 }
2412 2317
2413 2318 /*
2414 2319 * Normalize addresses and sizes.
2415 2320 */
2416 2321 initraddr = raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2417 2322 initrsize = rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
2418 2323 (size_t)raddr;
2419 2324
2420 2325 if (raddr + rsize < raddr) { /* check for wraparound */
2421 2326 AS_LOCK_EXIT(as, &as->a_lock);
2422 2327 return (ENOMEM);
2423 2328 }
2424 2329
2425 2330 /*
2426 2331 * Get initial segment.
2427 2332 */
2428 2333 if ((seg = as_segat(as, raddr)) == NULL) {
2429 2334 AS_LOCK_EXIT(as, &as->a_lock);
2430 2335 return (ENOMEM);
2431 2336 }
2432 2337
2433 2338 if (func == MC_LOCK) {
2434 2339 mlock_size = BT_BITOUL(btopr(rsize));
2435 2340 if ((mlock_map = (ulong_t *)kmem_zalloc(mlock_size *
2436 2341 sizeof (ulong_t), KM_NOSLEEP)) == NULL) {
2437 2342 AS_LOCK_EXIT(as, &as->a_lock);
2438 2343 return (EAGAIN);
2439 2344 }
2440 2345 }
2441 2346
2442 2347 /*
2443 2348 * Loop over all segments. If a hole in the address range is
2444 2349 * discovered, then fail. For each segment, perform the appropriate
2445 2350 * control operation.
2446 2351 */
2447 2352 while (rsize != 0) {
2448 2353
2449 2354 /*
2450 2355 * Make sure there's no hole, calculate the portion
2451 2356 * of the next segment to be operated over.
2452 2357 */
2453 2358 if (raddr >= seg->s_base + seg->s_size) {
2454 2359 seg = AS_SEGNEXT(as, seg);
2455 2360 if (seg == NULL || raddr != seg->s_base) {
2456 2361 if (func == MC_LOCK) {
2457 2362 as_unlockerr(as, attr, mlock_map,
2458 2363 initraddr, initrsize - rsize);
2459 2364 kmem_free(mlock_map,
2460 2365 mlock_size * sizeof (ulong_t));
2461 2366 }
2462 2367 AS_LOCK_EXIT(as, &as->a_lock);
2463 2368 return (ENOMEM);
2464 2369 }
2465 2370 }
2466 2371 if ((raddr + rsize) > (seg->s_base + seg->s_size))
2467 2372 ssize = seg->s_base + seg->s_size - raddr;
2468 2373 else
2469 2374 ssize = rsize;
2470 2375
2471 2376 /*
2472 2377 * Dispatch on specific function.
2473 2378 */
2474 2379 switch (func) {
2475 2380
2476 2381 /*
2477 2382 * Synchronize cached data from mappings with backing
2478 2383 * objects.
2479 2384 */
2480 2385 case MC_SYNC:
2481 2386 if (error = SEGOP_SYNC(seg, raddr, ssize,
2482 2387 attr, (uint_t)arg)) {
2483 2388 AS_LOCK_EXIT(as, &as->a_lock);
2484 2389 return (error);
2485 2390 }
2486 2391 break;
2487 2392
2488 2393 /*
2489 2394 * Lock pages in memory.
2490 2395 */
2491 2396 case MC_LOCK:
2492 2397 if (error = SEGOP_LOCKOP(seg, raddr, ssize,
2493 2398 attr, func, mlock_map, pos)) {
2494 2399 as_unlockerr(as, attr, mlock_map, initraddr,
2495 2400 initrsize - rsize + ssize);
2496 2401 kmem_free(mlock_map, mlock_size *
2497 2402 sizeof (ulong_t));
2498 2403 AS_LOCK_EXIT(as, &as->a_lock);
2499 2404 goto lockerr;
2500 2405 }
2501 2406 break;
2502 2407
2503 2408 /*
2504 2409 * Unlock mapped pages.
2505 2410 */
2506 2411 case MC_UNLOCK:
2507 2412 (void) SEGOP_LOCKOP(seg, raddr, ssize, attr, func,
2508 2413 (ulong_t *)NULL, (size_t)NULL);
2509 2414 break;
2510 2415
2511 2416 /*
2512 2417 * Store VM advise for mapped pages in segment layer.
2513 2418 */
2514 2419 case MC_ADVISE:
2515 2420 error = SEGOP_ADVISE(seg, raddr, ssize, (uint_t)arg);
2516 2421
2517 2422 /*
2518 2423 * Check for regular errors and special retry error
2519 2424 */
2520 2425 if (error) {
2521 2426 if (error == IE_RETRY) {
2522 2427 /*
2523 2428 * Need to acquire writers lock, so
2524 2429 * have to drop readers lock and start
2525 2430 * all over again
2526 2431 */
2527 2432 AS_LOCK_EXIT(as, &as->a_lock);
2528 2433 goto retry;
2529 2434 } else if (error == IE_REATTACH) {
2530 2435 /*
2531 2436 * Find segment for current address
2532 2437 * because current segment just got
2533 2438 * split or concatenated
2534 2439 */
2535 2440 seg = as_segat(as, raddr);
2536 2441 if (seg == NULL) {
2537 2442 AS_LOCK_EXIT(as, &as->a_lock);
2538 2443 return (ENOMEM);
2539 2444 }
2540 2445 } else {
2541 2446 /*
2542 2447 * Regular error
2543 2448 */
2544 2449 AS_LOCK_EXIT(as, &as->a_lock);
2545 2450 return (error);
2546 2451 }
2547 2452 }
2548 2453 break;
2549 2454
2550 2455 case MC_INHERIT_ZERO:
2551 2456 if (seg->s_ops->inherit == NULL) {
2552 2457 error = ENOTSUP;
2553 2458 } else {
2554 2459 error = SEGOP_INHERIT(seg, raddr, ssize,
2555 2460 SEGP_INH_ZERO);
2556 2461 }
2557 2462 if (error != 0) {
2558 2463 AS_LOCK_EXIT(as, &as->a_lock);
2559 2464 return (error);
2560 2465 }
2561 2466 break;
2562 2467
2563 2468 /*
2564 2469 * Can't happen.
2565 2470 */
2566 2471 default:
2567 2472 panic("as_ctl: bad operation %d", func);
2568 2473 /*NOTREACHED*/
2569 2474 }
2570 2475
2571 2476 rsize -= ssize;
2572 2477 raddr += ssize;
2573 2478 }
2574 2479
2575 2480 if (func == MC_LOCK)
2576 2481 kmem_free(mlock_map, mlock_size * sizeof (ulong_t));
2577 2482 AS_LOCK_EXIT(as, &as->a_lock);
2578 2483 return (0);
2579 2484 lockerr:
2580 2485
2581 2486 /*
2582 2487 * If the lower levels returned EDEADLK for a segment lockop,
2583 2488 * it means that we should retry the operation. Let's wait
2584 2489 * a bit also to let the deadlock causing condition clear.
2585 2490 * This is part of a gross hack to work around a design flaw
2586 2491 * in the ufs/sds logging code and should go away when the
2587 2492 * logging code is re-designed to fix the problem. See bug
2588 2493 * 4125102 for details of the problem.
2589 2494 */
2590 2495 if (error == EDEADLK) {
2591 2496 delay(deadlk_wait);
2592 2497 error = 0;
2593 2498 goto retry;
2594 2499 }
2595 2500 return (error);
2596 2501 }
2597 2502
2598 2503 int
2599 2504 fc_decode(faultcode_t fault_err)
2600 2505 {
2601 2506 int error = 0;
2602 2507
2603 2508 switch (FC_CODE(fault_err)) {
2604 2509 case FC_OBJERR:
2605 2510 error = FC_ERRNO(fault_err);
2606 2511 break;
2607 2512 case FC_PROT:
2608 2513 error = EACCES;
2609 2514 break;
2610 2515 default:
2611 2516 error = EFAULT;
2612 2517 break;
2613 2518 }
2614 2519 return (error);
2615 2520 }
2616 2521
2617 2522 /*
2618 2523 * Pagelock pages from a range that spans more than 1 segment. Obtain shadow
2619 2524 * lists from each segment and copy them to one contiguous shadow list (plist)
2620 2525 * as expected by the caller. Save pointers to per segment shadow lists at
2621 2526 * the tail of plist so that they can be used during as_pageunlock().
2622 2527 */
2623 2528 static int
2624 2529 as_pagelock_segs(struct as *as, struct seg *seg, struct page ***ppp,
2625 2530 caddr_t addr, size_t size, enum seg_rw rw)
2626 2531 {
2627 2532 caddr_t sv_addr = addr;
2628 2533 size_t sv_size = size;
2629 2534 struct seg *sv_seg = seg;
2630 2535 ulong_t segcnt = 1;
2631 2536 ulong_t cnt;
2632 2537 size_t ssize;
2633 2538 pgcnt_t npages = btop(size);
2634 2539 page_t **plist;
2635 2540 page_t **pl;
2636 2541 int error;
2637 2542 caddr_t eaddr;
2638 2543 faultcode_t fault_err = 0;
2639 2544 pgcnt_t pl_off;
2640 2545 extern struct seg_ops segspt_shmops;
2641 2546
2642 2547 ASSERT(AS_LOCK_HELD(as, &as->a_lock));
2643 2548 ASSERT(seg != NULL);
2644 2549 ASSERT(addr >= seg->s_base && addr < seg->s_base + seg->s_size);
2645 2550 ASSERT(addr + size > seg->s_base + seg->s_size);
2646 2551 ASSERT(IS_P2ALIGNED(size, PAGESIZE));
2647 2552 ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
2648 2553
2649 2554 /*
2650 2555 * Count the number of segments covered by the range we are about to
2651 2556 * lock. The segment count is used to size the shadow list we return
2652 2557 * back to the caller.
2653 2558 */
2654 2559 for (; size != 0; size -= ssize, addr += ssize) {
2655 2560 if (addr >= seg->s_base + seg->s_size) {
2656 2561
2657 2562 seg = AS_SEGNEXT(as, seg);
2658 2563 if (seg == NULL || addr != seg->s_base) {
2659 2564 AS_LOCK_EXIT(as, &as->a_lock);
2660 2565 return (EFAULT);
2661 2566 }
2662 2567 /*
2663 2568 * Do a quick check if subsequent segments
2664 2569 * will most likely support pagelock.
2665 2570 */
2666 2571 if (seg->s_ops == &segvn_ops) {
2667 2572 vnode_t *vp;
2668 2573
2669 2574 if (SEGOP_GETVP(seg, addr, &vp) != 0 ||
2670 2575 vp != NULL) {
2671 2576 AS_LOCK_EXIT(as, &as->a_lock);
2672 2577 goto slow;
2673 2578 }
2674 2579 } else if (seg->s_ops != &segspt_shmops) {
2675 2580 AS_LOCK_EXIT(as, &as->a_lock);
2676 2581 goto slow;
2677 2582 }
2678 2583 segcnt++;
2679 2584 }
2680 2585 if (addr + size > seg->s_base + seg->s_size) {
2681 2586 ssize = seg->s_base + seg->s_size - addr;
2682 2587 } else {
2683 2588 ssize = size;
2684 2589 }
2685 2590 }
2686 2591 ASSERT(segcnt > 1);
2687 2592
2688 2593 plist = kmem_zalloc((npages + segcnt) * sizeof (page_t *), KM_SLEEP);
2689 2594
2690 2595 addr = sv_addr;
2691 2596 size = sv_size;
2692 2597 seg = sv_seg;
2693 2598
2694 2599 for (cnt = 0, pl_off = 0; size != 0; size -= ssize, addr += ssize) {
2695 2600 if (addr >= seg->s_base + seg->s_size) {
2696 2601 seg = AS_SEGNEXT(as, seg);
2697 2602 ASSERT(seg != NULL && addr == seg->s_base);
2698 2603 cnt++;
2699 2604 ASSERT(cnt < segcnt);
2700 2605 }
2701 2606 if (addr + size > seg->s_base + seg->s_size) {
2702 2607 ssize = seg->s_base + seg->s_size - addr;
2703 2608 } else {
2704 2609 ssize = size;
2705 2610 }
2706 2611 pl = &plist[npages + cnt];
2707 2612 error = SEGOP_PAGELOCK(seg, addr, ssize, (page_t ***)pl,
2708 2613 L_PAGELOCK, rw);
2709 2614 if (error) {
2710 2615 break;
2711 2616 }
2712 2617 ASSERT(plist[npages + cnt] != NULL);
2713 2618 ASSERT(pl_off + btop(ssize) <= npages);
2714 2619 bcopy(plist[npages + cnt], &plist[pl_off],
2715 2620 btop(ssize) * sizeof (page_t *));
2716 2621 pl_off += btop(ssize);
2717 2622 }
2718 2623
2719 2624 if (size == 0) {
2720 2625 AS_LOCK_EXIT(as, &as->a_lock);
2721 2626 ASSERT(cnt == segcnt - 1);
2722 2627 *ppp = plist;
2723 2628 return (0);
2724 2629 }
2725 2630
2726 2631 /*
2727 2632 * one of pagelock calls failed. The error type is in error variable.
2728 2633 * Unlock what we've locked so far and retry with F_SOFTLOCK if error
2729 2634 * type is either EFAULT or ENOTSUP. Otherwise just return the error
2730 2635 * back to the caller.
2731 2636 */
2732 2637
2733 2638 eaddr = addr;
2734 2639 seg = sv_seg;
2735 2640
2736 2641 for (cnt = 0, addr = sv_addr; addr < eaddr; addr += ssize) {
2737 2642 if (addr >= seg->s_base + seg->s_size) {
2738 2643 seg = AS_SEGNEXT(as, seg);
2739 2644 ASSERT(seg != NULL && addr == seg->s_base);
2740 2645 cnt++;
2741 2646 ASSERT(cnt < segcnt);
2742 2647 }
2743 2648 if (eaddr > seg->s_base + seg->s_size) {
2744 2649 ssize = seg->s_base + seg->s_size - addr;
2745 2650 } else {
2746 2651 ssize = eaddr - addr;
2747 2652 }
2748 2653 pl = &plist[npages + cnt];
2749 2654 ASSERT(*pl != NULL);
2750 2655 (void) SEGOP_PAGELOCK(seg, addr, ssize, (page_t ***)pl,
2751 2656 L_PAGEUNLOCK, rw);
2752 2657 }
2753 2658
2754 2659 AS_LOCK_EXIT(as, &as->a_lock);
2755 2660
2756 2661 kmem_free(plist, (npages + segcnt) * sizeof (page_t *));
2757 2662
2758 2663 if (error != ENOTSUP && error != EFAULT) {
2759 2664 return (error);
2760 2665 }
2761 2666
2762 2667 slow:
2763 2668 /*
2764 2669 * If we are here because pagelock failed due to the need to cow fault
2765 2670 * in the pages we want to lock F_SOFTLOCK will do this job and in
2766 2671 * next as_pagelock() call for this address range pagelock will
2767 2672 * hopefully succeed.
2768 2673 */
2769 2674 fault_err = as_fault(as->a_hat, as, sv_addr, sv_size, F_SOFTLOCK, rw);
2770 2675 if (fault_err != 0) {
2771 2676 return (fc_decode(fault_err));
2772 2677 }
2773 2678 *ppp = NULL;
2774 2679
2775 2680 return (0);
2776 2681 }
2777 2682
2778 2683 /*
2779 2684 * lock pages in a given address space. Return shadow list. If
2780 2685 * the list is NULL, the MMU mapping is also locked.
2781 2686 */
2782 2687 int
2783 2688 as_pagelock(struct as *as, struct page ***ppp, caddr_t addr,
2784 2689 size_t size, enum seg_rw rw)
2785 2690 {
2786 2691 size_t rsize;
2787 2692 caddr_t raddr;
2788 2693 faultcode_t fault_err;
2789 2694 struct seg *seg;
2790 2695 int err;
2791 2696
2792 2697 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_AS_LOCK_START,
2793 2698 "as_pagelock_start: addr %p size %ld", addr, size);
2794 2699
2795 2700 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2796 2701 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
2797 2702 (size_t)raddr;
2798 2703
2799 2704 /*
2800 2705 * if the request crosses two segments let
2801 2706 * as_fault handle it.
2802 2707 */
2803 2708 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2804 2709
2805 2710 seg = as_segat(as, raddr);
2806 2711 if (seg == NULL) {
2807 2712 AS_LOCK_EXIT(as, &as->a_lock);
2808 2713 return (EFAULT);
2809 2714 }
2810 2715 ASSERT(raddr >= seg->s_base && raddr < seg->s_base + seg->s_size);
2811 2716 if (raddr + rsize > seg->s_base + seg->s_size) {
2812 2717 return (as_pagelock_segs(as, seg, ppp, raddr, rsize, rw));
2813 2718 }
2814 2719 if (raddr + rsize <= raddr) {
2815 2720 AS_LOCK_EXIT(as, &as->a_lock);
2816 2721 return (EFAULT);
2817 2722 }
2818 2723
2819 2724 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEG_LOCK_START,
2820 2725 "seg_lock_1_start: raddr %p rsize %ld", raddr, rsize);
2821 2726
2822 2727 /*
2823 2728 * try to lock pages and pass back shadow list
2824 2729 */
2825 2730 err = SEGOP_PAGELOCK(seg, raddr, rsize, ppp, L_PAGELOCK, rw);
2826 2731
2827 2732 TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_SEG_LOCK_END, "seg_lock_1_end");
2828 2733
2829 2734 AS_LOCK_EXIT(as, &as->a_lock);
2830 2735
2831 2736 if (err == 0 || (err != ENOTSUP && err != EFAULT)) {
2832 2737 return (err);
2833 2738 }
2834 2739
2835 2740 /*
2836 2741 * Use F_SOFTLOCK to lock the pages because pagelock failed either due
2837 2742 * to no pagelock support for this segment or pages need to be cow
2838 2743 * faulted in. If fault is needed F_SOFTLOCK will do this job for
2839 2744 * this as_pagelock() call and in the next as_pagelock() call for the
2840 2745 * same address range pagelock call will hopefull succeed.
2841 2746 */
2842 2747 fault_err = as_fault(as->a_hat, as, addr, size, F_SOFTLOCK, rw);
2843 2748 if (fault_err != 0) {
2844 2749 return (fc_decode(fault_err));
2845 2750 }
2846 2751 *ppp = NULL;
2847 2752
2848 2753 TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_AS_LOCK_END, "as_pagelock_end");
2849 2754 return (0);
2850 2755 }
2851 2756
2852 2757 /*
2853 2758 * unlock pages locked by as_pagelock_segs(). Retrieve per segment shadow
2854 2759 * lists from the end of plist and call pageunlock interface for each segment.
2855 2760 * Drop as lock and free plist.
2856 2761 */
2857 2762 static void
2858 2763 as_pageunlock_segs(struct as *as, struct seg *seg, caddr_t addr, size_t size,
2859 2764 struct page **plist, enum seg_rw rw)
2860 2765 {
2861 2766 ulong_t cnt;
2862 2767 caddr_t eaddr = addr + size;
2863 2768 pgcnt_t npages = btop(size);
2864 2769 size_t ssize;
2865 2770 page_t **pl;
2866 2771
2867 2772 ASSERT(AS_LOCK_HELD(as, &as->a_lock));
2868 2773 ASSERT(seg != NULL);
2869 2774 ASSERT(addr >= seg->s_base && addr < seg->s_base + seg->s_size);
2870 2775 ASSERT(addr + size > seg->s_base + seg->s_size);
2871 2776 ASSERT(IS_P2ALIGNED(size, PAGESIZE));
2872 2777 ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
2873 2778 ASSERT(plist != NULL);
2874 2779
2875 2780 for (cnt = 0; addr < eaddr; addr += ssize) {
2876 2781 if (addr >= seg->s_base + seg->s_size) {
2877 2782 seg = AS_SEGNEXT(as, seg);
2878 2783 ASSERT(seg != NULL && addr == seg->s_base);
2879 2784 cnt++;
2880 2785 }
2881 2786 if (eaddr > seg->s_base + seg->s_size) {
2882 2787 ssize = seg->s_base + seg->s_size - addr;
2883 2788 } else {
2884 2789 ssize = eaddr - addr;
2885 2790 }
2886 2791 pl = &plist[npages + cnt];
2887 2792 ASSERT(*pl != NULL);
2888 2793 (void) SEGOP_PAGELOCK(seg, addr, ssize, (page_t ***)pl,
2889 2794 L_PAGEUNLOCK, rw);
2890 2795 }
2891 2796 ASSERT(cnt > 0);
2892 2797 AS_LOCK_EXIT(as, &as->a_lock);
2893 2798
2894 2799 cnt++;
2895 2800 kmem_free(plist, (npages + cnt) * sizeof (page_t *));
2896 2801 }
2897 2802
2898 2803 /*
2899 2804 * unlock pages in a given address range
2900 2805 */
2901 2806 void
2902 2807 as_pageunlock(struct as *as, struct page **pp, caddr_t addr, size_t size,
2903 2808 enum seg_rw rw)
2904 2809 {
2905 2810 struct seg *seg;
2906 2811 size_t rsize;
2907 2812 caddr_t raddr;
2908 2813
2909 2814 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_AS_UNLOCK_START,
2910 2815 "as_pageunlock_start: addr %p size %ld", addr, size);
2911 2816
2912 2817 /*
2913 2818 * if the shadow list is NULL, as_pagelock was
2914 2819 * falling back to as_fault
2915 2820 */
2916 2821 if (pp == NULL) {
2917 2822 (void) as_fault(as->a_hat, as, addr, size, F_SOFTUNLOCK, rw);
2918 2823 return;
2919 2824 }
2920 2825
2921 2826 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2922 2827 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
2923 2828 (size_t)raddr;
2924 2829
2925 2830 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2926 2831 seg = as_segat(as, raddr);
2927 2832 ASSERT(seg != NULL);
2928 2833
2929 2834 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEG_UNLOCK_START,
2930 2835 "seg_unlock_start: raddr %p rsize %ld", raddr, rsize);
2931 2836
2932 2837 ASSERT(raddr >= seg->s_base && raddr < seg->s_base + seg->s_size);
2933 2838 if (raddr + rsize <= seg->s_base + seg->s_size) {
2934 2839 SEGOP_PAGELOCK(seg, raddr, rsize, &pp, L_PAGEUNLOCK, rw);
2935 2840 } else {
2936 2841 as_pageunlock_segs(as, seg, raddr, rsize, pp, rw);
2937 2842 return;
2938 2843 }
2939 2844 AS_LOCK_EXIT(as, &as->a_lock);
2940 2845 TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_AS_UNLOCK_END, "as_pageunlock_end");
2941 2846 }
2942 2847
2943 2848 int
2944 2849 as_setpagesize(struct as *as, caddr_t addr, size_t size, uint_t szc,
2945 2850 boolean_t wait)
2946 2851 {
2947 2852 struct seg *seg;
2948 2853 size_t ssize;
2949 2854 caddr_t raddr; /* rounded down addr */
2950 2855 size_t rsize; /* rounded up size */
2951 2856 int error = 0;
2952 2857 size_t pgsz = page_get_pagesize(szc);
2953 2858
2954 2859 setpgsz_top:
2955 2860 if (!IS_P2ALIGNED(addr, pgsz) || !IS_P2ALIGNED(size, pgsz)) {
2956 2861 return (EINVAL);
2957 2862 }
2958 2863
2959 2864 raddr = addr;
2960 2865 rsize = size;
2961 2866
2962 2867 if (raddr + rsize < raddr) /* check for wraparound */
2963 2868 return (ENOMEM);
2964 2869
2965 2870 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
2966 2871 as_clearwatchprot(as, raddr, rsize);
2967 2872 seg = as_segat(as, raddr);
2968 2873 if (seg == NULL) {
2969 2874 as_setwatch(as);
2970 2875 AS_LOCK_EXIT(as, &as->a_lock);
2971 2876 return (ENOMEM);
2972 2877 }
2973 2878
2974 2879 for (; rsize != 0; rsize -= ssize, raddr += ssize) {
2975 2880 if (raddr >= seg->s_base + seg->s_size) {
2976 2881 seg = AS_SEGNEXT(as, seg);
2977 2882 if (seg == NULL || raddr != seg->s_base) {
2978 2883 error = ENOMEM;
2979 2884 break;
2980 2885 }
2981 2886 }
2982 2887 if ((raddr + rsize) > (seg->s_base + seg->s_size)) {
2983 2888 ssize = seg->s_base + seg->s_size - raddr;
2984 2889 } else {
2985 2890 ssize = rsize;
2986 2891 }
2987 2892
2988 2893 retry:
2989 2894 error = SEGOP_SETPAGESIZE(seg, raddr, ssize, szc);
2990 2895
2991 2896 if (error == IE_NOMEM) {
2992 2897 error = EAGAIN;
2993 2898 break;
2994 2899 }
2995 2900
2996 2901 if (error == IE_RETRY) {
2997 2902 AS_LOCK_EXIT(as, &as->a_lock);
2998 2903 goto setpgsz_top;
2999 2904 }
3000 2905
3001 2906 if (error == ENOTSUP) {
3002 2907 error = EINVAL;
3003 2908 break;
3004 2909 }
3005 2910
3006 2911 if (wait && (error == EAGAIN)) {
3007 2912 /*
3008 2913 * Memory is currently locked. It must be unlocked
3009 2914 * before this operation can succeed through a retry.
3010 2915 * The possible reasons for locked memory and
3011 2916 * corresponding strategies for unlocking are:
3012 2917 * (1) Normal I/O
3013 2918 * wait for a signal that the I/O operation
3014 2919 * has completed and the memory is unlocked.
3015 2920 * (2) Asynchronous I/O
3016 2921 * The aio subsystem does not unlock pages when
3017 2922 * the I/O is completed. Those pages are unlocked
3018 2923 * when the application calls aiowait/aioerror.
3019 2924 * So, to prevent blocking forever, cv_broadcast()
3020 2925 * is done to wake up aio_cleanup_thread.
3021 2926 * Subsequently, segvn_reclaim will be called, and
3022 2927 * that will do AS_CLRUNMAPWAIT() and wake us up.
3023 2928 * (3) Long term page locking:
3024 2929 * This is not relevant for as_setpagesize()
3025 2930 * because we cannot change the page size for
3026 2931 * driver memory. The attempt to do so will
3027 2932 * fail with a different error than EAGAIN so
3028 2933 * there's no need to trigger as callbacks like
3029 2934 * as_unmap, as_setprot or as_free would do.
3030 2935 */
3031 2936 mutex_enter(&as->a_contents);
3032 2937 if (!AS_ISNOUNMAPWAIT(as)) {
3033 2938 if (AS_ISUNMAPWAIT(as) == 0) {
3034 2939 cv_broadcast(&as->a_cv);
3035 2940 }
3036 2941 AS_SETUNMAPWAIT(as);
3037 2942 AS_LOCK_EXIT(as, &as->a_lock);
3038 2943 while (AS_ISUNMAPWAIT(as)) {
3039 2944 cv_wait(&as->a_cv, &as->a_contents);
3040 2945 }
3041 2946 } else {
3042 2947 /*
3043 2948 * We may have raced with
3044 2949 * segvn_reclaim()/segspt_reclaim(). In this
3045 2950 * case clean nounmapwait flag and retry since
3046 2951 * softlockcnt in this segment may be already
3047 2952 * 0. We don't drop as writer lock so our
3048 2953 * number of retries without sleeping should
3049 2954 * be very small. See segvn_reclaim() for
3050 2955 * more comments.
3051 2956 */
3052 2957 AS_CLRNOUNMAPWAIT(as);
3053 2958 mutex_exit(&as->a_contents);
3054 2959 goto retry;
3055 2960 }
3056 2961 mutex_exit(&as->a_contents);
3057 2962 goto setpgsz_top;
3058 2963 } else if (error != 0) {
3059 2964 break;
3060 2965 }
3061 2966 }
3062 2967 as_setwatch(as);
3063 2968 AS_LOCK_EXIT(as, &as->a_lock);
3064 2969 return (error);
3065 2970 }
3066 2971
3067 2972 /*
3068 2973 * as_iset3_default_lpsize() just calls SEGOP_SETPAGESIZE() on all segments
3069 2974 * in its chunk where s_szc is less than the szc we want to set.
3070 2975 */
3071 2976 static int
3072 2977 as_iset3_default_lpsize(struct as *as, caddr_t raddr, size_t rsize, uint_t szc,
3073 2978 int *retry)
3074 2979 {
3075 2980 struct seg *seg;
3076 2981 size_t ssize;
3077 2982 int error;
3078 2983
3079 2984 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3080 2985
3081 2986 seg = as_segat(as, raddr);
3082 2987 if (seg == NULL) {
3083 2988 panic("as_iset3_default_lpsize: no seg");
3084 2989 }
3085 2990
3086 2991 for (; rsize != 0; rsize -= ssize, raddr += ssize) {
3087 2992 if (raddr >= seg->s_base + seg->s_size) {
3088 2993 seg = AS_SEGNEXT(as, seg);
3089 2994 if (seg == NULL || raddr != seg->s_base) {
3090 2995 panic("as_iset3_default_lpsize: as changed");
3091 2996 }
3092 2997 }
3093 2998 if ((raddr + rsize) > (seg->s_base + seg->s_size)) {
3094 2999 ssize = seg->s_base + seg->s_size - raddr;
3095 3000 } else {
3096 3001 ssize = rsize;
3097 3002 }
3098 3003
3099 3004 if (szc > seg->s_szc) {
3100 3005 error = SEGOP_SETPAGESIZE(seg, raddr, ssize, szc);
3101 3006 /* Only retry on EINVAL segments that have no vnode. */
3102 3007 if (error == EINVAL) {
3103 3008 vnode_t *vp = NULL;
3104 3009 if ((SEGOP_GETTYPE(seg, raddr) & MAP_SHARED) &&
3105 3010 (SEGOP_GETVP(seg, raddr, &vp) != 0 ||
3106 3011 vp == NULL)) {
3107 3012 *retry = 1;
3108 3013 } else {
3109 3014 *retry = 0;
3110 3015 }
3111 3016 }
3112 3017 if (error) {
3113 3018 return (error);
3114 3019 }
3115 3020 }
3116 3021 }
3117 3022 return (0);
3118 3023 }
3119 3024
3120 3025 /*
3121 3026 * as_iset2_default_lpsize() calls as_iset3_default_lpsize() to set the
3122 3027 * pagesize on each segment in its range, but if any fails with EINVAL,
3123 3028 * then it reduces the pagesizes to the next size in the bitmap and
3124 3029 * retries as_iset3_default_lpsize(). The reason why the code retries
3125 3030 * smaller allowed sizes on EINVAL is because (a) the anon offset may not
3126 3031 * match the bigger sizes, and (b) it's hard to get this offset (to begin
3127 3032 * with) to pass to map_pgszcvec().
3128 3033 */
3129 3034 static int
3130 3035 as_iset2_default_lpsize(struct as *as, caddr_t addr, size_t size, uint_t szc,
3131 3036 uint_t szcvec)
3132 3037 {
3133 3038 int error;
3134 3039 int retry;
3135 3040
3136 3041 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3137 3042
3138 3043 for (;;) {
3139 3044 error = as_iset3_default_lpsize(as, addr, size, szc, &retry);
3140 3045 if (error == EINVAL && retry) {
3141 3046 szcvec &= ~(1 << szc);
3142 3047 if (szcvec <= 1) {
3143 3048 return (EINVAL);
3144 3049 }
3145 3050 szc = highbit(szcvec) - 1;
3146 3051 } else {
3147 3052 return (error);
3148 3053 }
3149 3054 }
3150 3055 }
3151 3056
3152 3057 /*
3153 3058 * as_iset1_default_lpsize() breaks its chunk into areas where existing
3154 3059 * segments have a smaller szc than we want to set. For each such area,
3155 3060 * it calls as_iset2_default_lpsize()
3156 3061 */
3157 3062 static int
3158 3063 as_iset1_default_lpsize(struct as *as, caddr_t raddr, size_t rsize, uint_t szc,
3159 3064 uint_t szcvec)
3160 3065 {
3161 3066 struct seg *seg;
3162 3067 size_t ssize;
3163 3068 caddr_t setaddr = raddr;
3164 3069 size_t setsize = 0;
3165 3070 int set;
3166 3071 int error;
3167 3072
3168 3073 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3169 3074
3170 3075 seg = as_segat(as, raddr);
3171 3076 if (seg == NULL) {
3172 3077 panic("as_iset1_default_lpsize: no seg");
3173 3078 }
3174 3079 if (seg->s_szc < szc) {
3175 3080 set = 1;
3176 3081 } else {
3177 3082 set = 0;
3178 3083 }
3179 3084
3180 3085 for (; rsize != 0; rsize -= ssize, raddr += ssize, setsize += ssize) {
3181 3086 if (raddr >= seg->s_base + seg->s_size) {
3182 3087 seg = AS_SEGNEXT(as, seg);
3183 3088 if (seg == NULL || raddr != seg->s_base) {
3184 3089 panic("as_iset1_default_lpsize: as changed");
3185 3090 }
3186 3091 if (seg->s_szc >= szc && set) {
3187 3092 ASSERT(setsize != 0);
3188 3093 error = as_iset2_default_lpsize(as,
3189 3094 setaddr, setsize, szc, szcvec);
3190 3095 if (error) {
3191 3096 return (error);
3192 3097 }
3193 3098 set = 0;
3194 3099 } else if (seg->s_szc < szc && !set) {
3195 3100 setaddr = raddr;
3196 3101 setsize = 0;
3197 3102 set = 1;
3198 3103 }
3199 3104 }
3200 3105 if ((raddr + rsize) > (seg->s_base + seg->s_size)) {
3201 3106 ssize = seg->s_base + seg->s_size - raddr;
3202 3107 } else {
3203 3108 ssize = rsize;
3204 3109 }
3205 3110 }
3206 3111 error = 0;
3207 3112 if (set) {
3208 3113 ASSERT(setsize != 0);
3209 3114 error = as_iset2_default_lpsize(as, setaddr, setsize,
3210 3115 szc, szcvec);
3211 3116 }
3212 3117 return (error);
3213 3118 }
3214 3119
3215 3120 /*
3216 3121 * as_iset_default_lpsize() breaks its chunk according to the size code bitmap
3217 3122 * returned by map_pgszcvec() (similar to as_map_segvn_segs()), and passes each
3218 3123 * chunk to as_iset1_default_lpsize().
3219 3124 */
3220 3125 static int
3221 3126 as_iset_default_lpsize(struct as *as, caddr_t addr, size_t size, int flags,
3222 3127 int type)
3223 3128 {
3224 3129 int rtype = (type & MAP_SHARED) ? MAPPGSZC_SHM : MAPPGSZC_PRIVM;
3225 3130 uint_t szcvec = map_pgszcvec(addr, size, (uintptr_t)addr,
3226 3131 flags, rtype, 1);
3227 3132 uint_t szc;
3228 3133 uint_t nszc;
3229 3134 int error;
3230 3135 caddr_t a;
3231 3136 caddr_t eaddr;
3232 3137 size_t segsize;
3233 3138 size_t pgsz;
3234 3139 uint_t save_szcvec;
3235 3140
3236 3141 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3237 3142 ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
3238 3143 ASSERT(IS_P2ALIGNED(size, PAGESIZE));
3239 3144
3240 3145 szcvec &= ~1;
3241 3146 if (szcvec <= 1) { /* skip if base page size */
3242 3147 return (0);
3243 3148 }
3244 3149
3245 3150 /* Get the pagesize of the first larger page size. */
3246 3151 szc = lowbit(szcvec) - 1;
3247 3152 pgsz = page_get_pagesize(szc);
3248 3153 eaddr = addr + size;
3249 3154 addr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz);
3250 3155 eaddr = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz);
3251 3156
3252 3157 save_szcvec = szcvec;
3253 3158 szcvec >>= (szc + 1);
3254 3159 nszc = szc;
3255 3160 while (szcvec) {
3256 3161 if ((szcvec & 0x1) == 0) {
3257 3162 nszc++;
3258 3163 szcvec >>= 1;
3259 3164 continue;
3260 3165 }
3261 3166 nszc++;
3262 3167 pgsz = page_get_pagesize(nszc);
3263 3168 a = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz);
3264 3169 if (a != addr) {
3265 3170 ASSERT(szc > 0);
3266 3171 ASSERT(a < eaddr);
3267 3172 segsize = a - addr;
3268 3173 error = as_iset1_default_lpsize(as, addr, segsize, szc,
3269 3174 save_szcvec);
3270 3175 if (error) {
3271 3176 return (error);
3272 3177 }
3273 3178 addr = a;
3274 3179 }
3275 3180 szc = nszc;
3276 3181 szcvec >>= 1;
3277 3182 }
3278 3183
3279 3184 ASSERT(addr < eaddr);
3280 3185 szcvec = save_szcvec;
3281 3186 while (szcvec) {
3282 3187 a = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz);
3283 3188 ASSERT(a >= addr);
3284 3189 if (a != addr) {
3285 3190 ASSERT(szc > 0);
3286 3191 segsize = a - addr;
3287 3192 error = as_iset1_default_lpsize(as, addr, segsize, szc,
3288 3193 save_szcvec);
3289 3194 if (error) {
3290 3195 return (error);
3291 3196 }
3292 3197 addr = a;
3293 3198 }
3294 3199 szcvec &= ~(1 << szc);
3295 3200 if (szcvec) {
3296 3201 szc = highbit(szcvec) - 1;
3297 3202 pgsz = page_get_pagesize(szc);
3298 3203 }
3299 3204 }
3300 3205 ASSERT(addr == eaddr);
3301 3206
3302 3207 return (0);
3303 3208 }
3304 3209
3305 3210 /*
3306 3211 * Set the default large page size for the range. Called via memcntl with
3307 3212 * page size set to 0. as_set_default_lpsize breaks the range down into
3308 3213 * chunks with the same type/flags, ignores-non segvn segments, and passes
3309 3214 * each chunk to as_iset_default_lpsize().
3310 3215 */
3311 3216 int
3312 3217 as_set_default_lpsize(struct as *as, caddr_t addr, size_t size)
3313 3218 {
3314 3219 struct seg *seg;
3315 3220 caddr_t raddr;
3316 3221 size_t rsize;
3317 3222 size_t ssize;
3318 3223 int rtype, rflags;
3319 3224 int stype, sflags;
3320 3225 int error;
3321 3226 caddr_t setaddr;
3322 3227 size_t setsize;
3323 3228 int segvn;
3324 3229
3325 3230 if (size == 0)
3326 3231 return (0);
3327 3232
3328 3233 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3329 3234 again:
3330 3235 error = 0;
3331 3236
3332 3237 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3333 3238 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
3334 3239 (size_t)raddr;
3335 3240
3336 3241 if (raddr + rsize < raddr) { /* check for wraparound */
3337 3242 AS_LOCK_EXIT(as, &as->a_lock);
3338 3243 return (ENOMEM);
3339 3244 }
3340 3245 as_clearwatchprot(as, raddr, rsize);
3341 3246 seg = as_segat(as, raddr);
3342 3247 if (seg == NULL) {
3343 3248 as_setwatch(as);
3344 3249 AS_LOCK_EXIT(as, &as->a_lock);
3345 3250 return (ENOMEM);
3346 3251 }
3347 3252 if (seg->s_ops == &segvn_ops) {
3348 3253 rtype = SEGOP_GETTYPE(seg, addr);
3349 3254 rflags = rtype & (MAP_TEXT | MAP_INITDATA);
3350 3255 rtype = rtype & (MAP_SHARED | MAP_PRIVATE);
3351 3256 segvn = 1;
3352 3257 } else {
3353 3258 segvn = 0;
3354 3259 }
3355 3260 setaddr = raddr;
3356 3261 setsize = 0;
3357 3262
3358 3263 for (; rsize != 0; rsize -= ssize, raddr += ssize, setsize += ssize) {
3359 3264 if (raddr >= (seg->s_base + seg->s_size)) {
3360 3265 seg = AS_SEGNEXT(as, seg);
3361 3266 if (seg == NULL || raddr != seg->s_base) {
3362 3267 error = ENOMEM;
3363 3268 break;
3364 3269 }
3365 3270 if (seg->s_ops == &segvn_ops) {
3366 3271 stype = SEGOP_GETTYPE(seg, raddr);
3367 3272 sflags = stype & (MAP_TEXT | MAP_INITDATA);
3368 3273 stype &= (MAP_SHARED | MAP_PRIVATE);
3369 3274 if (segvn && (rflags != sflags ||
3370 3275 rtype != stype)) {
3371 3276 /*
3372 3277 * The next segment is also segvn but
3373 3278 * has different flags and/or type.
3374 3279 */
3375 3280 ASSERT(setsize != 0);
3376 3281 error = as_iset_default_lpsize(as,
3377 3282 setaddr, setsize, rflags, rtype);
3378 3283 if (error) {
3379 3284 break;
3380 3285 }
3381 3286 rflags = sflags;
3382 3287 rtype = stype;
3383 3288 setaddr = raddr;
3384 3289 setsize = 0;
3385 3290 } else if (!segvn) {
3386 3291 rflags = sflags;
3387 3292 rtype = stype;
3388 3293 setaddr = raddr;
3389 3294 setsize = 0;
3390 3295 segvn = 1;
3391 3296 }
3392 3297 } else if (segvn) {
3393 3298 /* The next segment is not segvn. */
3394 3299 ASSERT(setsize != 0);
3395 3300 error = as_iset_default_lpsize(as,
3396 3301 setaddr, setsize, rflags, rtype);
3397 3302 if (error) {
3398 3303 break;
3399 3304 }
3400 3305 segvn = 0;
3401 3306 }
3402 3307 }
3403 3308 if ((raddr + rsize) > (seg->s_base + seg->s_size)) {
3404 3309 ssize = seg->s_base + seg->s_size - raddr;
3405 3310 } else {
3406 3311 ssize = rsize;
3407 3312 }
3408 3313 }
3409 3314 if (error == 0 && segvn) {
3410 3315 /* The last chunk when rsize == 0. */
3411 3316 ASSERT(setsize != 0);
3412 3317 error = as_iset_default_lpsize(as, setaddr, setsize,
3413 3318 rflags, rtype);
3414 3319 }
3415 3320
3416 3321 if (error == IE_RETRY) {
3417 3322 goto again;
3418 3323 } else if (error == IE_NOMEM) {
3419 3324 error = EAGAIN;
3420 3325 } else if (error == ENOTSUP) {
3421 3326 error = EINVAL;
3422 3327 } else if (error == EAGAIN) {
3423 3328 mutex_enter(&as->a_contents);
3424 3329 if (!AS_ISNOUNMAPWAIT(as)) {
3425 3330 if (AS_ISUNMAPWAIT(as) == 0) {
3426 3331 cv_broadcast(&as->a_cv);
3427 3332 }
3428 3333 AS_SETUNMAPWAIT(as);
3429 3334 AS_LOCK_EXIT(as, &as->a_lock);
3430 3335 while (AS_ISUNMAPWAIT(as)) {
3431 3336 cv_wait(&as->a_cv, &as->a_contents);
3432 3337 }
3433 3338 mutex_exit(&as->a_contents);
3434 3339 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3435 3340 } else {
3436 3341 /*
3437 3342 * We may have raced with
3438 3343 * segvn_reclaim()/segspt_reclaim(). In this case
3439 3344 * clean nounmapwait flag and retry since softlockcnt
3440 3345 * in this segment may be already 0. We don't drop as
3441 3346 * writer lock so our number of retries without
3442 3347 * sleeping should be very small. See segvn_reclaim()
3443 3348 * for more comments.
3444 3349 */
3445 3350 AS_CLRNOUNMAPWAIT(as);
3446 3351 mutex_exit(&as->a_contents);
3447 3352 }
3448 3353 goto again;
3449 3354 }
3450 3355
3451 3356 as_setwatch(as);
3452 3357 AS_LOCK_EXIT(as, &as->a_lock);
3453 3358 return (error);
3454 3359 }
3455 3360
3456 3361 /*
3457 3362 * Setup all of the uninitialized watched pages that we can.
3458 3363 */
3459 3364 void
3460 3365 as_setwatch(struct as *as)
3461 3366 {
3462 3367 struct watched_page *pwp;
3463 3368 struct seg *seg;
3464 3369 caddr_t vaddr;
3465 3370 uint_t prot;
3466 3371 int err, retrycnt;
3467 3372
3468 3373 if (avl_numnodes(&as->a_wpage) == 0)
3469 3374 return;
3470 3375
3471 3376 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3472 3377
3473 3378 for (pwp = avl_first(&as->a_wpage); pwp != NULL;
3474 3379 pwp = AVL_NEXT(&as->a_wpage, pwp)) {
3475 3380 retrycnt = 0;
3476 3381 retry:
3477 3382 vaddr = pwp->wp_vaddr;
3478 3383 if (pwp->wp_oprot != 0 || /* already set up */
3479 3384 (seg = as_segat(as, vaddr)) == NULL ||
3480 3385 SEGOP_GETPROT(seg, vaddr, 0, &prot) != 0)
3481 3386 continue;
3482 3387
3483 3388 pwp->wp_oprot = prot;
3484 3389 if (pwp->wp_read)
3485 3390 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3486 3391 if (pwp->wp_write)
3487 3392 prot &= ~PROT_WRITE;
3488 3393 if (pwp->wp_exec)
3489 3394 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3490 3395 if (!(pwp->wp_flags & WP_NOWATCH) && prot != pwp->wp_oprot) {
3491 3396 err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, prot);
3492 3397 if (err == IE_RETRY) {
3493 3398 pwp->wp_oprot = 0;
3494 3399 ASSERT(retrycnt == 0);
3495 3400 retrycnt++;
3496 3401 goto retry;
3497 3402 }
3498 3403 }
3499 3404 pwp->wp_prot = prot;
3500 3405 }
3501 3406 }
3502 3407
3503 3408 /*
3504 3409 * Clear all of the watched pages in the address space.
3505 3410 */
3506 3411 void
3507 3412 as_clearwatch(struct as *as)
3508 3413 {
3509 3414 struct watched_page *pwp;
3510 3415 struct seg *seg;
3511 3416 caddr_t vaddr;
3512 3417 uint_t prot;
3513 3418 int err, retrycnt;
3514 3419
3515 3420 if (avl_numnodes(&as->a_wpage) == 0)
3516 3421 return;
3517 3422
3518 3423 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3519 3424
3520 3425 for (pwp = avl_first(&as->a_wpage); pwp != NULL;
3521 3426 pwp = AVL_NEXT(&as->a_wpage, pwp)) {
3522 3427 retrycnt = 0;
3523 3428 retry:
3524 3429 vaddr = pwp->wp_vaddr;
3525 3430 if (pwp->wp_oprot == 0 || /* not set up */
3526 3431 (seg = as_segat(as, vaddr)) == NULL)
3527 3432 continue;
3528 3433
3529 3434 if ((prot = pwp->wp_oprot) != pwp->wp_prot) {
3530 3435 err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, prot);
3531 3436 if (err == IE_RETRY) {
3532 3437 ASSERT(retrycnt == 0);
3533 3438 retrycnt++;
3534 3439 goto retry;
3535 3440 }
3536 3441 }
3537 3442 pwp->wp_oprot = 0;
3538 3443 pwp->wp_prot = 0;
3539 3444 }
3540 3445 }
3541 3446
3542 3447 /*
3543 3448 * Force a new setup for all the watched pages in the range.
3544 3449 */
3545 3450 static void
3546 3451 as_setwatchprot(struct as *as, caddr_t addr, size_t size, uint_t prot)
3547 3452 {
3548 3453 struct watched_page *pwp;
3549 3454 struct watched_page tpw;
3550 3455 caddr_t eaddr = addr + size;
3551 3456 caddr_t vaddr;
3552 3457 struct seg *seg;
3553 3458 int err, retrycnt;
3554 3459 uint_t wprot;
3555 3460 avl_index_t where;
3556 3461
3557 3462 if (avl_numnodes(&as->a_wpage) == 0)
3558 3463 return;
3559 3464
3560 3465 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3561 3466
3562 3467 tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3563 3468 if ((pwp = avl_find(&as->a_wpage, &tpw, &where)) == NULL)
3564 3469 pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER);
3565 3470
3566 3471 while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3567 3472 retrycnt = 0;
3568 3473 vaddr = pwp->wp_vaddr;
3569 3474
3570 3475 wprot = prot;
3571 3476 if (pwp->wp_read)
3572 3477 wprot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3573 3478 if (pwp->wp_write)
3574 3479 wprot &= ~PROT_WRITE;
3575 3480 if (pwp->wp_exec)
3576 3481 wprot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3577 3482 if (!(pwp->wp_flags & WP_NOWATCH) && wprot != pwp->wp_oprot) {
3578 3483 retry:
3579 3484 seg = as_segat(as, vaddr);
3580 3485 if (seg == NULL) {
3581 3486 panic("as_setwatchprot: no seg");
3582 3487 /*NOTREACHED*/
3583 3488 }
3584 3489 err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, wprot);
3585 3490 if (err == IE_RETRY) {
3586 3491 ASSERT(retrycnt == 0);
3587 3492 retrycnt++;
3588 3493 goto retry;
3589 3494 }
3590 3495 }
3591 3496 pwp->wp_oprot = prot;
3592 3497 pwp->wp_prot = wprot;
3593 3498
3594 3499 pwp = AVL_NEXT(&as->a_wpage, pwp);
3595 3500 }
3596 3501 }
3597 3502
3598 3503 /*
3599 3504 * Clear all of the watched pages in the range.
3600 3505 */
3601 3506 static void
3602 3507 as_clearwatchprot(struct as *as, caddr_t addr, size_t size)
3603 3508 {
3604 3509 caddr_t eaddr = addr + size;
3605 3510 struct watched_page *pwp;
3606 3511 struct watched_page tpw;
3607 3512 uint_t prot;
3608 3513 struct seg *seg;
3609 3514 int err, retrycnt;
3610 3515 avl_index_t where;
3611 3516
3612 3517 if (avl_numnodes(&as->a_wpage) == 0)
3613 3518 return;
3614 3519
3615 3520 tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3616 3521 if ((pwp = avl_find(&as->a_wpage, &tpw, &where)) == NULL)
3617 3522 pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER);
3618 3523
3619 3524 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3620 3525
3621 3526 while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3622 3527
3623 3528 if ((prot = pwp->wp_oprot) != 0) {
3624 3529 retrycnt = 0;
3625 3530
3626 3531 if (prot != pwp->wp_prot) {
3627 3532 retry:
3628 3533 seg = as_segat(as, pwp->wp_vaddr);
3629 3534 if (seg == NULL)
3630 3535 continue;
3631 3536 err = SEGOP_SETPROT(seg, pwp->wp_vaddr,
3632 3537 PAGESIZE, prot);
3633 3538 if (err == IE_RETRY) {
3634 3539 ASSERT(retrycnt == 0);
3635 3540 retrycnt++;
3636 3541 goto retry;
3637 3542
3638 3543 }
3639 3544 }
3640 3545 pwp->wp_oprot = 0;
3641 3546 pwp->wp_prot = 0;
3642 3547 }
3643 3548
3644 3549 pwp = AVL_NEXT(&as->a_wpage, pwp);
3645 3550 }
3646 3551 }
3647 3552
3648 3553 void
3649 3554 as_signal_proc(struct as *as, k_siginfo_t *siginfo)
3650 3555 {
3651 3556 struct proc *p;
3652 3557
3653 3558 mutex_enter(&pidlock);
3654 3559 for (p = practive; p; p = p->p_next) {
3655 3560 if (p->p_as == as) {
3656 3561 mutex_enter(&p->p_lock);
3657 3562 if (p->p_as == as)
3658 3563 sigaddq(p, NULL, siginfo, KM_NOSLEEP);
3659 3564 mutex_exit(&p->p_lock);
3660 3565 }
3661 3566 }
3662 3567 mutex_exit(&pidlock);
3663 3568 }
3664 3569
3665 3570 /*
3666 3571 * return memory object ID
3667 3572 */
3668 3573 int
3669 3574 as_getmemid(struct as *as, caddr_t addr, memid_t *memidp)
3670 3575 {
3671 3576 struct seg *seg;
3672 3577 int sts;
3673 3578
3674 3579 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
3675 3580 seg = as_segat(as, addr);
3676 3581 if (seg == NULL) {
3677 3582 AS_LOCK_EXIT(as, &as->a_lock);
3678 3583 return (EFAULT);
3679 3584 }
3680 3585 /*
3681 3586 * catch old drivers which may not support getmemid
3682 3587 */
3683 3588 if (seg->s_ops->getmemid == NULL) {
3684 3589 AS_LOCK_EXIT(as, &as->a_lock);
3685 3590 return (ENODEV);
3686 3591 }
3687 3592
3688 3593 sts = SEGOP_GETMEMID(seg, addr, memidp);
3689 3594
3690 3595 AS_LOCK_EXIT(as, &as->a_lock);
3691 3596 return (sts);
3692 3597 }
↓ open down ↓ |
1495 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX