1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 /* 31 * University Copyright- Copyright (c) 1982, 1986, 1988 32 * The Regents of the University of California 33 * All Rights Reserved 34 * 35 * University Acknowledgment- Portions of this document are derived from 36 * software developed by the University of California, Berkeley, and its 37 * contributors. 38 */ 39 40 /* 41 * VM - segment of a mapped device. 42 * 43 * This segment driver is used when mapping character special devices. 44 */ 45 46 #include <sys/types.h> 47 #include <sys/t_lock.h> 48 #include <sys/sysmacros.h> 49 #include <sys/vtrace.h> 50 #include <sys/systm.h> 51 #include <sys/vmsystm.h> 52 #include <sys/mman.h> 53 #include <sys/errno.h> 54 #include <sys/kmem.h> 55 #include <sys/cmn_err.h> 56 #include <sys/vnode.h> 57 #include <sys/proc.h> 58 #include <sys/conf.h> 59 #include <sys/debug.h> 60 #include <sys/ddidevmap.h> 61 #include <sys/ddi_implfuncs.h> 62 #include <sys/lgrp.h> 63 64 #include <vm/page.h> 65 #include <vm/hat.h> 66 #include <vm/as.h> 67 #include <vm/seg.h> 68 #include <vm/seg_dev.h> 69 #include <vm/seg_kp.h> 70 #include <vm/seg_kmem.h> 71 #include <vm/vpage.h> 72 73 #include <sys/sunddi.h> 74 #include <sys/esunddi.h> 75 #include <sys/fs/snode.h> 76 77 78 #if DEBUG 79 int segdev_debug; 80 #define DEBUGF(level, args) { if (segdev_debug >= (level)) cmn_err args; } 81 #else 82 #define DEBUGF(level, args) 83 #endif 84 85 /* Default timeout for devmap context management */ 86 #define CTX_TIMEOUT_VALUE 0 87 88 #define HOLD_DHP_LOCK(dhp) if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) \ 89 { mutex_enter(&dhp->dh_lock); } 90 91 #define RELE_DHP_LOCK(dhp) if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) \ 92 { mutex_exit(&dhp->dh_lock); } 93 94 #define round_down_p2(a, s) ((a) & ~((s) - 1)) 95 #define round_up_p2(a, s) (((a) + (s) - 1) & ~((s) - 1)) 96 97 /* 98 * VA_PA_ALIGNED checks to see if both VA and PA are on pgsize boundary 99 * VA_PA_PGSIZE_ALIGNED check to see if VA is aligned with PA w.r.t. pgsize 100 */ 101 #define VA_PA_ALIGNED(uvaddr, paddr, pgsize) \ 102 (((uvaddr | paddr) & (pgsize - 1)) == 0) 103 #define VA_PA_PGSIZE_ALIGNED(uvaddr, paddr, pgsize) \ 104 (((uvaddr ^ paddr) & (pgsize - 1)) == 0) 105 106 #define vpgtob(n) ((n) * sizeof (struct vpage)) /* For brevity */ 107 108 #define VTOCVP(vp) (VTOS(vp)->s_commonvp) /* we "know" it's an snode */ 109 110 static struct devmap_ctx *devmapctx_list = NULL; 111 static struct devmap_softlock *devmap_slist = NULL; 112 113 /* 114 * mutex, vnode and page for the page of zeros we use for the trash mappings. 115 * One trash page is allocated on the first ddi_umem_setup call that uses it 116 * XXX Eventually, we may want to combine this with what segnf does when all 117 * hat layers implement HAT_NOFAULT. 118 * 119 * The trash page is used when the backing store for a userland mapping is 120 * removed but the application semantics do not take kindly to a SIGBUS. 121 * In that scenario, the applications pages are mapped to some dummy page 122 * which returns garbage on read and writes go into a common place. 123 * (Perfect for NO_FAULT semantics) 124 * The device driver is responsible to communicating to the app with some 125 * other mechanism that such remapping has happened and the app should take 126 * corrective action. 127 * We can also use an anonymous memory page as there is no requirement to 128 * keep the page locked, however this complicates the fault code. RFE. 129 */ 130 static struct vnode trashvp; 131 static struct page *trashpp; 132 133 /* Non-pageable kernel memory is allocated from the umem_np_arena. */ 134 static vmem_t *umem_np_arena; 135 136 /* Set the cookie to a value we know will never be a valid umem_cookie */ 137 #define DEVMAP_DEVMEM_COOKIE ((ddi_umem_cookie_t)0x1) 138 139 /* 140 * Macros to check if type of devmap handle 141 */ 142 #define cookie_is_devmem(c) \ 143 ((c) == (struct ddi_umem_cookie *)DEVMAP_DEVMEM_COOKIE) 144 145 #define cookie_is_pmem(c) \ 146 ((c) == (struct ddi_umem_cookie *)DEVMAP_PMEM_COOKIE) 147 148 #define cookie_is_kpmem(c) (!cookie_is_devmem(c) && !cookie_is_pmem(c) &&\ 149 ((c)->type == KMEM_PAGEABLE)) 150 151 #define dhp_is_devmem(dhp) \ 152 (cookie_is_devmem((struct ddi_umem_cookie *)((dhp)->dh_cookie))) 153 154 #define dhp_is_pmem(dhp) \ 155 (cookie_is_pmem((struct ddi_umem_cookie *)((dhp)->dh_cookie))) 156 157 #define dhp_is_kpmem(dhp) \ 158 (cookie_is_kpmem((struct ddi_umem_cookie *)((dhp)->dh_cookie))) 159 160 /* 161 * Private seg op routines. 162 */ 163 static int segdev_dup(struct seg *, struct seg *); 164 static int segdev_unmap(struct seg *, caddr_t, size_t); 165 static void segdev_free(struct seg *); 166 static faultcode_t segdev_fault(struct hat *, struct seg *, caddr_t, size_t, 167 enum fault_type, enum seg_rw); 168 static faultcode_t segdev_faulta(struct seg *, caddr_t); 169 static int segdev_setprot(struct seg *, caddr_t, size_t, uint_t); 170 static int segdev_checkprot(struct seg *, caddr_t, size_t, uint_t); 171 static void segdev_badop(void); 172 static int segdev_sync(struct seg *, caddr_t, size_t, int, uint_t); 173 static size_t segdev_incore(struct seg *, caddr_t, size_t, char *); 174 static int segdev_lockop(struct seg *, caddr_t, size_t, int, int, 175 ulong_t *, size_t); 176 static int segdev_getprot(struct seg *, caddr_t, size_t, uint_t *); 177 static u_offset_t segdev_getoffset(struct seg *, caddr_t); 178 static int segdev_gettype(struct seg *, caddr_t); 179 static int segdev_getvp(struct seg *, caddr_t, struct vnode **); 180 static int segdev_advise(struct seg *, caddr_t, size_t, uint_t); 181 static void segdev_dump(struct seg *); 182 static int segdev_pagelock(struct seg *, caddr_t, size_t, 183 struct page ***, enum lock_type, enum seg_rw); 184 static int segdev_setpagesize(struct seg *, caddr_t, size_t, uint_t); 185 static int segdev_getmemid(struct seg *, caddr_t, memid_t *); 186 187 /* 188 * XXX this struct is used by rootnex_map_fault to identify 189 * the segment it has been passed. So if you make it 190 * "static" you'll need to fix rootnex_map_fault. 191 */ 192 struct seg_ops segdev_ops = { 193 .dup = segdev_dup, 194 .unmap = segdev_unmap, 195 .free = segdev_free, 196 .fault = segdev_fault, 197 .faulta = segdev_faulta, 198 .setprot = segdev_setprot, 199 .checkprot = segdev_checkprot, 200 .kluster = (int (*)())segdev_badop, 201 .sync = segdev_sync, 202 .incore = segdev_incore, 203 .lockop = segdev_lockop, 204 .getprot = segdev_getprot, 205 .getoffset = segdev_getoffset, 206 .gettype = segdev_gettype, 207 .getvp = segdev_getvp, 208 .advise = segdev_advise, 209 .dump = segdev_dump, 210 .pagelock = segdev_pagelock, 211 .setpagesize = segdev_setpagesize, 212 .getmemid = segdev_getmemid, 213 }; 214 215 /* 216 * Private segdev support routines 217 */ 218 static struct segdev_data *sdp_alloc(void); 219 220 static void segdev_softunlock(struct hat *, struct seg *, caddr_t, 221 size_t, enum seg_rw); 222 223 static faultcode_t segdev_faultpage(struct hat *, struct seg *, caddr_t, 224 struct vpage *, enum fault_type, enum seg_rw, devmap_handle_t *); 225 226 static faultcode_t segdev_faultpages(struct hat *, struct seg *, caddr_t, 227 size_t, enum fault_type, enum seg_rw, devmap_handle_t *); 228 229 static struct devmap_ctx *devmap_ctxinit(dev_t, ulong_t); 230 static struct devmap_softlock *devmap_softlock_init(dev_t, ulong_t); 231 static void devmap_softlock_rele(devmap_handle_t *); 232 static void devmap_ctx_rele(devmap_handle_t *); 233 234 static void devmap_ctxto(void *); 235 236 static devmap_handle_t *devmap_find_handle(devmap_handle_t *dhp_head, 237 caddr_t addr); 238 239 static ulong_t devmap_roundup(devmap_handle_t *dhp, ulong_t offset, size_t len, 240 ulong_t *opfn, ulong_t *pagesize); 241 242 static void free_devmap_handle(devmap_handle_t *dhp); 243 244 static int devmap_handle_dup(devmap_handle_t *dhp, devmap_handle_t **new_dhp, 245 struct seg *newseg); 246 247 static devmap_handle_t *devmap_handle_unmap(devmap_handle_t *dhp); 248 249 static void devmap_handle_unmap_head(devmap_handle_t *dhp, size_t len); 250 251 static void devmap_handle_unmap_tail(devmap_handle_t *dhp, caddr_t addr); 252 253 static int devmap_device(devmap_handle_t *dhp, struct as *as, caddr_t *addr, 254 offset_t off, size_t len, uint_t flags); 255 256 static void devmap_get_large_pgsize(devmap_handle_t *dhp, size_t len, 257 caddr_t addr, size_t *llen, caddr_t *laddr); 258 259 static void devmap_handle_reduce_len(devmap_handle_t *dhp, size_t len); 260 261 static void *devmap_alloc_pages(vmem_t *vmp, size_t size, int vmflag); 262 static void devmap_free_pages(vmem_t *vmp, void *inaddr, size_t size); 263 264 static void *devmap_umem_alloc_np(size_t size, size_t flags); 265 static void devmap_umem_free_np(void *addr, size_t size); 266 267 /* 268 * routines to lock and unlock underlying segkp segment for 269 * KMEM_PAGEABLE type cookies. 270 */ 271 static faultcode_t acquire_kpmem_lock(struct ddi_umem_cookie *, size_t); 272 static void release_kpmem_lock(struct ddi_umem_cookie *, size_t); 273 274 /* 275 * Routines to synchronize F_SOFTLOCK and F_INVAL faults for 276 * drivers with devmap_access callbacks 277 */ 278 static int devmap_softlock_enter(struct devmap_softlock *, size_t, 279 enum fault_type); 280 static void devmap_softlock_exit(struct devmap_softlock *, size_t, 281 enum fault_type); 282 283 static kmutex_t devmapctx_lock; 284 285 static kmutex_t devmap_slock; 286 287 /* 288 * Initialize the thread callbacks and thread private data. 289 */ 290 static struct devmap_ctx * 291 devmap_ctxinit(dev_t dev, ulong_t id) 292 { 293 struct devmap_ctx *devctx; 294 struct devmap_ctx *tmp; 295 dev_info_t *dip; 296 297 tmp = kmem_zalloc(sizeof (struct devmap_ctx), KM_SLEEP); 298 299 mutex_enter(&devmapctx_lock); 300 301 dip = e_ddi_hold_devi_by_dev(dev, 0); 302 ASSERT(dip != NULL); 303 ddi_release_devi(dip); 304 305 for (devctx = devmapctx_list; devctx != NULL; devctx = devctx->next) 306 if ((devctx->dip == dip) && (devctx->id == id)) 307 break; 308 309 if (devctx == NULL) { 310 devctx = tmp; 311 devctx->dip = dip; 312 devctx->id = id; 313 mutex_init(&devctx->lock, NULL, MUTEX_DEFAULT, NULL); 314 cv_init(&devctx->cv, NULL, CV_DEFAULT, NULL); 315 devctx->next = devmapctx_list; 316 devmapctx_list = devctx; 317 } else 318 kmem_free(tmp, sizeof (struct devmap_ctx)); 319 320 mutex_enter(&devctx->lock); 321 devctx->refcnt++; 322 mutex_exit(&devctx->lock); 323 mutex_exit(&devmapctx_lock); 324 325 return (devctx); 326 } 327 328 /* 329 * Timeout callback called if a CPU has not given up the device context 330 * within dhp->dh_timeout_length ticks 331 */ 332 static void 333 devmap_ctxto(void *data) 334 { 335 struct devmap_ctx *devctx = data; 336 337 TRACE_1(TR_FAC_DEVMAP, TR_DEVMAP_CTXTO, 338 "devmap_ctxto:timeout expired, devctx=%p", (void *)devctx); 339 mutex_enter(&devctx->lock); 340 /* 341 * Set oncpu = 0 so the next mapping trying to get the device context 342 * can. 343 */ 344 devctx->oncpu = 0; 345 devctx->timeout = 0; 346 cv_signal(&devctx->cv); 347 mutex_exit(&devctx->lock); 348 } 349 350 /* 351 * Create a device segment. 352 */ 353 int 354 segdev_create(struct seg *seg, void *argsp) 355 { 356 struct segdev_data *sdp; 357 struct segdev_crargs *a = (struct segdev_crargs *)argsp; 358 devmap_handle_t *dhp = (devmap_handle_t *)a->devmap_data; 359 int error; 360 361 /* 362 * Since the address space is "write" locked, we 363 * don't need the segment lock to protect "segdev" data. 364 */ 365 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 366 367 hat_map(seg->s_as->a_hat, seg->s_base, seg->s_size, HAT_MAP); 368 369 sdp = sdp_alloc(); 370 371 sdp->mapfunc = a->mapfunc; 372 sdp->offset = a->offset; 373 sdp->prot = a->prot; 374 sdp->maxprot = a->maxprot; 375 sdp->type = a->type; 376 sdp->pageprot = 0; 377 sdp->softlockcnt = 0; 378 sdp->vpage = NULL; 379 380 if (sdp->mapfunc == NULL) 381 sdp->devmap_data = dhp; 382 else 383 sdp->devmap_data = dhp = NULL; 384 385 sdp->hat_flags = a->hat_flags; 386 sdp->hat_attr = a->hat_attr; 387 388 /* 389 * Currently, hat_flags supports only HAT_LOAD_NOCONSIST 390 */ 391 ASSERT(!(sdp->hat_flags & ~HAT_LOAD_NOCONSIST)); 392 393 /* 394 * Hold shadow vnode -- segdev only deals with 395 * character (VCHR) devices. We use the common 396 * vp to hang pages on. 397 */ 398 sdp->vp = specfind(a->dev, VCHR); 399 ASSERT(sdp->vp != NULL); 400 401 seg->s_ops = &segdev_ops; 402 seg->s_data = sdp; 403 404 while (dhp != NULL) { 405 dhp->dh_seg = seg; 406 dhp = dhp->dh_next; 407 } 408 409 /* 410 * Inform the vnode of the new mapping. 411 */ 412 /* 413 * It is ok to use pass sdp->maxprot to ADDMAP rather than to use 414 * dhp specific maxprot because spec_addmap does not use maxprot. 415 */ 416 error = VOP_ADDMAP(VTOCVP(sdp->vp), sdp->offset, 417 seg->s_as, seg->s_base, seg->s_size, 418 sdp->prot, sdp->maxprot, sdp->type, CRED(), NULL); 419 420 if (error != 0) { 421 sdp->devmap_data = NULL; 422 hat_unload(seg->s_as->a_hat, seg->s_base, seg->s_size, 423 HAT_UNLOAD_UNMAP); 424 } else { 425 /* 426 * Mappings of /dev/null don't count towards the VSZ of a 427 * process. Mappings of /dev/null have no mapping type. 428 */ 429 if ((segop_gettype(seg, seg->s_base) & (MAP_SHARED | 430 MAP_PRIVATE)) == 0) { 431 seg->s_as->a_resvsize -= seg->s_size; 432 } 433 } 434 435 return (error); 436 } 437 438 static struct segdev_data * 439 sdp_alloc(void) 440 { 441 struct segdev_data *sdp; 442 443 sdp = kmem_zalloc(sizeof (struct segdev_data), KM_SLEEP); 444 rw_init(&sdp->lock, NULL, RW_DEFAULT, NULL); 445 446 return (sdp); 447 } 448 449 /* 450 * Duplicate seg and return new segment in newseg. 451 */ 452 static int 453 segdev_dup(struct seg *seg, struct seg *newseg) 454 { 455 struct segdev_data *sdp = (struct segdev_data *)seg->s_data; 456 struct segdev_data *newsdp; 457 devmap_handle_t *dhp = (devmap_handle_t *)sdp->devmap_data; 458 size_t npages; 459 int ret; 460 461 TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_DUP, 462 "segdev_dup:start dhp=%p, seg=%p", (void *)dhp, (void *)seg); 463 464 DEBUGF(3, (CE_CONT, "segdev_dup: dhp %p seg %p\n", 465 (void *)dhp, (void *)seg)); 466 467 /* 468 * Since the address space is "write" locked, we 469 * don't need the segment lock to protect "segdev" data. 470 */ 471 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 472 473 newsdp = sdp_alloc(); 474 475 newseg->s_ops = seg->s_ops; 476 newseg->s_data = (void *)newsdp; 477 478 VN_HOLD(sdp->vp); 479 newsdp->vp = sdp->vp; 480 newsdp->mapfunc = sdp->mapfunc; 481 newsdp->offset = sdp->offset; 482 newsdp->pageprot = sdp->pageprot; 483 newsdp->prot = sdp->prot; 484 newsdp->maxprot = sdp->maxprot; 485 newsdp->type = sdp->type; 486 newsdp->hat_attr = sdp->hat_attr; 487 newsdp->hat_flags = sdp->hat_flags; 488 newsdp->softlockcnt = 0; 489 490 /* 491 * Initialize per page data if the segment we are 492 * dup'ing has per page information. 493 */ 494 npages = seg_pages(newseg); 495 496 if (sdp->vpage != NULL) { 497 size_t nbytes = vpgtob(npages); 498 499 newsdp->vpage = kmem_zalloc(nbytes, KM_SLEEP); 500 bcopy(sdp->vpage, newsdp->vpage, nbytes); 501 } else 502 newsdp->vpage = NULL; 503 504 /* 505 * duplicate devmap handles 506 */ 507 if (dhp != NULL) { 508 ret = devmap_handle_dup(dhp, 509 (devmap_handle_t **)&newsdp->devmap_data, newseg); 510 if (ret != 0) { 511 TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_DUP_CK1, 512 "segdev_dup:ret1 ret=%x, dhp=%p seg=%p", 513 ret, (void *)dhp, (void *)seg); 514 DEBUGF(1, (CE_CONT, 515 "segdev_dup: ret %x dhp %p seg %p\n", 516 ret, (void *)dhp, (void *)seg)); 517 return (ret); 518 } 519 } 520 521 /* 522 * Inform the common vnode of the new mapping. 523 */ 524 return (VOP_ADDMAP(VTOCVP(newsdp->vp), 525 newsdp->offset, newseg->s_as, 526 newseg->s_base, newseg->s_size, newsdp->prot, 527 newsdp->maxprot, sdp->type, CRED(), NULL)); 528 } 529 530 /* 531 * duplicate devmap handles 532 */ 533 static int 534 devmap_handle_dup(devmap_handle_t *dhp, devmap_handle_t **new_dhp, 535 struct seg *newseg) 536 { 537 devmap_handle_t *newdhp_save = NULL; 538 devmap_handle_t *newdhp = NULL; 539 struct devmap_callback_ctl *callbackops; 540 541 while (dhp != NULL) { 542 newdhp = kmem_alloc(sizeof (devmap_handle_t), KM_SLEEP); 543 544 /* Need to lock the original dhp while copying if REMAP */ 545 HOLD_DHP_LOCK(dhp); 546 bcopy(dhp, newdhp, sizeof (devmap_handle_t)); 547 RELE_DHP_LOCK(dhp); 548 newdhp->dh_seg = newseg; 549 newdhp->dh_next = NULL; 550 if (newdhp_save != NULL) 551 newdhp_save->dh_next = newdhp; 552 else 553 *new_dhp = newdhp; 554 newdhp_save = newdhp; 555 556 callbackops = &newdhp->dh_callbackops; 557 558 if (dhp->dh_softlock != NULL) 559 newdhp->dh_softlock = devmap_softlock_init( 560 newdhp->dh_dev, 561 (ulong_t)callbackops->devmap_access); 562 if (dhp->dh_ctx != NULL) 563 newdhp->dh_ctx = devmap_ctxinit(newdhp->dh_dev, 564 (ulong_t)callbackops->devmap_access); 565 566 /* 567 * Initialize dh_lock if we want to do remap. 568 */ 569 if (newdhp->dh_flags & DEVMAP_ALLOW_REMAP) { 570 mutex_init(&newdhp->dh_lock, NULL, MUTEX_DEFAULT, NULL); 571 newdhp->dh_flags |= DEVMAP_LOCK_INITED; 572 } 573 574 if (callbackops->devmap_dup != NULL) { 575 int ret; 576 577 /* 578 * Call the dup callback so that the driver can 579 * duplicate its private data. 580 */ 581 ret = (*callbackops->devmap_dup)(dhp, dhp->dh_pvtp, 582 (devmap_cookie_t *)newdhp, &newdhp->dh_pvtp); 583 584 if (ret != 0) { 585 /* 586 * We want to free up this segment as the driver 587 * has indicated that we can't dup it. But we 588 * don't want to call the drivers, devmap_unmap, 589 * callback function as the driver does not 590 * think this segment exists. The caller of 591 * devmap_dup will call seg_free on newseg 592 * as it was the caller that allocated the 593 * segment. 594 */ 595 DEBUGF(1, (CE_CONT, "devmap_handle_dup ERROR: " 596 "newdhp %p dhp %p\n", (void *)newdhp, 597 (void *)dhp)); 598 callbackops->devmap_unmap = NULL; 599 return (ret); 600 } 601 } 602 603 dhp = dhp->dh_next; 604 } 605 606 return (0); 607 } 608 609 /* 610 * Split a segment at addr for length len. 611 */ 612 /*ARGSUSED*/ 613 static int 614 segdev_unmap(struct seg *seg, caddr_t addr, size_t len) 615 { 616 register struct segdev_data *sdp = (struct segdev_data *)seg->s_data; 617 register struct segdev_data *nsdp; 618 register struct seg *nseg; 619 register size_t opages; /* old segment size in pages */ 620 register size_t npages; /* new segment size in pages */ 621 register size_t dpages; /* pages being deleted (unmapped) */ 622 register size_t nbytes; 623 devmap_handle_t *dhp = (devmap_handle_t *)sdp->devmap_data; 624 devmap_handle_t *dhpp; 625 devmap_handle_t *newdhp; 626 struct devmap_callback_ctl *callbackops; 627 caddr_t nbase; 628 offset_t off; 629 ulong_t nsize; 630 size_t mlen, sz; 631 632 TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_UNMAP, 633 "segdev_unmap:start dhp=%p, seg=%p addr=%p len=%lx", 634 (void *)dhp, (void *)seg, (void *)addr, len); 635 636 DEBUGF(3, (CE_CONT, "segdev_unmap: dhp %p seg %p addr %p len %lx\n", 637 (void *)dhp, (void *)seg, (void *)addr, len)); 638 639 /* 640 * Since the address space is "write" locked, we 641 * don't need the segment lock to protect "segdev" data. 642 */ 643 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 644 645 if ((sz = sdp->softlockcnt) > 0) { 646 /* 647 * Fail the unmap if pages are SOFTLOCKed through this mapping. 648 * softlockcnt is protected from change by the as write lock. 649 */ 650 TRACE_1(TR_FAC_DEVMAP, TR_DEVMAP_UNMAP_CK1, 651 "segdev_unmap:error softlockcnt = %ld", sz); 652 DEBUGF(1, (CE_CONT, "segdev_unmap: softlockcnt %ld\n", sz)); 653 return (EAGAIN); 654 } 655 656 /* 657 * Check for bad sizes 658 */ 659 if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size || 660 (len & PAGEOFFSET) || ((uintptr_t)addr & PAGEOFFSET)) 661 panic("segdev_unmap"); 662 663 if (dhp != NULL) { 664 devmap_handle_t *tdhp; 665 /* 666 * If large page size was used in hat_devload(), 667 * the same page size must be used in hat_unload(). 668 */ 669 dhpp = tdhp = devmap_find_handle(dhp, addr); 670 while (tdhp != NULL) { 671 if (tdhp->dh_flags & DEVMAP_FLAG_LARGE) { 672 break; 673 } 674 tdhp = tdhp->dh_next; 675 } 676 if (tdhp != NULL) { /* found a dhp using large pages */ 677 size_t slen = len; 678 size_t mlen; 679 size_t soff; 680 681 soff = (ulong_t)(addr - dhpp->dh_uvaddr); 682 while (slen != 0) { 683 mlen = MIN(slen, (dhpp->dh_len - soff)); 684 hat_unload(seg->s_as->a_hat, dhpp->dh_uvaddr, 685 dhpp->dh_len, HAT_UNLOAD_UNMAP); 686 dhpp = dhpp->dh_next; 687 ASSERT(slen >= mlen); 688 slen -= mlen; 689 soff = 0; 690 } 691 } else 692 hat_unload(seg->s_as->a_hat, addr, len, 693 HAT_UNLOAD_UNMAP); 694 } else { 695 /* 696 * Unload any hardware translations in the range 697 * to be taken out. 698 */ 699 hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD_UNMAP); 700 } 701 702 /* 703 * get the user offset which will used in the driver callbacks 704 */ 705 off = sdp->offset + (offset_t)(addr - seg->s_base); 706 707 /* 708 * Inform the vnode of the unmapping. 709 */ 710 ASSERT(sdp->vp != NULL); 711 (void) VOP_DELMAP(VTOCVP(sdp->vp), off, seg->s_as, addr, len, 712 sdp->prot, sdp->maxprot, sdp->type, CRED(), NULL); 713 714 /* 715 * Check for entire segment 716 */ 717 if (addr == seg->s_base && len == seg->s_size) { 718 seg_free(seg); 719 return (0); 720 } 721 722 opages = seg_pages(seg); 723 dpages = btop(len); 724 npages = opages - dpages; 725 726 /* 727 * Check for beginning of segment 728 */ 729 if (addr == seg->s_base) { 730 if (sdp->vpage != NULL) { 731 register struct vpage *ovpage; 732 733 ovpage = sdp->vpage; /* keep pointer to vpage */ 734 735 nbytes = vpgtob(npages); 736 sdp->vpage = kmem_alloc(nbytes, KM_SLEEP); 737 bcopy(&ovpage[dpages], sdp->vpage, nbytes); 738 739 /* free up old vpage */ 740 kmem_free(ovpage, vpgtob(opages)); 741 } 742 743 /* 744 * free devmap handles from the beginning of the mapping. 745 */ 746 if (dhp != NULL) 747 devmap_handle_unmap_head(dhp, len); 748 749 sdp->offset += (offset_t)len; 750 751 seg->s_base += len; 752 seg->s_size -= len; 753 754 return (0); 755 } 756 757 /* 758 * Check for end of segment 759 */ 760 if (addr + len == seg->s_base + seg->s_size) { 761 if (sdp->vpage != NULL) { 762 register struct vpage *ovpage; 763 764 ovpage = sdp->vpage; /* keep pointer to vpage */ 765 766 nbytes = vpgtob(npages); 767 sdp->vpage = kmem_alloc(nbytes, KM_SLEEP); 768 bcopy(ovpage, sdp->vpage, nbytes); 769 770 /* free up old vpage */ 771 kmem_free(ovpage, vpgtob(opages)); 772 } 773 seg->s_size -= len; 774 775 /* 776 * free devmap handles from addr to the end of the mapping. 777 */ 778 if (dhp != NULL) 779 devmap_handle_unmap_tail(dhp, addr); 780 781 return (0); 782 } 783 784 /* 785 * The section to go is in the middle of the segment, 786 * have to make it into two segments. nseg is made for 787 * the high end while seg is cut down at the low end. 788 */ 789 nbase = addr + len; /* new seg base */ 790 nsize = (seg->s_base + seg->s_size) - nbase; /* new seg size */ 791 seg->s_size = addr - seg->s_base; /* shrink old seg */ 792 nseg = seg_alloc(seg->s_as, nbase, nsize); 793 if (nseg == NULL) 794 panic("segdev_unmap seg_alloc"); 795 796 TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_UNMAP_CK2, 797 "segdev_unmap: seg=%p nseg=%p", (void *)seg, (void *)nseg); 798 DEBUGF(3, (CE_CONT, "segdev_unmap: segdev_dup seg %p nseg %p\n", 799 (void *)seg, (void *)nseg)); 800 nsdp = sdp_alloc(); 801 802 nseg->s_ops = seg->s_ops; 803 nseg->s_data = (void *)nsdp; 804 805 VN_HOLD(sdp->vp); 806 nsdp->mapfunc = sdp->mapfunc; 807 nsdp->offset = sdp->offset + (offset_t)(nseg->s_base - seg->s_base); 808 nsdp->vp = sdp->vp; 809 nsdp->pageprot = sdp->pageprot; 810 nsdp->prot = sdp->prot; 811 nsdp->maxprot = sdp->maxprot; 812 nsdp->type = sdp->type; 813 nsdp->hat_attr = sdp->hat_attr; 814 nsdp->hat_flags = sdp->hat_flags; 815 nsdp->softlockcnt = 0; 816 817 /* 818 * Initialize per page data if the segment we are 819 * dup'ing has per page information. 820 */ 821 if (sdp->vpage != NULL) { 822 /* need to split vpage into two arrays */ 823 register size_t nnbytes; 824 register size_t nnpages; 825 register struct vpage *ovpage; 826 827 ovpage = sdp->vpage; /* keep pointer to vpage */ 828 829 npages = seg_pages(seg); /* seg has shrunk */ 830 nbytes = vpgtob(npages); 831 nnpages = seg_pages(nseg); 832 nnbytes = vpgtob(nnpages); 833 834 sdp->vpage = kmem_alloc(nbytes, KM_SLEEP); 835 bcopy(ovpage, sdp->vpage, nbytes); 836 837 nsdp->vpage = kmem_alloc(nnbytes, KM_SLEEP); 838 bcopy(&ovpage[npages + dpages], nsdp->vpage, nnbytes); 839 840 /* free up old vpage */ 841 kmem_free(ovpage, vpgtob(opages)); 842 } else 843 nsdp->vpage = NULL; 844 845 /* 846 * unmap dhps. 847 */ 848 if (dhp == NULL) { 849 nsdp->devmap_data = NULL; 850 return (0); 851 } 852 while (dhp != NULL) { 853 callbackops = &dhp->dh_callbackops; 854 TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_UNMAP_CK3, 855 "segdev_unmap: dhp=%p addr=%p", dhp, addr); 856 DEBUGF(3, (CE_CONT, "unmap: dhp %p addr %p uvaddr %p len %lx\n", 857 (void *)dhp, (void *)addr, 858 (void *)dhp->dh_uvaddr, dhp->dh_len)); 859 860 if (addr == (dhp->dh_uvaddr + dhp->dh_len)) { 861 dhpp = dhp->dh_next; 862 dhp->dh_next = NULL; 863 dhp = dhpp; 864 } else if (addr > (dhp->dh_uvaddr + dhp->dh_len)) { 865 dhp = dhp->dh_next; 866 } else if (addr > dhp->dh_uvaddr && 867 (addr + len) < (dhp->dh_uvaddr + dhp->dh_len)) { 868 /* 869 * <addr, addr+len> is enclosed by dhp. 870 * create a newdhp that begins at addr+len and 871 * ends at dhp->dh_uvaddr+dhp->dh_len. 872 */ 873 newdhp = kmem_alloc(sizeof (devmap_handle_t), KM_SLEEP); 874 HOLD_DHP_LOCK(dhp); 875 bcopy(dhp, newdhp, sizeof (devmap_handle_t)); 876 RELE_DHP_LOCK(dhp); 877 newdhp->dh_seg = nseg; 878 newdhp->dh_next = dhp->dh_next; 879 if (dhp->dh_softlock != NULL) 880 newdhp->dh_softlock = devmap_softlock_init( 881 newdhp->dh_dev, 882 (ulong_t)callbackops->devmap_access); 883 if (dhp->dh_ctx != NULL) 884 newdhp->dh_ctx = devmap_ctxinit(newdhp->dh_dev, 885 (ulong_t)callbackops->devmap_access); 886 if (newdhp->dh_flags & DEVMAP_LOCK_INITED) { 887 mutex_init(&newdhp->dh_lock, 888 NULL, MUTEX_DEFAULT, NULL); 889 } 890 if (callbackops->devmap_unmap != NULL) 891 (*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp, 892 off, len, dhp, &dhp->dh_pvtp, 893 newdhp, &newdhp->dh_pvtp); 894 mlen = len + (addr - dhp->dh_uvaddr); 895 devmap_handle_reduce_len(newdhp, mlen); 896 nsdp->devmap_data = newdhp; 897 /* XX Changing len should recalculate LARGE flag */ 898 dhp->dh_len = addr - dhp->dh_uvaddr; 899 dhpp = dhp->dh_next; 900 dhp->dh_next = NULL; 901 dhp = dhpp; 902 } else if ((addr > dhp->dh_uvaddr) && 903 ((addr + len) >= (dhp->dh_uvaddr + dhp->dh_len))) { 904 mlen = dhp->dh_len + dhp->dh_uvaddr - addr; 905 /* 906 * <addr, addr+len> spans over dhps. 907 */ 908 if (callbackops->devmap_unmap != NULL) 909 (*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp, 910 off, mlen, (devmap_cookie_t *)dhp, 911 &dhp->dh_pvtp, NULL, NULL); 912 /* XX Changing len should recalculate LARGE flag */ 913 dhp->dh_len = addr - dhp->dh_uvaddr; 914 dhpp = dhp->dh_next; 915 dhp->dh_next = NULL; 916 dhp = dhpp; 917 nsdp->devmap_data = dhp; 918 } else if ((addr + len) >= (dhp->dh_uvaddr + dhp->dh_len)) { 919 /* 920 * dhp is enclosed by <addr, addr+len>. 921 */ 922 dhp->dh_seg = nseg; 923 nsdp->devmap_data = dhp; 924 dhp = devmap_handle_unmap(dhp); 925 nsdp->devmap_data = dhp; /* XX redundant? */ 926 } else if (((addr + len) > dhp->dh_uvaddr) && 927 ((addr + len) < (dhp->dh_uvaddr + dhp->dh_len))) { 928 mlen = addr + len - dhp->dh_uvaddr; 929 if (callbackops->devmap_unmap != NULL) 930 (*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp, 931 dhp->dh_uoff, mlen, NULL, 932 NULL, dhp, &dhp->dh_pvtp); 933 devmap_handle_reduce_len(dhp, mlen); 934 nsdp->devmap_data = dhp; 935 dhp->dh_seg = nseg; 936 dhp = dhp->dh_next; 937 } else { 938 dhp->dh_seg = nseg; 939 dhp = dhp->dh_next; 940 } 941 } 942 return (0); 943 } 944 945 /* 946 * Utility function handles reducing the length of a devmap handle during unmap 947 * Note that is only used for unmapping the front portion of the handler, 948 * i.e., we are bumping up the offset/pfn etc up by len 949 * Do not use if reducing length at the tail. 950 */ 951 static void 952 devmap_handle_reduce_len(devmap_handle_t *dhp, size_t len) 953 { 954 struct ddi_umem_cookie *cp; 955 struct devmap_pmem_cookie *pcp; 956 /* 957 * adjust devmap handle fields 958 */ 959 ASSERT(len < dhp->dh_len); 960 961 /* Make sure only page-aligned changes are done */ 962 ASSERT((len & PAGEOFFSET) == 0); 963 964 dhp->dh_len -= len; 965 dhp->dh_uoff += (offset_t)len; 966 dhp->dh_roff += (offset_t)len; 967 dhp->dh_uvaddr += len; 968 /* Need to grab dhp lock if REMAP */ 969 HOLD_DHP_LOCK(dhp); 970 cp = dhp->dh_cookie; 971 if (!(dhp->dh_flags & DEVMAP_MAPPING_INVALID)) { 972 if (cookie_is_devmem(cp)) { 973 dhp->dh_pfn += btop(len); 974 } else if (cookie_is_pmem(cp)) { 975 pcp = (struct devmap_pmem_cookie *)dhp->dh_pcookie; 976 ASSERT((dhp->dh_roff & PAGEOFFSET) == 0 && 977 dhp->dh_roff < ptob(pcp->dp_npages)); 978 } else { 979 ASSERT(dhp->dh_roff < cp->size); 980 ASSERT(dhp->dh_cvaddr >= cp->cvaddr && 981 dhp->dh_cvaddr < (cp->cvaddr + cp->size)); 982 ASSERT((dhp->dh_cvaddr + len) <= 983 (cp->cvaddr + cp->size)); 984 985 dhp->dh_cvaddr += len; 986 } 987 } 988 /* XXX - Should recalculate the DEVMAP_FLAG_LARGE after changes */ 989 RELE_DHP_LOCK(dhp); 990 } 991 992 /* 993 * Free devmap handle, dhp. 994 * Return the next devmap handle on the linked list. 995 */ 996 static devmap_handle_t * 997 devmap_handle_unmap(devmap_handle_t *dhp) 998 { 999 struct devmap_callback_ctl *callbackops = &dhp->dh_callbackops; 1000 struct segdev_data *sdp = (struct segdev_data *)dhp->dh_seg->s_data; 1001 devmap_handle_t *dhpp = (devmap_handle_t *)sdp->devmap_data; 1002 1003 ASSERT(dhp != NULL); 1004 1005 /* 1006 * before we free up dhp, call the driver's devmap_unmap entry point 1007 * to free resources allocated for this dhp. 1008 */ 1009 if (callbackops->devmap_unmap != NULL) { 1010 (*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp, dhp->dh_uoff, 1011 dhp->dh_len, NULL, NULL, NULL, NULL); 1012 } 1013 1014 if (dhpp == dhp) { /* releasing first dhp, change sdp data */ 1015 sdp->devmap_data = dhp->dh_next; 1016 } else { 1017 while (dhpp->dh_next != dhp) { 1018 dhpp = dhpp->dh_next; 1019 } 1020 dhpp->dh_next = dhp->dh_next; 1021 } 1022 dhpp = dhp->dh_next; /* return value is next dhp in chain */ 1023 1024 if (dhp->dh_softlock != NULL) 1025 devmap_softlock_rele(dhp); 1026 1027 if (dhp->dh_ctx != NULL) 1028 devmap_ctx_rele(dhp); 1029 1030 if (dhp->dh_flags & DEVMAP_LOCK_INITED) { 1031 mutex_destroy(&dhp->dh_lock); 1032 } 1033 kmem_free(dhp, sizeof (devmap_handle_t)); 1034 1035 return (dhpp); 1036 } 1037 1038 /* 1039 * Free complete devmap handles from dhp for len bytes 1040 * dhp can be either the first handle or a subsequent handle 1041 */ 1042 static void 1043 devmap_handle_unmap_head(devmap_handle_t *dhp, size_t len) 1044 { 1045 struct devmap_callback_ctl *callbackops; 1046 1047 /* 1048 * free the devmap handles covered by len. 1049 */ 1050 while (len >= dhp->dh_len) { 1051 len -= dhp->dh_len; 1052 dhp = devmap_handle_unmap(dhp); 1053 } 1054 if (len != 0) { /* partial unmap at head of first remaining dhp */ 1055 callbackops = &dhp->dh_callbackops; 1056 1057 /* 1058 * Call the unmap callback so the drivers can make 1059 * adjustment on its private data. 1060 */ 1061 if (callbackops->devmap_unmap != NULL) 1062 (*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp, 1063 dhp->dh_uoff, len, NULL, NULL, dhp, &dhp->dh_pvtp); 1064 devmap_handle_reduce_len(dhp, len); 1065 } 1066 } 1067 1068 /* 1069 * Free devmap handles to truncate the mapping after addr 1070 * RFE: Simpler to pass in dhp pointing at correct dhp (avoid find again) 1071 * Also could then use the routine in middle unmap case too 1072 */ 1073 static void 1074 devmap_handle_unmap_tail(devmap_handle_t *dhp, caddr_t addr) 1075 { 1076 register struct seg *seg = dhp->dh_seg; 1077 register struct segdev_data *sdp = (struct segdev_data *)seg->s_data; 1078 register devmap_handle_t *dhph = (devmap_handle_t *)sdp->devmap_data; 1079 struct devmap_callback_ctl *callbackops; 1080 register devmap_handle_t *dhpp; 1081 size_t maplen; 1082 ulong_t off; 1083 size_t len; 1084 1085 maplen = (size_t)(addr - dhp->dh_uvaddr); 1086 dhph = devmap_find_handle(dhph, addr); 1087 1088 while (dhph != NULL) { 1089 if (maplen == 0) { 1090 dhph = devmap_handle_unmap(dhph); 1091 } else { 1092 callbackops = &dhph->dh_callbackops; 1093 len = dhph->dh_len - maplen; 1094 off = (ulong_t)sdp->offset + (addr - seg->s_base); 1095 /* 1096 * Call the unmap callback so the driver 1097 * can make adjustments on its private data. 1098 */ 1099 if (callbackops->devmap_unmap != NULL) 1100 (*callbackops->devmap_unmap)(dhph, 1101 dhph->dh_pvtp, off, len, 1102 (devmap_cookie_t *)dhph, 1103 &dhph->dh_pvtp, NULL, NULL); 1104 /* XXX Reducing len needs to recalculate LARGE flag */ 1105 dhph->dh_len = maplen; 1106 maplen = 0; 1107 dhpp = dhph->dh_next; 1108 dhph->dh_next = NULL; 1109 dhph = dhpp; 1110 } 1111 } /* end while */ 1112 } 1113 1114 /* 1115 * Free a segment. 1116 */ 1117 static void 1118 segdev_free(struct seg *seg) 1119 { 1120 register struct segdev_data *sdp = (struct segdev_data *)seg->s_data; 1121 devmap_handle_t *dhp = (devmap_handle_t *)sdp->devmap_data; 1122 1123 TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_FREE, 1124 "segdev_free: dhp=%p seg=%p", (void *)dhp, (void *)seg); 1125 DEBUGF(3, (CE_CONT, "segdev_free: dhp %p seg %p\n", 1126 (void *)dhp, (void *)seg)); 1127 1128 /* 1129 * Since the address space is "write" locked, we 1130 * don't need the segment lock to protect "segdev" data. 1131 */ 1132 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 1133 1134 while (dhp != NULL) 1135 dhp = devmap_handle_unmap(dhp); 1136 1137 VN_RELE(sdp->vp); 1138 if (sdp->vpage != NULL) 1139 kmem_free(sdp->vpage, vpgtob(seg_pages(seg))); 1140 1141 rw_destroy(&sdp->lock); 1142 kmem_free(sdp, sizeof (*sdp)); 1143 } 1144 1145 static void 1146 free_devmap_handle(devmap_handle_t *dhp) 1147 { 1148 register devmap_handle_t *dhpp; 1149 1150 /* 1151 * free up devmap handle 1152 */ 1153 while (dhp != NULL) { 1154 dhpp = dhp->dh_next; 1155 if (dhp->dh_flags & DEVMAP_LOCK_INITED) { 1156 mutex_destroy(&dhp->dh_lock); 1157 } 1158 1159 if (dhp->dh_softlock != NULL) 1160 devmap_softlock_rele(dhp); 1161 1162 if (dhp->dh_ctx != NULL) 1163 devmap_ctx_rele(dhp); 1164 1165 kmem_free(dhp, sizeof (devmap_handle_t)); 1166 dhp = dhpp; 1167 } 1168 } 1169 1170 /* 1171 * routines to lock and unlock underlying segkp segment for 1172 * KMEM_PAGEABLE type cookies. 1173 * segkp only allows a single pending F_SOFTLOCK 1174 * we keep track of number of locks in the cookie so we can 1175 * have multiple pending faults and manage the calls to segkp. 1176 * RFE: if segkp supports either pagelock or can support multiple 1177 * calls to F_SOFTLOCK, then these routines can go away. 1178 * If pagelock, segdev_faultpage can fault on a page by page basis 1179 * and simplifies the code quite a bit. 1180 * if multiple calls allowed but not partial ranges, then need for 1181 * cookie->lock and locked count goes away, code can call as_fault directly 1182 */ 1183 static faultcode_t 1184 acquire_kpmem_lock(struct ddi_umem_cookie *cookie, size_t npages) 1185 { 1186 int err = 0; 1187 ASSERT(cookie_is_kpmem(cookie)); 1188 /* 1189 * Fault in pages in segkp with F_SOFTLOCK. 1190 * We want to hold the lock until all pages have been loaded. 1191 * segkp only allows single caller to hold SOFTLOCK, so cookie 1192 * holds a count so we dont call into segkp multiple times 1193 */ 1194 mutex_enter(&cookie->lock); 1195 1196 /* 1197 * Check for overflow in locked field 1198 */ 1199 if ((UINT32_MAX - cookie->locked) < npages) { 1200 err = FC_MAKE_ERR(ENOMEM); 1201 } else if (cookie->locked == 0) { 1202 /* First time locking */ 1203 err = as_fault(kas.a_hat, &kas, cookie->cvaddr, 1204 cookie->size, F_SOFTLOCK, PROT_READ|PROT_WRITE); 1205 } 1206 if (!err) { 1207 cookie->locked += npages; 1208 } 1209 mutex_exit(&cookie->lock); 1210 return (err); 1211 } 1212 1213 static void 1214 release_kpmem_lock(struct ddi_umem_cookie *cookie, size_t npages) 1215 { 1216 mutex_enter(&cookie->lock); 1217 ASSERT(cookie_is_kpmem(cookie)); 1218 ASSERT(cookie->locked >= npages); 1219 cookie->locked -= (uint_t)npages; 1220 if (cookie->locked == 0) { 1221 /* Last unlock */ 1222 if (as_fault(kas.a_hat, &kas, cookie->cvaddr, 1223 cookie->size, F_SOFTUNLOCK, PROT_READ|PROT_WRITE)) 1224 panic("segdev releasing kpmem lock %p", (void *)cookie); 1225 } 1226 mutex_exit(&cookie->lock); 1227 } 1228 1229 /* 1230 * Routines to synchronize F_SOFTLOCK and F_INVAL faults for 1231 * drivers with devmap_access callbacks 1232 * slock->softlocked basically works like a rw lock 1233 * -ve counts => F_SOFTLOCK in progress 1234 * +ve counts => F_INVAL/F_PROT in progress 1235 * We allow only one F_SOFTLOCK at a time 1236 * but can have multiple pending F_INVAL/F_PROT calls 1237 * 1238 * This routine waits using cv_wait_sig so killing processes is more graceful 1239 * Returns EINTR if coming out of this routine due to a signal, 0 otherwise 1240 */ 1241 static int devmap_softlock_enter( 1242 struct devmap_softlock *slock, 1243 size_t npages, 1244 enum fault_type type) 1245 { 1246 if (npages == 0) 1247 return (0); 1248 mutex_enter(&(slock->lock)); 1249 switch (type) { 1250 case F_SOFTLOCK : 1251 while (slock->softlocked) { 1252 if (cv_wait_sig(&(slock)->cv, &(slock)->lock) == 0) { 1253 /* signalled */ 1254 mutex_exit(&(slock->lock)); 1255 return (EINTR); 1256 } 1257 } 1258 slock->softlocked -= npages; /* -ve count => locked */ 1259 break; 1260 case F_INVAL : 1261 case F_PROT : 1262 while (slock->softlocked < 0) 1263 if (cv_wait_sig(&(slock)->cv, &(slock)->lock) == 0) { 1264 /* signalled */ 1265 mutex_exit(&(slock->lock)); 1266 return (EINTR); 1267 } 1268 slock->softlocked += npages; /* +ve count => f_invals */ 1269 break; 1270 default: 1271 ASSERT(0); 1272 } 1273 mutex_exit(&(slock->lock)); 1274 return (0); 1275 } 1276 1277 static void devmap_softlock_exit( 1278 struct devmap_softlock *slock, 1279 size_t npages, 1280 enum fault_type type) 1281 { 1282 if (slock == NULL) 1283 return; 1284 mutex_enter(&(slock->lock)); 1285 switch (type) { 1286 case F_SOFTLOCK : 1287 ASSERT(-slock->softlocked >= npages); 1288 slock->softlocked += npages; /* -ve count is softlocked */ 1289 if (slock->softlocked == 0) 1290 cv_signal(&slock->cv); 1291 break; 1292 case F_INVAL : 1293 case F_PROT: 1294 ASSERT(slock->softlocked >= npages); 1295 slock->softlocked -= npages; 1296 if (slock->softlocked == 0) 1297 cv_signal(&slock->cv); 1298 break; 1299 default: 1300 ASSERT(0); 1301 } 1302 mutex_exit(&(slock->lock)); 1303 } 1304 1305 /* 1306 * Do a F_SOFTUNLOCK call over the range requested. 1307 * The range must have already been F_SOFTLOCK'ed. 1308 * The segment lock should be held, (but not the segment private lock?) 1309 * The softunlock code below does not adjust for large page sizes 1310 * assumes the caller already did any addr/len adjustments for 1311 * pagesize mappings before calling. 1312 */ 1313 /*ARGSUSED*/ 1314 static void 1315 segdev_softunlock( 1316 struct hat *hat, /* the hat */ 1317 struct seg *seg, /* seg_dev of interest */ 1318 caddr_t addr, /* base address of range */ 1319 size_t len, /* number of bytes */ 1320 enum seg_rw rw) /* type of access at fault */ 1321 { 1322 struct segdev_data *sdp = (struct segdev_data *)seg->s_data; 1323 devmap_handle_t *dhp_head = (devmap_handle_t *)sdp->devmap_data; 1324 1325 TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_SOFTUNLOCK, 1326 "segdev_softunlock:dhp_head=%p sdp=%p addr=%p len=%lx", 1327 dhp_head, sdp, addr, len); 1328 DEBUGF(3, (CE_CONT, "segdev_softunlock: dhp %p lockcnt %lx " 1329 "addr %p len %lx\n", 1330 (void *)dhp_head, sdp->softlockcnt, (void *)addr, len)); 1331 1332 hat_unlock(hat, addr, len); 1333 1334 if (dhp_head != NULL) { 1335 devmap_handle_t *dhp; 1336 size_t mlen; 1337 size_t tlen = len; 1338 ulong_t off; 1339 1340 dhp = devmap_find_handle(dhp_head, addr); 1341 ASSERT(dhp != NULL); 1342 1343 off = (ulong_t)(addr - dhp->dh_uvaddr); 1344 while (tlen != 0) { 1345 mlen = MIN(tlen, (dhp->dh_len - off)); 1346 1347 /* 1348 * unlock segkp memory, locked during F_SOFTLOCK 1349 */ 1350 if (dhp_is_kpmem(dhp)) { 1351 release_kpmem_lock( 1352 (struct ddi_umem_cookie *)dhp->dh_cookie, 1353 btopr(mlen)); 1354 } 1355 1356 /* 1357 * Do the softlock accounting for devmap_access 1358 */ 1359 if (dhp->dh_callbackops.devmap_access != NULL) { 1360 devmap_softlock_exit(dhp->dh_softlock, 1361 btopr(mlen), F_SOFTLOCK); 1362 } 1363 1364 tlen -= mlen; 1365 dhp = dhp->dh_next; 1366 off = 0; 1367 } 1368 } 1369 1370 mutex_enter(&freemem_lock); 1371 ASSERT(sdp->softlockcnt >= btopr(len)); 1372 sdp->softlockcnt -= btopr(len); 1373 mutex_exit(&freemem_lock); 1374 if (sdp->softlockcnt == 0) { 1375 /* 1376 * All SOFTLOCKS are gone. Wakeup any waiting 1377 * unmappers so they can try again to unmap. 1378 * Check for waiters first without the mutex 1379 * held so we don't always grab the mutex on 1380 * softunlocks. 1381 */ 1382 if (AS_ISUNMAPWAIT(seg->s_as)) { 1383 mutex_enter(&seg->s_as->a_contents); 1384 if (AS_ISUNMAPWAIT(seg->s_as)) { 1385 AS_CLRUNMAPWAIT(seg->s_as); 1386 cv_broadcast(&seg->s_as->a_cv); 1387 } 1388 mutex_exit(&seg->s_as->a_contents); 1389 } 1390 } 1391 1392 } 1393 1394 /* 1395 * Handle fault for a single page. 1396 * Done in a separate routine so we can handle errors more easily. 1397 * This routine is called only from segdev_faultpages() 1398 * when looping over the range of addresses requested. The segment lock is held. 1399 */ 1400 static faultcode_t 1401 segdev_faultpage( 1402 struct hat *hat, /* the hat */ 1403 struct seg *seg, /* seg_dev of interest */ 1404 caddr_t addr, /* address in as */ 1405 struct vpage *vpage, /* pointer to vpage for seg, addr */ 1406 enum fault_type type, /* type of fault */ 1407 enum seg_rw rw, /* type of access at fault */ 1408 devmap_handle_t *dhp) /* devmap handle if any for this page */ 1409 { 1410 struct segdev_data *sdp = (struct segdev_data *)seg->s_data; 1411 uint_t prot; 1412 pfn_t pfnum = PFN_INVALID; 1413 u_offset_t offset; 1414 uint_t hat_flags; 1415 dev_info_t *dip; 1416 1417 TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_FAULTPAGE, 1418 "segdev_faultpage: dhp=%p seg=%p addr=%p", dhp, seg, addr); 1419 DEBUGF(8, (CE_CONT, "segdev_faultpage: dhp %p seg %p addr %p \n", 1420 (void *)dhp, (void *)seg, (void *)addr)); 1421 1422 /* 1423 * Initialize protection value for this page. 1424 * If we have per page protection values check it now. 1425 */ 1426 if (sdp->pageprot) { 1427 uint_t protchk; 1428 1429 switch (rw) { 1430 case S_READ: 1431 protchk = PROT_READ; 1432 break; 1433 case S_WRITE: 1434 protchk = PROT_WRITE; 1435 break; 1436 case S_EXEC: 1437 protchk = PROT_EXEC; 1438 break; 1439 case S_OTHER: 1440 default: 1441 protchk = PROT_READ | PROT_WRITE | PROT_EXEC; 1442 break; 1443 } 1444 1445 prot = VPP_PROT(vpage); 1446 if ((prot & protchk) == 0) 1447 return (FC_PROT); /* illegal access type */ 1448 } else { 1449 prot = sdp->prot; 1450 /* caller has already done segment level protection check */ 1451 } 1452 1453 if (type == F_SOFTLOCK) { 1454 mutex_enter(&freemem_lock); 1455 sdp->softlockcnt++; 1456 mutex_exit(&freemem_lock); 1457 } 1458 1459 hat_flags = ((type == F_SOFTLOCK) ? HAT_LOAD_LOCK : HAT_LOAD); 1460 offset = sdp->offset + (u_offset_t)(addr - seg->s_base); 1461 /* 1462 * In the devmap framework, sdp->mapfunc is set to NULL. we can get 1463 * pfnum from dhp->dh_pfn (at beginning of segment) and offset from 1464 * seg->s_base. 1465 */ 1466 if (dhp == NULL) { 1467 /* If segment has devmap_data, then dhp should be non-NULL */ 1468 ASSERT(sdp->devmap_data == NULL); 1469 pfnum = (pfn_t)cdev_mmap(sdp->mapfunc, sdp->vp->v_rdev, 1470 (off_t)offset, prot); 1471 prot |= sdp->hat_attr; 1472 } else { 1473 ulong_t off; 1474 struct ddi_umem_cookie *cp; 1475 struct devmap_pmem_cookie *pcp; 1476 1477 /* ensure the dhp passed in contains addr. */ 1478 ASSERT(dhp == devmap_find_handle( 1479 (devmap_handle_t *)sdp->devmap_data, addr)); 1480 1481 off = addr - dhp->dh_uvaddr; 1482 1483 /* 1484 * This routine assumes that the caller makes sure that the 1485 * fields in dhp used below are unchanged due to remap during 1486 * this call. Caller does HOLD_DHP_LOCK if neeed 1487 */ 1488 cp = dhp->dh_cookie; 1489 if (dhp->dh_flags & DEVMAP_MAPPING_INVALID) { 1490 pfnum = PFN_INVALID; 1491 } else if (cookie_is_devmem(cp)) { 1492 pfnum = dhp->dh_pfn + btop(off); 1493 } else if (cookie_is_pmem(cp)) { 1494 pcp = (struct devmap_pmem_cookie *)dhp->dh_pcookie; 1495 ASSERT((dhp->dh_roff & PAGEOFFSET) == 0 && 1496 dhp->dh_roff < ptob(pcp->dp_npages)); 1497 pfnum = page_pptonum( 1498 pcp->dp_pparray[btop(off + dhp->dh_roff)]); 1499 } else { 1500 ASSERT(dhp->dh_roff < cp->size); 1501 ASSERT(dhp->dh_cvaddr >= cp->cvaddr && 1502 dhp->dh_cvaddr < (cp->cvaddr + cp->size)); 1503 ASSERT((dhp->dh_cvaddr + off) <= 1504 (cp->cvaddr + cp->size)); 1505 ASSERT((dhp->dh_cvaddr + off + PAGESIZE) <= 1506 (cp->cvaddr + cp->size)); 1507 1508 switch (cp->type) { 1509 case UMEM_LOCKED : 1510 if (cp->pparray != NULL) { 1511 ASSERT((dhp->dh_roff & 1512 PAGEOFFSET) == 0); 1513 pfnum = page_pptonum( 1514 cp->pparray[btop(off + 1515 dhp->dh_roff)]); 1516 } else { 1517 pfnum = hat_getpfnum( 1518 ((proc_t *)cp->procp)->p_as->a_hat, 1519 cp->cvaddr + off); 1520 } 1521 break; 1522 case UMEM_TRASH : 1523 pfnum = page_pptonum(trashpp); 1524 /* 1525 * We should set hat_flags to HAT_NOFAULT also 1526 * However, not all hat layers implement this 1527 */ 1528 break; 1529 case KMEM_PAGEABLE: 1530 case KMEM_NON_PAGEABLE: 1531 pfnum = hat_getpfnum(kas.a_hat, 1532 dhp->dh_cvaddr + off); 1533 break; 1534 default : 1535 pfnum = PFN_INVALID; 1536 break; 1537 } 1538 } 1539 prot |= dhp->dh_hat_attr; 1540 } 1541 if (pfnum == PFN_INVALID) { 1542 return (FC_MAKE_ERR(EFAULT)); 1543 } 1544 /* prot should already be OR'ed in with hat_attributes if needed */ 1545 1546 TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_FAULTPAGE_CK1, 1547 "segdev_faultpage: pfnum=%lx memory=%x prot=%x flags=%x", 1548 pfnum, pf_is_memory(pfnum), prot, hat_flags); 1549 DEBUGF(9, (CE_CONT, "segdev_faultpage: pfnum %lx memory %x " 1550 "prot %x flags %x\n", pfnum, pf_is_memory(pfnum), prot, hat_flags)); 1551 1552 if (pf_is_memory(pfnum) || (dhp != NULL)) { 1553 /* 1554 * It's not _really_ required here to pass sdp->hat_flags 1555 * to hat_devload even though we do it. 1556 * This is because hat figures it out DEVMEM mappings 1557 * are non-consistent, anyway. 1558 */ 1559 hat_devload(hat, addr, PAGESIZE, pfnum, 1560 prot, hat_flags | sdp->hat_flags); 1561 return (0); 1562 } 1563 1564 /* 1565 * Fall through to the case where devmap is not used and need to call 1566 * up the device tree to set up the mapping 1567 */ 1568 1569 dip = VTOS(VTOCVP(sdp->vp))->s_dip; 1570 ASSERT(dip); 1571 1572 /* 1573 * When calling ddi_map_fault, we do not OR in sdp->hat_attr 1574 * This is because this calls drivers which may not expect 1575 * prot to have any other values than PROT_ALL 1576 * The root nexus driver has a hack to peek into the segment 1577 * structure and then OR in sdp->hat_attr. 1578 * XX In case the bus_ops interfaces are ever revisited 1579 * we need to fix this. prot should include other hat attributes 1580 */ 1581 if (ddi_map_fault(dip, hat, seg, addr, NULL, pfnum, prot & PROT_ALL, 1582 (uint_t)(type == F_SOFTLOCK)) != DDI_SUCCESS) { 1583 return (FC_MAKE_ERR(EFAULT)); 1584 } 1585 return (0); 1586 } 1587 1588 static faultcode_t 1589 segdev_fault( 1590 struct hat *hat, /* the hat */ 1591 struct seg *seg, /* the seg_dev of interest */ 1592 caddr_t addr, /* the address of the fault */ 1593 size_t len, /* the length of the range */ 1594 enum fault_type type, /* type of fault */ 1595 enum seg_rw rw) /* type of access at fault */ 1596 { 1597 struct segdev_data *sdp = (struct segdev_data *)seg->s_data; 1598 devmap_handle_t *dhp_head = (devmap_handle_t *)sdp->devmap_data; 1599 devmap_handle_t *dhp; 1600 struct devmap_softlock *slock = NULL; 1601 ulong_t slpage = 0; 1602 ulong_t off; 1603 caddr_t maddr = addr; 1604 int err; 1605 int err_is_faultcode = 0; 1606 1607 TRACE_5(TR_FAC_DEVMAP, TR_DEVMAP_FAULT, 1608 "segdev_fault: dhp_head=%p seg=%p addr=%p len=%lx type=%x", 1609 (void *)dhp_head, (void *)seg, (void *)addr, len, type); 1610 DEBUGF(7, (CE_CONT, "segdev_fault: dhp_head %p seg %p " 1611 "addr %p len %lx type %x\n", 1612 (void *)dhp_head, (void *)seg, (void *)addr, len, type)); 1613 1614 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 1615 1616 /* Handle non-devmap case */ 1617 if (dhp_head == NULL) 1618 return (segdev_faultpages(hat, seg, addr, len, type, rw, NULL)); 1619 1620 /* Find devmap handle */ 1621 if ((dhp = devmap_find_handle(dhp_head, addr)) == NULL) 1622 return (FC_NOMAP); 1623 1624 /* 1625 * The seg_dev driver does not implement copy-on-write, 1626 * and always loads translations with maximal allowed permissions 1627 * but we got an fault trying to access the device. 1628 * Servicing the fault is not going to result in any better result 1629 * RFE: If we want devmap_access callbacks to be involved in F_PROT 1630 * faults, then the code below is written for that 1631 * Pending resolution of the following: 1632 * - determine if the F_INVAL/F_SOFTLOCK syncing 1633 * is needed for F_PROT also or not. The code below assumes it does 1634 * - If driver sees F_PROT and calls devmap_load with same type, 1635 * then segdev_faultpages will fail with FC_PROT anyway, need to 1636 * change that so calls from devmap_load to segdev_faultpages for 1637 * F_PROT type are retagged to F_INVAL. 1638 * RFE: Today we dont have drivers that use devmap and want to handle 1639 * F_PROT calls. The code in segdev_fault* is written to allow 1640 * this case but is not tested. A driver that needs this capability 1641 * should be able to remove the short-circuit case; resolve the 1642 * above issues and "should" work. 1643 */ 1644 if (type == F_PROT) { 1645 return (FC_PROT); 1646 } 1647 1648 /* 1649 * Loop through dhp list calling devmap_access or segdev_faultpages for 1650 * each devmap handle. 1651 * drivers which implement devmap_access can interpose on faults and do 1652 * device-appropriate special actions before calling devmap_load. 1653 */ 1654 1655 /* 1656 * Unfortunately, this simple loop has turned out to expose a variety 1657 * of complex problems which results in the following convoluted code. 1658 * 1659 * First, a desire to handle a serialization of F_SOFTLOCK calls 1660 * to the driver within the framework. 1661 * This results in a dh_softlock structure that is on a per device 1662 * (or device instance) basis and serializes devmap_access calls. 1663 * Ideally we would need to do this for underlying 1664 * memory/device regions that are being faulted on 1665 * but that is hard to identify and with REMAP, harder 1666 * Second, a desire to serialize F_INVAL(and F_PROT) calls w.r.t. 1667 * to F_SOFTLOCK calls to the driver. 1668 * These serializations are to simplify the driver programmer model. 1669 * To support these two features, the code first goes through the 1670 * devmap handles and counts the pages (slpage) that are covered 1671 * by devmap_access callbacks. 1672 * This part ends with a devmap_softlock_enter call 1673 * which allows only one F_SOFTLOCK active on a device instance, 1674 * but multiple F_INVAL/F_PROTs can be active except when a 1675 * F_SOFTLOCK is active 1676 * 1677 * Next, we dont short-circuit the fault code upfront to call 1678 * segdev_softunlock for F_SOFTUNLOCK, because we must use 1679 * the same length when we softlock and softunlock. 1680 * 1681 * -Hat layers may not support softunlocking lengths less than the 1682 * original length when there is large page support. 1683 * -kpmem locking is dependent on keeping the lengths same. 1684 * -if drivers handled F_SOFTLOCK, they probably also expect to 1685 * see an F_SOFTUNLOCK of the same length 1686 * Hence, if extending lengths during softlock, 1687 * softunlock has to make the same adjustments and goes through 1688 * the same loop calling segdev_faultpages/segdev_softunlock 1689 * But some of the synchronization and error handling is different 1690 */ 1691 1692 if (type != F_SOFTUNLOCK) { 1693 devmap_handle_t *dhpp = dhp; 1694 size_t slen = len; 1695 1696 /* 1697 * Calculate count of pages that are : 1698 * a) within the (potentially extended) fault region 1699 * b) AND covered by devmap handle with devmap_access 1700 */ 1701 off = (ulong_t)(addr - dhpp->dh_uvaddr); 1702 while (slen != 0) { 1703 size_t mlen; 1704 1705 /* 1706 * Softlocking on a region that allows remap is 1707 * unsupported due to unresolved locking issues 1708 * XXX: unclear what these are? 1709 * One potential is that if there is a pending 1710 * softlock, then a remap should not be allowed 1711 * until the unlock is done. This is easily 1712 * fixed by returning error in devmap*remap on 1713 * checking the dh->dh_softlock->softlocked value 1714 */ 1715 if ((type == F_SOFTLOCK) && 1716 (dhpp->dh_flags & DEVMAP_ALLOW_REMAP)) { 1717 return (FC_NOSUPPORT); 1718 } 1719 1720 mlen = MIN(slen, (dhpp->dh_len - off)); 1721 if (dhpp->dh_callbackops.devmap_access) { 1722 size_t llen; 1723 caddr_t laddr; 1724 /* 1725 * use extended length for large page mappings 1726 */ 1727 HOLD_DHP_LOCK(dhpp); 1728 if ((sdp->pageprot == 0) && 1729 (dhpp->dh_flags & DEVMAP_FLAG_LARGE)) { 1730 devmap_get_large_pgsize(dhpp, 1731 mlen, maddr, &llen, &laddr); 1732 } else { 1733 llen = mlen; 1734 } 1735 RELE_DHP_LOCK(dhpp); 1736 1737 slpage += btopr(llen); 1738 slock = dhpp->dh_softlock; 1739 } 1740 maddr += mlen; 1741 ASSERT(slen >= mlen); 1742 slen -= mlen; 1743 dhpp = dhpp->dh_next; 1744 off = 0; 1745 } 1746 /* 1747 * synchonize with other faulting threads and wait till safe 1748 * devmap_softlock_enter might return due to signal in cv_wait 1749 * 1750 * devmap_softlock_enter has to be called outside of while loop 1751 * to prevent a deadlock if len spans over multiple dhps. 1752 * dh_softlock is based on device instance and if multiple dhps 1753 * use the same device instance, the second dhp's LOCK call 1754 * will hang waiting on the first to complete. 1755 * devmap_setup verifies that slocks in a dhp_chain are same. 1756 * RFE: this deadlock only hold true for F_SOFTLOCK. For 1757 * F_INVAL/F_PROT, since we now allow multiple in parallel, 1758 * we could have done the softlock_enter inside the loop 1759 * and supported multi-dhp mappings with dissimilar devices 1760 */ 1761 if (err = devmap_softlock_enter(slock, slpage, type)) 1762 return (FC_MAKE_ERR(err)); 1763 } 1764 1765 /* reset 'maddr' to the start addr of the range of fault. */ 1766 maddr = addr; 1767 1768 /* calculate the offset corresponds to 'addr' in the first dhp. */ 1769 off = (ulong_t)(addr - dhp->dh_uvaddr); 1770 1771 /* 1772 * The fault length may span over multiple dhps. 1773 * Loop until the total length is satisfied. 1774 */ 1775 while (len != 0) { 1776 size_t llen; 1777 size_t mlen; 1778 caddr_t laddr; 1779 1780 /* 1781 * mlen is the smaller of 'len' and the length 1782 * from addr to the end of mapping defined by dhp. 1783 */ 1784 mlen = MIN(len, (dhp->dh_len - off)); 1785 1786 HOLD_DHP_LOCK(dhp); 1787 /* 1788 * Pass the extended length and address to devmap_access 1789 * if large pagesize is used for loading address translations. 1790 */ 1791 if ((sdp->pageprot == 0) && 1792 (dhp->dh_flags & DEVMAP_FLAG_LARGE)) { 1793 devmap_get_large_pgsize(dhp, mlen, maddr, 1794 &llen, &laddr); 1795 ASSERT(maddr == addr || laddr == maddr); 1796 } else { 1797 llen = mlen; 1798 laddr = maddr; 1799 } 1800 1801 if (dhp->dh_callbackops.devmap_access != NULL) { 1802 offset_t aoff; 1803 1804 aoff = sdp->offset + (offset_t)(laddr - seg->s_base); 1805 1806 /* 1807 * call driver's devmap_access entry point which will 1808 * call devmap_load/contextmgmt to load the translations 1809 * 1810 * We drop the dhp_lock before calling access so 1811 * drivers can call devmap_*_remap within access 1812 */ 1813 RELE_DHP_LOCK(dhp); 1814 1815 err = (*dhp->dh_callbackops.devmap_access)( 1816 dhp, (void *)dhp->dh_pvtp, aoff, llen, type, rw); 1817 } else { 1818 /* 1819 * If no devmap_access entry point, then load mappings 1820 * hold dhp_lock across faultpages if REMAP 1821 */ 1822 err = segdev_faultpages(hat, seg, laddr, llen, 1823 type, rw, dhp); 1824 err_is_faultcode = 1; 1825 RELE_DHP_LOCK(dhp); 1826 } 1827 1828 if (err) { 1829 if ((type == F_SOFTLOCK) && (maddr > addr)) { 1830 /* 1831 * If not first dhp, use 1832 * segdev_fault(F_SOFTUNLOCK) for prior dhps 1833 * While this is recursion, it is incorrect to 1834 * call just segdev_softunlock 1835 * if we are using either large pages 1836 * or devmap_access. It will be more right 1837 * to go through the same loop as above 1838 * rather than call segdev_softunlock directly 1839 * It will use the right lenghths as well as 1840 * call into the driver devmap_access routines. 1841 */ 1842 size_t done = (size_t)(maddr - addr); 1843 (void) segdev_fault(hat, seg, addr, done, 1844 F_SOFTUNLOCK, S_OTHER); 1845 /* 1846 * reduce slpage by number of pages 1847 * released by segdev_softunlock 1848 */ 1849 ASSERT(slpage >= btopr(done)); 1850 devmap_softlock_exit(slock, 1851 slpage - btopr(done), type); 1852 } else { 1853 devmap_softlock_exit(slock, slpage, type); 1854 } 1855 1856 1857 /* 1858 * Segdev_faultpages() already returns a faultcode, 1859 * hence, result from segdev_faultpages() should be 1860 * returned directly. 1861 */ 1862 if (err_is_faultcode) 1863 return (err); 1864 return (FC_MAKE_ERR(err)); 1865 } 1866 1867 maddr += mlen; 1868 ASSERT(len >= mlen); 1869 len -= mlen; 1870 dhp = dhp->dh_next; 1871 off = 0; 1872 1873 ASSERT(!dhp || len == 0 || maddr == dhp->dh_uvaddr); 1874 } 1875 /* 1876 * release the softlock count at end of fault 1877 * For F_SOFTLOCk this is done in the later F_SOFTUNLOCK 1878 */ 1879 if ((type == F_INVAL) || (type == F_PROT)) 1880 devmap_softlock_exit(slock, slpage, type); 1881 return (0); 1882 } 1883 1884 /* 1885 * segdev_faultpages 1886 * 1887 * Used to fault in seg_dev segment pages. Called by segdev_fault or devmap_load 1888 * This routine assumes that the callers makes sure that the fields 1889 * in dhp used below are not changed due to remap during this call. 1890 * Caller does HOLD_DHP_LOCK if neeed 1891 * This routine returns a faultcode_t as a return value for segdev_fault. 1892 */ 1893 static faultcode_t 1894 segdev_faultpages( 1895 struct hat *hat, /* the hat */ 1896 struct seg *seg, /* the seg_dev of interest */ 1897 caddr_t addr, /* the address of the fault */ 1898 size_t len, /* the length of the range */ 1899 enum fault_type type, /* type of fault */ 1900 enum seg_rw rw, /* type of access at fault */ 1901 devmap_handle_t *dhp) /* devmap handle */ 1902 { 1903 register struct segdev_data *sdp = (struct segdev_data *)seg->s_data; 1904 register caddr_t a; 1905 struct vpage *vpage; 1906 struct ddi_umem_cookie *kpmem_cookie = NULL; 1907 int err; 1908 1909 TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_FAULTPAGES, 1910 "segdev_faultpages: dhp=%p seg=%p addr=%p len=%lx", 1911 (void *)dhp, (void *)seg, (void *)addr, len); 1912 DEBUGF(5, (CE_CONT, "segdev_faultpages: " 1913 "dhp %p seg %p addr %p len %lx\n", 1914 (void *)dhp, (void *)seg, (void *)addr, len)); 1915 1916 /* 1917 * The seg_dev driver does not implement copy-on-write, 1918 * and always loads translations with maximal allowed permissions 1919 * but we got an fault trying to access the device. 1920 * Servicing the fault is not going to result in any better result 1921 * XXX: If we want to allow devmap_access to handle F_PROT calls, 1922 * This code should be removed and let the normal fault handling 1923 * take care of finding the error 1924 */ 1925 if (type == F_PROT) { 1926 return (FC_PROT); 1927 } 1928 1929 if (type == F_SOFTUNLOCK) { 1930 segdev_softunlock(hat, seg, addr, len, rw); 1931 return (0); 1932 } 1933 1934 /* 1935 * For kernel pageable memory, fault/lock segkp pages 1936 * We hold this until the completion of this 1937 * fault (INVAL/PROT) or till unlock (SOFTLOCK). 1938 */ 1939 if ((dhp != NULL) && dhp_is_kpmem(dhp)) { 1940 kpmem_cookie = (struct ddi_umem_cookie *)dhp->dh_cookie; 1941 if (err = acquire_kpmem_lock(kpmem_cookie, btopr(len))) 1942 return (err); 1943 } 1944 1945 /* 1946 * If we have the same protections for the entire segment, 1947 * insure that the access being attempted is legitimate. 1948 */ 1949 rw_enter(&sdp->lock, RW_READER); 1950 if (sdp->pageprot == 0) { 1951 uint_t protchk; 1952 1953 switch (rw) { 1954 case S_READ: 1955 protchk = PROT_READ; 1956 break; 1957 case S_WRITE: 1958 protchk = PROT_WRITE; 1959 break; 1960 case S_EXEC: 1961 protchk = PROT_EXEC; 1962 break; 1963 case S_OTHER: 1964 default: 1965 protchk = PROT_READ | PROT_WRITE | PROT_EXEC; 1966 break; 1967 } 1968 1969 if ((sdp->prot & protchk) == 0) { 1970 rw_exit(&sdp->lock); 1971 /* undo kpmem locking */ 1972 if (kpmem_cookie != NULL) { 1973 release_kpmem_lock(kpmem_cookie, btopr(len)); 1974 } 1975 return (FC_PROT); /* illegal access type */ 1976 } 1977 } 1978 1979 /* 1980 * we do a single hat_devload for the range if 1981 * - devmap framework (dhp is not NULL), 1982 * - pageprot == 0, i.e., no per-page protection set and 1983 * - is device pages, irrespective of whether we are using large pages 1984 */ 1985 if ((sdp->pageprot == 0) && (dhp != NULL) && dhp_is_devmem(dhp)) { 1986 pfn_t pfnum; 1987 uint_t hat_flags; 1988 1989 if (dhp->dh_flags & DEVMAP_MAPPING_INVALID) { 1990 rw_exit(&sdp->lock); 1991 return (FC_NOMAP); 1992 } 1993 1994 if (type == F_SOFTLOCK) { 1995 mutex_enter(&freemem_lock); 1996 sdp->softlockcnt += btopr(len); 1997 mutex_exit(&freemem_lock); 1998 } 1999 2000 hat_flags = ((type == F_SOFTLOCK) ? HAT_LOAD_LOCK : HAT_LOAD); 2001 pfnum = dhp->dh_pfn + btop((uintptr_t)(addr - dhp->dh_uvaddr)); 2002 ASSERT(!pf_is_memory(pfnum)); 2003 2004 hat_devload(hat, addr, len, pfnum, sdp->prot | dhp->dh_hat_attr, 2005 hat_flags | sdp->hat_flags); 2006 rw_exit(&sdp->lock); 2007 return (0); 2008 } 2009 2010 /* Handle cases where we have to loop through fault handling per-page */ 2011 2012 if (sdp->vpage == NULL) 2013 vpage = NULL; 2014 else 2015 vpage = &sdp->vpage[seg_page(seg, addr)]; 2016 2017 /* loop over the address range handling each fault */ 2018 for (a = addr; a < addr + len; a += PAGESIZE) { 2019 if (err = segdev_faultpage(hat, seg, a, vpage, type, rw, dhp)) { 2020 break; 2021 } 2022 if (vpage != NULL) 2023 vpage++; 2024 } 2025 rw_exit(&sdp->lock); 2026 if (err && (type == F_SOFTLOCK)) { /* error handling for F_SOFTLOCK */ 2027 size_t done = (size_t)(a - addr); /* pages fault successfully */ 2028 if (done > 0) { 2029 /* use softunlock for those pages */ 2030 segdev_softunlock(hat, seg, addr, done, S_OTHER); 2031 } 2032 if (kpmem_cookie != NULL) { 2033 /* release kpmem lock for rest of pages */ 2034 ASSERT(len >= done); 2035 release_kpmem_lock(kpmem_cookie, btopr(len - done)); 2036 } 2037 } else if ((kpmem_cookie != NULL) && (type != F_SOFTLOCK)) { 2038 /* for non-SOFTLOCK cases, release kpmem */ 2039 release_kpmem_lock(kpmem_cookie, btopr(len)); 2040 } 2041 return (err); 2042 } 2043 2044 /* 2045 * Asynchronous page fault. We simply do nothing since this 2046 * entry point is not supposed to load up the translation. 2047 */ 2048 /*ARGSUSED*/ 2049 static faultcode_t 2050 segdev_faulta(struct seg *seg, caddr_t addr) 2051 { 2052 TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_FAULTA, 2053 "segdev_faulta: seg=%p addr=%p", (void *)seg, (void *)addr); 2054 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 2055 2056 return (0); 2057 } 2058 2059 static int 2060 segdev_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) 2061 { 2062 register struct segdev_data *sdp = (struct segdev_data *)seg->s_data; 2063 register devmap_handle_t *dhp; 2064 register struct vpage *vp, *evp; 2065 devmap_handle_t *dhp_head = (devmap_handle_t *)sdp->devmap_data; 2066 ulong_t off; 2067 size_t mlen, sz; 2068 2069 TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_SETPROT, 2070 "segdev_setprot:start seg=%p addr=%p len=%lx prot=%x", 2071 (void *)seg, (void *)addr, len, prot); 2072 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 2073 2074 if ((sz = sdp->softlockcnt) > 0 && dhp_head != NULL) { 2075 /* 2076 * Fail the setprot if pages are SOFTLOCKed through this 2077 * mapping. 2078 * Softlockcnt is protected from change by the as read lock. 2079 */ 2080 TRACE_1(TR_FAC_DEVMAP, TR_DEVMAP_SETPROT_CK1, 2081 "segdev_setprot:error softlockcnt=%lx", sz); 2082 DEBUGF(1, (CE_CONT, "segdev_setprot: softlockcnt %ld\n", sz)); 2083 return (EAGAIN); 2084 } 2085 2086 if (dhp_head != NULL) { 2087 if ((dhp = devmap_find_handle(dhp_head, addr)) == NULL) 2088 return (EINVAL); 2089 2090 /* 2091 * check if violate maxprot. 2092 */ 2093 off = (ulong_t)(addr - dhp->dh_uvaddr); 2094 mlen = len; 2095 while (dhp) { 2096 if ((dhp->dh_maxprot & prot) != prot) 2097 return (EACCES); /* violated maxprot */ 2098 2099 if (mlen > (dhp->dh_len - off)) { 2100 mlen -= dhp->dh_len - off; 2101 dhp = dhp->dh_next; 2102 off = 0; 2103 } else 2104 break; 2105 } 2106 } else { 2107 if ((sdp->maxprot & prot) != prot) 2108 return (EACCES); 2109 } 2110 2111 rw_enter(&sdp->lock, RW_WRITER); 2112 if (addr == seg->s_base && len == seg->s_size && sdp->pageprot == 0) { 2113 if (sdp->prot == prot) { 2114 rw_exit(&sdp->lock); 2115 return (0); /* all done */ 2116 } 2117 sdp->prot = (uchar_t)prot; 2118 } else { 2119 sdp->pageprot = 1; 2120 if (sdp->vpage == NULL) { 2121 /* 2122 * First time through setting per page permissions, 2123 * initialize all the vpage structures to prot 2124 */ 2125 sdp->vpage = kmem_zalloc(vpgtob(seg_pages(seg)), 2126 KM_SLEEP); 2127 evp = &sdp->vpage[seg_pages(seg)]; 2128 for (vp = sdp->vpage; vp < evp; vp++) 2129 VPP_SETPROT(vp, sdp->prot); 2130 } 2131 /* 2132 * Now go change the needed vpages protections. 2133 */ 2134 evp = &sdp->vpage[seg_page(seg, addr + len)]; 2135 for (vp = &sdp->vpage[seg_page(seg, addr)]; vp < evp; vp++) 2136 VPP_SETPROT(vp, prot); 2137 } 2138 rw_exit(&sdp->lock); 2139 2140 if (dhp_head != NULL) { 2141 devmap_handle_t *tdhp; 2142 /* 2143 * If large page size was used in hat_devload(), 2144 * the same page size must be used in hat_unload(). 2145 */ 2146 dhp = tdhp = devmap_find_handle(dhp_head, addr); 2147 while (tdhp != NULL) { 2148 if (tdhp->dh_flags & DEVMAP_FLAG_LARGE) { 2149 break; 2150 } 2151 tdhp = tdhp->dh_next; 2152 } 2153 if (tdhp) { 2154 size_t slen = len; 2155 size_t mlen; 2156 size_t soff; 2157 2158 soff = (ulong_t)(addr - dhp->dh_uvaddr); 2159 while (slen != 0) { 2160 mlen = MIN(slen, (dhp->dh_len - soff)); 2161 hat_unload(seg->s_as->a_hat, dhp->dh_uvaddr, 2162 dhp->dh_len, HAT_UNLOAD); 2163 dhp = dhp->dh_next; 2164 ASSERT(slen >= mlen); 2165 slen -= mlen; 2166 soff = 0; 2167 } 2168 return (0); 2169 } 2170 } 2171 2172 if ((prot & ~PROT_USER) == PROT_NONE) { 2173 hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD); 2174 } else { 2175 /* 2176 * RFE: the segment should keep track of all attributes 2177 * allowing us to remove the deprecated hat_chgprot 2178 * and use hat_chgattr. 2179 */ 2180 hat_chgprot(seg->s_as->a_hat, addr, len, prot); 2181 } 2182 2183 return (0); 2184 } 2185 2186 static int 2187 segdev_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) 2188 { 2189 struct segdev_data *sdp = (struct segdev_data *)seg->s_data; 2190 struct vpage *vp, *evp; 2191 2192 TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_CHECKPROT, 2193 "segdev_checkprot:start seg=%p addr=%p len=%lx prot=%x", 2194 (void *)seg, (void *)addr, len, prot); 2195 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 2196 2197 /* 2198 * If segment protection can be used, simply check against them 2199 */ 2200 rw_enter(&sdp->lock, RW_READER); 2201 if (sdp->pageprot == 0) { 2202 register int err; 2203 2204 err = ((sdp->prot & prot) != prot) ? EACCES : 0; 2205 rw_exit(&sdp->lock); 2206 return (err); 2207 } 2208 2209 /* 2210 * Have to check down to the vpage level 2211 */ 2212 evp = &sdp->vpage[seg_page(seg, addr + len)]; 2213 for (vp = &sdp->vpage[seg_page(seg, addr)]; vp < evp; vp++) { 2214 if ((VPP_PROT(vp) & prot) != prot) { 2215 rw_exit(&sdp->lock); 2216 return (EACCES); 2217 } 2218 } 2219 rw_exit(&sdp->lock); 2220 return (0); 2221 } 2222 2223 static int 2224 segdev_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv) 2225 { 2226 struct segdev_data *sdp = (struct segdev_data *)seg->s_data; 2227 size_t pgno; 2228 2229 TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_GETPROT, 2230 "segdev_getprot:start seg=%p addr=%p len=%lx protv=%p", 2231 (void *)seg, (void *)addr, len, (void *)protv); 2232 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 2233 2234 pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1; 2235 if (pgno != 0) { 2236 rw_enter(&sdp->lock, RW_READER); 2237 if (sdp->pageprot == 0) { 2238 do { 2239 protv[--pgno] = sdp->prot; 2240 } while (pgno != 0); 2241 } else { 2242 size_t pgoff = seg_page(seg, addr); 2243 2244 do { 2245 pgno--; 2246 protv[pgno] = 2247 VPP_PROT(&sdp->vpage[pgno + pgoff]); 2248 } while (pgno != 0); 2249 } 2250 rw_exit(&sdp->lock); 2251 } 2252 return (0); 2253 } 2254 2255 static u_offset_t 2256 segdev_getoffset(register struct seg *seg, caddr_t addr) 2257 { 2258 register struct segdev_data *sdp = (struct segdev_data *)seg->s_data; 2259 2260 TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_GETOFFSET, 2261 "segdev_getoffset:start seg=%p addr=%p", (void *)seg, (void *)addr); 2262 2263 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 2264 2265 return ((u_offset_t)sdp->offset + (addr - seg->s_base)); 2266 } 2267 2268 /*ARGSUSED*/ 2269 static int 2270 segdev_gettype(register struct seg *seg, caddr_t addr) 2271 { 2272 register struct segdev_data *sdp = (struct segdev_data *)seg->s_data; 2273 2274 TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_GETTYPE, 2275 "segdev_gettype:start seg=%p addr=%p", (void *)seg, (void *)addr); 2276 2277 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 2278 2279 return (sdp->type); 2280 } 2281 2282 2283 /*ARGSUSED*/ 2284 static int 2285 segdev_getvp(register struct seg *seg, caddr_t addr, struct vnode **vpp) 2286 { 2287 register struct segdev_data *sdp = (struct segdev_data *)seg->s_data; 2288 2289 TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_GETVP, 2290 "segdev_getvp:start seg=%p addr=%p", (void *)seg, (void *)addr); 2291 2292 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 2293 2294 /* 2295 * Note that this vp is the common_vp of the device, where the 2296 * pages are hung .. 2297 */ 2298 *vpp = VTOCVP(sdp->vp); 2299 2300 return (0); 2301 } 2302 2303 static void 2304 segdev_badop(void) 2305 { 2306 TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_SEGDEV_BADOP, 2307 "segdev_badop:start"); 2308 panic("segdev_badop"); 2309 /*NOTREACHED*/ 2310 } 2311 2312 /* 2313 * segdev pages are not in the cache, and thus can't really be controlled. 2314 * Hence, syncs are simply always successful. 2315 */ 2316 /*ARGSUSED*/ 2317 static int 2318 segdev_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags) 2319 { 2320 TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_SYNC, "segdev_sync:start"); 2321 2322 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 2323 2324 return (0); 2325 } 2326 2327 /* 2328 * segdev pages are always "in core". 2329 */ 2330 /*ARGSUSED*/ 2331 static size_t 2332 segdev_incore(struct seg *seg, caddr_t addr, size_t len, char *vec) 2333 { 2334 size_t v = 0; 2335 2336 TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_INCORE, "segdev_incore:start"); 2337 2338 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 2339 2340 for (len = (len + PAGEOFFSET) & PAGEMASK; len; len -= PAGESIZE, 2341 v += PAGESIZE) 2342 *vec++ = 1; 2343 return (v); 2344 } 2345 2346 /* 2347 * segdev pages are not in the cache, and thus can't really be controlled. 2348 * Hence, locks are simply always successful. 2349 */ 2350 /*ARGSUSED*/ 2351 static int 2352 segdev_lockop(struct seg *seg, caddr_t addr, 2353 size_t len, int attr, int op, ulong_t *lockmap, size_t pos) 2354 { 2355 TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_LOCKOP, "segdev_lockop:start"); 2356 2357 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 2358 2359 return (0); 2360 } 2361 2362 /* 2363 * segdev pages are not in the cache, and thus can't really be controlled. 2364 * Hence, advise is simply always successful. 2365 */ 2366 /*ARGSUSED*/ 2367 static int 2368 segdev_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav) 2369 { 2370 TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_ADVISE, "segdev_advise:start"); 2371 2372 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 2373 2374 return (0); 2375 } 2376 2377 /* 2378 * segdev pages are not dumped, so we just return 2379 */ 2380 /*ARGSUSED*/ 2381 static void 2382 segdev_dump(struct seg *seg) 2383 {} 2384 2385 /* 2386 * ddi_segmap_setup: Used by drivers who wish specify mapping attributes 2387 * for a segment. Called from a drivers segmap(9E) 2388 * routine. 2389 */ 2390 /*ARGSUSED*/ 2391 int 2392 ddi_segmap_setup(dev_t dev, off_t offset, struct as *as, caddr_t *addrp, 2393 off_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cred, 2394 ddi_device_acc_attr_t *accattrp, uint_t rnumber) 2395 { 2396 struct segdev_crargs dev_a; 2397 int (*mapfunc)(dev_t dev, off_t off, int prot); 2398 uint_t hat_attr; 2399 pfn_t pfn; 2400 int error, i; 2401 2402 TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_SEGMAP_SETUP, 2403 "ddi_segmap_setup:start"); 2404 2405 if ((mapfunc = devopsp[getmajor(dev)]->devo_cb_ops->cb_mmap) == nodev) 2406 return (ENODEV); 2407 2408 /* 2409 * Character devices that support the d_mmap 2410 * interface can only be mmap'ed shared. 2411 */ 2412 if ((flags & MAP_TYPE) != MAP_SHARED) 2413 return (EINVAL); 2414 2415 /* 2416 * Check that this region is indeed mappable on this platform. 2417 * Use the mapping function. 2418 */ 2419 if (ddi_device_mapping_check(dev, accattrp, rnumber, &hat_attr) == -1) 2420 return (ENXIO); 2421 2422 /* 2423 * Check to ensure that the entire range is 2424 * legal and we are not trying to map in 2425 * more than the device will let us. 2426 */ 2427 for (i = 0; i < len; i += PAGESIZE) { 2428 if (i == 0) { 2429 /* 2430 * Save the pfn at offset here. This pfn will be 2431 * used later to get user address. 2432 */ 2433 if ((pfn = (pfn_t)cdev_mmap(mapfunc, dev, offset, 2434 maxprot)) == PFN_INVALID) 2435 return (ENXIO); 2436 } else { 2437 if (cdev_mmap(mapfunc, dev, offset + i, maxprot) == 2438 PFN_INVALID) 2439 return (ENXIO); 2440 } 2441 } 2442 2443 as_rangelock(as); 2444 /* Pick an address w/o worrying about any vac alignment constraints. */ 2445 error = choose_addr(as, addrp, len, ptob(pfn), ADDR_NOVACALIGN, flags); 2446 if (error != 0) { 2447 as_rangeunlock(as); 2448 return (error); 2449 } 2450 2451 dev_a.mapfunc = mapfunc; 2452 dev_a.dev = dev; 2453 dev_a.offset = (offset_t)offset; 2454 dev_a.type = flags & MAP_TYPE; 2455 dev_a.prot = (uchar_t)prot; 2456 dev_a.maxprot = (uchar_t)maxprot; 2457 dev_a.hat_attr = hat_attr; 2458 dev_a.hat_flags = 0; 2459 dev_a.devmap_data = NULL; 2460 2461 error = as_map(as, *addrp, len, segdev_create, &dev_a); 2462 as_rangeunlock(as); 2463 return (error); 2464 2465 } 2466 2467 /*ARGSUSED*/ 2468 static int 2469 segdev_pagelock(struct seg *seg, caddr_t addr, size_t len, 2470 struct page ***ppp, enum lock_type type, enum seg_rw rw) 2471 { 2472 TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_PAGELOCK, 2473 "segdev_pagelock:start"); 2474 return (ENOTSUP); 2475 } 2476 2477 /*ARGSUSED*/ 2478 static int 2479 segdev_setpagesize(struct seg *seg, caddr_t addr, size_t len, 2480 uint_t szc) 2481 { 2482 return (ENOTSUP); 2483 } 2484 2485 /* 2486 * devmap_device: Used by devmap framework to establish mapping 2487 * called by devmap_seup(9F) during map setup time. 2488 */ 2489 /*ARGSUSED*/ 2490 static int 2491 devmap_device(devmap_handle_t *dhp, struct as *as, caddr_t *addr, 2492 offset_t off, size_t len, uint_t flags) 2493 { 2494 devmap_handle_t *rdhp, *maxdhp; 2495 struct segdev_crargs dev_a; 2496 int err; 2497 uint_t maxprot = PROT_ALL; 2498 offset_t offset = 0; 2499 pfn_t pfn; 2500 struct devmap_pmem_cookie *pcp; 2501 2502 TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_DEVICE, 2503 "devmap_device:start dhp=%p addr=%p off=%llx, len=%lx", 2504 (void *)dhp, (void *)addr, off, len); 2505 2506 DEBUGF(2, (CE_CONT, "devmap_device: dhp %p addr %p off %llx len %lx\n", 2507 (void *)dhp, (void *)addr, off, len)); 2508 2509 as_rangelock(as); 2510 if ((flags & MAP_FIXED) == 0) { 2511 offset_t aligned_off; 2512 2513 rdhp = maxdhp = dhp; 2514 while (rdhp != NULL) { 2515 maxdhp = (maxdhp->dh_len > rdhp->dh_len) ? 2516 maxdhp : rdhp; 2517 rdhp = rdhp->dh_next; 2518 maxprot |= dhp->dh_maxprot; 2519 } 2520 offset = maxdhp->dh_uoff - dhp->dh_uoff; 2521 2522 /* 2523 * Use the dhp that has the 2524 * largest len to get user address. 2525 */ 2526 /* 2527 * If MAPPING_INVALID, cannot use dh_pfn/dh_cvaddr, 2528 * use 0 which is as good as any other. 2529 */ 2530 if (maxdhp->dh_flags & DEVMAP_MAPPING_INVALID) { 2531 aligned_off = (offset_t)0; 2532 } else if (dhp_is_devmem(maxdhp)) { 2533 aligned_off = (offset_t)ptob(maxdhp->dh_pfn) - offset; 2534 } else if (dhp_is_pmem(maxdhp)) { 2535 pcp = (struct devmap_pmem_cookie *)maxdhp->dh_pcookie; 2536 pfn = page_pptonum( 2537 pcp->dp_pparray[btop(maxdhp->dh_roff)]); 2538 aligned_off = (offset_t)ptob(pfn) - offset; 2539 } else { 2540 aligned_off = (offset_t)(uintptr_t)maxdhp->dh_cvaddr - 2541 offset; 2542 } 2543 2544 /* 2545 * Pick an address aligned to dh_cookie. 2546 * for kernel memory/user memory, cookie is cvaddr. 2547 * for device memory, cookie is physical address. 2548 */ 2549 map_addr(addr, len, aligned_off, 1, flags); 2550 if (*addr == NULL) { 2551 as_rangeunlock(as); 2552 return (ENOMEM); 2553 } 2554 } else { 2555 /* 2556 * User-specified address; blow away any previous mappings. 2557 */ 2558 (void) as_unmap(as, *addr, len); 2559 } 2560 2561 dev_a.mapfunc = NULL; 2562 dev_a.dev = dhp->dh_dev; 2563 dev_a.type = flags & MAP_TYPE; 2564 dev_a.offset = off; 2565 /* 2566 * sdp->maxprot has the least restrict protection of all dhps. 2567 */ 2568 dev_a.maxprot = maxprot; 2569 dev_a.prot = dhp->dh_prot; 2570 /* 2571 * devmap uses dhp->dh_hat_attr for hat. 2572 */ 2573 dev_a.hat_flags = 0; 2574 dev_a.hat_attr = 0; 2575 dev_a.devmap_data = (void *)dhp; 2576 2577 err = as_map(as, *addr, len, segdev_create, &dev_a); 2578 as_rangeunlock(as); 2579 return (err); 2580 } 2581 2582 int 2583 devmap_do_ctxmgt(devmap_cookie_t dhc, void *pvtp, offset_t off, size_t len, 2584 uint_t type, uint_t rw, int (*ctxmgt)(devmap_cookie_t, void *, offset_t, 2585 size_t, uint_t, uint_t)) 2586 { 2587 register devmap_handle_t *dhp = (devmap_handle_t *)dhc; 2588 struct devmap_ctx *devctx; 2589 int do_timeout = 0; 2590 int ret; 2591 2592 #ifdef lint 2593 pvtp = pvtp; 2594 #endif 2595 2596 TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_DO_CTXMGT, 2597 "devmap_do_ctxmgt:start dhp=%p off=%llx, len=%lx", 2598 (void *)dhp, off, len); 2599 DEBUGF(7, (CE_CONT, "devmap_do_ctxmgt: dhp %p off %llx len %lx\n", 2600 (void *)dhp, off, len)); 2601 2602 if (ctxmgt == NULL) 2603 return (FC_HWERR); 2604 2605 devctx = dhp->dh_ctx; 2606 2607 /* 2608 * If we are on an MP system with more than one cpu running 2609 * and if a thread on some CPU already has the context, wait 2610 * for it to finish if there is a hysteresis timeout. 2611 * 2612 * We call cv_wait() instead of cv_wait_sig() because 2613 * it does not matter much if it returned due to a signal 2614 * or due to a cv_signal() or cv_broadcast(). In either event 2615 * we need to complete the mapping otherwise the processes 2616 * will die with a SEGV. 2617 */ 2618 if ((dhp->dh_timeout_length > 0) && (ncpus > 1)) { 2619 TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_DO_CTXMGT_CK1, 2620 "devmap_do_ctxmgt:doing hysteresis, devctl %p dhp %p", 2621 devctx, dhp); 2622 do_timeout = 1; 2623 mutex_enter(&devctx->lock); 2624 while (devctx->oncpu) 2625 cv_wait(&devctx->cv, &devctx->lock); 2626 devctx->oncpu = 1; 2627 mutex_exit(&devctx->lock); 2628 } 2629 2630 /* 2631 * Call the contextmgt callback so that the driver can handle 2632 * the fault. 2633 */ 2634 ret = (*ctxmgt)(dhp, dhp->dh_pvtp, off, len, type, rw); 2635 2636 /* 2637 * If devmap_access() returned -1, then there was a hardware 2638 * error so we need to convert the return value to something 2639 * that trap() will understand. Otherwise, the return value 2640 * is already a fault code generated by devmap_unload() 2641 * or devmap_load(). 2642 */ 2643 if (ret) { 2644 TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_DO_CTXMGT_CK2, 2645 "devmap_do_ctxmgt: ret=%x dhp=%p devctx=%p", 2646 ret, dhp, devctx); 2647 DEBUGF(1, (CE_CONT, "devmap_do_ctxmgt: ret %x dhp %p\n", 2648 ret, (void *)dhp)); 2649 if (devctx->oncpu) { 2650 mutex_enter(&devctx->lock); 2651 devctx->oncpu = 0; 2652 cv_signal(&devctx->cv); 2653 mutex_exit(&devctx->lock); 2654 } 2655 return (FC_HWERR); 2656 } 2657 2658 /* 2659 * Setup the timeout if we need to 2660 */ 2661 if (do_timeout) { 2662 mutex_enter(&devctx->lock); 2663 if (dhp->dh_timeout_length > 0) { 2664 TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_DO_CTXMGT_CK3, 2665 "devmap_do_ctxmgt:timeout set"); 2666 devctx->timeout = timeout(devmap_ctxto, 2667 devctx, dhp->dh_timeout_length); 2668 } else { 2669 /* 2670 * We don't want to wait so set oncpu to 2671 * 0 and wake up anyone waiting. 2672 */ 2673 TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_DO_CTXMGT_CK4, 2674 "devmap_do_ctxmgt:timeout not set"); 2675 devctx->oncpu = 0; 2676 cv_signal(&devctx->cv); 2677 } 2678 mutex_exit(&devctx->lock); 2679 } 2680 2681 return (DDI_SUCCESS); 2682 } 2683 2684 /* 2685 * end of mapping 2686 * poff fault_offset | 2687 * base | | | 2688 * | | | | 2689 * V V V V 2690 * +-----------+---------------+-------+---------+-------+ 2691 * ^ ^ ^ ^ 2692 * |<--- offset--->|<-len->| | 2693 * |<--- dh_len(size of mapping) --->| 2694 * |<-- pg -->| 2695 * -->|rlen|<-- 2696 */ 2697 static ulong_t 2698 devmap_roundup(devmap_handle_t *dhp, ulong_t offset, size_t len, 2699 ulong_t *opfn, ulong_t *pagesize) 2700 { 2701 register int level; 2702 ulong_t pg; 2703 ulong_t poff; 2704 ulong_t base; 2705 caddr_t uvaddr; 2706 long rlen; 2707 2708 TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_ROUNDUP, 2709 "devmap_roundup:start dhp=%p off=%lx len=%lx", 2710 (void *)dhp, offset, len); 2711 DEBUGF(2, (CE_CONT, "devmap_roundup: dhp %p off %lx len %lx\n", 2712 (void *)dhp, offset, len)); 2713 2714 /* 2715 * get the max. pagesize that is aligned within the range 2716 * <dh_pfn, dh_pfn+offset>. 2717 * 2718 * The calculations below use physical address to ddetermine 2719 * the page size to use. The same calculations can use the 2720 * virtual address to determine the page size. 2721 */ 2722 base = (ulong_t)ptob(dhp->dh_pfn); 2723 for (level = dhp->dh_mmulevel; level >= 0; level--) { 2724 pg = page_get_pagesize(level); 2725 poff = ((base + offset) & ~(pg - 1)); 2726 uvaddr = dhp->dh_uvaddr + (poff - base); 2727 if ((poff >= base) && 2728 ((poff + pg) <= (base + dhp->dh_len)) && 2729 VA_PA_ALIGNED((uintptr_t)uvaddr, poff, pg)) 2730 break; 2731 } 2732 2733 TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_ROUNDUP_CK1, 2734 "devmap_roundup: base=%lx poff=%lx dhp=%p", 2735 base, poff, dhp); 2736 DEBUGF(2, (CE_CONT, "devmap_roundup: base %lx poff %lx pfn %lx\n", 2737 base, poff, dhp->dh_pfn)); 2738 2739 ASSERT(VA_PA_ALIGNED((uintptr_t)uvaddr, poff, pg)); 2740 ASSERT(level >= 0); 2741 2742 *pagesize = pg; 2743 *opfn = dhp->dh_pfn + btop(poff - base); 2744 2745 rlen = len + offset - (poff - base + pg); 2746 2747 ASSERT(rlen < (long)len); 2748 2749 TRACE_5(TR_FAC_DEVMAP, TR_DEVMAP_ROUNDUP_CK2, 2750 "devmap_roundup:ret dhp=%p level=%x rlen=%lx psiz=%p opfn=%p", 2751 (void *)dhp, level, rlen, pagesize, opfn); 2752 DEBUGF(1, (CE_CONT, "devmap_roundup: dhp %p " 2753 "level %x rlen %lx psize %lx opfn %lx\n", 2754 (void *)dhp, level, rlen, *pagesize, *opfn)); 2755 2756 return ((ulong_t)((rlen > 0) ? rlen : 0)); 2757 } 2758 2759 /* 2760 * find the dhp that contains addr. 2761 */ 2762 static devmap_handle_t * 2763 devmap_find_handle(devmap_handle_t *dhp_head, caddr_t addr) 2764 { 2765 devmap_handle_t *dhp; 2766 2767 TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_FIND_HANDLE, 2768 "devmap_find_handle:start"); 2769 2770 dhp = dhp_head; 2771 while (dhp) { 2772 if (addr >= dhp->dh_uvaddr && 2773 addr < (dhp->dh_uvaddr + dhp->dh_len)) 2774 return (dhp); 2775 dhp = dhp->dh_next; 2776 } 2777 2778 return ((devmap_handle_t *)NULL); 2779 } 2780 2781 /* 2782 * devmap_unload: 2783 * Marks a segdev segment or pages if offset->offset+len 2784 * is not the entire segment as intercept and unloads the 2785 * pages in the range offset -> offset+len. 2786 */ 2787 int 2788 devmap_unload(devmap_cookie_t dhc, offset_t offset, size_t len) 2789 { 2790 register devmap_handle_t *dhp = (devmap_handle_t *)dhc; 2791 caddr_t addr; 2792 ulong_t size; 2793 ssize_t soff; 2794 2795 TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_UNLOAD, 2796 "devmap_unload:start dhp=%p offset=%llx len=%lx", 2797 (void *)dhp, offset, len); 2798 DEBUGF(7, (CE_CONT, "devmap_unload: dhp %p offset %llx len %lx\n", 2799 (void *)dhp, offset, len)); 2800 2801 soff = (ssize_t)(offset - dhp->dh_uoff); 2802 soff = round_down_p2(soff, PAGESIZE); 2803 if (soff < 0 || soff >= dhp->dh_len) 2804 return (FC_MAKE_ERR(EINVAL)); 2805 2806 /* 2807 * Address and size must be page aligned. Len is set to the 2808 * number of bytes in the number of pages that are required to 2809 * support len. Offset is set to the byte offset of the first byte 2810 * of the page that contains offset. 2811 */ 2812 len = round_up_p2(len, PAGESIZE); 2813 2814 /* 2815 * If len is == 0, then calculate the size by getting 2816 * the number of bytes from offset to the end of the segment. 2817 */ 2818 if (len == 0) 2819 size = dhp->dh_len - soff; 2820 else { 2821 size = len; 2822 if ((soff + size) > dhp->dh_len) 2823 return (FC_MAKE_ERR(EINVAL)); 2824 } 2825 2826 /* 2827 * The address is offset bytes from the base address of 2828 * the dhp. 2829 */ 2830 addr = (caddr_t)(soff + dhp->dh_uvaddr); 2831 2832 /* 2833 * If large page size was used in hat_devload(), 2834 * the same page size must be used in hat_unload(). 2835 */ 2836 if (dhp->dh_flags & DEVMAP_FLAG_LARGE) { 2837 hat_unload(dhp->dh_seg->s_as->a_hat, dhp->dh_uvaddr, 2838 dhp->dh_len, HAT_UNLOAD|HAT_UNLOAD_OTHER); 2839 } else { 2840 hat_unload(dhp->dh_seg->s_as->a_hat, addr, size, 2841 HAT_UNLOAD|HAT_UNLOAD_OTHER); 2842 } 2843 2844 return (0); 2845 } 2846 2847 /* 2848 * calculates the optimal page size that will be used for hat_devload(). 2849 */ 2850 static void 2851 devmap_get_large_pgsize(devmap_handle_t *dhp, size_t len, caddr_t addr, 2852 size_t *llen, caddr_t *laddr) 2853 { 2854 ulong_t off; 2855 ulong_t pfn; 2856 ulong_t pgsize; 2857 uint_t first = 1; 2858 2859 TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_GET_LARGE_PGSIZE, 2860 "devmap_get_large_pgsize:start"); 2861 2862 /* 2863 * RFE - Code only supports large page mappings for devmem 2864 * This code could be changed in future if we want to support 2865 * large page mappings for kernel exported memory. 2866 */ 2867 ASSERT(dhp_is_devmem(dhp)); 2868 ASSERT(!(dhp->dh_flags & DEVMAP_MAPPING_INVALID)); 2869 2870 *llen = 0; 2871 off = (ulong_t)(addr - dhp->dh_uvaddr); 2872 while ((long)len > 0) { 2873 /* 2874 * get the optimal pfn to minimize address translations. 2875 * devmap_roundup() returns residue bytes for next round 2876 * calculations. 2877 */ 2878 len = devmap_roundup(dhp, off, len, &pfn, &pgsize); 2879 2880 if (first) { 2881 *laddr = dhp->dh_uvaddr + ptob(pfn - dhp->dh_pfn); 2882 first = 0; 2883 } 2884 2885 *llen += pgsize; 2886 off = ptob(pfn - dhp->dh_pfn) + pgsize; 2887 } 2888 /* Large page mapping len/addr cover more range than original fault */ 2889 ASSERT(*llen >= len && *laddr <= addr); 2890 ASSERT((*laddr + *llen) >= (addr + len)); 2891 } 2892 2893 /* 2894 * Initialize the devmap_softlock structure. 2895 */ 2896 static struct devmap_softlock * 2897 devmap_softlock_init(dev_t dev, ulong_t id) 2898 { 2899 struct devmap_softlock *slock; 2900 struct devmap_softlock *tmp; 2901 2902 TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_SOFTLOCK_INIT, 2903 "devmap_softlock_init:start"); 2904 2905 tmp = kmem_zalloc(sizeof (struct devmap_softlock), KM_SLEEP); 2906 mutex_enter(&devmap_slock); 2907 2908 for (slock = devmap_slist; slock != NULL; slock = slock->next) 2909 if ((slock->dev == dev) && (slock->id == id)) 2910 break; 2911 2912 if (slock == NULL) { 2913 slock = tmp; 2914 slock->dev = dev; 2915 slock->id = id; 2916 mutex_init(&slock->lock, NULL, MUTEX_DEFAULT, NULL); 2917 cv_init(&slock->cv, NULL, CV_DEFAULT, NULL); 2918 slock->next = devmap_slist; 2919 devmap_slist = slock; 2920 } else 2921 kmem_free(tmp, sizeof (struct devmap_softlock)); 2922 2923 mutex_enter(&slock->lock); 2924 slock->refcnt++; 2925 mutex_exit(&slock->lock); 2926 mutex_exit(&devmap_slock); 2927 2928 return (slock); 2929 } 2930 2931 /* 2932 * Wake up processes that sleep on softlocked. 2933 * Free dh_softlock if refcnt is 0. 2934 */ 2935 static void 2936 devmap_softlock_rele(devmap_handle_t *dhp) 2937 { 2938 struct devmap_softlock *slock = dhp->dh_softlock; 2939 struct devmap_softlock *tmp; 2940 struct devmap_softlock *parent; 2941 2942 TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_SOFTLOCK_RELE, 2943 "devmap_softlock_rele:start"); 2944 2945 mutex_enter(&devmap_slock); 2946 mutex_enter(&slock->lock); 2947 2948 ASSERT(slock->refcnt > 0); 2949 2950 slock->refcnt--; 2951 2952 /* 2953 * If no one is using the device, free up the slock data. 2954 */ 2955 if (slock->refcnt == 0) { 2956 slock->softlocked = 0; 2957 cv_signal(&slock->cv); 2958 2959 if (devmap_slist == slock) 2960 devmap_slist = slock->next; 2961 else { 2962 parent = devmap_slist; 2963 for (tmp = devmap_slist->next; tmp != NULL; 2964 tmp = tmp->next) { 2965 if (tmp == slock) { 2966 parent->next = tmp->next; 2967 break; 2968 } 2969 parent = tmp; 2970 } 2971 } 2972 mutex_exit(&slock->lock); 2973 mutex_destroy(&slock->lock); 2974 cv_destroy(&slock->cv); 2975 kmem_free(slock, sizeof (struct devmap_softlock)); 2976 } else 2977 mutex_exit(&slock->lock); 2978 2979 mutex_exit(&devmap_slock); 2980 } 2981 2982 /* 2983 * Wake up processes that sleep on dh_ctx->locked. 2984 * Free dh_ctx if refcnt is 0. 2985 */ 2986 static void 2987 devmap_ctx_rele(devmap_handle_t *dhp) 2988 { 2989 struct devmap_ctx *devctx = dhp->dh_ctx; 2990 struct devmap_ctx *tmp; 2991 struct devmap_ctx *parent; 2992 timeout_id_t tid; 2993 2994 TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_CTX_RELE, 2995 "devmap_ctx_rele:start"); 2996 2997 mutex_enter(&devmapctx_lock); 2998 mutex_enter(&devctx->lock); 2999 3000 ASSERT(devctx->refcnt > 0); 3001 3002 devctx->refcnt--; 3003 3004 /* 3005 * If no one is using the device, free up the devctx data. 3006 */ 3007 if (devctx->refcnt == 0) { 3008 /* 3009 * Untimeout any threads using this mapping as they are about 3010 * to go away. 3011 */ 3012 if (devctx->timeout != 0) { 3013 TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_CTX_RELE_CK1, 3014 "devmap_ctx_rele:untimeout ctx->timeout"); 3015 3016 tid = devctx->timeout; 3017 mutex_exit(&devctx->lock); 3018 (void) untimeout(tid); 3019 mutex_enter(&devctx->lock); 3020 } 3021 3022 devctx->oncpu = 0; 3023 cv_signal(&devctx->cv); 3024 3025 if (devmapctx_list == devctx) 3026 devmapctx_list = devctx->next; 3027 else { 3028 parent = devmapctx_list; 3029 for (tmp = devmapctx_list->next; tmp != NULL; 3030 tmp = tmp->next) { 3031 if (tmp == devctx) { 3032 parent->next = tmp->next; 3033 break; 3034 } 3035 parent = tmp; 3036 } 3037 } 3038 mutex_exit(&devctx->lock); 3039 mutex_destroy(&devctx->lock); 3040 cv_destroy(&devctx->cv); 3041 kmem_free(devctx, sizeof (struct devmap_ctx)); 3042 } else 3043 mutex_exit(&devctx->lock); 3044 3045 mutex_exit(&devmapctx_lock); 3046 } 3047 3048 /* 3049 * devmap_load: 3050 * Marks a segdev segment or pages if offset->offset+len 3051 * is not the entire segment as nointercept and faults in 3052 * the pages in the range offset -> offset+len. 3053 */ 3054 int 3055 devmap_load(devmap_cookie_t dhc, offset_t offset, size_t len, uint_t type, 3056 uint_t rw) 3057 { 3058 devmap_handle_t *dhp = (devmap_handle_t *)dhc; 3059 struct as *asp = dhp->dh_seg->s_as; 3060 caddr_t addr; 3061 ulong_t size; 3062 ssize_t soff; /* offset from the beginning of the segment */ 3063 int rc; 3064 3065 TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_LOAD, 3066 "devmap_load:start dhp=%p offset=%llx len=%lx", 3067 (void *)dhp, offset, len); 3068 3069 DEBUGF(7, (CE_CONT, "devmap_load: dhp %p offset %llx len %lx\n", 3070 (void *)dhp, offset, len)); 3071 3072 /* 3073 * Hat layer only supports devload to process' context for which 3074 * the as lock is held. Verify here and return error if drivers 3075 * inadvertently call devmap_load on a wrong devmap handle. 3076 */ 3077 if ((asp != &kas) && !AS_LOCK_HELD(asp, &asp->a_lock)) 3078 return (FC_MAKE_ERR(EINVAL)); 3079 3080 soff = (ssize_t)(offset - dhp->dh_uoff); 3081 soff = round_down_p2(soff, PAGESIZE); 3082 if (soff < 0 || soff >= dhp->dh_len) 3083 return (FC_MAKE_ERR(EINVAL)); 3084 3085 /* 3086 * Address and size must be page aligned. Len is set to the 3087 * number of bytes in the number of pages that are required to 3088 * support len. Offset is set to the byte offset of the first byte 3089 * of the page that contains offset. 3090 */ 3091 len = round_up_p2(len, PAGESIZE); 3092 3093 /* 3094 * If len == 0, then calculate the size by getting 3095 * the number of bytes from offset to the end of the segment. 3096 */ 3097 if (len == 0) 3098 size = dhp->dh_len - soff; 3099 else { 3100 size = len; 3101 if ((soff + size) > dhp->dh_len) 3102 return (FC_MAKE_ERR(EINVAL)); 3103 } 3104 3105 /* 3106 * The address is offset bytes from the base address of 3107 * the segment. 3108 */ 3109 addr = (caddr_t)(soff + dhp->dh_uvaddr); 3110 3111 HOLD_DHP_LOCK(dhp); 3112 rc = segdev_faultpages(asp->a_hat, 3113 dhp->dh_seg, addr, size, type, rw, dhp); 3114 RELE_DHP_LOCK(dhp); 3115 return (rc); 3116 } 3117 3118 int 3119 devmap_setup(dev_t dev, offset_t off, struct as *as, caddr_t *addrp, 3120 size_t len, uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred) 3121 { 3122 register devmap_handle_t *dhp; 3123 int (*devmap)(dev_t, devmap_cookie_t, offset_t, size_t, 3124 size_t *, uint_t); 3125 int (*mmap)(dev_t, off_t, int); 3126 struct devmap_callback_ctl *callbackops; 3127 devmap_handle_t *dhp_head = NULL; 3128 devmap_handle_t *dhp_prev = NULL; 3129 devmap_handle_t *dhp_curr; 3130 caddr_t addr; 3131 int map_flag; 3132 int ret; 3133 ulong_t total_len; 3134 size_t map_len; 3135 size_t resid_len = len; 3136 offset_t map_off = off; 3137 struct devmap_softlock *slock = NULL; 3138 3139 #ifdef lint 3140 cred = cred; 3141 #endif 3142 3143 TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_SETUP, 3144 "devmap_setup:start off=%llx len=%lx", off, len); 3145 DEBUGF(3, (CE_CONT, "devmap_setup: off %llx len %lx\n", 3146 off, len)); 3147 3148 devmap = devopsp[getmajor(dev)]->devo_cb_ops->cb_devmap; 3149 mmap = devopsp[getmajor(dev)]->devo_cb_ops->cb_mmap; 3150 3151 /* 3152 * driver must provide devmap(9E) entry point in cb_ops to use the 3153 * devmap framework. 3154 */ 3155 if (devmap == NULL || devmap == nulldev || devmap == nodev) 3156 return (EINVAL); 3157 3158 /* 3159 * To protect from an inadvertent entry because the devmap entry point 3160 * is not NULL, return error if D_DEVMAP bit is not set in cb_flag and 3161 * mmap is NULL. 3162 */ 3163 map_flag = devopsp[getmajor(dev)]->devo_cb_ops->cb_flag; 3164 if ((map_flag & D_DEVMAP) == 0 && (mmap == NULL || mmap == nulldev)) 3165 return (EINVAL); 3166 3167 /* 3168 * devmap allows mmap(2) to map multiple registers. 3169 * one devmap_handle is created for each register mapped. 3170 */ 3171 for (total_len = 0; total_len < len; total_len += map_len) { 3172 dhp = kmem_zalloc(sizeof (devmap_handle_t), KM_SLEEP); 3173 3174 if (dhp_prev != NULL) 3175 dhp_prev->dh_next = dhp; 3176 else 3177 dhp_head = dhp; 3178 dhp_prev = dhp; 3179 3180 dhp->dh_prot = prot; 3181 dhp->dh_orig_maxprot = dhp->dh_maxprot = maxprot; 3182 dhp->dh_dev = dev; 3183 dhp->dh_timeout_length = CTX_TIMEOUT_VALUE; 3184 dhp->dh_uoff = map_off; 3185 3186 /* 3187 * Get mapping specific info from 3188 * the driver, such as rnumber, roff, len, callbackops, 3189 * accattrp and, if the mapping is for kernel memory, 3190 * ddi_umem_cookie. 3191 */ 3192 if ((ret = cdev_devmap(dev, dhp, map_off, 3193 resid_len, &map_len, get_udatamodel())) != 0) { 3194 free_devmap_handle(dhp_head); 3195 return (ENXIO); 3196 } 3197 3198 if (map_len & PAGEOFFSET) { 3199 free_devmap_handle(dhp_head); 3200 return (EINVAL); 3201 } 3202 3203 callbackops = &dhp->dh_callbackops; 3204 3205 if ((callbackops->devmap_access == NULL) || 3206 (callbackops->devmap_access == nulldev) || 3207 (callbackops->devmap_access == nodev)) { 3208 /* 3209 * Normally devmap does not support MAP_PRIVATE unless 3210 * the drivers provide a valid devmap_access routine. 3211 */ 3212 if ((flags & MAP_PRIVATE) != 0) { 3213 free_devmap_handle(dhp_head); 3214 return (EINVAL); 3215 } 3216 } else { 3217 /* 3218 * Initialize dhp_softlock and dh_ctx if the drivers 3219 * provide devmap_access. 3220 */ 3221 dhp->dh_softlock = devmap_softlock_init(dev, 3222 (ulong_t)callbackops->devmap_access); 3223 dhp->dh_ctx = devmap_ctxinit(dev, 3224 (ulong_t)callbackops->devmap_access); 3225 3226 /* 3227 * segdev_fault can only work when all 3228 * dh_softlock in a multi-dhp mapping 3229 * are same. see comments in segdev_fault 3230 * This code keeps track of the first 3231 * dh_softlock allocated in slock and 3232 * compares all later allocations and if 3233 * not similar, returns an error. 3234 */ 3235 if (slock == NULL) 3236 slock = dhp->dh_softlock; 3237 if (slock != dhp->dh_softlock) { 3238 free_devmap_handle(dhp_head); 3239 return (ENOTSUP); 3240 } 3241 } 3242 3243 map_off += map_len; 3244 resid_len -= map_len; 3245 } 3246 3247 /* 3248 * get the user virtual address and establish the mapping between 3249 * uvaddr and device physical address. 3250 */ 3251 if ((ret = devmap_device(dhp_head, as, addrp, off, len, flags)) 3252 != 0) { 3253 /* 3254 * free devmap handles if error during the mapping. 3255 */ 3256 free_devmap_handle(dhp_head); 3257 3258 return (ret); 3259 } 3260 3261 /* 3262 * call the driver's devmap_map callback to do more after the mapping, 3263 * such as to allocate driver private data for context management. 3264 */ 3265 dhp = dhp_head; 3266 map_off = off; 3267 addr = *addrp; 3268 while (dhp != NULL) { 3269 callbackops = &dhp->dh_callbackops; 3270 dhp->dh_uvaddr = addr; 3271 dhp_curr = dhp; 3272 if (callbackops->devmap_map != NULL) { 3273 ret = (*callbackops->devmap_map)((devmap_cookie_t)dhp, 3274 dev, flags, map_off, 3275 dhp->dh_len, &dhp->dh_pvtp); 3276 if (ret != 0) { 3277 struct segdev_data *sdp; 3278 3279 /* 3280 * call driver's devmap_unmap entry point 3281 * to free driver resources. 3282 */ 3283 dhp = dhp_head; 3284 map_off = off; 3285 while (dhp != dhp_curr) { 3286 callbackops = &dhp->dh_callbackops; 3287 if (callbackops->devmap_unmap != NULL) { 3288 (*callbackops->devmap_unmap)( 3289 dhp, dhp->dh_pvtp, 3290 map_off, dhp->dh_len, 3291 NULL, NULL, NULL, NULL); 3292 } 3293 map_off += dhp->dh_len; 3294 dhp = dhp->dh_next; 3295 } 3296 sdp = dhp_head->dh_seg->s_data; 3297 sdp->devmap_data = NULL; 3298 free_devmap_handle(dhp_head); 3299 return (ENXIO); 3300 } 3301 } 3302 map_off += dhp->dh_len; 3303 addr += dhp->dh_len; 3304 dhp = dhp->dh_next; 3305 } 3306 3307 return (0); 3308 } 3309 3310 int 3311 ddi_devmap_segmap(dev_t dev, off_t off, ddi_as_handle_t as, caddr_t *addrp, 3312 off_t len, uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred) 3313 { 3314 TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_SEGMAP, 3315 "devmap_segmap:start"); 3316 return (devmap_setup(dev, (offset_t)off, (struct as *)as, addrp, 3317 (size_t)len, prot, maxprot, flags, cred)); 3318 } 3319 3320 /* 3321 * Called from devmap_devmem_setup/remap to see if can use large pages for 3322 * this device mapping. 3323 * Also calculate the max. page size for this mapping. 3324 * this page size will be used in fault routine for 3325 * optimal page size calculations. 3326 */ 3327 static void 3328 devmap_devmem_large_page_setup(devmap_handle_t *dhp) 3329 { 3330 ASSERT(dhp_is_devmem(dhp)); 3331 dhp->dh_mmulevel = 0; 3332 3333 /* 3334 * use large page size only if: 3335 * 1. device memory. 3336 * 2. mmu supports multiple page sizes, 3337 * 3. Driver did not disallow it 3338 * 4. dhp length is at least as big as the large pagesize 3339 * 5. the uvaddr and pfn are large pagesize aligned 3340 */ 3341 if (page_num_pagesizes() > 1 && 3342 !(dhp->dh_flags & (DEVMAP_USE_PAGESIZE | DEVMAP_MAPPING_INVALID))) { 3343 ulong_t base; 3344 int level; 3345 3346 base = (ulong_t)ptob(dhp->dh_pfn); 3347 for (level = 1; level < page_num_pagesizes(); level++) { 3348 size_t pgsize = page_get_pagesize(level); 3349 if ((dhp->dh_len < pgsize) || 3350 (!VA_PA_PGSIZE_ALIGNED((uintptr_t)dhp->dh_uvaddr, 3351 base, pgsize))) { 3352 break; 3353 } 3354 } 3355 dhp->dh_mmulevel = level - 1; 3356 } 3357 if (dhp->dh_mmulevel > 0) { 3358 dhp->dh_flags |= DEVMAP_FLAG_LARGE; 3359 } else { 3360 dhp->dh_flags &= ~DEVMAP_FLAG_LARGE; 3361 } 3362 } 3363 3364 /* 3365 * Called by driver devmap routine to pass device specific info to 3366 * the framework. used for device memory mapping only. 3367 */ 3368 int 3369 devmap_devmem_setup(devmap_cookie_t dhc, dev_info_t *dip, 3370 struct devmap_callback_ctl *callbackops, uint_t rnumber, offset_t roff, 3371 size_t len, uint_t maxprot, uint_t flags, ddi_device_acc_attr_t *accattrp) 3372 { 3373 devmap_handle_t *dhp = (devmap_handle_t *)dhc; 3374 ddi_acc_handle_t handle; 3375 ddi_map_req_t mr; 3376 ddi_acc_hdl_t *hp; 3377 int err; 3378 3379 TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_DEVMEM_SETUP, 3380 "devmap_devmem_setup:start dhp=%p offset=%llx rnum=%d len=%lx", 3381 (void *)dhp, roff, rnumber, (uint_t)len); 3382 DEBUGF(2, (CE_CONT, "devmap_devmem_setup: dhp %p offset %llx " 3383 "rnum %d len %lx\n", (void *)dhp, roff, rnumber, len)); 3384 3385 /* 3386 * First to check if this function has been called for this dhp. 3387 */ 3388 if (dhp->dh_flags & DEVMAP_SETUP_DONE) 3389 return (DDI_FAILURE); 3390 3391 if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot) 3392 return (DDI_FAILURE); 3393 3394 if (flags & DEVMAP_MAPPING_INVALID) { 3395 /* 3396 * Don't go up the tree to get pfn if the driver specifies 3397 * DEVMAP_MAPPING_INVALID in flags. 3398 * 3399 * If DEVMAP_MAPPING_INVALID is specified, we have to grant 3400 * remap permission. 3401 */ 3402 if (!(flags & DEVMAP_ALLOW_REMAP)) { 3403 return (DDI_FAILURE); 3404 } 3405 dhp->dh_pfn = PFN_INVALID; 3406 } else { 3407 handle = impl_acc_hdl_alloc(KM_SLEEP, NULL); 3408 if (handle == NULL) 3409 return (DDI_FAILURE); 3410 3411 hp = impl_acc_hdl_get(handle); 3412 hp->ah_vers = VERS_ACCHDL; 3413 hp->ah_dip = dip; 3414 hp->ah_rnumber = rnumber; 3415 hp->ah_offset = roff; 3416 hp->ah_len = len; 3417 if (accattrp != NULL) 3418 hp->ah_acc = *accattrp; 3419 3420 mr.map_op = DDI_MO_MAP_LOCKED; 3421 mr.map_type = DDI_MT_RNUMBER; 3422 mr.map_obj.rnumber = rnumber; 3423 mr.map_prot = maxprot & dhp->dh_orig_maxprot; 3424 mr.map_flags = DDI_MF_DEVICE_MAPPING; 3425 mr.map_handlep = hp; 3426 mr.map_vers = DDI_MAP_VERSION; 3427 3428 /* 3429 * up the device tree to get pfn. 3430 * The rootnex_map_regspec() routine in nexus drivers has been 3431 * modified to return pfn if map_flags is DDI_MF_DEVICE_MAPPING. 3432 */ 3433 err = ddi_map(dip, &mr, roff, len, (caddr_t *)&dhp->dh_pfn); 3434 dhp->dh_hat_attr = hp->ah_hat_flags; 3435 impl_acc_hdl_free(handle); 3436 3437 if (err) 3438 return (DDI_FAILURE); 3439 } 3440 /* Should not be using devmem setup for memory pages */ 3441 ASSERT(!pf_is_memory(dhp->dh_pfn)); 3442 3443 /* Only some of the flags bits are settable by the driver */ 3444 dhp->dh_flags |= (flags & DEVMAP_SETUP_FLAGS); 3445 dhp->dh_len = ptob(btopr(len)); 3446 3447 dhp->dh_cookie = DEVMAP_DEVMEM_COOKIE; 3448 dhp->dh_roff = ptob(btop(roff)); 3449 3450 /* setup the dh_mmulevel and DEVMAP_FLAG_LARGE */ 3451 devmap_devmem_large_page_setup(dhp); 3452 dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot; 3453 ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot); 3454 3455 3456 if (callbackops != NULL) { 3457 bcopy(callbackops, &dhp->dh_callbackops, 3458 sizeof (struct devmap_callback_ctl)); 3459 } 3460 3461 /* 3462 * Initialize dh_lock if we want to do remap. 3463 */ 3464 if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) { 3465 mutex_init(&dhp->dh_lock, NULL, MUTEX_DEFAULT, NULL); 3466 dhp->dh_flags |= DEVMAP_LOCK_INITED; 3467 } 3468 3469 dhp->dh_flags |= DEVMAP_SETUP_DONE; 3470 3471 return (DDI_SUCCESS); 3472 } 3473 3474 int 3475 devmap_devmem_remap(devmap_cookie_t dhc, dev_info_t *dip, 3476 uint_t rnumber, offset_t roff, size_t len, uint_t maxprot, 3477 uint_t flags, ddi_device_acc_attr_t *accattrp) 3478 { 3479 devmap_handle_t *dhp = (devmap_handle_t *)dhc; 3480 ddi_acc_handle_t handle; 3481 ddi_map_req_t mr; 3482 ddi_acc_hdl_t *hp; 3483 pfn_t pfn; 3484 uint_t hat_flags; 3485 int err; 3486 3487 TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_DEVMEM_REMAP, 3488 "devmap_devmem_setup:start dhp=%p offset=%llx rnum=%d len=%lx", 3489 (void *)dhp, roff, rnumber, (uint_t)len); 3490 DEBUGF(2, (CE_CONT, "devmap_devmem_remap: dhp %p offset %llx " 3491 "rnum %d len %lx\n", (void *)dhp, roff, rnumber, len)); 3492 3493 /* 3494 * Return failure if setup has not been done or no remap permission 3495 * has been granted during the setup. 3496 */ 3497 if ((dhp->dh_flags & DEVMAP_SETUP_DONE) == 0 || 3498 (dhp->dh_flags & DEVMAP_ALLOW_REMAP) == 0) 3499 return (DDI_FAILURE); 3500 3501 /* Only DEVMAP_MAPPING_INVALID flag supported for remap */ 3502 if ((flags != 0) && (flags != DEVMAP_MAPPING_INVALID)) 3503 return (DDI_FAILURE); 3504 3505 if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot) 3506 return (DDI_FAILURE); 3507 3508 if (!(flags & DEVMAP_MAPPING_INVALID)) { 3509 handle = impl_acc_hdl_alloc(KM_SLEEP, NULL); 3510 if (handle == NULL) 3511 return (DDI_FAILURE); 3512 } 3513 3514 HOLD_DHP_LOCK(dhp); 3515 3516 /* 3517 * Unload the old mapping, so next fault will setup the new mappings 3518 * Do this while holding the dhp lock so other faults dont reestablish 3519 * the mappings 3520 */ 3521 hat_unload(dhp->dh_seg->s_as->a_hat, dhp->dh_uvaddr, 3522 dhp->dh_len, HAT_UNLOAD|HAT_UNLOAD_OTHER); 3523 3524 if (flags & DEVMAP_MAPPING_INVALID) { 3525 dhp->dh_flags |= DEVMAP_MAPPING_INVALID; 3526 dhp->dh_pfn = PFN_INVALID; 3527 } else { 3528 /* clear any prior DEVMAP_MAPPING_INVALID flag */ 3529 dhp->dh_flags &= ~DEVMAP_MAPPING_INVALID; 3530 hp = impl_acc_hdl_get(handle); 3531 hp->ah_vers = VERS_ACCHDL; 3532 hp->ah_dip = dip; 3533 hp->ah_rnumber = rnumber; 3534 hp->ah_offset = roff; 3535 hp->ah_len = len; 3536 if (accattrp != NULL) 3537 hp->ah_acc = *accattrp; 3538 3539 mr.map_op = DDI_MO_MAP_LOCKED; 3540 mr.map_type = DDI_MT_RNUMBER; 3541 mr.map_obj.rnumber = rnumber; 3542 mr.map_prot = maxprot & dhp->dh_orig_maxprot; 3543 mr.map_flags = DDI_MF_DEVICE_MAPPING; 3544 mr.map_handlep = hp; 3545 mr.map_vers = DDI_MAP_VERSION; 3546 3547 /* 3548 * up the device tree to get pfn. 3549 * The rootnex_map_regspec() routine in nexus drivers has been 3550 * modified to return pfn if map_flags is DDI_MF_DEVICE_MAPPING. 3551 */ 3552 err = ddi_map(dip, &mr, roff, len, (caddr_t *)&pfn); 3553 hat_flags = hp->ah_hat_flags; 3554 impl_acc_hdl_free(handle); 3555 if (err) { 3556 RELE_DHP_LOCK(dhp); 3557 return (DDI_FAILURE); 3558 } 3559 /* 3560 * Store result of ddi_map first in local variables, as we do 3561 * not want to overwrite the existing dhp with wrong data. 3562 */ 3563 dhp->dh_pfn = pfn; 3564 dhp->dh_hat_attr = hat_flags; 3565 } 3566 3567 /* clear the large page size flag */ 3568 dhp->dh_flags &= ~DEVMAP_FLAG_LARGE; 3569 3570 dhp->dh_cookie = DEVMAP_DEVMEM_COOKIE; 3571 dhp->dh_roff = ptob(btop(roff)); 3572 3573 /* setup the dh_mmulevel and DEVMAP_FLAG_LARGE */ 3574 devmap_devmem_large_page_setup(dhp); 3575 dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot; 3576 ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot); 3577 3578 RELE_DHP_LOCK(dhp); 3579 return (DDI_SUCCESS); 3580 } 3581 3582 /* 3583 * called by driver devmap routine to pass kernel virtual address mapping 3584 * info to the framework. used only for kernel memory 3585 * allocated from ddi_umem_alloc(). 3586 */ 3587 int 3588 devmap_umem_setup(devmap_cookie_t dhc, dev_info_t *dip, 3589 struct devmap_callback_ctl *callbackops, ddi_umem_cookie_t cookie, 3590 offset_t off, size_t len, uint_t maxprot, uint_t flags, 3591 ddi_device_acc_attr_t *accattrp) 3592 { 3593 devmap_handle_t *dhp = (devmap_handle_t *)dhc; 3594 struct ddi_umem_cookie *cp = (struct ddi_umem_cookie *)cookie; 3595 3596 #ifdef lint 3597 dip = dip; 3598 #endif 3599 3600 TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_UMEM_SETUP, 3601 "devmap_umem_setup:start dhp=%p offset=%llx cookie=%p len=%lx", 3602 (void *)dhp, off, cookie, len); 3603 DEBUGF(2, (CE_CONT, "devmap_umem_setup: dhp %p offset %llx " 3604 "cookie %p len %lx\n", (void *)dhp, off, (void *)cookie, len)); 3605 3606 if (cookie == NULL) 3607 return (DDI_FAILURE); 3608 3609 /* For UMEM_TRASH, this restriction is not needed */ 3610 if ((off + len) > cp->size) 3611 return (DDI_FAILURE); 3612 3613 /* check if the cache attributes are supported */ 3614 if (i_ddi_check_cache_attr(flags) == B_FALSE) 3615 return (DDI_FAILURE); 3616 3617 /* 3618 * First to check if this function has been called for this dhp. 3619 */ 3620 if (dhp->dh_flags & DEVMAP_SETUP_DONE) 3621 return (DDI_FAILURE); 3622 3623 if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot) 3624 return (DDI_FAILURE); 3625 3626 if (flags & DEVMAP_MAPPING_INVALID) { 3627 /* 3628 * If DEVMAP_MAPPING_INVALID is specified, we have to grant 3629 * remap permission. 3630 */ 3631 if (!(flags & DEVMAP_ALLOW_REMAP)) { 3632 return (DDI_FAILURE); 3633 } 3634 } else { 3635 dhp->dh_cookie = cookie; 3636 dhp->dh_roff = ptob(btop(off)); 3637 dhp->dh_cvaddr = cp->cvaddr + dhp->dh_roff; 3638 /* set HAT cache attributes */ 3639 i_ddi_cacheattr_to_hatacc(flags, &dhp->dh_hat_attr); 3640 /* set HAT endianess attributes */ 3641 i_ddi_devacc_to_hatacc(accattrp, &dhp->dh_hat_attr); 3642 } 3643 3644 /* 3645 * The default is _not_ to pass HAT_LOAD_NOCONSIST to hat_devload(); 3646 * we pass HAT_LOAD_NOCONSIST _only_ in cases where hat tries to 3647 * create consistent mappings but our intention was to create 3648 * non-consistent mappings. 3649 * 3650 * DEVMEM: hat figures it out it's DEVMEM and creates non-consistent 3651 * mappings. 3652 * 3653 * kernel exported memory: hat figures it out it's memory and always 3654 * creates consistent mappings. 3655 * 3656 * /dev/mem: non-consistent mappings. See comments in common/io/mem.c 3657 * 3658 * /dev/kmem: consistent mappings are created unless they are 3659 * MAP_FIXED. We _explicitly_ tell hat to create non-consistent 3660 * mappings by passing HAT_LOAD_NOCONSIST in case of MAP_FIXED 3661 * mappings of /dev/kmem. See common/io/mem.c 3662 */ 3663 3664 /* Only some of the flags bits are settable by the driver */ 3665 dhp->dh_flags |= (flags & DEVMAP_SETUP_FLAGS); 3666 3667 dhp->dh_len = ptob(btopr(len)); 3668 dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot; 3669 ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot); 3670 3671 if (callbackops != NULL) { 3672 bcopy(callbackops, &dhp->dh_callbackops, 3673 sizeof (struct devmap_callback_ctl)); 3674 } 3675 /* 3676 * Initialize dh_lock if we want to do remap. 3677 */ 3678 if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) { 3679 mutex_init(&dhp->dh_lock, NULL, MUTEX_DEFAULT, NULL); 3680 dhp->dh_flags |= DEVMAP_LOCK_INITED; 3681 } 3682 3683 dhp->dh_flags |= DEVMAP_SETUP_DONE; 3684 3685 return (DDI_SUCCESS); 3686 } 3687 3688 int 3689 devmap_umem_remap(devmap_cookie_t dhc, dev_info_t *dip, 3690 ddi_umem_cookie_t cookie, offset_t off, size_t len, uint_t maxprot, 3691 uint_t flags, ddi_device_acc_attr_t *accattrp) 3692 { 3693 devmap_handle_t *dhp = (devmap_handle_t *)dhc; 3694 struct ddi_umem_cookie *cp = (struct ddi_umem_cookie *)cookie; 3695 3696 TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_UMEM_REMAP, 3697 "devmap_umem_remap:start dhp=%p offset=%llx cookie=%p len=%lx", 3698 (void *)dhp, off, cookie, len); 3699 DEBUGF(2, (CE_CONT, "devmap_umem_remap: dhp %p offset %llx " 3700 "cookie %p len %lx\n", (void *)dhp, off, (void *)cookie, len)); 3701 3702 #ifdef lint 3703 dip = dip; 3704 accattrp = accattrp; 3705 #endif 3706 /* 3707 * Reture failure if setup has not been done or no remap permission 3708 * has been granted during the setup. 3709 */ 3710 if ((dhp->dh_flags & DEVMAP_SETUP_DONE) == 0 || 3711 (dhp->dh_flags & DEVMAP_ALLOW_REMAP) == 0) 3712 return (DDI_FAILURE); 3713 3714 /* No flags supported for remap yet */ 3715 if (flags != 0) 3716 return (DDI_FAILURE); 3717 3718 /* check if the cache attributes are supported */ 3719 if (i_ddi_check_cache_attr(flags) == B_FALSE) 3720 return (DDI_FAILURE); 3721 3722 if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot) 3723 return (DDI_FAILURE); 3724 3725 /* For UMEM_TRASH, this restriction is not needed */ 3726 if ((off + len) > cp->size) 3727 return (DDI_FAILURE); 3728 3729 HOLD_DHP_LOCK(dhp); 3730 /* 3731 * Unload the old mapping, so next fault will setup the new mappings 3732 * Do this while holding the dhp lock so other faults dont reestablish 3733 * the mappings 3734 */ 3735 hat_unload(dhp->dh_seg->s_as->a_hat, dhp->dh_uvaddr, 3736 dhp->dh_len, HAT_UNLOAD|HAT_UNLOAD_OTHER); 3737 3738 dhp->dh_cookie = cookie; 3739 dhp->dh_roff = ptob(btop(off)); 3740 dhp->dh_cvaddr = cp->cvaddr + dhp->dh_roff; 3741 /* set HAT cache attributes */ 3742 i_ddi_cacheattr_to_hatacc(flags, &dhp->dh_hat_attr); 3743 /* set HAT endianess attributes */ 3744 i_ddi_devacc_to_hatacc(accattrp, &dhp->dh_hat_attr); 3745 3746 /* clear the large page size flag */ 3747 dhp->dh_flags &= ~DEVMAP_FLAG_LARGE; 3748 3749 dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot; 3750 ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot); 3751 RELE_DHP_LOCK(dhp); 3752 return (DDI_SUCCESS); 3753 } 3754 3755 /* 3756 * to set timeout value for the driver's context management callback, e.g. 3757 * devmap_access(). 3758 */ 3759 void 3760 devmap_set_ctx_timeout(devmap_cookie_t dhc, clock_t ticks) 3761 { 3762 devmap_handle_t *dhp = (devmap_handle_t *)dhc; 3763 3764 TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_SET_CTX_TIMEOUT, 3765 "devmap_set_ctx_timeout:start dhp=%p ticks=%x", 3766 (void *)dhp, ticks); 3767 dhp->dh_timeout_length = ticks; 3768 } 3769 3770 int 3771 devmap_default_access(devmap_cookie_t dhp, void *pvtp, offset_t off, 3772 size_t len, uint_t type, uint_t rw) 3773 { 3774 #ifdef lint 3775 pvtp = pvtp; 3776 #endif 3777 3778 TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_DEFAULT_ACCESS, 3779 "devmap_default_access:start"); 3780 return (devmap_load(dhp, off, len, type, rw)); 3781 } 3782 3783 /* 3784 * segkmem_alloc() wrapper to allocate memory which is both 3785 * non-relocatable (for DR) and sharelocked, since the rest 3786 * of this segment driver requires it. 3787 */ 3788 static void * 3789 devmap_alloc_pages(vmem_t *vmp, size_t size, int vmflag) 3790 { 3791 ASSERT(vmp != NULL); 3792 ASSERT(kvseg.s_base != NULL); 3793 vmflag |= (VM_NORELOC | SEGKMEM_SHARELOCKED); 3794 return (segkmem_alloc(vmp, size, vmflag)); 3795 } 3796 3797 /* 3798 * This is where things are a bit incestuous with seg_kmem: unlike 3799 * seg_kp, seg_kmem does not keep its pages long-term sharelocked, so 3800 * we need to do a bit of a dance around that to prevent duplication of 3801 * code until we decide to bite the bullet and implement a new kernel 3802 * segment for driver-allocated memory that is exported to user space. 3803 */ 3804 static void 3805 devmap_free_pages(vmem_t *vmp, void *inaddr, size_t size) 3806 { 3807 page_t *pp; 3808 caddr_t addr = inaddr; 3809 caddr_t eaddr; 3810 pgcnt_t npages = btopr(size); 3811 3812 ASSERT(vmp != NULL); 3813 ASSERT(kvseg.s_base != NULL); 3814 ASSERT(((uintptr_t)addr & PAGEOFFSET) == 0); 3815 3816 hat_unload(kas.a_hat, addr, size, HAT_UNLOAD_UNLOCK); 3817 3818 for (eaddr = addr + size; addr < eaddr; addr += PAGESIZE) { 3819 /* 3820 * Use page_find() instead of page_lookup() to find the page 3821 * since we know that it is hashed and has a shared lock. 3822 */ 3823 pp = page_find(&kvp, (u_offset_t)(uintptr_t)addr); 3824 3825 if (pp == NULL) 3826 panic("devmap_free_pages: page not found"); 3827 if (!page_tryupgrade(pp)) { 3828 page_unlock(pp); 3829 pp = page_lookup(&kvp, (u_offset_t)(uintptr_t)addr, 3830 SE_EXCL); 3831 if (pp == NULL) 3832 panic("devmap_free_pages: page already freed"); 3833 } 3834 /* Clear p_lckcnt so page_destroy() doesn't update availrmem */ 3835 pp->p_lckcnt = 0; 3836 page_destroy(pp, 0); 3837 } 3838 page_unresv(npages); 3839 3840 if (vmp != NULL) 3841 vmem_free(vmp, inaddr, size); 3842 } 3843 3844 /* 3845 * devmap_umem_alloc_np() replaces kmem_zalloc() as the method for 3846 * allocating non-pageable kmem in response to a ddi_umem_alloc() 3847 * default request. For now we allocate our own pages and we keep 3848 * them long-term sharelocked, since: A) the fault routines expect the 3849 * memory to already be locked; B) pageable umem is already long-term 3850 * locked; C) it's a lot of work to make it otherwise, particularly 3851 * since the nexus layer expects the pages to never fault. An RFE is to 3852 * not keep the pages long-term locked, but instead to be able to 3853 * take faults on them and simply look them up in kvp in case we 3854 * fault on them. Even then, we must take care not to let pageout 3855 * steal them from us since the data must remain resident; if we 3856 * do this we must come up with some way to pin the pages to prevent 3857 * faults while a driver is doing DMA to/from them. 3858 */ 3859 static void * 3860 devmap_umem_alloc_np(size_t size, size_t flags) 3861 { 3862 void *buf; 3863 int vmflags = (flags & DDI_UMEM_NOSLEEP)? VM_NOSLEEP : VM_SLEEP; 3864 3865 buf = vmem_alloc(umem_np_arena, size, vmflags); 3866 if (buf != NULL) 3867 bzero(buf, size); 3868 return (buf); 3869 } 3870 3871 static void 3872 devmap_umem_free_np(void *addr, size_t size) 3873 { 3874 vmem_free(umem_np_arena, addr, size); 3875 } 3876 3877 /* 3878 * allocate page aligned kernel memory for exporting to user land. 3879 * The devmap framework will use the cookie allocated by ddi_umem_alloc() 3880 * to find a user virtual address that is in same color as the address 3881 * allocated here. 3882 */ 3883 void * 3884 ddi_umem_alloc(size_t size, int flags, ddi_umem_cookie_t *cookie) 3885 { 3886 register size_t len = ptob(btopr(size)); 3887 void *buf = NULL; 3888 struct ddi_umem_cookie *cp; 3889 int iflags = 0; 3890 3891 *cookie = NULL; 3892 3893 TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_UMEM_ALLOC, 3894 "devmap_umem_alloc:start"); 3895 if (len == 0) 3896 return ((void *)NULL); 3897 3898 /* 3899 * allocate cookie 3900 */ 3901 if ((cp = kmem_zalloc(sizeof (struct ddi_umem_cookie), 3902 flags & DDI_UMEM_NOSLEEP ? KM_NOSLEEP : KM_SLEEP)) == NULL) { 3903 ASSERT(flags & DDI_UMEM_NOSLEEP); 3904 return ((void *)NULL); 3905 } 3906 3907 if (flags & DDI_UMEM_PAGEABLE) { 3908 /* Only one of the flags is allowed */ 3909 ASSERT(!(flags & DDI_UMEM_TRASH)); 3910 /* initialize resource with 0 */ 3911 iflags = KPD_ZERO; 3912 3913 /* 3914 * to allocate unlocked pageable memory, use segkp_get() to 3915 * create a segkp segment. Since segkp can only service kas, 3916 * other segment drivers such as segdev have to do 3917 * as_fault(segkp, SOFTLOCK) in its fault routine, 3918 */ 3919 if (flags & DDI_UMEM_NOSLEEP) 3920 iflags |= KPD_NOWAIT; 3921 3922 if ((buf = segkp_get(segkp, len, iflags)) == NULL) { 3923 kmem_free(cp, sizeof (struct ddi_umem_cookie)); 3924 return ((void *)NULL); 3925 } 3926 cp->type = KMEM_PAGEABLE; 3927 mutex_init(&cp->lock, NULL, MUTEX_DEFAULT, NULL); 3928 cp->locked = 0; 3929 } else if (flags & DDI_UMEM_TRASH) { 3930 /* Only one of the flags is allowed */ 3931 ASSERT(!(flags & DDI_UMEM_PAGEABLE)); 3932 cp->type = UMEM_TRASH; 3933 buf = NULL; 3934 } else { 3935 if ((buf = devmap_umem_alloc_np(len, flags)) == NULL) { 3936 kmem_free(cp, sizeof (struct ddi_umem_cookie)); 3937 return ((void *)NULL); 3938 } 3939 3940 cp->type = KMEM_NON_PAGEABLE; 3941 } 3942 3943 /* 3944 * need to save size here. size will be used when 3945 * we do kmem_free. 3946 */ 3947 cp->size = len; 3948 cp->cvaddr = (caddr_t)buf; 3949 3950 *cookie = (void *)cp; 3951 return (buf); 3952 } 3953 3954 void 3955 ddi_umem_free(ddi_umem_cookie_t cookie) 3956 { 3957 struct ddi_umem_cookie *cp; 3958 3959 TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_UMEM_FREE, 3960 "devmap_umem_free:start"); 3961 3962 /* 3963 * if cookie is NULL, no effects on the system 3964 */ 3965 if (cookie == NULL) 3966 return; 3967 3968 cp = (struct ddi_umem_cookie *)cookie; 3969 3970 switch (cp->type) { 3971 case KMEM_PAGEABLE : 3972 ASSERT(cp->cvaddr != NULL && cp->size != 0); 3973 /* 3974 * Check if there are still any pending faults on the cookie 3975 * while the driver is deleting it, 3976 * XXX - could change to an ASSERT but wont catch errant drivers 3977 */ 3978 mutex_enter(&cp->lock); 3979 if (cp->locked) { 3980 mutex_exit(&cp->lock); 3981 panic("ddi_umem_free for cookie with pending faults %p", 3982 (void *)cp); 3983 return; 3984 } 3985 3986 segkp_release(segkp, cp->cvaddr); 3987 3988 /* 3989 * release mutex associated with this cookie. 3990 */ 3991 mutex_destroy(&cp->lock); 3992 break; 3993 case KMEM_NON_PAGEABLE : 3994 ASSERT(cp->cvaddr != NULL && cp->size != 0); 3995 devmap_umem_free_np(cp->cvaddr, cp->size); 3996 break; 3997 case UMEM_TRASH : 3998 break; 3999 case UMEM_LOCKED : 4000 /* Callers should use ddi_umem_unlock for this type */ 4001 ddi_umem_unlock(cookie); 4002 /* Frees the cookie too */ 4003 return; 4004 default: 4005 /* panic so we can diagnose the underlying cause */ 4006 panic("ddi_umem_free: illegal cookie type 0x%x\n", 4007 cp->type); 4008 } 4009 4010 kmem_free(cookie, sizeof (struct ddi_umem_cookie)); 4011 } 4012 4013 4014 static int 4015 segdev_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp) 4016 { 4017 struct segdev_data *sdp = (struct segdev_data *)seg->s_data; 4018 4019 /* 4020 * It looks as if it is always mapped shared 4021 */ 4022 TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_GETMEMID, 4023 "segdev_getmemid:start"); 4024 memidp->val[0] = (uintptr_t)VTOCVP(sdp->vp); 4025 memidp->val[1] = sdp->offset + (uintptr_t)(addr - seg->s_base); 4026 return (0); 4027 } 4028 4029 /* 4030 * ddi_umem_alloc() non-pageable quantum cache max size. 4031 * This is just a SWAG. 4032 */ 4033 #define DEVMAP_UMEM_QUANTUM (8*PAGESIZE) 4034 4035 /* 4036 * Initialize seg_dev from boot. This routine sets up the trash page 4037 * and creates the umem_np_arena used to back non-pageable memory 4038 * requests. 4039 */ 4040 void 4041 segdev_init(void) 4042 { 4043 struct seg kseg; 4044 4045 umem_np_arena = vmem_create("umem_np", NULL, 0, PAGESIZE, 4046 devmap_alloc_pages, devmap_free_pages, heap_arena, 4047 DEVMAP_UMEM_QUANTUM, VM_SLEEP); 4048 4049 kseg.s_as = &kas; 4050 trashpp = page_create_va(&trashvp, 0, PAGESIZE, 4051 PG_NORELOC | PG_EXCL | PG_WAIT, &kseg, NULL); 4052 if (trashpp == NULL) 4053 panic("segdev_init: failed to create trash page"); 4054 pagezero(trashpp, 0, PAGESIZE); 4055 page_downgrade(trashpp); 4056 } 4057 4058 /* 4059 * Invoke platform-dependent support routines so that /proc can have 4060 * the platform code deal with curious hardware. 4061 */ 4062 int 4063 segdev_copyfrom(struct seg *seg, 4064 caddr_t uaddr, const void *devaddr, void *kaddr, size_t len) 4065 { 4066 struct segdev_data *sdp = (struct segdev_data *)seg->s_data; 4067 struct snode *sp = VTOS(VTOCVP(sdp->vp)); 4068 4069 return (e_ddi_copyfromdev(sp->s_dip, 4070 (off_t)(uaddr - seg->s_base), devaddr, kaddr, len)); 4071 } 4072 4073 int 4074 segdev_copyto(struct seg *seg, 4075 caddr_t uaddr, const void *kaddr, void *devaddr, size_t len) 4076 { 4077 struct segdev_data *sdp = (struct segdev_data *)seg->s_data; 4078 struct snode *sp = VTOS(VTOCVP(sdp->vp)); 4079 4080 return (e_ddi_copytodev(sp->s_dip, 4081 (off_t)(uaddr - seg->s_base), kaddr, devaddr, len)); 4082 }