Print this page
patch lower-case-segops
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/io/mem.c
+++ new/usr/src/uts/common/io/mem.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 26 /*
27 27 * Memory special file
28 28 */
29 29
30 30 #include <sys/types.h>
31 31 #include <sys/param.h>
32 32 #include <sys/user.h>
33 33 #include <sys/buf.h>
34 34 #include <sys/systm.h>
35 35 #include <sys/cred.h>
36 36 #include <sys/vm.h>
37 37 #include <sys/uio.h>
38 38 #include <sys/mman.h>
39 39 #include <sys/kmem.h>
40 40 #include <vm/seg.h>
41 41 #include <vm/page.h>
42 42 #include <sys/stat.h>
43 43 #include <sys/vmem.h>
44 44 #include <sys/memlist.h>
45 45 #include <sys/bootconf.h>
46 46
47 47 #include <vm/seg_vn.h>
48 48 #include <vm/seg_dev.h>
49 49 #include <vm/seg_kmem.h>
50 50 #include <vm/seg_kp.h>
51 51 #include <vm/seg_kpm.h>
52 52 #include <vm/hat.h>
53 53
54 54 #include <sys/conf.h>
55 55 #include <sys/mem.h>
56 56 #include <sys/types.h>
57 57 #include <sys/conf.h>
58 58 #include <sys/param.h>
59 59 #include <sys/systm.h>
60 60 #include <sys/errno.h>
61 61 #include <sys/modctl.h>
62 62 #include <sys/memlist.h>
63 63 #include <sys/ddi.h>
64 64 #include <sys/sunddi.h>
65 65 #include <sys/debug.h>
66 66 #include <sys/fm/protocol.h>
67 67
68 68 #if defined(__sparc)
69 69 extern int cpu_get_mem_name(uint64_t, uint64_t *, uint64_t, char *, int, int *);
70 70 extern int cpu_get_mem_info(uint64_t, uint64_t, uint64_t *, uint64_t *,
71 71 uint64_t *, int *, int *, int *);
72 72 extern size_t cpu_get_name_bufsize(void);
73 73 extern int cpu_get_mem_sid(char *, char *, int, int *);
74 74 extern int cpu_get_mem_addr(char *, char *, uint64_t, uint64_t *);
75 75 #elif defined(__x86)
76 76 #include <sys/cpu_module.h>
77 77 #endif /* __sparc */
78 78
79 79 /*
80 80 * Turn a byte length into a pagecount. The DDI btop takes a
81 81 * 32-bit size on 32-bit machines, this handles 64-bit sizes for
82 82 * large physical-memory 32-bit machines.
83 83 */
84 84 #define BTOP(x) ((pgcnt_t)((x) >> _pageshift))
85 85
86 86 static kmutex_t mm_lock;
87 87 static caddr_t mm_map;
88 88
89 89 static dev_info_t *mm_dip; /* private copy of devinfo pointer */
90 90
91 91 static int mm_kmem_io_access;
92 92
93 93 static int mm_kstat_update(kstat_t *ksp, int rw);
94 94 static int mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw);
95 95
96 96 static int mm_read_mem_name(intptr_t data, mem_name_t *mem_name);
97 97
98 98 /*ARGSUSED1*/
99 99 static int
100 100 mm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
101 101 {
102 102 int i;
103 103 struct mem_minor {
104 104 char *name;
105 105 minor_t minor;
106 106 int privonly;
107 107 const char *rdpriv;
108 108 const char *wrpriv;
109 109 mode_t priv_mode;
110 110 } mm[] = {
111 111 { "mem", M_MEM, 0, NULL, "all", 0640 },
112 112 { "kmem", M_KMEM, 0, NULL, "all", 0640 },
113 113 { "allkmem", M_ALLKMEM, 0, "all", "all", 0600 },
114 114 { "null", M_NULL, PRIVONLY_DEV, NULL, NULL, 0666 },
115 115 { "zero", M_ZERO, PRIVONLY_DEV, NULL, NULL, 0666 },
116 116 };
117 117 kstat_t *ksp;
118 118
119 119 mutex_init(&mm_lock, NULL, MUTEX_DEFAULT, NULL);
120 120 mm_map = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
121 121
122 122 for (i = 0; i < (sizeof (mm) / sizeof (mm[0])); i++) {
123 123 if (ddi_create_priv_minor_node(devi, mm[i].name, S_IFCHR,
124 124 mm[i].minor, DDI_PSEUDO, mm[i].privonly,
125 125 mm[i].rdpriv, mm[i].wrpriv, mm[i].priv_mode) ==
126 126 DDI_FAILURE) {
127 127 ddi_remove_minor_node(devi, NULL);
128 128 return (DDI_FAILURE);
129 129 }
130 130 }
131 131
132 132 mm_dip = devi;
133 133
134 134 ksp = kstat_create("mm", 0, "phys_installed", "misc",
135 135 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE | KSTAT_FLAG_VIRTUAL);
136 136 if (ksp != NULL) {
137 137 ksp->ks_update = mm_kstat_update;
138 138 ksp->ks_snapshot = mm_kstat_snapshot;
139 139 ksp->ks_lock = &mm_lock; /* XXX - not really needed */
140 140 kstat_install(ksp);
141 141 }
142 142
143 143 mm_kmem_io_access = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
144 144 "kmem_io_access", 0);
145 145
146 146 return (DDI_SUCCESS);
147 147 }
148 148
149 149 /*ARGSUSED*/
150 150 static int
151 151 mm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
152 152 {
153 153 register int error;
154 154
155 155 switch (infocmd) {
156 156 case DDI_INFO_DEVT2DEVINFO:
157 157 *result = (void *)mm_dip;
158 158 error = DDI_SUCCESS;
159 159 break;
160 160 case DDI_INFO_DEVT2INSTANCE:
161 161 *result = (void *)0;
162 162 error = DDI_SUCCESS;
163 163 break;
164 164 default:
165 165 error = DDI_FAILURE;
166 166 }
167 167 return (error);
168 168 }
169 169
170 170 /*ARGSUSED1*/
171 171 static int
172 172 mmopen(dev_t *devp, int flag, int typ, struct cred *cred)
173 173 {
174 174 switch (getminor(*devp)) {
175 175 case M_NULL:
176 176 case M_ZERO:
177 177 case M_MEM:
178 178 case M_KMEM:
179 179 case M_ALLKMEM:
180 180 /* standard devices */
181 181 break;
182 182
183 183 default:
184 184 /* Unsupported or unknown type */
185 185 return (EINVAL);
186 186 }
187 187 /* must be character device */
188 188 if (typ != OTYP_CHR)
189 189 return (EINVAL);
190 190 return (0);
191 191 }
192 192
193 193 struct pollhead mm_pollhd;
194 194
195 195 /*ARGSUSED*/
196 196 static int
197 197 mmchpoll(dev_t dev, short events, int anyyet, short *reventsp,
198 198 struct pollhead **phpp)
199 199 {
200 200 switch (getminor(dev)) {
201 201 case M_NULL:
202 202 case M_ZERO:
203 203 case M_MEM:
204 204 case M_KMEM:
205 205 case M_ALLKMEM:
206 206 *reventsp = events & (POLLIN | POLLOUT | POLLPRI | POLLRDNORM |
207 207 POLLWRNORM | POLLRDBAND | POLLWRBAND);
208 208 /*
209 209 * A non NULL pollhead pointer should be returned in case
210 210 * user polls for 0 events.
211 211 */
212 212 *phpp = !anyyet && !*reventsp ?
213 213 &mm_pollhd : (struct pollhead *)NULL;
214 214 return (0);
215 215 default:
216 216 /* no other devices currently support polling */
217 217 return (ENXIO);
218 218 }
219 219 }
220 220
221 221 static int
222 222 mmpropop(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags,
223 223 char *name, caddr_t valuep, int *lengthp)
224 224 {
225 225 /*
226 226 * implement zero size to reduce overhead (avoid two failing
227 227 * property lookups per stat).
228 228 */
229 229 return (ddi_prop_op_size(dev, dip, prop_op,
230 230 flags, name, valuep, lengthp, 0));
231 231 }
232 232
233 233 static int
234 234 mmio(struct uio *uio, enum uio_rw rw, pfn_t pfn, off_t pageoff, int allowio,
235 235 page_t *pp)
236 236 {
237 237 int error = 0;
238 238 int devload = 0;
239 239 int is_memory = pf_is_memory(pfn);
240 240 size_t nbytes = MIN((size_t)(PAGESIZE - pageoff),
241 241 (size_t)uio->uio_iov->iov_len);
242 242 caddr_t va = NULL;
243 243
244 244 mutex_enter(&mm_lock);
245 245
246 246 if (is_memory && kpm_enable) {
247 247 if (pp)
248 248 va = hat_kpm_mapin(pp, NULL);
249 249 else
250 250 va = hat_kpm_mapin_pfn(pfn);
251 251 }
252 252
253 253 if (va == NULL) {
254 254 hat_devload(kas.a_hat, mm_map, PAGESIZE, pfn,
255 255 (uint_t)(rw == UIO_READ ? PROT_READ : PROT_READ|PROT_WRITE),
256 256 HAT_LOAD_NOCONSIST|HAT_LOAD_LOCK);
257 257 va = mm_map;
258 258 devload = 1;
259 259 }
260 260
261 261 if (!is_memory) {
262 262 if (allowio) {
263 263 size_t c = uio->uio_iov->iov_len;
264 264
265 265 if (ddi_peekpokeio(NULL, uio, rw,
266 266 (caddr_t)(uintptr_t)uio->uio_loffset, c,
267 267 sizeof (int32_t)) != DDI_SUCCESS)
268 268 error = EFAULT;
269 269 } else
270 270 error = EIO;
271 271 } else
272 272 error = uiomove(va + pageoff, nbytes, rw, uio);
273 273
274 274 if (devload)
275 275 hat_unload(kas.a_hat, mm_map, PAGESIZE, HAT_UNLOAD_UNLOCK);
276 276 else if (pp)
277 277 hat_kpm_mapout(pp, NULL, va);
278 278 else
279 279 hat_kpm_mapout_pfn(pfn);
280 280
281 281 mutex_exit(&mm_lock);
282 282 return (error);
↓ open down ↓ |
282 lines elided |
↑ open up ↑ |
283 283 }
284 284
285 285 static int
286 286 mmpagelock(struct as *as, caddr_t va)
287 287 {
288 288 struct seg *seg;
289 289 int i;
290 290
291 291 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
292 292 seg = as_segat(as, va);
293 - i = (seg != NULL)? SEGOP_CAPABLE(seg, S_CAPABILITY_NOMINFLT) : 0;
293 + i = (seg != NULL)? segop_capable(seg, S_CAPABILITY_NOMINFLT) : 0;
294 294 AS_LOCK_EXIT(as, &as->a_lock);
295 295
296 296 return (i);
297 297 }
298 298
299 299 #ifdef __sparc
300 300
301 301 #define NEED_LOCK_KVADDR(kva) mmpagelock(&kas, kva)
302 302
303 303 #else /* __i386, __amd64 */
304 304
305 305 #define NEED_LOCK_KVADDR(va) 0
306 306
307 307 #endif /* __sparc */
308 308
309 309 /*ARGSUSED3*/
310 310 static int
311 311 mmrw(dev_t dev, struct uio *uio, enum uio_rw rw, cred_t *cred)
312 312 {
313 313 pfn_t v;
314 314 struct iovec *iov;
315 315 int error = 0;
316 316 size_t c;
317 317 ssize_t oresid = uio->uio_resid;
318 318 minor_t minor = getminor(dev);
319 319
320 320 while (uio->uio_resid > 0 && error == 0) {
321 321 iov = uio->uio_iov;
322 322 if (iov->iov_len == 0) {
323 323 uio->uio_iov++;
324 324 uio->uio_iovcnt--;
325 325 if (uio->uio_iovcnt < 0)
326 326 panic("mmrw");
327 327 continue;
328 328 }
329 329 switch (minor) {
330 330
331 331 case M_MEM:
332 332 memlist_read_lock();
333 333 if (!address_in_memlist(phys_install,
334 334 (uint64_t)uio->uio_loffset, 1)) {
335 335 memlist_read_unlock();
336 336 error = EFAULT;
337 337 break;
338 338 }
339 339 memlist_read_unlock();
340 340
341 341 v = BTOP((u_offset_t)uio->uio_loffset);
342 342 error = mmio(uio, rw, v,
343 343 uio->uio_loffset & PAGEOFFSET, 0, NULL);
344 344 break;
345 345
346 346 case M_KMEM:
347 347 case M_ALLKMEM:
348 348 {
349 349 page_t **ppp = NULL;
350 350 caddr_t vaddr = (caddr_t)uio->uio_offset;
351 351 int try_lock = NEED_LOCK_KVADDR(vaddr);
352 352 int locked = 0;
353 353
354 354 if ((error = plat_mem_do_mmio(uio, rw)) != ENOTSUP)
355 355 break;
356 356
357 357 /*
358 358 * If vaddr does not map a valid page, as_pagelock()
359 359 * will return failure. Hence we can't check the
360 360 * return value and return EFAULT here as we'd like.
361 361 * seg_kp and seg_kpm do not properly support
362 362 * as_pagelock() for this context so we avoid it
363 363 * using the try_lock set check above. Some day when
364 364 * the kernel page locking gets redesigned all this
365 365 * muck can be cleaned up.
366 366 */
367 367 if (try_lock)
368 368 locked = (as_pagelock(&kas, &ppp, vaddr,
369 369 PAGESIZE, S_WRITE) == 0);
370 370
371 371 v = hat_getpfnum(kas.a_hat,
372 372 (caddr_t)(uintptr_t)uio->uio_loffset);
373 373 if (v == PFN_INVALID) {
374 374 if (locked)
375 375 as_pageunlock(&kas, ppp, vaddr,
376 376 PAGESIZE, S_WRITE);
377 377 error = EFAULT;
378 378 break;
379 379 }
380 380
381 381 error = mmio(uio, rw, v, uio->uio_loffset & PAGEOFFSET,
382 382 minor == M_ALLKMEM || mm_kmem_io_access,
383 383 (locked && ppp) ? *ppp : NULL);
384 384 if (locked)
385 385 as_pageunlock(&kas, ppp, vaddr, PAGESIZE,
386 386 S_WRITE);
387 387 }
388 388
389 389 break;
390 390
391 391 case M_ZERO:
392 392 if (rw == UIO_READ) {
393 393 label_t ljb;
394 394
395 395 if (on_fault(&ljb)) {
396 396 no_fault();
397 397 error = EFAULT;
398 398 break;
399 399 }
400 400 uzero(iov->iov_base, iov->iov_len);
401 401 no_fault();
402 402 uio->uio_resid -= iov->iov_len;
403 403 uio->uio_loffset += iov->iov_len;
404 404 break;
405 405 }
406 406 /* else it's a write, fall through to NULL case */
407 407 /*FALLTHROUGH*/
408 408
409 409 case M_NULL:
410 410 if (rw == UIO_READ)
411 411 return (0);
412 412 c = iov->iov_len;
413 413 iov->iov_base += c;
414 414 iov->iov_len -= c;
415 415 uio->uio_loffset += c;
416 416 uio->uio_resid -= c;
417 417 break;
418 418
419 419 }
420 420 }
421 421 return (uio->uio_resid == oresid ? error : 0);
422 422 }
423 423
424 424 static int
425 425 mmread(dev_t dev, struct uio *uio, cred_t *cred)
426 426 {
427 427 return (mmrw(dev, uio, UIO_READ, cred));
428 428 }
429 429
430 430 static int
431 431 mmwrite(dev_t dev, struct uio *uio, cred_t *cred)
432 432 {
433 433 return (mmrw(dev, uio, UIO_WRITE, cred));
434 434 }
435 435
436 436 /*
437 437 * Private ioctl for libkvm to support kvm_physaddr().
438 438 * Given an address space and a VA, compute the PA.
439 439 */
440 440 static int
441 441 mmioctl_vtop(intptr_t data)
442 442 {
443 443 #ifdef _SYSCALL32
444 444 mem_vtop32_t vtop32;
445 445 #endif
446 446 mem_vtop_t mem_vtop;
447 447 proc_t *p;
448 448 pfn_t pfn = (pfn_t)PFN_INVALID;
449 449 pid_t pid = 0;
450 450 struct as *as;
451 451 struct seg *seg;
452 452
453 453 if (get_udatamodel() == DATAMODEL_NATIVE) {
454 454 if (copyin((void *)data, &mem_vtop, sizeof (mem_vtop_t)))
455 455 return (EFAULT);
456 456 }
457 457 #ifdef _SYSCALL32
458 458 else {
459 459 if (copyin((void *)data, &vtop32, sizeof (mem_vtop32_t)))
460 460 return (EFAULT);
461 461 mem_vtop.m_as = (struct as *)(uintptr_t)vtop32.m_as;
462 462 mem_vtop.m_va = (void *)(uintptr_t)vtop32.m_va;
463 463
464 464 if (mem_vtop.m_as != NULL)
465 465 return (EINVAL);
466 466 }
467 467 #endif
468 468
469 469 if (mem_vtop.m_as == &kas) {
470 470 pfn = hat_getpfnum(kas.a_hat, mem_vtop.m_va);
471 471 } else {
472 472 if (mem_vtop.m_as == NULL) {
473 473 /*
474 474 * Assume the calling process's address space if the
475 475 * caller didn't specify one.
476 476 */
477 477 p = curthread->t_procp;
478 478 if (p == NULL)
479 479 return (EIO);
480 480 mem_vtop.m_as = p->p_as;
481 481 }
482 482
483 483 mutex_enter(&pidlock);
484 484 for (p = practive; p != NULL; p = p->p_next) {
485 485 if (p->p_as == mem_vtop.m_as) {
486 486 pid = p->p_pid;
487 487 break;
488 488 }
489 489 }
490 490 mutex_exit(&pidlock);
491 491 if (p == NULL)
492 492 return (EIO);
493 493 p = sprlock(pid);
494 494 if (p == NULL)
495 495 return (EIO);
496 496 as = p->p_as;
497 497 if (as == mem_vtop.m_as) {
498 498 mutex_exit(&p->p_lock);
499 499 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
500 500 for (seg = AS_SEGFIRST(as); seg != NULL;
501 501 seg = AS_SEGNEXT(as, seg))
502 502 if ((uintptr_t)mem_vtop.m_va -
503 503 (uintptr_t)seg->s_base < seg->s_size)
504 504 break;
505 505 if (seg != NULL)
506 506 pfn = hat_getpfnum(as->a_hat, mem_vtop.m_va);
507 507 AS_LOCK_EXIT(as, &as->a_lock);
508 508 mutex_enter(&p->p_lock);
509 509 }
510 510 sprunlock(p);
511 511 }
512 512 mem_vtop.m_pfn = pfn;
513 513 if (pfn == PFN_INVALID)
514 514 return (EIO);
515 515
516 516 if (get_udatamodel() == DATAMODEL_NATIVE) {
517 517 if (copyout(&mem_vtop, (void *)data, sizeof (mem_vtop_t)))
518 518 return (EFAULT);
519 519 }
520 520 #ifdef _SYSCALL32
521 521 else {
522 522 vtop32.m_pfn = mem_vtop.m_pfn;
523 523 if (copyout(&vtop32, (void *)data, sizeof (mem_vtop32_t)))
524 524 return (EFAULT);
525 525 }
526 526 #endif
527 527
528 528 return (0);
529 529 }
530 530
531 531 /*
532 532 * Given a PA, execute the given page retire command on it.
533 533 */
534 534 static int
535 535 mmioctl_page_retire(int cmd, intptr_t data)
536 536 {
537 537 extern int page_retire_test(void);
538 538 uint64_t pa;
539 539
540 540 if (copyin((void *)data, &pa, sizeof (uint64_t))) {
541 541 return (EFAULT);
542 542 }
543 543
544 544 switch (cmd) {
545 545 case MEM_PAGE_ISRETIRED:
546 546 return (page_retire_check(pa, NULL));
547 547
548 548 case MEM_PAGE_UNRETIRE:
549 549 return (page_unretire(pa));
550 550
551 551 case MEM_PAGE_RETIRE:
552 552 return (page_retire(pa, PR_FMA));
553 553
554 554 case MEM_PAGE_RETIRE_MCE:
555 555 return (page_retire(pa, PR_MCE));
556 556
557 557 case MEM_PAGE_RETIRE_UE:
558 558 return (page_retire(pa, PR_UE));
559 559
560 560 case MEM_PAGE_GETERRORS:
561 561 {
562 562 uint64_t page_errors;
563 563 int rc = page_retire_check(pa, &page_errors);
564 564 if (copyout(&page_errors, (void *)data,
565 565 sizeof (uint64_t))) {
566 566 return (EFAULT);
567 567 }
568 568 return (rc);
569 569 }
570 570
571 571 case MEM_PAGE_RETIRE_TEST:
572 572 return (page_retire_test());
573 573
574 574 }
575 575
576 576 return (EINVAL);
577 577 }
578 578
579 579 #ifdef __sparc
580 580 /*
581 581 * Given a syndrome, syndrome type, and address return the
582 582 * associated memory name in the provided data buffer.
583 583 */
584 584 static int
585 585 mmioctl_get_mem_name(intptr_t data)
586 586 {
587 587 mem_name_t mem_name;
588 588 void *buf;
589 589 size_t bufsize;
590 590 int len, err;
591 591
592 592 if ((bufsize = cpu_get_name_bufsize()) == 0)
593 593 return (ENOTSUP);
594 594
595 595 if ((err = mm_read_mem_name(data, &mem_name)) < 0)
596 596 return (err);
597 597
598 598 buf = kmem_alloc(bufsize, KM_SLEEP);
599 599
600 600 /*
601 601 * Call into cpu specific code to do the lookup.
602 602 */
603 603 if ((err = cpu_get_mem_name(mem_name.m_synd, mem_name.m_type,
604 604 mem_name.m_addr, buf, bufsize, &len)) != 0) {
605 605 kmem_free(buf, bufsize);
606 606 return (err);
607 607 }
608 608
609 609 if (len >= mem_name.m_namelen) {
610 610 kmem_free(buf, bufsize);
611 611 return (ENOSPC);
612 612 }
613 613
614 614 if (copyoutstr(buf, (char *)mem_name.m_name,
615 615 mem_name.m_namelen, NULL) != 0) {
616 616 kmem_free(buf, bufsize);
617 617 return (EFAULT);
618 618 }
619 619
620 620 kmem_free(buf, bufsize);
621 621 return (0);
622 622 }
623 623
624 624 /*
625 625 * Given a syndrome and address return information about the associated memory.
626 626 */
627 627 static int
628 628 mmioctl_get_mem_info(intptr_t data)
629 629 {
630 630 mem_info_t mem_info;
631 631 int err;
632 632
633 633 if (copyin((void *)data, &mem_info, sizeof (mem_info_t)))
634 634 return (EFAULT);
635 635
636 636 if ((err = cpu_get_mem_info(mem_info.m_synd, mem_info.m_addr,
637 637 &mem_info.m_mem_size, &mem_info.m_seg_size, &mem_info.m_bank_size,
638 638 &mem_info.m_segments, &mem_info.m_banks, &mem_info.m_mcid)) != 0)
639 639 return (err);
640 640
641 641 if (copyout(&mem_info, (void *)data, sizeof (mem_info_t)) != 0)
642 642 return (EFAULT);
643 643
644 644 return (0);
645 645 }
646 646
647 647 /*
648 648 * Given a memory name, return its associated serial id
649 649 */
650 650 static int
651 651 mmioctl_get_mem_sid(intptr_t data)
652 652 {
653 653 mem_name_t mem_name;
654 654 void *buf;
655 655 void *name;
656 656 size_t name_len;
657 657 size_t bufsize;
658 658 int len, err;
659 659
660 660 if ((bufsize = cpu_get_name_bufsize()) == 0)
661 661 return (ENOTSUP);
662 662
663 663 if ((err = mm_read_mem_name(data, &mem_name)) < 0)
664 664 return (err);
665 665
666 666 buf = kmem_alloc(bufsize, KM_SLEEP);
667 667
668 668 if (mem_name.m_namelen > 1024)
669 669 mem_name.m_namelen = 1024; /* cap at 1024 bytes */
670 670
671 671 name = kmem_alloc(mem_name.m_namelen, KM_SLEEP);
672 672
673 673 if ((err = copyinstr((char *)mem_name.m_name, (char *)name,
674 674 mem_name.m_namelen, &name_len)) != 0) {
675 675 kmem_free(buf, bufsize);
676 676 kmem_free(name, mem_name.m_namelen);
677 677 return (err);
678 678 }
679 679
680 680 /*
681 681 * Call into cpu specific code to do the lookup.
682 682 */
683 683 if ((err = cpu_get_mem_sid(name, buf, bufsize, &len)) != 0) {
684 684 kmem_free(buf, bufsize);
685 685 kmem_free(name, mem_name.m_namelen);
686 686 return (err);
687 687 }
688 688
689 689 if (len > mem_name.m_sidlen) {
690 690 kmem_free(buf, bufsize);
691 691 kmem_free(name, mem_name.m_namelen);
692 692 return (ENAMETOOLONG);
693 693 }
694 694
695 695 if (copyoutstr(buf, (char *)mem_name.m_sid,
696 696 mem_name.m_sidlen, NULL) != 0) {
697 697 kmem_free(buf, bufsize);
698 698 kmem_free(name, mem_name.m_namelen);
699 699 return (EFAULT);
700 700 }
701 701
702 702 kmem_free(buf, bufsize);
703 703 kmem_free(name, mem_name.m_namelen);
704 704 return (0);
705 705 }
706 706 #endif /* __sparc */
707 707
708 708 /*
709 709 * Private ioctls for
710 710 * libkvm to support kvm_physaddr().
711 711 * FMA support for page_retire() and memory attribute information.
712 712 */
713 713 /*ARGSUSED*/
714 714 static int
715 715 mmioctl(dev_t dev, int cmd, intptr_t data, int flag, cred_t *cred, int *rvalp)
716 716 {
717 717 if ((cmd == MEM_VTOP && getminor(dev) != M_KMEM) ||
718 718 (cmd != MEM_VTOP && getminor(dev) != M_MEM))
719 719 return (ENXIO);
720 720
721 721 switch (cmd) {
722 722 case MEM_VTOP:
723 723 return (mmioctl_vtop(data));
724 724
725 725 case MEM_PAGE_RETIRE:
726 726 case MEM_PAGE_ISRETIRED:
727 727 case MEM_PAGE_UNRETIRE:
728 728 case MEM_PAGE_RETIRE_MCE:
729 729 case MEM_PAGE_RETIRE_UE:
730 730 case MEM_PAGE_GETERRORS:
731 731 case MEM_PAGE_RETIRE_TEST:
732 732 return (mmioctl_page_retire(cmd, data));
733 733
734 734 #ifdef __sparc
735 735 case MEM_NAME:
736 736 return (mmioctl_get_mem_name(data));
737 737
738 738 case MEM_INFO:
739 739 return (mmioctl_get_mem_info(data));
740 740
741 741 case MEM_SID:
742 742 return (mmioctl_get_mem_sid(data));
743 743 #else
744 744 case MEM_NAME:
745 745 case MEM_INFO:
746 746 case MEM_SID:
747 747 return (ENOTSUP);
748 748 #endif /* __sparc */
749 749 }
750 750 return (ENXIO);
751 751 }
752 752
753 753 /*ARGSUSED2*/
754 754 static int
755 755 mmmmap(dev_t dev, off_t off, int prot)
756 756 {
757 757 pfn_t pf;
758 758 struct memlist *pmem;
759 759 minor_t minor = getminor(dev);
760 760
761 761 switch (minor) {
762 762 case M_MEM:
763 763 pf = btop(off);
764 764 memlist_read_lock();
765 765 for (pmem = phys_install; pmem != NULL; pmem = pmem->ml_next) {
766 766 if (pf >= BTOP(pmem->ml_address) &&
767 767 pf < BTOP(pmem->ml_address + pmem->ml_size)) {
768 768 memlist_read_unlock();
769 769 return (impl_obmem_pfnum(pf));
770 770 }
771 771 }
772 772 memlist_read_unlock();
773 773 break;
774 774
775 775 case M_KMEM:
776 776 case M_ALLKMEM:
777 777 /* no longer supported with KPR */
778 778 return (-1);
779 779
780 780 case M_ZERO:
781 781 /*
782 782 * We shouldn't be mmap'ing to /dev/zero here as
783 783 * mmsegmap() should have already converted
784 784 * a mapping request for this device to a mapping
785 785 * using seg_vn for anonymous memory.
786 786 */
787 787 break;
788 788
789 789 }
790 790 return (-1);
791 791 }
792 792
793 793 /*
794 794 * This function is called when a memory device is mmap'ed.
795 795 * Set up the mapping to the correct device driver.
796 796 */
797 797 static int
798 798 mmsegmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len,
799 799 uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
800 800 {
801 801 struct segvn_crargs vn_a;
802 802 struct segdev_crargs dev_a;
803 803 int error;
804 804 minor_t minor;
805 805 off_t i;
806 806
807 807 minor = getminor(dev);
808 808
809 809 as_rangelock(as);
810 810 /*
811 811 * No need to worry about vac alignment on /dev/zero
812 812 * since this is a "clone" object that doesn't yet exist.
813 813 */
814 814 error = choose_addr(as, addrp, len, off,
815 815 (minor == M_MEM) || (minor == M_KMEM), flags);
816 816 if (error != 0) {
817 817 as_rangeunlock(as);
818 818 return (error);
819 819 }
820 820
821 821 switch (minor) {
822 822 case M_MEM:
823 823 /* /dev/mem cannot be mmap'ed with MAP_PRIVATE */
824 824 if ((flags & MAP_TYPE) != MAP_SHARED) {
825 825 as_rangeunlock(as);
826 826 return (EINVAL);
827 827 }
828 828
829 829 /*
830 830 * Check to ensure that the entire range is
831 831 * legal and we are not trying to map in
832 832 * more than the device will let us.
833 833 */
834 834 for (i = 0; i < len; i += PAGESIZE) {
835 835 if (mmmmap(dev, off + i, maxprot) == -1) {
836 836 as_rangeunlock(as);
837 837 return (ENXIO);
838 838 }
839 839 }
840 840
841 841 /*
842 842 * Use seg_dev segment driver for /dev/mem mapping.
843 843 */
844 844 dev_a.mapfunc = mmmmap;
845 845 dev_a.dev = dev;
846 846 dev_a.offset = off;
847 847 dev_a.type = (flags & MAP_TYPE);
848 848 dev_a.prot = (uchar_t)prot;
849 849 dev_a.maxprot = (uchar_t)maxprot;
850 850 dev_a.hat_attr = 0;
851 851
852 852 /*
853 853 * Make /dev/mem mappings non-consistent since we can't
854 854 * alias pages that don't have page structs behind them,
855 855 * such as kernel stack pages. If someone mmap()s a kernel
856 856 * stack page and if we give him a tte with cv, a line from
857 857 * that page can get into both pages of the spitfire d$.
858 858 * But snoop from another processor will only invalidate
859 859 * the first page. This later caused kernel (xc_attention)
860 860 * to go into an infinite loop at pil 13 and no interrupts
861 861 * could come in. See 1203630.
862 862 *
863 863 */
864 864 dev_a.hat_flags = HAT_LOAD_NOCONSIST;
865 865 dev_a.devmap_data = NULL;
866 866
867 867 error = as_map(as, *addrp, len, segdev_create, &dev_a);
868 868 break;
869 869
870 870 case M_ZERO:
871 871 /*
872 872 * Use seg_vn segment driver for /dev/zero mapping.
873 873 * Passing in a NULL amp gives us the "cloning" effect.
874 874 */
875 875 vn_a.vp = NULL;
876 876 vn_a.offset = 0;
877 877 vn_a.type = (flags & MAP_TYPE);
878 878 vn_a.prot = prot;
879 879 vn_a.maxprot = maxprot;
880 880 vn_a.flags = flags & ~MAP_TYPE;
881 881 vn_a.cred = cred;
882 882 vn_a.amp = NULL;
883 883 vn_a.szc = 0;
884 884 vn_a.lgrp_mem_policy_flags = 0;
885 885 error = as_map(as, *addrp, len, segvn_create, &vn_a);
886 886 break;
887 887
888 888 case M_KMEM:
889 889 case M_ALLKMEM:
890 890 /* No longer supported with KPR. */
891 891 error = ENXIO;
892 892 break;
893 893
894 894 case M_NULL:
895 895 /*
896 896 * Use seg_dev segment driver for /dev/null mapping.
897 897 */
898 898 dev_a.mapfunc = mmmmap;
899 899 dev_a.dev = dev;
900 900 dev_a.offset = off;
901 901 dev_a.type = 0; /* neither PRIVATE nor SHARED */
902 902 dev_a.prot = dev_a.maxprot = (uchar_t)PROT_NONE;
903 903 dev_a.hat_attr = 0;
904 904 dev_a.hat_flags = 0;
905 905 error = as_map(as, *addrp, len, segdev_create, &dev_a);
906 906 break;
907 907
908 908 default:
909 909 error = ENXIO;
910 910 }
911 911
912 912 as_rangeunlock(as);
913 913 return (error);
914 914 }
915 915
916 916 static struct cb_ops mm_cb_ops = {
917 917 mmopen, /* open */
918 918 nulldev, /* close */
919 919 nodev, /* strategy */
920 920 nodev, /* print */
921 921 nodev, /* dump */
922 922 mmread, /* read */
923 923 mmwrite, /* write */
924 924 mmioctl, /* ioctl */
925 925 nodev, /* devmap */
926 926 mmmmap, /* mmap */
927 927 mmsegmap, /* segmap */
928 928 mmchpoll, /* poll */
929 929 mmpropop, /* prop_op */
930 930 0, /* streamtab */
931 931 D_NEW | D_MP | D_64BIT | D_U64BIT
932 932 };
933 933
934 934 static struct dev_ops mm_ops = {
935 935 DEVO_REV, /* devo_rev, */
936 936 0, /* refcnt */
937 937 mm_info, /* get_dev_info */
938 938 nulldev, /* identify */
939 939 nulldev, /* probe */
940 940 mm_attach, /* attach */
941 941 nodev, /* detach */
942 942 nodev, /* reset */
943 943 &mm_cb_ops, /* driver operations */
944 944 (struct bus_ops *)0, /* bus operations */
945 945 NULL, /* power */
946 946 ddi_quiesce_not_needed, /* quiesce */
947 947 };
948 948
949 949 static struct modldrv modldrv = {
950 950 &mod_driverops, "memory driver", &mm_ops,
951 951 };
952 952
953 953 static struct modlinkage modlinkage = {
954 954 MODREV_1, &modldrv, NULL
955 955 };
956 956
957 957 int
958 958 _init(void)
959 959 {
960 960 return (mod_install(&modlinkage));
961 961 }
962 962
963 963 int
964 964 _info(struct modinfo *modinfop)
965 965 {
966 966 return (mod_info(&modlinkage, modinfop));
967 967 }
968 968
969 969 int
970 970 _fini(void)
971 971 {
972 972 return (mod_remove(&modlinkage));
973 973 }
974 974
975 975 static int
976 976 mm_kstat_update(kstat_t *ksp, int rw)
977 977 {
978 978 struct memlist *pmem;
979 979 uint_t count;
980 980
981 981 if (rw == KSTAT_WRITE)
982 982 return (EACCES);
983 983
984 984 count = 0;
985 985 memlist_read_lock();
986 986 for (pmem = phys_install; pmem != NULL; pmem = pmem->ml_next) {
987 987 count++;
988 988 }
989 989 memlist_read_unlock();
990 990
991 991 ksp->ks_ndata = count;
992 992 ksp->ks_data_size = count * 2 * sizeof (uint64_t);
993 993
994 994 return (0);
995 995 }
996 996
997 997 static int
998 998 mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw)
999 999 {
1000 1000 struct memlist *pmem;
1001 1001 struct memunit {
1002 1002 uint64_t address;
1003 1003 uint64_t size;
1004 1004 } *kspmem;
1005 1005
1006 1006 if (rw == KSTAT_WRITE)
1007 1007 return (EACCES);
1008 1008
1009 1009 ksp->ks_snaptime = gethrtime();
1010 1010
1011 1011 kspmem = (struct memunit *)buf;
1012 1012 memlist_read_lock();
1013 1013 for (pmem = phys_install; pmem != NULL;
1014 1014 pmem = pmem->ml_next, kspmem++) {
1015 1015 if ((caddr_t)kspmem >= (caddr_t)buf + ksp->ks_data_size)
1016 1016 break;
1017 1017 kspmem->address = pmem->ml_address;
1018 1018 kspmem->size = pmem->ml_size;
1019 1019 }
1020 1020 memlist_read_unlock();
1021 1021
1022 1022 return (0);
1023 1023 }
1024 1024
1025 1025 /*
1026 1026 * Read a mem_name_t from user-space and store it in the mem_name_t
1027 1027 * pointed to by the mem_name argument.
1028 1028 */
1029 1029 static int
1030 1030 mm_read_mem_name(intptr_t data, mem_name_t *mem_name)
1031 1031 {
1032 1032 if (get_udatamodel() == DATAMODEL_NATIVE) {
1033 1033 if (copyin((void *)data, mem_name, sizeof (mem_name_t)))
1034 1034 return (EFAULT);
1035 1035 }
1036 1036 #ifdef _SYSCALL32
1037 1037 else {
1038 1038 mem_name32_t mem_name32;
1039 1039
1040 1040 if (copyin((void *)data, &mem_name32, sizeof (mem_name32_t)))
1041 1041 return (EFAULT);
1042 1042 mem_name->m_addr = mem_name32.m_addr;
1043 1043 mem_name->m_synd = mem_name32.m_synd;
1044 1044 mem_name->m_type[0] = mem_name32.m_type[0];
1045 1045 mem_name->m_type[1] = mem_name32.m_type[1];
1046 1046 mem_name->m_name = (caddr_t)(uintptr_t)mem_name32.m_name;
1047 1047 mem_name->m_namelen = (size_t)mem_name32.m_namelen;
1048 1048 mem_name->m_sid = (caddr_t)(uintptr_t)mem_name32.m_sid;
1049 1049 mem_name->m_sidlen = (size_t)mem_name32.m_sidlen;
1050 1050 }
1051 1051 #endif /* _SYSCALL32 */
1052 1052
1053 1053 return (0);
1054 1054 }
↓ open down ↓ |
751 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX