Print this page
patch lower-case-segops
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/os/dumpsubr.c
+++ new/usr/src/uts/common/os/dumpsubr.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 * Copyright (c) 2013, Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
25 25 */
26 26
27 27 #include <sys/types.h>
28 28 #include <sys/param.h>
29 29 #include <sys/systm.h>
30 30 #include <sys/vm.h>
31 31 #include <sys/proc.h>
32 32 #include <sys/file.h>
33 33 #include <sys/conf.h>
34 34 #include <sys/kmem.h>
35 35 #include <sys/mem.h>
36 36 #include <sys/mman.h>
37 37 #include <sys/vnode.h>
38 38 #include <sys/errno.h>
39 39 #include <sys/memlist.h>
40 40 #include <sys/dumphdr.h>
41 41 #include <sys/dumpadm.h>
42 42 #include <sys/ksyms.h>
43 43 #include <sys/compress.h>
44 44 #include <sys/stream.h>
45 45 #include <sys/strsun.h>
46 46 #include <sys/cmn_err.h>
47 47 #include <sys/bitmap.h>
48 48 #include <sys/modctl.h>
49 49 #include <sys/utsname.h>
50 50 #include <sys/systeminfo.h>
51 51 #include <sys/vmem.h>
52 52 #include <sys/log.h>
53 53 #include <sys/var.h>
54 54 #include <sys/debug.h>
55 55 #include <sys/sunddi.h>
56 56 #include <fs/fs_subr.h>
57 57 #include <sys/fs/snode.h>
58 58 #include <sys/ontrap.h>
59 59 #include <sys/panic.h>
60 60 #include <sys/dkio.h>
61 61 #include <sys/vtoc.h>
62 62 #include <sys/errorq.h>
63 63 #include <sys/fm/util.h>
64 64 #include <sys/fs/zfs.h>
65 65
66 66 #include <vm/hat.h>
67 67 #include <vm/as.h>
68 68 #include <vm/page.h>
69 69 #include <vm/pvn.h>
70 70 #include <vm/seg.h>
71 71 #include <vm/seg_kmem.h>
72 72 #include <sys/clock_impl.h>
73 73 #include <sys/hold_page.h>
74 74
75 75 /*
76 76 * exported vars
77 77 */
78 78 kmutex_t dump_lock; /* lock for dump configuration */
79 79 dumphdr_t *dumphdr; /* dump header */
80 80 int dump_conflags = DUMP_KERNEL; /* dump configuration flags */
81 81 vnode_t *dumpvp; /* dump device vnode pointer */
82 82 u_offset_t dumpvp_size; /* size of dump device, in bytes */
83 83 char *dumppath; /* pathname of dump device */
84 84 int dump_timeout = 120; /* timeout for dumping pages */
85 85 int dump_timeleft; /* portion of dump_timeout remaining */
86 86 int dump_ioerr; /* dump i/o error */
87 87 char *dump_stack_scratch; /* scratch area for saving stack summary */
88 88
89 89 /*
90 90 * Tunables for dump. These can be set via /etc/system.
91 91 *
92 92 * dump_metrics_on if set, metrics are collected in the kernel, passed
93 93 * to savecore via the dump file, and recorded by savecore in
94 94 * METRICS.txt.
95 95 */
96 96
97 97 /* tunables for pre-reserved heap */
98 98 uint_t dump_kmem_permap = 1024;
99 99 uint_t dump_kmem_pages = 8;
100 100
101 101 /*
102 102 * Compression metrics are accumulated nano-second subtotals. The
103 103 * results are normalized by the number of pages dumped. A report is
104 104 * generated when dumpsys() completes and is saved in the dump image
105 105 * after the trailing dump header.
106 106 *
107 107 * Metrics are always collected. Set the variable dump_metrics_on to
108 108 * cause metrics to be saved in the crash file, where savecore will
109 109 * save it in the file METRICS.txt.
110 110 */
111 111 #define PERPAGES \
112 112 PERPAGE(bitmap) PERPAGE(map) PERPAGE(unmap) \
113 113 PERPAGE(compress) \
114 114 PERPAGE(write)
115 115
116 116 typedef struct perpage {
117 117 #define PERPAGE(x) hrtime_t x;
118 118 PERPAGES
119 119 #undef PERPAGE
120 120 } perpage_t;
121 121
122 122 /*
123 123 * If dump_metrics_on is set to 1, the timing information is passed to
124 124 * savecore via the crash file, where it is appended to the file
125 125 * dump-dir/METRICS.txt.
126 126 */
127 127 uint_t dump_metrics_on = 0; /* set to 1 to enable recording metrics */
128 128
129 129 #define HRSTART(v, m) v##ts.m = gethrtime()
130 130 #define HRSTOP(v, m) v.m += gethrtime() - v##ts.m
131 131
132 132 static char dump_osimage_uuid[36 + 1];
133 133
134 134 #define isdigit(ch) ((ch) >= '0' && (ch) <= '9')
135 135 #define isxdigit(ch) (isdigit(ch) || ((ch) >= 'a' && (ch) <= 'f') || \
136 136 ((ch) >= 'A' && (ch) <= 'F'))
137 137
138 138 /*
139 139 * configuration vars for dumpsys
140 140 */
141 141 typedef struct dumpcfg {
142 142 char *page; /* buffer for page copy */
143 143 char *lzbuf; /* lzjb output */
144 144
145 145 char *cmap; /* array of input (map) buffers */
146 146 ulong_t *bitmap; /* bitmap for marking pages to dump */
147 147 pgcnt_t bitmapsize; /* size of bitmap */
148 148 pid_t *pids; /* list of process IDs at dump time */
149 149
150 150 /*
151 151 * statistics
152 152 */
153 153 perpage_t perpage; /* per page metrics */
154 154 perpage_t perpagets; /* per page metrics (timestamps) */
155 155 pgcnt_t npages; /* subtotal of pages dumped */
156 156 pgcnt_t pages_mapped; /* subtotal of pages mapped */
157 157 pgcnt_t pages_used; /* subtotal of pages used per map */
158 158 size_t nwrite; /* subtotal of bytes written */
159 159 hrtime_t elapsed; /* elapsed time when completed */
160 160 hrtime_t iotime; /* time spent writing nwrite bytes */
161 161 hrtime_t iowait; /* time spent waiting for output */
162 162 hrtime_t iowaitts; /* iowait timestamp */
163 163
164 164 /*
165 165 * I/O buffer
166 166 *
167 167 * There is one I/O buffer used by dumpvp_write and dumvp_flush. It
168 168 * is sized according to the optimum device transfer speed.
169 169 */
170 170 struct {
171 171 vnode_t *cdev_vp; /* VCHR open of the dump device */
172 172 len_t vp_limit; /* maximum write offset */
173 173 offset_t vp_off; /* current dump device offset */
174 174 char *cur; /* dump write pointer */
175 175 char *start; /* dump buffer address */
176 176 char *end; /* dump buffer end */
177 177 size_t size; /* size of dump buf in bytes */
178 178 size_t iosize; /* best transfer size for device */
179 179 } buf;
180 180 } dumpcfg_t;
181 181
182 182 static dumpcfg_t dumpcfg; /* config vars */
183 183
184 184 /*
185 185 * The dump I/O buffer must be at least one page, at most xfer_size bytes,
186 186 * and should scale with physmem in between. The transfer size passed in
187 187 * will either represent a global default (maxphys) or the best size for the
188 188 * device. The size of the dump I/O buffer is limited by dumpbuf_limit (8MB
189 189 * by default) because the dump performance saturates beyond a certain size.
190 190 * The default is to select 1/4096 of the memory.
191 191 */
192 192 static int dumpbuf_fraction = 12; /* memory size scale factor */
193 193 static size_t dumpbuf_limit = 8 << 20; /* max I/O buf size */
194 194
195 195 static size_t
196 196 dumpbuf_iosize(size_t xfer_size)
197 197 {
198 198 size_t iosize = ptob(physmem >> dumpbuf_fraction);
199 199
200 200 if (iosize < PAGESIZE)
201 201 iosize = PAGESIZE;
202 202 else if (iosize > xfer_size)
203 203 iosize = xfer_size;
204 204 if (iosize > dumpbuf_limit)
205 205 iosize = dumpbuf_limit;
206 206 return (iosize & PAGEMASK);
207 207 }
208 208
209 209 /*
210 210 * resize the I/O buffer
211 211 */
212 212 static void
213 213 dumpbuf_resize(void)
214 214 {
215 215 char *old_buf = dumpcfg.buf.start;
216 216 size_t old_size = dumpcfg.buf.size;
217 217 char *new_buf;
218 218 size_t new_size;
219 219
220 220 ASSERT(MUTEX_HELD(&dump_lock));
221 221
222 222 new_size = dumpbuf_iosize(MAX(dumpcfg.buf.iosize, maxphys));
223 223 if (new_size <= old_size)
224 224 return; /* no need to reallocate buffer */
225 225
226 226 new_buf = kmem_alloc(new_size, KM_SLEEP);
227 227 dumpcfg.buf.size = new_size;
228 228 dumpcfg.buf.start = new_buf;
229 229 dumpcfg.buf.end = new_buf + new_size;
230 230 kmem_free(old_buf, old_size);
231 231 }
232 232
233 233 /*
234 234 * dump_update_clevel is called when dumpadm configures the dump device.
235 235 * Allocate the minimum configuration for now.
236 236 *
237 237 * When the dump file is configured we reserve a minimum amount of
238 238 * memory for use at crash time. But we reserve VA for all the memory
239 239 * we really want in order to do the fastest dump possible. The VA is
240 240 * backed by pages not being dumped, according to the bitmap. If
241 241 * there is insufficient spare memory, however, we fall back to the
242 242 * minimum.
243 243 *
244 244 * Live dump (savecore -L) always uses the minimum config.
245 245 */
246 246 static void
247 247 dump_update_clevel()
248 248 {
249 249 dumpcfg_t *old = &dumpcfg;
250 250 dumpcfg_t newcfg = *old;
251 251 dumpcfg_t *new = &newcfg;
252 252
253 253 ASSERT(MUTEX_HELD(&dump_lock));
254 254
255 255 /*
256 256 * Free the previously allocated bufs and VM.
257 257 */
258 258 if (old->lzbuf)
259 259 kmem_free(old->lzbuf, PAGESIZE);
260 260 if (old->page)
261 261 kmem_free(old->page, PAGESIZE);
262 262
263 263 if (old->cmap)
264 264 /* VM space for mapping pages */
265 265 vmem_xfree(heap_arena, old->cmap, PAGESIZE);
266 266
267 267 /*
268 268 * Allocate new data structures and buffers, and also figure the max
269 269 * desired size.
270 270 */
271 271 new->lzbuf = kmem_alloc(PAGESIZE, KM_SLEEP);
272 272 new->page = kmem_alloc(PAGESIZE, KM_SLEEP);
273 273
274 274 new->cmap = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE,
275 275 0, 0, NULL, NULL, VM_SLEEP);
276 276
277 277 /*
278 278 * Reserve memory for kmem allocation calls made during crash
279 279 * dump. The hat layer allocates memory for each mapping
280 280 * created, and the I/O path allocates buffers and data structs.
281 281 * Add a few pages for safety.
282 282 */
283 283 kmem_dump_init(dump_kmem_permap + (dump_kmem_pages * PAGESIZE));
284 284
285 285 /* set new config pointers */
286 286 *old = *new;
287 287 }
288 288
289 289 /*
290 290 * Define a struct memlist walker to optimize bitnum to pfn
291 291 * lookup. The walker maintains the state of the list traversal.
292 292 */
293 293 typedef struct dumpmlw {
294 294 struct memlist *mp; /* current memlist */
295 295 pgcnt_t basenum; /* bitnum base offset */
296 296 pgcnt_t mppages; /* current memlist size */
297 297 pgcnt_t mpleft; /* size to end of current memlist */
298 298 pfn_t mpaddr; /* first pfn in memlist */
299 299 } dumpmlw_t;
300 300
301 301 /* initialize the walker */
302 302 static inline void
303 303 dump_init_memlist_walker(dumpmlw_t *pw)
304 304 {
305 305 pw->mp = phys_install;
306 306 pw->basenum = 0;
307 307 pw->mppages = pw->mp->ml_size >> PAGESHIFT;
308 308 pw->mpleft = pw->mppages;
309 309 pw->mpaddr = pw->mp->ml_address >> PAGESHIFT;
310 310 }
311 311
312 312 /*
313 313 * Lookup pfn given bitnum. The memlist can be quite long on some
314 314 * systems (e.g.: one per board). To optimize sequential lookups, the
315 315 * caller initializes and presents a memlist walker.
316 316 */
317 317 static pfn_t
318 318 dump_bitnum_to_pfn(pgcnt_t bitnum, dumpmlw_t *pw)
319 319 {
320 320 bitnum -= pw->basenum;
321 321 while (pw->mp != NULL) {
322 322 if (bitnum < pw->mppages) {
323 323 pw->mpleft = pw->mppages - bitnum;
324 324 return (pw->mpaddr + bitnum);
325 325 }
326 326 bitnum -= pw->mppages;
327 327 pw->basenum += pw->mppages;
328 328 pw->mp = pw->mp->ml_next;
329 329 if (pw->mp != NULL) {
330 330 pw->mppages = pw->mp->ml_size >> PAGESHIFT;
331 331 pw->mpleft = pw->mppages;
332 332 pw->mpaddr = pw->mp->ml_address >> PAGESHIFT;
333 333 }
334 334 }
335 335 return (PFN_INVALID);
336 336 }
337 337
338 338 static pgcnt_t
339 339 dump_pfn_to_bitnum(pfn_t pfn)
340 340 {
341 341 struct memlist *mp;
342 342 pgcnt_t bitnum = 0;
343 343
344 344 for (mp = phys_install; mp != NULL; mp = mp->ml_next) {
345 345 if (pfn >= (mp->ml_address >> PAGESHIFT) &&
346 346 pfn < ((mp->ml_address + mp->ml_size) >> PAGESHIFT))
347 347 return (bitnum + pfn - (mp->ml_address >> PAGESHIFT));
348 348 bitnum += mp->ml_size >> PAGESHIFT;
349 349 }
350 350 return ((pgcnt_t)-1);
351 351 }
352 352
353 353 static void
354 354 dumphdr_init(void)
355 355 {
356 356 pgcnt_t npages;
357 357
358 358 ASSERT(MUTEX_HELD(&dump_lock));
359 359
360 360 if (dumphdr == NULL) {
361 361 dumphdr = kmem_zalloc(sizeof (dumphdr_t), KM_SLEEP);
362 362 dumphdr->dump_magic = DUMP_MAGIC;
363 363 dumphdr->dump_version = DUMP_VERSION;
364 364 dumphdr->dump_wordsize = DUMP_WORDSIZE;
365 365 dumphdr->dump_pageshift = PAGESHIFT;
366 366 dumphdr->dump_pagesize = PAGESIZE;
367 367 dumphdr->dump_utsname = utsname;
368 368 (void) strcpy(dumphdr->dump_platform, platform);
369 369 dumpcfg.buf.size = dumpbuf_iosize(maxphys);
370 370 dumpcfg.buf.start = kmem_alloc(dumpcfg.buf.size, KM_SLEEP);
371 371 dumpcfg.buf.end = dumpcfg.buf.start + dumpcfg.buf.size;
372 372 dumpcfg.pids = kmem_alloc(v.v_proc * sizeof (pid_t), KM_SLEEP);
373 373 dump_stack_scratch = kmem_alloc(STACK_BUF_SIZE, KM_SLEEP);
374 374 (void) strncpy(dumphdr->dump_uuid, dump_get_uuid(),
375 375 sizeof (dumphdr->dump_uuid));
376 376 }
377 377
378 378 npages = num_phys_pages();
379 379
380 380 if (dumpcfg.bitmapsize != npages) {
381 381 void *map = kmem_alloc(BT_SIZEOFMAP(npages), KM_SLEEP);
382 382
383 383 if (dumpcfg.bitmap != NULL)
384 384 kmem_free(dumpcfg.bitmap, BT_SIZEOFMAP(dumpcfg.
385 385 bitmapsize));
386 386 dumpcfg.bitmap = map;
387 387 dumpcfg.bitmapsize = npages;
388 388 }
389 389 }
390 390
391 391 /*
392 392 * Establish a new dump device.
393 393 */
394 394 int
395 395 dumpinit(vnode_t *vp, char *name, int justchecking)
396 396 {
397 397 vnode_t *cvp;
398 398 vattr_t vattr;
399 399 vnode_t *cdev_vp;
400 400 int error = 0;
401 401
402 402 ASSERT(MUTEX_HELD(&dump_lock));
403 403
404 404 dumphdr_init();
405 405
406 406 cvp = common_specvp(vp);
407 407 if (cvp == dumpvp)
408 408 return (0);
409 409
410 410 /*
411 411 * Determine whether this is a plausible dump device. We want either:
412 412 * (1) a real device that's not mounted and has a cb_dump routine, or
413 413 * (2) a swapfile on some filesystem that has a vop_dump routine.
414 414 */
415 415 if ((error = VOP_OPEN(&cvp, FREAD | FWRITE, kcred, NULL)) != 0)
416 416 return (error);
417 417
418 418 vattr.va_mask = AT_SIZE | AT_TYPE | AT_RDEV;
419 419 if ((error = VOP_GETATTR(cvp, &vattr, 0, kcred, NULL)) == 0) {
420 420 if (vattr.va_type == VBLK || vattr.va_type == VCHR) {
421 421 if (devopsp[getmajor(vattr.va_rdev)]->
422 422 devo_cb_ops->cb_dump == nodev)
423 423 error = ENOTSUP;
424 424 else if (vfs_devismounted(vattr.va_rdev))
425 425 error = EBUSY;
426 426 if (strcmp(ddi_driver_name(VTOS(cvp)->s_dip),
427 427 ZFS_DRIVER) == 0 &&
428 428 IS_SWAPVP(common_specvp(cvp)))
429 429 error = EBUSY;
430 430 } else {
431 431 if (vn_matchopval(cvp, VOPNAME_DUMP, fs_nosys) ||
432 432 !IS_SWAPVP(cvp))
433 433 error = ENOTSUP;
434 434 }
435 435 }
436 436
437 437 if (error == 0 && vattr.va_size < 2 * DUMP_LOGSIZE + DUMP_ERPTSIZE)
438 438 error = ENOSPC;
439 439
440 440 if (error || justchecking) {
441 441 (void) VOP_CLOSE(cvp, FREAD | FWRITE, 1, (offset_t)0,
442 442 kcred, NULL);
443 443 return (error);
444 444 }
445 445
446 446 VN_HOLD(cvp);
447 447
448 448 if (dumpvp != NULL)
449 449 dumpfini(); /* unconfigure the old dump device */
450 450
451 451 dumpvp = cvp;
452 452 dumpvp_size = vattr.va_size & -DUMP_OFFSET;
453 453 dumppath = kmem_alloc(strlen(name) + 1, KM_SLEEP);
454 454 (void) strcpy(dumppath, name);
455 455 dumpcfg.buf.iosize = 0;
456 456
457 457 /*
458 458 * If the dump device is a block device, attempt to open up the
459 459 * corresponding character device and determine its maximum transfer
460 460 * size. We use this information to potentially resize dump buffer
461 461 * to a larger and more optimal size for performing i/o to the dump
462 462 * device.
463 463 */
464 464 if (cvp->v_type == VBLK &&
465 465 (cdev_vp = makespecvp(VTOS(cvp)->s_dev, VCHR)) != NULL) {
466 466 if (VOP_OPEN(&cdev_vp, FREAD | FWRITE, kcred, NULL) == 0) {
467 467 size_t blk_size;
468 468 struct dk_cinfo dki;
469 469 struct dk_minfo minf;
470 470
471 471 if (VOP_IOCTL(cdev_vp, DKIOCGMEDIAINFO,
472 472 (intptr_t)&minf, FKIOCTL, kcred, NULL, NULL)
473 473 == 0 && minf.dki_lbsize != 0)
474 474 blk_size = minf.dki_lbsize;
475 475 else
476 476 blk_size = DEV_BSIZE;
477 477
478 478 if (VOP_IOCTL(cdev_vp, DKIOCINFO, (intptr_t)&dki,
479 479 FKIOCTL, kcred, NULL, NULL) == 0) {
480 480 dumpcfg.buf.iosize = dki.dki_maxtransfer * blk_size;
481 481 dumpbuf_resize();
482 482 }
483 483 /*
484 484 * If we are working with a zvol then dumpify it
485 485 * if it's not being used as swap.
486 486 */
487 487 if (strcmp(dki.dki_dname, ZVOL_DRIVER) == 0) {
488 488 if (IS_SWAPVP(common_specvp(cvp)))
489 489 error = EBUSY;
490 490 else if ((error = VOP_IOCTL(cdev_vp,
491 491 DKIOCDUMPINIT, NULL, FKIOCTL, kcred,
492 492 NULL, NULL)) != 0)
493 493 dumpfini();
494 494 }
495 495
496 496 (void) VOP_CLOSE(cdev_vp, FREAD | FWRITE, 1, 0,
497 497 kcred, NULL);
498 498 }
499 499
500 500 VN_RELE(cdev_vp);
501 501 }
502 502
503 503 cmn_err(CE_CONT, "?dump on %s size %llu MB\n", name, dumpvp_size >> 20);
504 504
505 505 dump_update_clevel();
506 506
507 507 return (error);
508 508 }
509 509
510 510 void
511 511 dumpfini(void)
512 512 {
513 513 vattr_t vattr;
514 514 boolean_t is_zfs = B_FALSE;
515 515 vnode_t *cdev_vp;
516 516 ASSERT(MUTEX_HELD(&dump_lock));
517 517
518 518 kmem_free(dumppath, strlen(dumppath) + 1);
519 519
520 520 /*
521 521 * Determine if we are using zvols for our dump device
522 522 */
523 523 vattr.va_mask = AT_RDEV;
524 524 if (VOP_GETATTR(dumpvp, &vattr, 0, kcred, NULL) == 0) {
525 525 is_zfs = (getmajor(vattr.va_rdev) ==
526 526 ddi_name_to_major(ZFS_DRIVER)) ? B_TRUE : B_FALSE;
527 527 }
528 528
529 529 /*
530 530 * If we have a zvol dump device then we call into zfs so
531 531 * that it may have a chance to cleanup.
532 532 */
533 533 if (is_zfs &&
534 534 (cdev_vp = makespecvp(VTOS(dumpvp)->s_dev, VCHR)) != NULL) {
535 535 if (VOP_OPEN(&cdev_vp, FREAD | FWRITE, kcred, NULL) == 0) {
536 536 (void) VOP_IOCTL(cdev_vp, DKIOCDUMPFINI, NULL, FKIOCTL,
537 537 kcred, NULL, NULL);
538 538 (void) VOP_CLOSE(cdev_vp, FREAD | FWRITE, 1, 0,
539 539 kcred, NULL);
540 540 }
541 541 VN_RELE(cdev_vp);
542 542 }
543 543
544 544 (void) VOP_CLOSE(dumpvp, FREAD | FWRITE, 1, (offset_t)0, kcred, NULL);
545 545
546 546 VN_RELE(dumpvp);
547 547
548 548 dumpvp = NULL;
549 549 dumpvp_size = 0;
550 550 dumppath = NULL;
551 551 }
552 552
553 553 static offset_t
554 554 dumpvp_flush(void)
555 555 {
556 556 size_t size = P2ROUNDUP(dumpcfg.buf.cur - dumpcfg.buf.start, PAGESIZE);
557 557 hrtime_t iotime;
558 558 int err;
559 559
560 560 if (dumpcfg.buf.vp_off + size > dumpcfg.buf.vp_limit) {
561 561 dump_ioerr = ENOSPC;
562 562 dumpcfg.buf.vp_off = dumpcfg.buf.vp_limit;
563 563 } else if (size != 0) {
564 564 iotime = gethrtime();
565 565 dumpcfg.iowait += iotime - dumpcfg.iowaitts;
566 566 if (panicstr)
567 567 err = VOP_DUMP(dumpvp, dumpcfg.buf.start,
568 568 lbtodb(dumpcfg.buf.vp_off), btod(size), NULL);
569 569 else
570 570 err = vn_rdwr(UIO_WRITE, dumpcfg.buf.cdev_vp != NULL ?
571 571 dumpcfg.buf.cdev_vp : dumpvp, dumpcfg.buf.start, size,
572 572 dumpcfg.buf.vp_off, UIO_SYSSPACE, 0, dumpcfg.buf.vp_limit,
573 573 kcred, 0);
574 574 if (err && dump_ioerr == 0)
575 575 dump_ioerr = err;
576 576 dumpcfg.iowaitts = gethrtime();
577 577 dumpcfg.iotime += dumpcfg.iowaitts - iotime;
578 578 dumpcfg.nwrite += size;
579 579 dumpcfg.buf.vp_off += size;
580 580 }
581 581 dumpcfg.buf.cur = dumpcfg.buf.start;
582 582 dump_timeleft = dump_timeout;
583 583 return (dumpcfg.buf.vp_off);
584 584 }
585 585
586 586 /* maximize write speed by keeping seek offset aligned with size */
587 587 void
588 588 dumpvp_write(const void *va, size_t size)
589 589 {
590 590 size_t len, off, sz;
591 591
592 592 while (size != 0) {
593 593 len = MIN(size, dumpcfg.buf.end - dumpcfg.buf.cur);
594 594 if (len == 0) {
595 595 off = P2PHASE(dumpcfg.buf.vp_off, dumpcfg.buf.size);
596 596 if (off == 0 || !ISP2(dumpcfg.buf.size)) {
597 597 (void) dumpvp_flush();
598 598 } else {
599 599 sz = dumpcfg.buf.size - off;
600 600 dumpcfg.buf.cur = dumpcfg.buf.start + sz;
601 601 (void) dumpvp_flush();
602 602 ovbcopy(dumpcfg.buf.start + sz, dumpcfg.buf.start, off);
603 603 dumpcfg.buf.cur += off;
604 604 }
605 605 } else {
606 606 bcopy(va, dumpcfg.buf.cur, len);
607 607 va = (char *)va + len;
608 608 dumpcfg.buf.cur += len;
609 609 size -= len;
610 610 }
611 611 }
612 612 }
613 613
614 614 /*ARGSUSED*/
615 615 static void
616 616 dumpvp_ksyms_write(const void *src, void *dst, size_t size)
617 617 {
618 618 dumpvp_write(src, size);
619 619 }
620 620
621 621 /*
622 622 * Mark 'pfn' in the bitmap and dump its translation table entry.
623 623 */
624 624 void
625 625 dump_addpage(struct as *as, void *va, pfn_t pfn)
626 626 {
627 627 mem_vtop_t mem_vtop;
628 628 pgcnt_t bitnum;
629 629
630 630 if ((bitnum = dump_pfn_to_bitnum(pfn)) != (pgcnt_t)-1) {
631 631 if (!BT_TEST(dumpcfg.bitmap, bitnum)) {
632 632 dumphdr->dump_npages++;
633 633 BT_SET(dumpcfg.bitmap, bitnum);
634 634 }
635 635 dumphdr->dump_nvtop++;
636 636 mem_vtop.m_as = as;
637 637 mem_vtop.m_va = va;
638 638 mem_vtop.m_pfn = pfn;
639 639 dumpvp_write(&mem_vtop, sizeof (mem_vtop_t));
640 640 }
641 641 dump_timeleft = dump_timeout;
642 642 }
643 643
644 644 /*
645 645 * Mark 'pfn' in the bitmap
646 646 */
647 647 void
648 648 dump_page(pfn_t pfn)
649 649 {
650 650 pgcnt_t bitnum;
651 651
652 652 if ((bitnum = dump_pfn_to_bitnum(pfn)) != (pgcnt_t)-1) {
↓ open down ↓ |
652 lines elided |
↑ open up ↑ |
653 653 if (!BT_TEST(dumpcfg.bitmap, bitnum)) {
654 654 dumphdr->dump_npages++;
655 655 BT_SET(dumpcfg.bitmap, bitnum);
656 656 }
657 657 }
658 658 dump_timeleft = dump_timeout;
659 659 }
660 660
661 661 /*
662 662 * Dump the <as, va, pfn> information for a given address space.
663 - * SEGOP_DUMP() will call dump_addpage() for each page in the segment.
663 + * segop_dump() will call dump_addpage() for each page in the segment.
664 664 */
665 665 static void
666 666 dump_as(struct as *as)
667 667 {
668 668 struct seg *seg;
669 669
670 670 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
671 671 for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) {
672 672 if (seg->s_as != as)
673 673 break;
674 674 if (seg->s_ops == NULL)
675 675 continue;
676 - SEGOP_DUMP(seg);
676 + segop_dump(seg);
677 677 }
678 678 AS_LOCK_EXIT(as, &as->a_lock);
679 679
680 680 if (seg != NULL)
681 681 cmn_err(CE_WARN, "invalid segment %p in address space %p",
682 682 (void *)seg, (void *)as);
683 683 }
684 684
685 685 static int
686 686 dump_process(pid_t pid)
687 687 {
688 688 proc_t *p = sprlock(pid);
689 689
690 690 if (p == NULL)
691 691 return (-1);
692 692 if (p->p_as != &kas) {
693 693 mutex_exit(&p->p_lock);
694 694 dump_as(p->p_as);
695 695 mutex_enter(&p->p_lock);
696 696 }
697 697
698 698 sprunlock(p);
699 699
700 700 return (0);
701 701 }
702 702
703 703 /*
704 704 * The following functions (dump_summary(), dump_ereports(), and
705 705 * dump_messages()), write data to an uncompressed area within the
706 706 * crashdump. The layout of these is
707 707 *
708 708 * +------------------------------------------------------------+
709 709 * | compressed pages | summary | ereports | messages |
710 710 * +------------------------------------------------------------+
711 711 *
712 712 * With the advent of saving a compressed crash dump by default, we
713 713 * need to save a little more data to describe the failure mode in
714 714 * an uncompressed buffer available before savecore uncompresses
715 715 * the dump. Initially this is a copy of the stack trace. Additional
716 716 * summary information should be added here.
717 717 */
718 718
719 719 void
720 720 dump_summary(void)
721 721 {
722 722 u_offset_t dumpvp_start;
723 723 summary_dump_t sd;
724 724
725 725 if (dumpvp == NULL || dumphdr == NULL)
726 726 return;
727 727
728 728 dumpcfg.buf.cur = dumpcfg.buf.start;
729 729
730 730 dumpcfg.buf.vp_limit = dumpvp_size - (DUMP_OFFSET + DUMP_LOGSIZE +
731 731 DUMP_ERPTSIZE);
732 732 dumpvp_start = dumpcfg.buf.vp_limit - DUMP_SUMMARYSIZE;
733 733 dumpcfg.buf.vp_off = dumpvp_start;
734 734
735 735 sd.sd_magic = SUMMARY_MAGIC;
736 736 sd.sd_ssum = checksum32(dump_stack_scratch, STACK_BUF_SIZE);
737 737 dumpvp_write(&sd, sizeof (sd));
738 738 dumpvp_write(dump_stack_scratch, STACK_BUF_SIZE);
739 739
740 740 sd.sd_magic = 0; /* indicate end of summary */
741 741 dumpvp_write(&sd, sizeof (sd));
742 742 (void) dumpvp_flush();
743 743 }
744 744
745 745 void
746 746 dump_ereports(void)
747 747 {
748 748 u_offset_t dumpvp_start;
749 749 erpt_dump_t ed;
750 750
751 751 if (dumpvp == NULL || dumphdr == NULL)
752 752 return;
753 753
754 754 dumpcfg.buf.cur = dumpcfg.buf.start;
755 755 dumpcfg.buf.vp_limit = dumpvp_size - (DUMP_OFFSET + DUMP_LOGSIZE);
756 756 dumpvp_start = dumpcfg.buf.vp_limit - DUMP_ERPTSIZE;
757 757 dumpcfg.buf.vp_off = dumpvp_start;
758 758
759 759 fm_ereport_dump();
760 760 if (panicstr)
761 761 errorq_dump();
762 762
763 763 bzero(&ed, sizeof (ed)); /* indicate end of ereports */
764 764 dumpvp_write(&ed, sizeof (ed));
765 765 (void) dumpvp_flush();
766 766
767 767 if (!panicstr) {
768 768 (void) VOP_PUTPAGE(dumpvp, dumpvp_start,
769 769 (size_t)(dumpcfg.buf.vp_off - dumpvp_start),
770 770 B_INVAL | B_FORCE, kcred, NULL);
771 771 }
772 772 }
773 773
774 774 void
775 775 dump_messages(void)
776 776 {
777 777 log_dump_t ld;
778 778 mblk_t *mctl, *mdata;
779 779 queue_t *q, *qlast;
780 780 u_offset_t dumpvp_start;
781 781
782 782 if (dumpvp == NULL || dumphdr == NULL || log_consq == NULL)
783 783 return;
784 784
785 785 dumpcfg.buf.cur = dumpcfg.buf.start;
786 786 dumpcfg.buf.vp_limit = dumpvp_size - DUMP_OFFSET;
787 787 dumpvp_start = dumpcfg.buf.vp_limit - DUMP_LOGSIZE;
788 788 dumpcfg.buf.vp_off = dumpvp_start;
789 789
790 790 qlast = NULL;
791 791 do {
792 792 for (q = log_consq; q->q_next != qlast; q = q->q_next)
793 793 continue;
794 794 for (mctl = q->q_first; mctl != NULL; mctl = mctl->b_next) {
795 795 dump_timeleft = dump_timeout;
796 796 mdata = mctl->b_cont;
797 797 ld.ld_magic = LOG_MAGIC;
798 798 ld.ld_msgsize = MBLKL(mctl->b_cont);
799 799 ld.ld_csum = checksum32(mctl->b_rptr, MBLKL(mctl));
800 800 ld.ld_msum = checksum32(mdata->b_rptr, MBLKL(mdata));
801 801 dumpvp_write(&ld, sizeof (ld));
802 802 dumpvp_write(mctl->b_rptr, MBLKL(mctl));
803 803 dumpvp_write(mdata->b_rptr, MBLKL(mdata));
804 804 }
805 805 } while ((qlast = q) != log_consq);
806 806
807 807 ld.ld_magic = 0; /* indicate end of messages */
808 808 dumpvp_write(&ld, sizeof (ld));
809 809 (void) dumpvp_flush();
810 810 if (!panicstr) {
811 811 (void) VOP_PUTPAGE(dumpvp, dumpvp_start,
812 812 (size_t)(dumpcfg.buf.vp_off - dumpvp_start),
813 813 B_INVAL | B_FORCE, kcred, NULL);
814 814 }
815 815 }
816 816
817 817 /*
818 818 * Copy pages, trapping ECC errors. Also, for robustness, trap data
819 819 * access in case something goes wrong in the hat layer and the
820 820 * mapping is broken.
821 821 */
822 822 static int
823 823 dump_pagecopy(void *src, void *dst)
824 824 {
825 825 long *wsrc = (long *)src;
826 826 long *wdst = (long *)dst;
827 827 const ulong_t ncopies = PAGESIZE / sizeof (long);
828 828 volatile int w = 0;
829 829 volatile int ueoff = -1;
830 830 on_trap_data_t otd;
831 831
832 832 if (on_trap(&otd, OT_DATA_EC | OT_DATA_ACCESS)) {
833 833 if (ueoff == -1)
834 834 ueoff = w * sizeof (long);
835 835 /* report "bad ECC" or "bad address" */
836 836 #ifdef _LP64
837 837 if (otd.ot_trap & OT_DATA_EC)
838 838 wdst[w++] = 0x00badecc00badecc;
839 839 else
840 840 wdst[w++] = 0x00badadd00badadd;
841 841 #else
842 842 if (otd.ot_trap & OT_DATA_EC)
843 843 wdst[w++] = 0x00badecc;
844 844 else
845 845 wdst[w++] = 0x00badadd;
846 846 #endif
847 847 }
848 848 while (w < ncopies) {
849 849 wdst[w] = wsrc[w];
850 850 w++;
851 851 }
852 852 no_trap();
853 853 return (ueoff);
854 854 }
855 855
856 856 size_t
857 857 dumpsys_metrics(char *buf, size_t size)
858 858 {
859 859 dumpcfg_t *cfg = &dumpcfg;
860 860 int myid = CPU->cpu_seqid;
861 861 int i, compress_ratio;
862 862 int sec, iorate;
863 863 char *e = buf + size;
864 864 char *p = buf;
865 865
866 866 sec = cfg->elapsed / (1000 * 1000 * 1000ULL);
867 867 if (sec < 1)
868 868 sec = 1;
869 869
870 870 if (cfg->iotime < 1)
871 871 cfg->iotime = 1;
872 872 iorate = (cfg->nwrite * 100000ULL) / cfg->iotime;
873 873
874 874 compress_ratio = 100LL * cfg->npages / btopr(cfg->nwrite + 1);
875 875
876 876 #define P(...) (p += p < e ? snprintf(p, e - p, __VA_ARGS__) : 0)
877 877
878 878 P("Master cpu_seqid,%d\n", CPU->cpu_seqid);
879 879 P("Master cpu_id,%d\n", CPU->cpu_id);
880 880 P("dump_flags,0x%x\n", dumphdr->dump_flags);
881 881 P("dump_ioerr,%d\n", dump_ioerr);
882 882
883 883 P("Compression type,serial lzjb\n");
884 884 P("Compression ratio,%d.%02d\n", compress_ratio / 100, compress_ratio %
885 885 100);
886 886
887 887 P("Dump I/O rate MBS,%d.%02d\n", iorate / 100, iorate % 100);
888 888 P("..total bytes,%lld\n", (u_longlong_t)cfg->nwrite);
889 889 P("..total nsec,%lld\n", (u_longlong_t)cfg->iotime);
890 890 P("dumpbuf.iosize,%ld\n", dumpcfg.buf.iosize);
891 891 P("dumpbuf.size,%ld\n", dumpcfg.buf.size);
892 892
893 893 P("Dump pages/sec,%llu\n", (u_longlong_t)cfg->npages / sec);
894 894 P("Dump pages,%llu\n", (u_longlong_t)cfg->npages);
895 895 P("Dump time,%d\n", sec);
896 896
897 897 if (cfg->pages_mapped > 0)
898 898 P("per-cent map utilization,%d\n", (int)((100 * cfg->pages_used)
899 899 / cfg->pages_mapped));
900 900
901 901 P("\nPer-page metrics:\n");
902 902 if (cfg->npages > 0) {
903 903 #define PERPAGE(x) \
904 904 P("%s nsec/page,%d\n", #x, (int)(cfg->perpage.x / cfg->npages));
905 905 PERPAGES;
906 906 #undef PERPAGE
907 907
908 908 P("I/O wait nsec/page,%llu\n", (u_longlong_t)(cfg->iowait /
909 909 cfg->npages));
910 910 }
911 911 #undef P
912 912 if (p < e)
913 913 bzero(p, e - p);
914 914 return (p - buf);
915 915 }
916 916
917 917 /*
918 918 * Dump the system.
919 919 */
920 920 void
921 921 dumpsys(void)
922 922 {
923 923 dumpcfg_t *cfg = &dumpcfg;
924 924 uint_t percent_done; /* dump progress reported */
925 925 hrtime_t start; /* start time */
926 926 pfn_t pfn;
927 927 pgcnt_t bitnum;
928 928 proc_t *p;
929 929 pid_t npids, pidx;
930 930 char *content;
931 931 char *buf;
932 932 size_t size;
933 933 dumpmlw_t mlw;
934 934 dumpcsize_t datatag;
935 935 dumpdatahdr_t datahdr;
936 936
937 937 if (dumpvp == NULL || dumphdr == NULL) {
938 938 uprintf("skipping system dump - no dump device configured\n");
939 939 return;
940 940 }
941 941 dumpcfg.buf.cur = dumpcfg.buf.start;
942 942
943 943 /* clear the sync variables */
944 944 cfg->npages = 0;
945 945 cfg->pages_mapped = 0;
946 946 cfg->pages_used = 0;
947 947 cfg->nwrite = 0;
948 948 cfg->elapsed = 0;
949 949 cfg->iotime = 0;
950 950 cfg->iowait = 0;
951 951 cfg->iowaitts = 0;
952 952
953 953 /*
954 954 * Calculate the starting block for dump. If we're dumping on a
955 955 * swap device, start 1/5 of the way in; otherwise, start at the
956 956 * beginning. And never use the first page -- it may be a disk label.
957 957 */
958 958 if (dumpvp->v_flag & VISSWAP)
959 959 dumphdr->dump_start = P2ROUNDUP(dumpvp_size / 5, DUMP_OFFSET);
960 960 else
961 961 dumphdr->dump_start = DUMP_OFFSET;
962 962
963 963 dumphdr->dump_flags = DF_VALID | DF_COMPLETE | DF_LIVE | DF_COMPRESSED;
964 964 dumphdr->dump_crashtime = gethrestime_sec();
965 965 dumphdr->dump_npages = 0;
966 966 dumphdr->dump_nvtop = 0;
967 967 bzero(dumpcfg.bitmap, BT_SIZEOFMAP(dumpcfg.bitmapsize));
968 968 dump_timeleft = dump_timeout;
969 969
970 970 if (panicstr) {
971 971 dumphdr->dump_flags &= ~DF_LIVE;
972 972 (void) VOP_DUMPCTL(dumpvp, DUMP_FREE, NULL, NULL);
973 973 (void) VOP_DUMPCTL(dumpvp, DUMP_ALLOC, NULL, NULL);
974 974 (void) vsnprintf(dumphdr->dump_panicstring, DUMP_PANICSIZE,
975 975 panicstr, panicargs);
976 976 }
977 977
978 978 if (dump_conflags & DUMP_ALL)
979 979 content = "all";
980 980 else if (dump_conflags & DUMP_CURPROC)
981 981 content = "kernel + curproc";
982 982 else
983 983 content = "kernel";
984 984 uprintf("dumping to %s, offset %lld, content: %s\n", dumppath,
985 985 dumphdr->dump_start, content);
986 986
987 987 /* Make sure nodename is current */
988 988 bcopy(utsname.nodename, dumphdr->dump_utsname.nodename, SYS_NMLN);
989 989
990 990 /*
991 991 * If this is a live dump, try to open a VCHR vnode for better
992 992 * performance. We must take care to flush the buffer cache
993 993 * first.
994 994 */
995 995 if (!panicstr) {
996 996 vnode_t *cdev_vp, *cmn_cdev_vp;
997 997
998 998 ASSERT(dumpcfg.buf.cdev_vp == NULL);
999 999 cdev_vp = makespecvp(VTOS(dumpvp)->s_dev, VCHR);
1000 1000 if (cdev_vp != NULL) {
1001 1001 cmn_cdev_vp = common_specvp(cdev_vp);
1002 1002 if (VOP_OPEN(&cmn_cdev_vp, FREAD | FWRITE, kcred, NULL)
1003 1003 == 0) {
1004 1004 if (vn_has_cached_data(dumpvp))
1005 1005 (void) pvn_vplist_dirty(dumpvp, 0, NULL,
1006 1006 B_INVAL | B_TRUNC, kcred);
1007 1007 dumpcfg.buf.cdev_vp = cmn_cdev_vp;
1008 1008 } else {
1009 1009 VN_RELE(cdev_vp);
1010 1010 }
1011 1011 }
1012 1012 }
1013 1013
1014 1014 /*
1015 1015 * Store a hires timestamp so we can look it up during debugging.
1016 1016 */
1017 1017 lbolt_debug_entry();
1018 1018
1019 1019 /*
1020 1020 * Leave room for the message and ereport save areas and terminal dump
1021 1021 * header.
1022 1022 */
1023 1023 dumpcfg.buf.vp_limit = dumpvp_size - DUMP_LOGSIZE - DUMP_OFFSET -
1024 1024 DUMP_ERPTSIZE;
1025 1025
1026 1026 /*
1027 1027 * Write out the symbol table. It's no longer compressed,
1028 1028 * so its 'size' and 'csize' are equal.
1029 1029 */
1030 1030 dumpcfg.buf.vp_off = dumphdr->dump_ksyms = dumphdr->dump_start + PAGESIZE;
1031 1031 dumphdr->dump_ksyms_size = dumphdr->dump_ksyms_csize =
1032 1032 ksyms_snapshot(dumpvp_ksyms_write, NULL, LONG_MAX);
1033 1033
1034 1034 /*
1035 1035 * Write out the translation map.
1036 1036 */
1037 1037 dumphdr->dump_map = dumpvp_flush();
1038 1038 dump_as(&kas);
1039 1039 dumphdr->dump_nvtop += dump_plat_addr();
1040 1040
1041 1041 /*
1042 1042 * call into hat, which may have unmapped pages that also need to
1043 1043 * be in the dump
1044 1044 */
1045 1045 hat_dump();
1046 1046
1047 1047 if (dump_conflags & DUMP_ALL) {
1048 1048 mutex_enter(&pidlock);
1049 1049
1050 1050 for (npids = 0, p = practive; p != NULL; p = p->p_next)
1051 1051 dumpcfg.pids[npids++] = p->p_pid;
1052 1052
1053 1053 mutex_exit(&pidlock);
1054 1054
1055 1055 for (pidx = 0; pidx < npids; pidx++)
1056 1056 (void) dump_process(dumpcfg.pids[pidx]);
1057 1057
1058 1058 dump_init_memlist_walker(&mlw);
1059 1059 for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum++) {
1060 1060 dump_timeleft = dump_timeout;
1061 1061 pfn = dump_bitnum_to_pfn(bitnum, &mlw);
1062 1062 /*
1063 1063 * Some hypervisors do not have all pages available to
1064 1064 * be accessed by the guest OS. Check for page
1065 1065 * accessibility.
1066 1066 */
1067 1067 if (plat_hold_page(pfn, PLAT_HOLD_NO_LOCK, NULL) !=
1068 1068 PLAT_HOLD_OK)
1069 1069 continue;
1070 1070 BT_SET(dumpcfg.bitmap, bitnum);
1071 1071 }
1072 1072 dumphdr->dump_npages = dumpcfg.bitmapsize;
1073 1073 dumphdr->dump_flags |= DF_ALL;
1074 1074
1075 1075 } else if (dump_conflags & DUMP_CURPROC) {
1076 1076 /*
1077 1077 * Determine which pid is to be dumped. If we're panicking, we
1078 1078 * dump the process associated with panic_thread (if any). If
1079 1079 * this is a live dump, we dump the process associated with
1080 1080 * curthread.
1081 1081 */
1082 1082 npids = 0;
1083 1083 if (panicstr) {
1084 1084 if (panic_thread != NULL &&
1085 1085 panic_thread->t_procp != NULL &&
1086 1086 panic_thread->t_procp != &p0) {
1087 1087 dumpcfg.pids[npids++] =
1088 1088 panic_thread->t_procp->p_pid;
1089 1089 }
1090 1090 } else {
1091 1091 dumpcfg.pids[npids++] = curthread->t_procp->p_pid;
1092 1092 }
1093 1093
1094 1094 if (npids && dump_process(dumpcfg.pids[0]) == 0)
1095 1095 dumphdr->dump_flags |= DF_CURPROC;
1096 1096 else
1097 1097 dumphdr->dump_flags |= DF_KERNEL;
1098 1098
1099 1099 } else {
1100 1100 dumphdr->dump_flags |= DF_KERNEL;
1101 1101 }
1102 1102
1103 1103 dumphdr->dump_hashmask = (1 << highbit(dumphdr->dump_nvtop - 1)) - 1;
1104 1104
1105 1105 /*
1106 1106 * Write out the pfn table.
1107 1107 */
1108 1108 dumphdr->dump_pfn = dumpvp_flush();
1109 1109 dump_init_memlist_walker(&mlw);
1110 1110 for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum++) {
1111 1111 dump_timeleft = dump_timeout;
1112 1112 if (!BT_TEST(dumpcfg.bitmap, bitnum))
1113 1113 continue;
1114 1114 pfn = dump_bitnum_to_pfn(bitnum, &mlw);
1115 1115 ASSERT(pfn != PFN_INVALID);
1116 1116 dumpvp_write(&pfn, sizeof (pfn_t));
1117 1117 }
1118 1118 dump_plat_pfn();
1119 1119
1120 1120 /*
1121 1121 * Write out all the pages.
1122 1122 * Map pages, copy them handling UEs, compress, and write them out.
1123 1123 */
1124 1124 dumphdr->dump_data = dumpvp_flush();
1125 1125
1126 1126 ASSERT(dumpcfg.page);
1127 1127 bzero(&dumpcfg.perpage, sizeof (dumpcfg.perpage));
1128 1128
1129 1129 start = gethrtime();
1130 1130 cfg->iowaitts = start;
1131 1131
1132 1132 if (panicstr)
1133 1133 kmem_dump_begin();
1134 1134
1135 1135 percent_done = 0;
1136 1136
1137 1137 dump_init_memlist_walker(&mlw);
1138 1138 for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum++) {
1139 1139 size_t csize;
1140 1140
1141 1141 dump_timeleft = dump_timeout;
1142 1142 HRSTART(cfg->perpage, bitmap);
1143 1143 if (!BT_TEST(dumpcfg.bitmap, bitnum)) {
1144 1144 HRSTOP(cfg->perpage, bitmap);
1145 1145 continue;
1146 1146 }
1147 1147 HRSTOP(cfg->perpage, bitmap);
1148 1148
1149 1149 pfn = dump_bitnum_to_pfn(bitnum, &mlw);
1150 1150 ASSERT(pfn != PFN_INVALID);
1151 1151
1152 1152 HRSTART(cfg->perpage, map);
1153 1153 hat_devload(kas.a_hat, dumpcfg.cmap, PAGESIZE, pfn, PROT_READ,
1154 1154 HAT_LOAD_NOCONSIST);
1155 1155 HRSTOP(cfg->perpage, map);
1156 1156
1157 1157 dump_pagecopy(dumpcfg.cmap, dumpcfg.page);
1158 1158
1159 1159 HRSTART(cfg->perpage, unmap);
1160 1160 hat_unload(kas.a_hat, dumpcfg.cmap, PAGESIZE, HAT_UNLOAD);
1161 1161 HRSTOP(cfg->perpage, unmap);
1162 1162
1163 1163 HRSTART(dumpcfg.perpage, compress);
1164 1164 csize = compress(dumpcfg.page, dumpcfg.lzbuf, PAGESIZE);
1165 1165 HRSTOP(dumpcfg.perpage, compress);
1166 1166
1167 1167 HRSTART(dumpcfg.perpage, write);
1168 1168 dumpvp_write(&csize, sizeof (csize));
1169 1169 dumpvp_write(dumpcfg.lzbuf, csize);
1170 1170 HRSTOP(dumpcfg.perpage, write);
1171 1171
1172 1172 if (dump_ioerr) {
1173 1173 dumphdr->dump_flags &= ~DF_COMPLETE;
1174 1174 dumphdr->dump_npages = cfg->npages;
1175 1175 break;
1176 1176 }
1177 1177 if (++cfg->npages * 100LL / dumphdr->dump_npages > percent_done) {
1178 1178 int sec;
1179 1179
1180 1180 sec = (gethrtime() - start) / 1000 / 1000 / 1000;
1181 1181 uprintf("^\r%2d:%02d %3d%% done", sec / 60, sec % 60,
1182 1182 ++percent_done);
1183 1183 if (!panicstr)
1184 1184 delay(1); /* let the output be sent */
1185 1185 }
1186 1186 }
1187 1187
1188 1188 cfg->elapsed = gethrtime() - start;
1189 1189 if (cfg->elapsed < 1)
1190 1190 cfg->elapsed = 1;
1191 1191
1192 1192 /* record actual pages dumped */
1193 1193 dumphdr->dump_npages = cfg->npages;
1194 1194
1195 1195 /* platform-specific data */
1196 1196 dumphdr->dump_npages += dump_plat_data(dumpcfg.page);
1197 1197
1198 1198 /* note any errors by clearing DF_COMPLETE */
1199 1199 if (dump_ioerr || cfg->npages < dumphdr->dump_npages)
1200 1200 dumphdr->dump_flags &= ~DF_COMPLETE;
1201 1201
1202 1202 /* end of stream blocks */
1203 1203 datatag = 0;
1204 1204 dumpvp_write(&datatag, sizeof (datatag));
1205 1205
1206 1206 bzero(&datahdr, sizeof (datahdr));
1207 1207
1208 1208 /* buffer for metrics */
1209 1209 buf = dumpcfg.page;
1210 1210 size = MIN(PAGESIZE, DUMP_OFFSET - sizeof (dumphdr_t) -
1211 1211 sizeof (dumpdatahdr_t));
1212 1212
1213 1213 /* finish the kmem intercepts, collect kmem verbose info */
1214 1214 if (panicstr) {
1215 1215 datahdr.dump_metrics = kmem_dump_finish(buf, size);
1216 1216 buf += datahdr.dump_metrics;
1217 1217 size -= datahdr.dump_metrics;
1218 1218 }
1219 1219
1220 1220 /* record in the header whether this is a fault-management panic */
1221 1221 if (panicstr)
1222 1222 dumphdr->dump_fm_panic = is_fm_panic();
1223 1223
1224 1224 /* compression info in data header */
1225 1225 datahdr.dump_datahdr_magic = DUMP_DATAHDR_MAGIC;
1226 1226 datahdr.dump_datahdr_version = DUMP_DATAHDR_VERSION;
1227 1227 datahdr.dump_maxcsize = PAGESIZE;
1228 1228 datahdr.dump_maxrange = 1;
1229 1229 datahdr.dump_nstreams = 1;
1230 1230 datahdr.dump_clevel = 0;
1231 1231
1232 1232 if (dump_metrics_on)
1233 1233 datahdr.dump_metrics += dumpsys_metrics(buf, size);
1234 1234
1235 1235 datahdr.dump_data_csize = dumpvp_flush() - dumphdr->dump_data;
1236 1236
1237 1237 /*
1238 1238 * Write out the initial and terminal dump headers.
1239 1239 */
1240 1240 dumpcfg.buf.vp_off = dumphdr->dump_start;
1241 1241 dumpvp_write(dumphdr, sizeof (dumphdr_t));
1242 1242 (void) dumpvp_flush();
1243 1243
1244 1244 dumpcfg.buf.vp_limit = dumpvp_size;
1245 1245 dumpcfg.buf.vp_off = dumpcfg.buf.vp_limit - DUMP_OFFSET;
1246 1246 dumpvp_write(dumphdr, sizeof (dumphdr_t));
1247 1247 dumpvp_write(&datahdr, sizeof (dumpdatahdr_t));
1248 1248 dumpvp_write(dumpcfg.page, datahdr.dump_metrics);
1249 1249
1250 1250 (void) dumpvp_flush();
1251 1251
1252 1252 uprintf("\r%3d%% done: %llu pages dumped, ",
1253 1253 percent_done, (u_longlong_t)cfg->npages);
1254 1254
1255 1255 if (dump_ioerr == 0) {
1256 1256 uprintf("dump succeeded\n");
1257 1257 } else {
1258 1258 uprintf("dump failed: error %d\n", dump_ioerr);
1259 1259 #ifdef DEBUG
1260 1260 if (panicstr)
1261 1261 debug_enter("dump failed");
1262 1262 #endif
1263 1263 }
1264 1264
1265 1265 /*
1266 1266 * Write out all undelivered messages. This has to be the *last*
1267 1267 * thing we do because the dump process itself emits messages.
1268 1268 */
1269 1269 if (panicstr) {
1270 1270 dump_summary();
1271 1271 dump_ereports();
1272 1272 dump_messages();
1273 1273 }
1274 1274
1275 1275 delay(2 * hz); /* let people see the 'done' message */
1276 1276 dump_timeleft = 0;
1277 1277 dump_ioerr = 0;
1278 1278
1279 1279 /* restore settings after live dump completes */
1280 1280 if (!panicstr) {
1281 1281 /* release any VCHR open of the dump device */
1282 1282 if (dumpcfg.buf.cdev_vp != NULL) {
1283 1283 (void) VOP_CLOSE(dumpcfg.buf.cdev_vp, FREAD | FWRITE, 1, 0,
1284 1284 kcred, NULL);
1285 1285 VN_RELE(dumpcfg.buf.cdev_vp);
1286 1286 dumpcfg.buf.cdev_vp = NULL;
1287 1287 }
1288 1288 }
1289 1289 }
1290 1290
1291 1291 /*
1292 1292 * This function is called whenever the memory size, as represented
1293 1293 * by the phys_install list, changes.
1294 1294 */
1295 1295 void
1296 1296 dump_resize()
1297 1297 {
1298 1298 mutex_enter(&dump_lock);
1299 1299 dumphdr_init();
1300 1300 dumpbuf_resize();
1301 1301 dump_update_clevel();
1302 1302 mutex_exit(&dump_lock);
1303 1303 }
1304 1304
1305 1305 /*
1306 1306 * This function allows for dynamic resizing of a dump area. It assumes that
1307 1307 * the underlying device has update its appropriate size(9P).
1308 1308 */
1309 1309 int
1310 1310 dumpvp_resize()
1311 1311 {
1312 1312 int error;
1313 1313 vattr_t vattr;
1314 1314
1315 1315 mutex_enter(&dump_lock);
1316 1316 vattr.va_mask = AT_SIZE;
1317 1317 if ((error = VOP_GETATTR(dumpvp, &vattr, 0, kcred, NULL)) != 0) {
1318 1318 mutex_exit(&dump_lock);
1319 1319 return (error);
1320 1320 }
1321 1321
1322 1322 if (error == 0 && vattr.va_size < 2 * DUMP_LOGSIZE + DUMP_ERPTSIZE) {
1323 1323 mutex_exit(&dump_lock);
1324 1324 return (ENOSPC);
1325 1325 }
1326 1326
1327 1327 dumpvp_size = vattr.va_size & -DUMP_OFFSET;
1328 1328 mutex_exit(&dump_lock);
1329 1329 return (0);
1330 1330 }
1331 1331
1332 1332 int
1333 1333 dump_set_uuid(const char *uuidstr)
1334 1334 {
1335 1335 const char *ptr;
1336 1336 int i;
1337 1337
1338 1338 if (uuidstr == NULL || strnlen(uuidstr, 36 + 1) != 36)
1339 1339 return (EINVAL);
1340 1340
1341 1341 /* uuid_parse is not common code so check manually */
1342 1342 for (i = 0, ptr = uuidstr; i < 36; i++, ptr++) {
1343 1343 switch (i) {
1344 1344 case 8:
1345 1345 case 13:
1346 1346 case 18:
1347 1347 case 23:
1348 1348 if (*ptr != '-')
1349 1349 return (EINVAL);
1350 1350 break;
1351 1351
1352 1352 default:
1353 1353 if (!isxdigit(*ptr))
1354 1354 return (EINVAL);
1355 1355 break;
1356 1356 }
1357 1357 }
1358 1358
1359 1359 if (dump_osimage_uuid[0] != '\0')
1360 1360 return (EALREADY);
1361 1361
1362 1362 (void) strncpy(dump_osimage_uuid, uuidstr, 36 + 1);
1363 1363
1364 1364 cmn_err(CE_CONT, "?This Solaris instance has UUID %s\n",
1365 1365 dump_osimage_uuid);
1366 1366
1367 1367 return (0);
1368 1368 }
1369 1369
1370 1370 const char *
1371 1371 dump_get_uuid(void)
1372 1372 {
1373 1373 return (dump_osimage_uuid[0] != '\0' ? dump_osimage_uuid : "");
1374 1374 }
↓ open down ↓ |
688 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX