1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2013, Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
  25  */
  26 
  27 #include <sys/types.h>
  28 #include <sys/param.h>
  29 #include <sys/systm.h>
  30 #include <sys/vm.h>
  31 #include <sys/proc.h>
  32 #include <sys/file.h>
  33 #include <sys/conf.h>
  34 #include <sys/kmem.h>
  35 #include <sys/mem.h>
  36 #include <sys/mman.h>
  37 #include <sys/vnode.h>
  38 #include <sys/errno.h>
  39 #include <sys/memlist.h>
  40 #include <sys/dumphdr.h>
  41 #include <sys/dumpadm.h>
  42 #include <sys/ksyms.h>
  43 #include <sys/compress.h>
  44 #include <sys/stream.h>
  45 #include <sys/strsun.h>
  46 #include <sys/cmn_err.h>
  47 #include <sys/bitmap.h>
  48 #include <sys/modctl.h>
  49 #include <sys/utsname.h>
  50 #include <sys/systeminfo.h>
  51 #include <sys/vmem.h>
  52 #include <sys/log.h>
  53 #include <sys/var.h>
  54 #include <sys/debug.h>
  55 #include <sys/sunddi.h>
  56 #include <fs/fs_subr.h>
  57 #include <sys/fs/snode.h>
  58 #include <sys/ontrap.h>
  59 #include <sys/panic.h>
  60 #include <sys/dkio.h>
  61 #include <sys/vtoc.h>
  62 #include <sys/errorq.h>
  63 #include <sys/fm/util.h>
  64 #include <sys/fs/zfs.h>
  65 
  66 #include <vm/hat.h>
  67 #include <vm/as.h>
  68 #include <vm/page.h>
  69 #include <vm/pvn.h>
  70 #include <vm/seg.h>
  71 #include <vm/seg_kmem.h>
  72 #include <sys/clock_impl.h>
  73 #include <sys/hold_page.h>
  74 
  75 /*
  76  * exported vars
  77  */
  78 kmutex_t        dump_lock;              /* lock for dump configuration */
  79 dumphdr_t       *dumphdr;               /* dump header */
  80 int             dump_conflags = DUMP_KERNEL; /* dump configuration flags */
  81 vnode_t         *dumpvp;                /* dump device vnode pointer */
  82 u_offset_t      dumpvp_size;            /* size of dump device, in bytes */
  83 char            *dumppath;              /* pathname of dump device */
  84 int             dump_timeout = 120;     /* timeout for dumping pages */
  85 int             dump_timeleft;          /* portion of dump_timeout remaining */
  86 int             dump_ioerr;             /* dump i/o error */
  87 char            *dump_stack_scratch;    /* scratch area for saving stack summary */
  88 
  89 /*
  90  * Tunables for dump.  These can be set via /etc/system.
  91  *
  92  * dump_metrics_on      if set, metrics are collected in the kernel, passed
  93  *      to savecore via the dump file, and recorded by savecore in
  94  *      METRICS.txt.
  95  */
  96 
  97 /* tunables for pre-reserved heap */
  98 uint_t dump_kmem_permap = 1024;
  99 uint_t dump_kmem_pages = 8;
 100 
 101 /*
 102  * Compression metrics are accumulated nano-second subtotals. The
 103  * results are normalized by the number of pages dumped. A report is
 104  * generated when dumpsys() completes and is saved in the dump image
 105  * after the trailing dump header.
 106  *
 107  * Metrics are always collected. Set the variable dump_metrics_on to
 108  * cause metrics to be saved in the crash file, where savecore will
 109  * save it in the file METRICS.txt.
 110  */
 111 #define PERPAGES \
 112         PERPAGE(bitmap) PERPAGE(map) PERPAGE(unmap) \
 113         PERPAGE(compress) \
 114         PERPAGE(write)
 115 
 116 typedef struct perpage {
 117 #define PERPAGE(x) hrtime_t x;
 118         PERPAGES
 119 #undef PERPAGE
 120 } perpage_t;
 121 
 122 /*
 123  * If dump_metrics_on is set to 1, the timing information is passed to
 124  * savecore via the crash file, where it is appended to the file
 125  * dump-dir/METRICS.txt.
 126  */
 127 uint_t dump_metrics_on = 0;     /* set to 1 to enable recording metrics */
 128 
 129 #define HRSTART(v, m)           v##ts.m = gethrtime()
 130 #define HRSTOP(v, m)            v.m += gethrtime() - v##ts.m
 131 
 132 static char dump_osimage_uuid[36 + 1];
 133 
 134 #define isdigit(ch)     ((ch) >= '0' && (ch) <= '9')
 135 #define isxdigit(ch)    (isdigit(ch) || ((ch) >= 'a' && (ch) <= 'f') || \
 136                         ((ch) >= 'A' && (ch) <= 'F'))
 137 
 138 /*
 139  * configuration vars for dumpsys
 140  */
 141 typedef struct dumpcfg {
 142         char *page;                     /* buffer for page copy */
 143         char *lzbuf;                    /* lzjb output */
 144 
 145         char *cmap;                     /* array of input (map) buffers */
 146         ulong_t *bitmap;                /* bitmap for marking pages to dump */
 147         pgcnt_t bitmapsize;             /* size of bitmap */
 148         pid_t *pids;                    /* list of process IDs at dump time */
 149 
 150         /*
 151          * statistics
 152          */
 153         perpage_t perpage;              /* per page metrics */
 154         perpage_t perpagets;            /* per page metrics (timestamps) */
 155         pgcnt_t npages;                 /* subtotal of pages dumped */
 156         pgcnt_t pages_mapped;           /* subtotal of pages mapped */
 157         pgcnt_t pages_used;             /* subtotal of pages used per map */
 158         size_t nwrite;                  /* subtotal of bytes written */
 159         hrtime_t elapsed;               /* elapsed time when completed */
 160         hrtime_t iotime;                /* time spent writing nwrite bytes */
 161         hrtime_t iowait;                /* time spent waiting for output */
 162         hrtime_t iowaitts;              /* iowait timestamp */
 163 
 164         /*
 165          * I/O buffer
 166          *
 167          * There is one I/O buffer used by dumpvp_write and dumvp_flush. It
 168          * is sized according to the optimum device transfer speed.
 169          */
 170         struct {
 171                 vnode_t *cdev_vp;       /* VCHR open of the dump device */
 172                 len_t   vp_limit;       /* maximum write offset */
 173                 offset_t vp_off;        /* current dump device offset */
 174                 char    *cur;           /* dump write pointer */
 175                 char    *start;         /* dump buffer address */
 176                 char    *end;           /* dump buffer end */
 177                 size_t  size;           /* size of dump buf in bytes */
 178                 size_t  iosize;         /* best transfer size for device */
 179         } buf;
 180 } dumpcfg_t;
 181 
 182 static dumpcfg_t dumpcfg;       /* config vars */
 183 
 184 /*
 185  * The dump I/O buffer must be at least one page, at most xfer_size bytes,
 186  * and should scale with physmem in between.  The transfer size passed in
 187  * will either represent a global default (maxphys) or the best size for the
 188  * device.  The size of the dump I/O buffer is limited by dumpbuf_limit (8MB
 189  * by default) because the dump performance saturates beyond a certain size.
 190  * The default is to select 1/4096 of the memory.
 191  */
 192 static int      dumpbuf_fraction = 12;  /* memory size scale factor */
 193 static size_t   dumpbuf_limit = 8 << 20;  /* max I/O buf size */
 194 
 195 static size_t
 196 dumpbuf_iosize(size_t xfer_size)
 197 {
 198         size_t iosize = ptob(physmem >> dumpbuf_fraction);
 199 
 200         if (iosize < PAGESIZE)
 201                 iosize = PAGESIZE;
 202         else if (iosize > xfer_size)
 203                 iosize = xfer_size;
 204         if (iosize > dumpbuf_limit)
 205                 iosize = dumpbuf_limit;
 206         return (iosize & PAGEMASK);
 207 }
 208 
 209 /*
 210  * resize the I/O buffer
 211  */
 212 static void
 213 dumpbuf_resize(void)
 214 {
 215         char *old_buf = dumpcfg.buf.start;
 216         size_t old_size = dumpcfg.buf.size;
 217         char *new_buf;
 218         size_t new_size;
 219 
 220         ASSERT(MUTEX_HELD(&dump_lock));
 221 
 222         new_size = dumpbuf_iosize(MAX(dumpcfg.buf.iosize, maxphys));
 223         if (new_size <= old_size)
 224                 return; /* no need to reallocate buffer */
 225 
 226         new_buf = kmem_alloc(new_size, KM_SLEEP);
 227         dumpcfg.buf.size = new_size;
 228         dumpcfg.buf.start = new_buf;
 229         dumpcfg.buf.end = new_buf + new_size;
 230         kmem_free(old_buf, old_size);
 231 }
 232 
 233 /*
 234  * dump_update_clevel is called when dumpadm configures the dump device.
 235  *      Allocate the minimum configuration for now.
 236  *
 237  * When the dump file is configured we reserve a minimum amount of
 238  * memory for use at crash time. But we reserve VA for all the memory
 239  * we really want in order to do the fastest dump possible. The VA is
 240  * backed by pages not being dumped, according to the bitmap. If
 241  * there is insufficient spare memory, however, we fall back to the
 242  * minimum.
 243  *
 244  * Live dump (savecore -L) always uses the minimum config.
 245  */
 246 static void
 247 dump_update_clevel()
 248 {
 249         dumpcfg_t *old = &dumpcfg;
 250         dumpcfg_t newcfg = *old;
 251         dumpcfg_t *new = &newcfg;
 252 
 253         ASSERT(MUTEX_HELD(&dump_lock));
 254 
 255         /*
 256          * Free the previously allocated bufs and VM.
 257          */
 258         if (old->lzbuf)
 259                 kmem_free(old->lzbuf, PAGESIZE);
 260         if (old->page)
 261                 kmem_free(old->page, PAGESIZE);
 262 
 263         if (old->cmap)
 264                 /* VM space for mapping pages */
 265                 vmem_xfree(heap_arena, old->cmap, PAGESIZE);
 266 
 267         /*
 268          * Allocate new data structures and buffers, and also figure the max
 269          * desired size.
 270          */
 271         new->lzbuf = kmem_alloc(PAGESIZE, KM_SLEEP);
 272         new->page = kmem_alloc(PAGESIZE, KM_SLEEP);
 273 
 274         new->cmap = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE,
 275                                 0, 0, NULL, NULL, VM_SLEEP);
 276 
 277         /*
 278          * Reserve memory for kmem allocation calls made during crash
 279          * dump.  The hat layer allocates memory for each mapping
 280          * created, and the I/O path allocates buffers and data structs.
 281          * Add a few pages for safety.
 282          */
 283         kmem_dump_init(dump_kmem_permap + (dump_kmem_pages * PAGESIZE));
 284 
 285         /* set new config pointers */
 286         *old = *new;
 287 }
 288 
 289 /*
 290  * Define a struct memlist walker to optimize bitnum to pfn
 291  * lookup. The walker maintains the state of the list traversal.
 292  */
 293 typedef struct dumpmlw {
 294         struct memlist  *mp;            /* current memlist */
 295         pgcnt_t         basenum;        /* bitnum base offset */
 296         pgcnt_t         mppages;        /* current memlist size */
 297         pgcnt_t         mpleft;         /* size to end of current memlist */
 298         pfn_t           mpaddr;         /* first pfn in memlist */
 299 } dumpmlw_t;
 300 
 301 /* initialize the walker */
 302 static inline void
 303 dump_init_memlist_walker(dumpmlw_t *pw)
 304 {
 305         pw->mp = phys_install;
 306         pw->basenum = 0;
 307         pw->mppages = pw->mp->ml_size >> PAGESHIFT;
 308         pw->mpleft = pw->mppages;
 309         pw->mpaddr = pw->mp->ml_address >> PAGESHIFT;
 310 }
 311 
 312 /*
 313  * Lookup pfn given bitnum. The memlist can be quite long on some
 314  * systems (e.g.: one per board). To optimize sequential lookups, the
 315  * caller initializes and presents a memlist walker.
 316  */
 317 static pfn_t
 318 dump_bitnum_to_pfn(pgcnt_t bitnum, dumpmlw_t *pw)
 319 {
 320         bitnum -= pw->basenum;
 321         while (pw->mp != NULL) {
 322                 if (bitnum < pw->mppages) {
 323                         pw->mpleft = pw->mppages - bitnum;
 324                         return (pw->mpaddr + bitnum);
 325                 }
 326                 bitnum -= pw->mppages;
 327                 pw->basenum += pw->mppages;
 328                 pw->mp = pw->mp->ml_next;
 329                 if (pw->mp != NULL) {
 330                         pw->mppages = pw->mp->ml_size >> PAGESHIFT;
 331                         pw->mpleft = pw->mppages;
 332                         pw->mpaddr = pw->mp->ml_address >> PAGESHIFT;
 333                 }
 334         }
 335         return (PFN_INVALID);
 336 }
 337 
 338 static pgcnt_t
 339 dump_pfn_to_bitnum(pfn_t pfn)
 340 {
 341         struct memlist *mp;
 342         pgcnt_t bitnum = 0;
 343 
 344         for (mp = phys_install; mp != NULL; mp = mp->ml_next) {
 345                 if (pfn >= (mp->ml_address >> PAGESHIFT) &&
 346                     pfn < ((mp->ml_address + mp->ml_size) >> PAGESHIFT))
 347                         return (bitnum + pfn - (mp->ml_address >> PAGESHIFT));
 348                 bitnum += mp->ml_size >> PAGESHIFT;
 349         }
 350         return ((pgcnt_t)-1);
 351 }
 352 
 353 static void
 354 dumphdr_init(void)
 355 {
 356         pgcnt_t npages;
 357 
 358         ASSERT(MUTEX_HELD(&dump_lock));
 359 
 360         if (dumphdr == NULL) {
 361                 dumphdr = kmem_zalloc(sizeof (dumphdr_t), KM_SLEEP);
 362                 dumphdr->dump_magic = DUMP_MAGIC;
 363                 dumphdr->dump_version = DUMP_VERSION;
 364                 dumphdr->dump_wordsize = DUMP_WORDSIZE;
 365                 dumphdr->dump_pageshift = PAGESHIFT;
 366                 dumphdr->dump_pagesize = PAGESIZE;
 367                 dumphdr->dump_utsname = utsname;
 368                 (void) strcpy(dumphdr->dump_platform, platform);
 369                 dumpcfg.buf.size = dumpbuf_iosize(maxphys);
 370                 dumpcfg.buf.start = kmem_alloc(dumpcfg.buf.size, KM_SLEEP);
 371                 dumpcfg.buf.end = dumpcfg.buf.start + dumpcfg.buf.size;
 372                 dumpcfg.pids = kmem_alloc(v.v_proc * sizeof (pid_t), KM_SLEEP);
 373                 dump_stack_scratch = kmem_alloc(STACK_BUF_SIZE, KM_SLEEP);
 374                 (void) strncpy(dumphdr->dump_uuid, dump_get_uuid(),
 375                     sizeof (dumphdr->dump_uuid));
 376         }
 377 
 378         npages = num_phys_pages();
 379 
 380         if (dumpcfg.bitmapsize != npages) {
 381                 void *map = kmem_alloc(BT_SIZEOFMAP(npages), KM_SLEEP);
 382 
 383                 if (dumpcfg.bitmap != NULL)
 384                         kmem_free(dumpcfg.bitmap, BT_SIZEOFMAP(dumpcfg.
 385                             bitmapsize));
 386                 dumpcfg.bitmap = map;
 387                 dumpcfg.bitmapsize = npages;
 388         }
 389 }
 390 
 391 /*
 392  * Establish a new dump device.
 393  */
 394 int
 395 dumpinit(vnode_t *vp, char *name, int justchecking)
 396 {
 397         vnode_t *cvp;
 398         vattr_t vattr;
 399         vnode_t *cdev_vp;
 400         int error = 0;
 401 
 402         ASSERT(MUTEX_HELD(&dump_lock));
 403 
 404         dumphdr_init();
 405 
 406         cvp = common_specvp(vp);
 407         if (cvp == dumpvp)
 408                 return (0);
 409 
 410         /*
 411          * Determine whether this is a plausible dump device.  We want either:
 412          * (1) a real device that's not mounted and has a cb_dump routine, or
 413          * (2) a swapfile on some filesystem that has a vop_dump routine.
 414          */
 415         if ((error = VOP_OPEN(&cvp, FREAD | FWRITE, kcred, NULL)) != 0)
 416                 return (error);
 417 
 418         vattr.va_mask = AT_SIZE | AT_TYPE | AT_RDEV;
 419         if ((error = VOP_GETATTR(cvp, &vattr, 0, kcred, NULL)) == 0) {
 420                 if (vattr.va_type == VBLK || vattr.va_type == VCHR) {
 421                         if (devopsp[getmajor(vattr.va_rdev)]->
 422                             devo_cb_ops->cb_dump == nodev)
 423                                 error = ENOTSUP;
 424                         else if (vfs_devismounted(vattr.va_rdev))
 425                                 error = EBUSY;
 426                         if (strcmp(ddi_driver_name(VTOS(cvp)->s_dip),
 427                             ZFS_DRIVER) == 0 &&
 428                             IS_SWAPVP(common_specvp(cvp)))
 429                                 error = EBUSY;
 430                 } else {
 431                         if (vn_matchopval(cvp, VOPNAME_DUMP, fs_nosys) ||
 432                             !IS_SWAPVP(cvp))
 433                                 error = ENOTSUP;
 434                 }
 435         }
 436 
 437         if (error == 0 && vattr.va_size < 2 * DUMP_LOGSIZE + DUMP_ERPTSIZE)
 438                 error = ENOSPC;
 439 
 440         if (error || justchecking) {
 441                 (void) VOP_CLOSE(cvp, FREAD | FWRITE, 1, (offset_t)0,
 442                     kcred, NULL);
 443                 return (error);
 444         }
 445 
 446         VN_HOLD(cvp);
 447 
 448         if (dumpvp != NULL)
 449                 dumpfini();     /* unconfigure the old dump device */
 450 
 451         dumpvp = cvp;
 452         dumpvp_size = vattr.va_size & -DUMP_OFFSET;
 453         dumppath = kmem_alloc(strlen(name) + 1, KM_SLEEP);
 454         (void) strcpy(dumppath, name);
 455         dumpcfg.buf.iosize = 0;
 456 
 457         /*
 458          * If the dump device is a block device, attempt to open up the
 459          * corresponding character device and determine its maximum transfer
 460          * size.  We use this information to potentially resize dump buffer
 461          * to a larger and more optimal size for performing i/o to the dump
 462          * device.
 463          */
 464         if (cvp->v_type == VBLK &&
 465             (cdev_vp = makespecvp(VTOS(cvp)->s_dev, VCHR)) != NULL) {
 466                 if (VOP_OPEN(&cdev_vp, FREAD | FWRITE, kcred, NULL) == 0) {
 467                         size_t blk_size;
 468                         struct dk_cinfo dki;
 469                         struct dk_minfo minf;
 470 
 471                         if (VOP_IOCTL(cdev_vp, DKIOCGMEDIAINFO,
 472                             (intptr_t)&minf, FKIOCTL, kcred, NULL, NULL)
 473                             == 0 && minf.dki_lbsize != 0)
 474                                 blk_size = minf.dki_lbsize;
 475                         else
 476                                 blk_size = DEV_BSIZE;
 477 
 478                         if (VOP_IOCTL(cdev_vp, DKIOCINFO, (intptr_t)&dki,
 479                             FKIOCTL, kcred, NULL, NULL) == 0) {
 480                                 dumpcfg.buf.iosize = dki.dki_maxtransfer * blk_size;
 481                                 dumpbuf_resize();
 482                         }
 483                         /*
 484                          * If we are working with a zvol then dumpify it
 485                          * if it's not being used as swap.
 486                          */
 487                         if (strcmp(dki.dki_dname, ZVOL_DRIVER) == 0) {
 488                                 if (IS_SWAPVP(common_specvp(cvp)))
 489                                         error = EBUSY;
 490                                 else if ((error = VOP_IOCTL(cdev_vp,
 491                                     DKIOCDUMPINIT, NULL, FKIOCTL, kcred,
 492                                     NULL, NULL)) != 0)
 493                                         dumpfini();
 494                         }
 495 
 496                         (void) VOP_CLOSE(cdev_vp, FREAD | FWRITE, 1, 0,
 497                             kcred, NULL);
 498                 }
 499 
 500                 VN_RELE(cdev_vp);
 501         }
 502 
 503         cmn_err(CE_CONT, "?dump on %s size %llu MB\n", name, dumpvp_size >> 20);
 504 
 505         dump_update_clevel();
 506 
 507         return (error);
 508 }
 509 
 510 void
 511 dumpfini(void)
 512 {
 513         vattr_t vattr;
 514         boolean_t is_zfs = B_FALSE;
 515         vnode_t *cdev_vp;
 516         ASSERT(MUTEX_HELD(&dump_lock));
 517 
 518         kmem_free(dumppath, strlen(dumppath) + 1);
 519 
 520         /*
 521          * Determine if we are using zvols for our dump device
 522          */
 523         vattr.va_mask = AT_RDEV;
 524         if (VOP_GETATTR(dumpvp, &vattr, 0, kcred, NULL) == 0) {
 525                 is_zfs = (getmajor(vattr.va_rdev) ==
 526                     ddi_name_to_major(ZFS_DRIVER)) ? B_TRUE : B_FALSE;
 527         }
 528 
 529         /*
 530          * If we have a zvol dump device then we call into zfs so
 531          * that it may have a chance to cleanup.
 532          */
 533         if (is_zfs &&
 534             (cdev_vp = makespecvp(VTOS(dumpvp)->s_dev, VCHR)) != NULL) {
 535                 if (VOP_OPEN(&cdev_vp, FREAD | FWRITE, kcred, NULL) == 0) {
 536                         (void) VOP_IOCTL(cdev_vp, DKIOCDUMPFINI, NULL, FKIOCTL,
 537                             kcred, NULL, NULL);
 538                         (void) VOP_CLOSE(cdev_vp, FREAD | FWRITE, 1, 0,
 539                             kcred, NULL);
 540                 }
 541                 VN_RELE(cdev_vp);
 542         }
 543 
 544         (void) VOP_CLOSE(dumpvp, FREAD | FWRITE, 1, (offset_t)0, kcred, NULL);
 545 
 546         VN_RELE(dumpvp);
 547 
 548         dumpvp = NULL;
 549         dumpvp_size = 0;
 550         dumppath = NULL;
 551 }
 552 
 553 static offset_t
 554 dumpvp_flush(void)
 555 {
 556         size_t size = P2ROUNDUP(dumpcfg.buf.cur - dumpcfg.buf.start, PAGESIZE);
 557         hrtime_t iotime;
 558         int err;
 559 
 560         if (dumpcfg.buf.vp_off + size > dumpcfg.buf.vp_limit) {
 561                 dump_ioerr = ENOSPC;
 562                 dumpcfg.buf.vp_off = dumpcfg.buf.vp_limit;
 563         } else if (size != 0) {
 564                 iotime = gethrtime();
 565                 dumpcfg.iowait += iotime - dumpcfg.iowaitts;
 566                 if (panicstr)
 567                         err = VOP_DUMP(dumpvp, dumpcfg.buf.start,
 568                             lbtodb(dumpcfg.buf.vp_off), btod(size), NULL);
 569                 else
 570                         err = vn_rdwr(UIO_WRITE, dumpcfg.buf.cdev_vp != NULL ?
 571                             dumpcfg.buf.cdev_vp : dumpvp, dumpcfg.buf.start, size,
 572                             dumpcfg.buf.vp_off, UIO_SYSSPACE, 0, dumpcfg.buf.vp_limit,
 573                             kcred, 0);
 574                 if (err && dump_ioerr == 0)
 575                         dump_ioerr = err;
 576                 dumpcfg.iowaitts = gethrtime();
 577                 dumpcfg.iotime += dumpcfg.iowaitts - iotime;
 578                 dumpcfg.nwrite += size;
 579                 dumpcfg.buf.vp_off += size;
 580         }
 581         dumpcfg.buf.cur = dumpcfg.buf.start;
 582         dump_timeleft = dump_timeout;
 583         return (dumpcfg.buf.vp_off);
 584 }
 585 
 586 /* maximize write speed by keeping seek offset aligned with size */
 587 void
 588 dumpvp_write(const void *va, size_t size)
 589 {
 590         size_t len, off, sz;
 591 
 592         while (size != 0) {
 593                 len = MIN(size, dumpcfg.buf.end - dumpcfg.buf.cur);
 594                 if (len == 0) {
 595                         off = P2PHASE(dumpcfg.buf.vp_off, dumpcfg.buf.size);
 596                         if (off == 0 || !ISP2(dumpcfg.buf.size)) {
 597                                 (void) dumpvp_flush();
 598                         } else {
 599                                 sz = dumpcfg.buf.size - off;
 600                                 dumpcfg.buf.cur = dumpcfg.buf.start + sz;
 601                                 (void) dumpvp_flush();
 602                                 ovbcopy(dumpcfg.buf.start + sz, dumpcfg.buf.start, off);
 603                                 dumpcfg.buf.cur += off;
 604                         }
 605                 } else {
 606                         bcopy(va, dumpcfg.buf.cur, len);
 607                         va = (char *)va + len;
 608                         dumpcfg.buf.cur += len;
 609                         size -= len;
 610                 }
 611         }
 612 }
 613 
 614 /*ARGSUSED*/
 615 static void
 616 dumpvp_ksyms_write(const void *src, void *dst, size_t size)
 617 {
 618         dumpvp_write(src, size);
 619 }
 620 
 621 /*
 622  * Mark 'pfn' in the bitmap and dump its translation table entry.
 623  */
 624 void
 625 dump_addpage(struct as *as, void *va, pfn_t pfn)
 626 {
 627         mem_vtop_t mem_vtop;
 628         pgcnt_t bitnum;
 629 
 630         if ((bitnum = dump_pfn_to_bitnum(pfn)) != (pgcnt_t)-1) {
 631                 if (!BT_TEST(dumpcfg.bitmap, bitnum)) {
 632                         dumphdr->dump_npages++;
 633                         BT_SET(dumpcfg.bitmap, bitnum);
 634                 }
 635                 dumphdr->dump_nvtop++;
 636                 mem_vtop.m_as = as;
 637                 mem_vtop.m_va = va;
 638                 mem_vtop.m_pfn = pfn;
 639                 dumpvp_write(&mem_vtop, sizeof (mem_vtop_t));
 640         }
 641         dump_timeleft = dump_timeout;
 642 }
 643 
 644 /*
 645  * Mark 'pfn' in the bitmap
 646  */
 647 void
 648 dump_page(pfn_t pfn)
 649 {
 650         pgcnt_t bitnum;
 651 
 652         if ((bitnum = dump_pfn_to_bitnum(pfn)) != (pgcnt_t)-1) {
 653                 if (!BT_TEST(dumpcfg.bitmap, bitnum)) {
 654                         dumphdr->dump_npages++;
 655                         BT_SET(dumpcfg.bitmap, bitnum);
 656                 }
 657         }
 658         dump_timeleft = dump_timeout;
 659 }
 660 
 661 /*
 662  * Dump the <as, va, pfn> information for a given address space.
 663  * SEGOP_DUMP() will call dump_addpage() for each page in the segment.
 664  */
 665 static void
 666 dump_as(struct as *as)
 667 {
 668         struct seg *seg;
 669 
 670         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
 671         for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) {
 672                 if (seg->s_as != as)
 673                         break;
 674                 if (seg->s_ops == NULL)
 675                         continue;
 676                 SEGOP_DUMP(seg);
 677         }
 678         AS_LOCK_EXIT(as, &as->a_lock);
 679 
 680         if (seg != NULL)
 681                 cmn_err(CE_WARN, "invalid segment %p in address space %p",
 682                     (void *)seg, (void *)as);
 683 }
 684 
 685 static int
 686 dump_process(pid_t pid)
 687 {
 688         proc_t *p = sprlock(pid);
 689 
 690         if (p == NULL)
 691                 return (-1);
 692         if (p->p_as != &kas) {
 693                 mutex_exit(&p->p_lock);
 694                 dump_as(p->p_as);
 695                 mutex_enter(&p->p_lock);
 696         }
 697 
 698         sprunlock(p);
 699 
 700         return (0);
 701 }
 702 
 703 /*
 704  * The following functions (dump_summary(), dump_ereports(), and
 705  * dump_messages()), write data to an uncompressed area within the
 706  * crashdump. The layout of these is
 707  *
 708  * +------------------------------------------------------------+
 709  * |     compressed pages       | summary | ereports | messages |
 710  * +------------------------------------------------------------+
 711  *
 712  * With the advent of saving a compressed crash dump by default, we
 713  * need to save a little more data to describe the failure mode in
 714  * an uncompressed buffer available before savecore uncompresses
 715  * the dump. Initially this is a copy of the stack trace. Additional
 716  * summary information should be added here.
 717  */
 718 
 719 void
 720 dump_summary(void)
 721 {
 722         u_offset_t dumpvp_start;
 723         summary_dump_t sd;
 724 
 725         if (dumpvp == NULL || dumphdr == NULL)
 726                 return;
 727 
 728         dumpcfg.buf.cur = dumpcfg.buf.start;
 729 
 730         dumpcfg.buf.vp_limit = dumpvp_size - (DUMP_OFFSET + DUMP_LOGSIZE +
 731             DUMP_ERPTSIZE);
 732         dumpvp_start = dumpcfg.buf.vp_limit - DUMP_SUMMARYSIZE;
 733         dumpcfg.buf.vp_off = dumpvp_start;
 734 
 735         sd.sd_magic = SUMMARY_MAGIC;
 736         sd.sd_ssum = checksum32(dump_stack_scratch, STACK_BUF_SIZE);
 737         dumpvp_write(&sd, sizeof (sd));
 738         dumpvp_write(dump_stack_scratch, STACK_BUF_SIZE);
 739 
 740         sd.sd_magic = 0; /* indicate end of summary */
 741         dumpvp_write(&sd, sizeof (sd));
 742         (void) dumpvp_flush();
 743 }
 744 
 745 void
 746 dump_ereports(void)
 747 {
 748         u_offset_t dumpvp_start;
 749         erpt_dump_t ed;
 750 
 751         if (dumpvp == NULL || dumphdr == NULL)
 752                 return;
 753 
 754         dumpcfg.buf.cur = dumpcfg.buf.start;
 755         dumpcfg.buf.vp_limit = dumpvp_size - (DUMP_OFFSET + DUMP_LOGSIZE);
 756         dumpvp_start = dumpcfg.buf.vp_limit - DUMP_ERPTSIZE;
 757         dumpcfg.buf.vp_off = dumpvp_start;
 758 
 759         fm_ereport_dump();
 760         if (panicstr)
 761                 errorq_dump();
 762 
 763         bzero(&ed, sizeof (ed)); /* indicate end of ereports */
 764         dumpvp_write(&ed, sizeof (ed));
 765         (void) dumpvp_flush();
 766 
 767         if (!panicstr) {
 768                 (void) VOP_PUTPAGE(dumpvp, dumpvp_start,
 769                     (size_t)(dumpcfg.buf.vp_off - dumpvp_start),
 770                     B_INVAL | B_FORCE, kcred, NULL);
 771         }
 772 }
 773 
 774 void
 775 dump_messages(void)
 776 {
 777         log_dump_t ld;
 778         mblk_t *mctl, *mdata;
 779         queue_t *q, *qlast;
 780         u_offset_t dumpvp_start;
 781 
 782         if (dumpvp == NULL || dumphdr == NULL || log_consq == NULL)
 783                 return;
 784 
 785         dumpcfg.buf.cur = dumpcfg.buf.start;
 786         dumpcfg.buf.vp_limit = dumpvp_size - DUMP_OFFSET;
 787         dumpvp_start = dumpcfg.buf.vp_limit - DUMP_LOGSIZE;
 788         dumpcfg.buf.vp_off = dumpvp_start;
 789 
 790         qlast = NULL;
 791         do {
 792                 for (q = log_consq; q->q_next != qlast; q = q->q_next)
 793                         continue;
 794                 for (mctl = q->q_first; mctl != NULL; mctl = mctl->b_next) {
 795                         dump_timeleft = dump_timeout;
 796                         mdata = mctl->b_cont;
 797                         ld.ld_magic = LOG_MAGIC;
 798                         ld.ld_msgsize = MBLKL(mctl->b_cont);
 799                         ld.ld_csum = checksum32(mctl->b_rptr, MBLKL(mctl));
 800                         ld.ld_msum = checksum32(mdata->b_rptr, MBLKL(mdata));
 801                         dumpvp_write(&ld, sizeof (ld));
 802                         dumpvp_write(mctl->b_rptr, MBLKL(mctl));
 803                         dumpvp_write(mdata->b_rptr, MBLKL(mdata));
 804                 }
 805         } while ((qlast = q) != log_consq);
 806 
 807         ld.ld_magic = 0;                /* indicate end of messages */
 808         dumpvp_write(&ld, sizeof (ld));
 809         (void) dumpvp_flush();
 810         if (!panicstr) {
 811                 (void) VOP_PUTPAGE(dumpvp, dumpvp_start,
 812                     (size_t)(dumpcfg.buf.vp_off - dumpvp_start),
 813                     B_INVAL | B_FORCE, kcred, NULL);
 814         }
 815 }
 816 
 817 /*
 818  * Copy pages, trapping ECC errors. Also, for robustness, trap data
 819  * access in case something goes wrong in the hat layer and the
 820  * mapping is broken.
 821  */
 822 static int
 823 dump_pagecopy(void *src, void *dst)
 824 {
 825         long *wsrc = (long *)src;
 826         long *wdst = (long *)dst;
 827         const ulong_t ncopies = PAGESIZE / sizeof (long);
 828         volatile int w = 0;
 829         volatile int ueoff = -1;
 830         on_trap_data_t otd;
 831 
 832         if (on_trap(&otd, OT_DATA_EC | OT_DATA_ACCESS)) {
 833                 if (ueoff == -1)
 834                         ueoff = w * sizeof (long);
 835                 /* report "bad ECC" or "bad address" */
 836 #ifdef _LP64
 837                 if (otd.ot_trap & OT_DATA_EC)
 838                         wdst[w++] = 0x00badecc00badecc;
 839                 else
 840                         wdst[w++] = 0x00badadd00badadd;
 841 #else
 842                 if (otd.ot_trap & OT_DATA_EC)
 843                         wdst[w++] = 0x00badecc;
 844                 else
 845                         wdst[w++] = 0x00badadd;
 846 #endif
 847         }
 848         while (w < ncopies) {
 849                 wdst[w] = wsrc[w];
 850                 w++;
 851         }
 852         no_trap();
 853         return (ueoff);
 854 }
 855 
 856 size_t
 857 dumpsys_metrics(char *buf, size_t size)
 858 {
 859         dumpcfg_t *cfg = &dumpcfg;
 860         int myid = CPU->cpu_seqid;
 861         int i, compress_ratio;
 862         int sec, iorate;
 863         char *e = buf + size;
 864         char *p = buf;
 865 
 866         sec = cfg->elapsed / (1000 * 1000 * 1000ULL);
 867         if (sec < 1)
 868                 sec = 1;
 869 
 870         if (cfg->iotime < 1)
 871                 cfg->iotime = 1;
 872         iorate = (cfg->nwrite * 100000ULL) / cfg->iotime;
 873 
 874         compress_ratio = 100LL * cfg->npages / btopr(cfg->nwrite + 1);
 875 
 876 #define P(...) (p += p < e ? snprintf(p, e - p, __VA_ARGS__) : 0)
 877 
 878         P("Master cpu_seqid,%d\n", CPU->cpu_seqid);
 879         P("Master cpu_id,%d\n", CPU->cpu_id);
 880         P("dump_flags,0x%x\n", dumphdr->dump_flags);
 881         P("dump_ioerr,%d\n", dump_ioerr);
 882 
 883         P("Compression type,serial lzjb\n");
 884         P("Compression ratio,%d.%02d\n", compress_ratio / 100, compress_ratio %
 885             100);
 886 
 887         P("Dump I/O rate MBS,%d.%02d\n", iorate / 100, iorate % 100);
 888         P("..total bytes,%lld\n", (u_longlong_t)cfg->nwrite);
 889         P("..total nsec,%lld\n", (u_longlong_t)cfg->iotime);
 890         P("dumpbuf.iosize,%ld\n", dumpcfg.buf.iosize);
 891         P("dumpbuf.size,%ld\n", dumpcfg.buf.size);
 892 
 893         P("Dump pages/sec,%llu\n", (u_longlong_t)cfg->npages / sec);
 894         P("Dump pages,%llu\n", (u_longlong_t)cfg->npages);
 895         P("Dump time,%d\n", sec);
 896 
 897         if (cfg->pages_mapped > 0)
 898                 P("per-cent map utilization,%d\n", (int)((100 * cfg->pages_used)
 899                     / cfg->pages_mapped));
 900 
 901         P("\nPer-page metrics:\n");
 902         if (cfg->npages > 0) {
 903 #define PERPAGE(x) \
 904                 P("%s nsec/page,%d\n", #x, (int)(cfg->perpage.x / cfg->npages));
 905                 PERPAGES;
 906 #undef PERPAGE
 907 
 908                 P("I/O wait nsec/page,%llu\n", (u_longlong_t)(cfg->iowait /
 909                     cfg->npages));
 910         }
 911 #undef P
 912         if (p < e)
 913                 bzero(p, e - p);
 914         return (p - buf);
 915 }
 916 
 917 /*
 918  * Dump the system.
 919  */
 920 void
 921 dumpsys(void)
 922 {
 923         dumpcfg_t *cfg = &dumpcfg;
 924         uint_t percent_done;            /* dump progress reported */
 925         hrtime_t start;                 /* start time */
 926         pfn_t pfn;
 927         pgcnt_t bitnum;
 928         proc_t *p;
 929         pid_t npids, pidx;
 930         char *content;
 931         char *buf;
 932         size_t size;
 933         dumpmlw_t mlw;
 934         dumpcsize_t datatag;
 935         dumpdatahdr_t datahdr;
 936 
 937         if (dumpvp == NULL || dumphdr == NULL) {
 938                 uprintf("skipping system dump - no dump device configured\n");
 939                 return;
 940         }
 941         dumpcfg.buf.cur = dumpcfg.buf.start;
 942 
 943         /* clear the sync variables */
 944         cfg->npages = 0;
 945         cfg->pages_mapped = 0;
 946         cfg->pages_used = 0;
 947         cfg->nwrite = 0;
 948         cfg->elapsed = 0;
 949         cfg->iotime = 0;
 950         cfg->iowait = 0;
 951         cfg->iowaitts = 0;
 952 
 953         /*
 954          * Calculate the starting block for dump.  If we're dumping on a
 955          * swap device, start 1/5 of the way in; otherwise, start at the
 956          * beginning.  And never use the first page -- it may be a disk label.
 957          */
 958         if (dumpvp->v_flag & VISSWAP)
 959                 dumphdr->dump_start = P2ROUNDUP(dumpvp_size / 5, DUMP_OFFSET);
 960         else
 961                 dumphdr->dump_start = DUMP_OFFSET;
 962 
 963         dumphdr->dump_flags = DF_VALID | DF_COMPLETE | DF_LIVE | DF_COMPRESSED;
 964         dumphdr->dump_crashtime = gethrestime_sec();
 965         dumphdr->dump_npages = 0;
 966         dumphdr->dump_nvtop = 0;
 967         bzero(dumpcfg.bitmap, BT_SIZEOFMAP(dumpcfg.bitmapsize));
 968         dump_timeleft = dump_timeout;
 969 
 970         if (panicstr) {
 971                 dumphdr->dump_flags &= ~DF_LIVE;
 972                 (void) VOP_DUMPCTL(dumpvp, DUMP_FREE, NULL, NULL);
 973                 (void) VOP_DUMPCTL(dumpvp, DUMP_ALLOC, NULL, NULL);
 974                 (void) vsnprintf(dumphdr->dump_panicstring, DUMP_PANICSIZE,
 975                     panicstr, panicargs);
 976         }
 977 
 978         if (dump_conflags & DUMP_ALL)
 979                 content = "all";
 980         else if (dump_conflags & DUMP_CURPROC)
 981                 content = "kernel + curproc";
 982         else
 983                 content = "kernel";
 984         uprintf("dumping to %s, offset %lld, content: %s\n", dumppath,
 985             dumphdr->dump_start, content);
 986 
 987         /* Make sure nodename is current */
 988         bcopy(utsname.nodename, dumphdr->dump_utsname.nodename, SYS_NMLN);
 989 
 990         /*
 991          * If this is a live dump, try to open a VCHR vnode for better
 992          * performance. We must take care to flush the buffer cache
 993          * first.
 994          */
 995         if (!panicstr) {
 996                 vnode_t *cdev_vp, *cmn_cdev_vp;
 997 
 998                 ASSERT(dumpcfg.buf.cdev_vp == NULL);
 999                 cdev_vp = makespecvp(VTOS(dumpvp)->s_dev, VCHR);
1000                 if (cdev_vp != NULL) {
1001                         cmn_cdev_vp = common_specvp(cdev_vp);
1002                         if (VOP_OPEN(&cmn_cdev_vp, FREAD | FWRITE, kcred, NULL)
1003                             == 0) {
1004                                 if (vn_has_cached_data(dumpvp))
1005                                         (void) pvn_vplist_dirty(dumpvp, 0, NULL,
1006                                             B_INVAL | B_TRUNC, kcred);
1007                                 dumpcfg.buf.cdev_vp = cmn_cdev_vp;
1008                         } else {
1009                                 VN_RELE(cdev_vp);
1010                         }
1011                 }
1012         }
1013 
1014         /*
1015          * Store a hires timestamp so we can look it up during debugging.
1016          */
1017         lbolt_debug_entry();
1018 
1019         /*
1020          * Leave room for the message and ereport save areas and terminal dump
1021          * header.
1022          */
1023         dumpcfg.buf.vp_limit = dumpvp_size - DUMP_LOGSIZE - DUMP_OFFSET -
1024             DUMP_ERPTSIZE;
1025 
1026         /*
1027          * Write out the symbol table.  It's no longer compressed,
1028          * so its 'size' and 'csize' are equal.
1029          */
1030         dumpcfg.buf.vp_off = dumphdr->dump_ksyms = dumphdr->dump_start + PAGESIZE;
1031         dumphdr->dump_ksyms_size = dumphdr->dump_ksyms_csize =
1032             ksyms_snapshot(dumpvp_ksyms_write, NULL, LONG_MAX);
1033 
1034         /*
1035          * Write out the translation map.
1036          */
1037         dumphdr->dump_map = dumpvp_flush();
1038         dump_as(&kas);
1039         dumphdr->dump_nvtop += dump_plat_addr();
1040 
1041         /*
1042          * call into hat, which may have unmapped pages that also need to
1043          * be in the dump
1044          */
1045         hat_dump();
1046 
1047         if (dump_conflags & DUMP_ALL) {
1048                 mutex_enter(&pidlock);
1049 
1050                 for (npids = 0, p = practive; p != NULL; p = p->p_next)
1051                         dumpcfg.pids[npids++] = p->p_pid;
1052 
1053                 mutex_exit(&pidlock);
1054 
1055                 for (pidx = 0; pidx < npids; pidx++)
1056                         (void) dump_process(dumpcfg.pids[pidx]);
1057 
1058                 dump_init_memlist_walker(&mlw);
1059                 for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum++) {
1060                         dump_timeleft = dump_timeout;
1061                         pfn = dump_bitnum_to_pfn(bitnum, &mlw);
1062                         /*
1063                          * Some hypervisors do not have all pages available to
1064                          * be accessed by the guest OS.  Check for page
1065                          * accessibility.
1066                          */
1067                         if (plat_hold_page(pfn, PLAT_HOLD_NO_LOCK, NULL) !=
1068                             PLAT_HOLD_OK)
1069                                 continue;
1070                         BT_SET(dumpcfg.bitmap, bitnum);
1071                 }
1072                 dumphdr->dump_npages = dumpcfg.bitmapsize;
1073                 dumphdr->dump_flags |= DF_ALL;
1074 
1075         } else if (dump_conflags & DUMP_CURPROC) {
1076                 /*
1077                  * Determine which pid is to be dumped.  If we're panicking, we
1078                  * dump the process associated with panic_thread (if any).  If
1079                  * this is a live dump, we dump the process associated with
1080                  * curthread.
1081                  */
1082                 npids = 0;
1083                 if (panicstr) {
1084                         if (panic_thread != NULL &&
1085                             panic_thread->t_procp != NULL &&
1086                             panic_thread->t_procp != &p0) {
1087                                 dumpcfg.pids[npids++] =
1088                                     panic_thread->t_procp->p_pid;
1089                         }
1090                 } else {
1091                         dumpcfg.pids[npids++] = curthread->t_procp->p_pid;
1092                 }
1093 
1094                 if (npids && dump_process(dumpcfg.pids[0]) == 0)
1095                         dumphdr->dump_flags |= DF_CURPROC;
1096                 else
1097                         dumphdr->dump_flags |= DF_KERNEL;
1098 
1099         } else {
1100                 dumphdr->dump_flags |= DF_KERNEL;
1101         }
1102 
1103         dumphdr->dump_hashmask = (1 << highbit(dumphdr->dump_nvtop - 1)) - 1;
1104 
1105         /*
1106          * Write out the pfn table.
1107          */
1108         dumphdr->dump_pfn = dumpvp_flush();
1109         dump_init_memlist_walker(&mlw);
1110         for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum++) {
1111                 dump_timeleft = dump_timeout;
1112                 if (!BT_TEST(dumpcfg.bitmap, bitnum))
1113                         continue;
1114                 pfn = dump_bitnum_to_pfn(bitnum, &mlw);
1115                 ASSERT(pfn != PFN_INVALID);
1116                 dumpvp_write(&pfn, sizeof (pfn_t));
1117         }
1118         dump_plat_pfn();
1119 
1120         /*
1121          * Write out all the pages.
1122          * Map pages, copy them handling UEs, compress, and write them out.
1123          */
1124         dumphdr->dump_data = dumpvp_flush();
1125 
1126         ASSERT(dumpcfg.page);
1127         bzero(&dumpcfg.perpage, sizeof (dumpcfg.perpage));
1128 
1129         start = gethrtime();
1130         cfg->iowaitts = start;
1131 
1132         if (panicstr)
1133                 kmem_dump_begin();
1134 
1135         percent_done = 0;
1136 
1137         dump_init_memlist_walker(&mlw);
1138         for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum++) {
1139                 size_t csize;
1140 
1141                 dump_timeleft = dump_timeout;
1142                 HRSTART(cfg->perpage, bitmap);
1143                 if (!BT_TEST(dumpcfg.bitmap, bitnum)) {
1144                         HRSTOP(cfg->perpage, bitmap);
1145                         continue;
1146                 }
1147                 HRSTOP(cfg->perpage, bitmap);
1148 
1149                 pfn = dump_bitnum_to_pfn(bitnum, &mlw);
1150                 ASSERT(pfn != PFN_INVALID);
1151 
1152                 HRSTART(cfg->perpage, map);
1153                 hat_devload(kas.a_hat, dumpcfg.cmap, PAGESIZE, pfn, PROT_READ,
1154                             HAT_LOAD_NOCONSIST);
1155                 HRSTOP(cfg->perpage, map);
1156 
1157                 dump_pagecopy(dumpcfg.cmap, dumpcfg.page);
1158 
1159                 HRSTART(cfg->perpage, unmap);
1160                 hat_unload(kas.a_hat, dumpcfg.cmap, PAGESIZE, HAT_UNLOAD);
1161                 HRSTOP(cfg->perpage, unmap);
1162 
1163                 HRSTART(dumpcfg.perpage, compress);
1164                 csize = compress(dumpcfg.page, dumpcfg.lzbuf, PAGESIZE);
1165                 HRSTOP(dumpcfg.perpage, compress);
1166 
1167                 HRSTART(dumpcfg.perpage, write);
1168                 dumpvp_write(&csize, sizeof (csize));
1169                 dumpvp_write(dumpcfg.lzbuf, csize);
1170                 HRSTOP(dumpcfg.perpage, write);
1171 
1172                 if (dump_ioerr) {
1173                         dumphdr->dump_flags &= ~DF_COMPLETE;
1174                         dumphdr->dump_npages = cfg->npages;
1175                         break;
1176                 }
1177                 if (++cfg->npages * 100LL / dumphdr->dump_npages > percent_done) {
1178                         int sec;
1179 
1180                         sec = (gethrtime() - start) / 1000 / 1000 / 1000;
1181                         uprintf("^\r%2d:%02d %3d%% done", sec / 60, sec % 60,
1182                                 ++percent_done);
1183                         if (!panicstr)
1184                                 delay(1);       /* let the output be sent */
1185                 }
1186         }
1187 
1188         cfg->elapsed = gethrtime() - start;
1189         if (cfg->elapsed < 1)
1190                 cfg->elapsed = 1;
1191 
1192         /* record actual pages dumped */
1193         dumphdr->dump_npages = cfg->npages;
1194 
1195         /* platform-specific data */
1196         dumphdr->dump_npages += dump_plat_data(dumpcfg.page);
1197 
1198         /* note any errors by clearing DF_COMPLETE */
1199         if (dump_ioerr || cfg->npages < dumphdr->dump_npages)
1200                 dumphdr->dump_flags &= ~DF_COMPLETE;
1201 
1202         /* end of stream blocks */
1203         datatag = 0;
1204         dumpvp_write(&datatag, sizeof (datatag));
1205 
1206         bzero(&datahdr, sizeof (datahdr));
1207 
1208         /* buffer for metrics */
1209         buf = dumpcfg.page;
1210         size = MIN(PAGESIZE, DUMP_OFFSET - sizeof (dumphdr_t) -
1211             sizeof (dumpdatahdr_t));
1212 
1213         /* finish the kmem intercepts, collect kmem verbose info */
1214         if (panicstr) {
1215                 datahdr.dump_metrics = kmem_dump_finish(buf, size);
1216                 buf += datahdr.dump_metrics;
1217                 size -= datahdr.dump_metrics;
1218         }
1219 
1220         /* record in the header whether this is a fault-management panic */
1221         if (panicstr)
1222                 dumphdr->dump_fm_panic = is_fm_panic();
1223 
1224         /* compression info in data header */
1225         datahdr.dump_datahdr_magic = DUMP_DATAHDR_MAGIC;
1226         datahdr.dump_datahdr_version = DUMP_DATAHDR_VERSION;
1227         datahdr.dump_maxcsize = PAGESIZE;
1228         datahdr.dump_maxrange = 1;
1229         datahdr.dump_nstreams = 1;
1230         datahdr.dump_clevel = 0;
1231 
1232         if (dump_metrics_on)
1233                 datahdr.dump_metrics += dumpsys_metrics(buf, size);
1234 
1235         datahdr.dump_data_csize = dumpvp_flush() - dumphdr->dump_data;
1236 
1237         /*
1238          * Write out the initial and terminal dump headers.
1239          */
1240         dumpcfg.buf.vp_off = dumphdr->dump_start;
1241         dumpvp_write(dumphdr, sizeof (dumphdr_t));
1242         (void) dumpvp_flush();
1243 
1244         dumpcfg.buf.vp_limit = dumpvp_size;
1245         dumpcfg.buf.vp_off = dumpcfg.buf.vp_limit - DUMP_OFFSET;
1246         dumpvp_write(dumphdr, sizeof (dumphdr_t));
1247         dumpvp_write(&datahdr, sizeof (dumpdatahdr_t));
1248         dumpvp_write(dumpcfg.page, datahdr.dump_metrics);
1249 
1250         (void) dumpvp_flush();
1251 
1252         uprintf("\r%3d%% done: %llu pages dumped, ",
1253             percent_done, (u_longlong_t)cfg->npages);
1254 
1255         if (dump_ioerr == 0) {
1256                 uprintf("dump succeeded\n");
1257         } else {
1258                 uprintf("dump failed: error %d\n", dump_ioerr);
1259 #ifdef DEBUG
1260                 if (panicstr)
1261                         debug_enter("dump failed");
1262 #endif
1263         }
1264 
1265         /*
1266          * Write out all undelivered messages.  This has to be the *last*
1267          * thing we do because the dump process itself emits messages.
1268          */
1269         if (panicstr) {
1270                 dump_summary();
1271                 dump_ereports();
1272                 dump_messages();
1273         }
1274 
1275         delay(2 * hz);  /* let people see the 'done' message */
1276         dump_timeleft = 0;
1277         dump_ioerr = 0;
1278 
1279         /* restore settings after live dump completes */
1280         if (!panicstr) {
1281                 /* release any VCHR open of the dump device */
1282                 if (dumpcfg.buf.cdev_vp != NULL) {
1283                         (void) VOP_CLOSE(dumpcfg.buf.cdev_vp, FREAD | FWRITE, 1, 0,
1284                             kcred, NULL);
1285                         VN_RELE(dumpcfg.buf.cdev_vp);
1286                         dumpcfg.buf.cdev_vp = NULL;
1287                 }
1288         }
1289 }
1290 
1291 /*
1292  * This function is called whenever the memory size, as represented
1293  * by the phys_install list, changes.
1294  */
1295 void
1296 dump_resize()
1297 {
1298         mutex_enter(&dump_lock);
1299         dumphdr_init();
1300         dumpbuf_resize();
1301         dump_update_clevel();
1302         mutex_exit(&dump_lock);
1303 }
1304 
1305 /*
1306  * This function allows for dynamic resizing of a dump area. It assumes that
1307  * the underlying device has update its appropriate size(9P).
1308  */
1309 int
1310 dumpvp_resize()
1311 {
1312         int error;
1313         vattr_t vattr;
1314 
1315         mutex_enter(&dump_lock);
1316         vattr.va_mask = AT_SIZE;
1317         if ((error = VOP_GETATTR(dumpvp, &vattr, 0, kcred, NULL)) != 0) {
1318                 mutex_exit(&dump_lock);
1319                 return (error);
1320         }
1321 
1322         if (error == 0 && vattr.va_size < 2 * DUMP_LOGSIZE + DUMP_ERPTSIZE) {
1323                 mutex_exit(&dump_lock);
1324                 return (ENOSPC);
1325         }
1326 
1327         dumpvp_size = vattr.va_size & -DUMP_OFFSET;
1328         mutex_exit(&dump_lock);
1329         return (0);
1330 }
1331 
1332 int
1333 dump_set_uuid(const char *uuidstr)
1334 {
1335         const char *ptr;
1336         int i;
1337 
1338         if (uuidstr == NULL || strnlen(uuidstr, 36 + 1) != 36)
1339                 return (EINVAL);
1340 
1341         /* uuid_parse is not common code so check manually */
1342         for (i = 0, ptr = uuidstr; i < 36; i++, ptr++) {
1343                 switch (i) {
1344                 case 8:
1345                 case 13:
1346                 case 18:
1347                 case 23:
1348                         if (*ptr != '-')
1349                                 return (EINVAL);
1350                         break;
1351 
1352                 default:
1353                         if (!isxdigit(*ptr))
1354                                 return (EINVAL);
1355                         break;
1356                 }
1357         }
1358 
1359         if (dump_osimage_uuid[0] != '\0')
1360                 return (EALREADY);
1361 
1362         (void) strncpy(dump_osimage_uuid, uuidstr, 36 + 1);
1363 
1364         cmn_err(CE_CONT, "?This Solaris instance has UUID %s\n",
1365             dump_osimage_uuid);
1366 
1367         return (0);
1368 }
1369 
1370 const char *
1371 dump_get_uuid(void)
1372 {
1373         return (dump_osimage_uuid[0] != '\0' ? dump_osimage_uuid : "");
1374 }