1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 /*
  26  * Ereport-handling routines for memory errors
  27  */
  28 
  29 #include <cmd_mem.h>
  30 #include <cmd_dimm.h>
  31 #include <cmd_bank.h>
  32 #include <cmd_page.h>
  33 #include <cmd_cpu.h>
  34 #include <cmd_branch.h>
  35 #include <cmd_state.h>
  36 #include <cmd.h>
  37 #include <cmd_hc_sun4v.h>
  38 
  39 #include <assert.h>
  40 #include <strings.h>
  41 #include <string.h>
  42 #include <errno.h>
  43 #include <unistd.h>
  44 #include <fm/fmd_api.h>
  45 #include <sys/fm/ldom.h>
  46 #include <sys/fm/protocol.h>
  47 
  48 #include <sys/fm/cpu/UltraSPARC-T1.h>
  49 #include <sys/mdesc.h>
  50 #include <sys/async.h>
  51 #include <sys/errclassify.h>
  52 #include <sys/niagararegs.h>
  53 #include <sys/fm/ldom.h>
  54 #include <ctype.h>
  55 
  56 #define VF_TS3_FCR      0x000000000000FFFFULL
  57 #define VF_L2ESYR_C2C   0x8000000000000000ULL
  58 #define OFFBIT          0xFFFFFFFFFFFC07FFULL
  59 #define BIT28_32        0x00000001F0000000ULL
  60 #define BIT13_17        0x000000000003E000ULL
  61 #define BIT18_19        0x00000000000C0000ULL
  62 #define BIT11_12        0x0000000000001800ULL
  63 #define UTS2_CPUS_PER_CHIP      64
  64 #define FBR_ERROR       ".fbr"
  65 #define DSU_ERROR       ".dsu"
  66 #define FERG_INVALID    ".invalid"
  67 #define DBU_ERROR       ".dbu"
  68 
  69 extern ldom_hdl_t *cpumem_diagnosis_lhp;
  70 
  71 static fmd_hdl_t *cpumem_hdl = NULL;
  72 
  73 #define ERR_CLASS(x, y) (strcmp(strrchr(x, '.'), y))
  74 
  75 static void *
  76 cpumem_alloc(size_t size)
  77 {
  78         assert(cpumem_hdl != NULL);
  79 
  80         return (fmd_hdl_alloc(cpumem_hdl, size, FMD_SLEEP));
  81 }
  82 
  83 static void
  84 cpumem_free(void *addr, size_t size)
  85 {
  86         assert(cpumem_hdl != NULL);
  87 
  88         fmd_hdl_free(cpumem_hdl, addr, size);
  89 }
  90 
  91 /*ARGSUSED*/
  92 cmd_evdisp_t
  93 cmd_mem_synd_check(fmd_hdl_t *hdl, uint64_t afar, uint8_t afar_status,
  94     uint16_t synd, uint8_t synd_status, cmd_cpu_t *cpu)
  95 {
  96         /*
  97          * Niagara writebacks from L2 containing UEs are placed in memory
  98          * with the poison syndrome NI_DRAM_POISON_SYND_FROM_LDWU.
  99          * Memory UE ereports showing this syndrome are dropped because they
 100          * indicate an L2 problem, which should be diagnosed from the
 101          * corresponding L2 cache ereport.
 102          */
 103         switch (cpu->cpu_type) {
 104                 case CPU_ULTRASPARC_T1:
 105                         if (synd == NI_DRAM_POISON_SYND_FROM_LDWU) {
 106                                 fmd_hdl_debug(hdl,
 107                                     "discarding UE due to magic syndrome %x\n",
 108                                     synd);
 109                                 return (CMD_EVD_UNUSED);
 110                         }
 111                         break;
 112                 case CPU_ULTRASPARC_T2:
 113                 case CPU_ULTRASPARC_T2plus:
 114                         if (synd == N2_DRAM_POISON_SYND_FROM_LDWU) {
 115                                 fmd_hdl_debug(hdl,
 116                                     "discarding UE due to magic syndrome %x\n",
 117                                     synd);
 118                                 return (CMD_EVD_UNUSED);
 119                         }
 120                         break;
 121                 default:
 122                         break;
 123         }
 124         return (CMD_EVD_OK);
 125 }
 126 
 127 static int
 128 cpu_present(fmd_hdl_t *hdl, nvlist_t *asru, uint32_t *cpuid)
 129 {
 130         nvlist_t *cp_asru;
 131         uint32_t i;
 132 
 133         if (nvlist_dup(asru, &cp_asru, 0) != 0) {
 134                 fmd_hdl_debug(hdl, "unable to alloc asru for thread\n");
 135                 return (-1);
 136         }
 137 
 138         for (i = *cpuid; i < *cpuid + UTS2_CPUS_PER_CHIP; i++) {
 139 
 140                 (void) nvlist_remove_all(cp_asru, FM_FMRI_CPU_ID);
 141 
 142                 if (nvlist_add_uint32(cp_asru, FM_FMRI_CPU_ID, i) == 0) {
 143                         if (fmd_nvl_fmri_present(hdl, cp_asru) &&
 144                             !fmd_nvl_fmri_unusable(hdl, cp_asru)) {
 145                                 nvlist_free(cp_asru);
 146                                 *cpuid = i;
 147                                 return (0);
 148                         }
 149                 }
 150         }
 151         nvlist_free(cp_asru);
 152         return (-1);
 153 }
 154 
 155 /*ARGSUSED*/
 156 cmd_evdisp_t
 157 cmd_c2c(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
 158     cmd_errcl_t clcode)
 159 {
 160         uint32_t cpuid;
 161         nvlist_t *det;
 162         int rc;
 163 
 164         (void) nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR, &det);
 165         if (nvlist_lookup_uint32(det, FM_FMRI_CPU_ID, &cpuid) == 0) {
 166 
 167                 /*
 168                  * If the c2c bit is set, the sending cache of the
 169                  * cpu must be faulted instead of the memory.
 170                  * If the detector is chip0, the cache of the chip1
 171                  * is faulted and vice versa.
 172                  */
 173                 if (cpuid < UTS2_CPUS_PER_CHIP)
 174                         cpuid = UTS2_CPUS_PER_CHIP;
 175                 else
 176                         cpuid = 0;
 177 
 178                 rc = cpu_present(hdl, det, &cpuid);
 179 
 180                 if (rc != -1) {
 181                         (void) nvlist_remove(det, FM_FMRI_CPU_ID,
 182                             DATA_TYPE_UINT32);
 183                         if (nvlist_add_uint32(det,
 184                             FM_FMRI_CPU_ID, cpuid) == 0) {
 185                                 clcode |= CMD_CPU_LEVEL_CHIP;
 186                                 return (cmd_l2u(hdl, ep, nvl, class, clcode));
 187                         }
 188 
 189                 }
 190         }
 191         fmd_hdl_debug(hdl, "cmd_c2c: no cpuid discarding C2C error");
 192         return (CMD_EVD_BAD);
 193 }
 194 
 195 /*
 196  * sun4v's xe_common routine has an extra argument, clcode, compared
 197  * to routine of same name in sun4u.
 198  */
 199 
 200 static cmd_evdisp_t
 201 xe_common(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
 202     const char *class, cmd_errcl_t clcode, cmd_xe_handler_f *hdlr)
 203 {
 204         uint64_t afar, l2_afar, dram_afar;
 205         uint64_t l2_afsr, dram_afsr, l2_esyr;
 206         uint16_t synd;
 207         uint8_t afar_status, synd_status;
 208         nvlist_t *rsrc;
 209         char *typenm;
 210         uint64_t disp = 0;
 211         int minorvers = 1;
 212 
 213         if (nvlist_lookup_uint64(nvl,
 214             FM_EREPORT_PAYLOAD_NAME_L2_AFSR, &l2_afsr) != 0 &&
 215             nvlist_lookup_uint64(nvl,
 216             FM_EREPORT_PAYLOAD_NAME_L2_ESR, &l2_afsr) != 0)
 217                 return (CMD_EVD_BAD);
 218 
 219         if (nvlist_lookup_uint64(nvl,
 220             FM_EREPORT_PAYLOAD_NAME_DRAM_AFSR, &dram_afsr) != 0 &&
 221             nvlist_lookup_uint64(nvl,
 222             FM_EREPORT_PAYLOAD_NAME_DRAM_ESR, &dram_afsr) != 0)
 223                 return (CMD_EVD_BAD);
 224 
 225         if (nvlist_lookup_uint64(nvl,
 226             FM_EREPORT_PAYLOAD_NAME_L2_AFAR, &l2_afar) != 0 &&
 227             nvlist_lookup_uint64(nvl,
 228             FM_EREPORT_PAYLOAD_NAME_L2_EAR, &l2_afar) != 0)
 229                 return (CMD_EVD_BAD);
 230 
 231         if (nvlist_lookup_uint64(nvl,
 232             FM_EREPORT_PAYLOAD_NAME_DRAM_AFAR, &dram_afar) != 0 &&
 233             nvlist_lookup_uint64(nvl,
 234             FM_EREPORT_PAYLOAD_NAME_DRAM_EAR, &dram_afar) != 0)
 235                 return (CMD_EVD_BAD);
 236 
 237         if (nvlist_lookup_pairs(nvl, 0,
 238             FM_EREPORT_PAYLOAD_NAME_ERR_TYPE, DATA_TYPE_STRING, &typenm,
 239             FM_EREPORT_PAYLOAD_NAME_RESOURCE, DATA_TYPE_NVLIST, &rsrc,
 240             NULL) != 0)
 241                 return (CMD_EVD_BAD);
 242 
 243         synd = dram_afsr;
 244 
 245         /*
 246          * Niagara afar and synd validity.
 247          * For a given set of error registers, the payload value is valid if
 248          * no higher priority error status bit is set.  See UltraSPARC-T1.h for
 249          * error status bit values and priority settings.  Note that for DAC
 250          * and DAU, afar value is taken from l2 error registers, syndrome
 251          * from dram error * registers; for DSC and DSU, both afar and
 252          * syndrome are taken from dram * error registers.  DSU afar and
 253          * syndrome are always valid because no
 254          * higher priority error will override.
 255          */
 256         switch (clcode) {
 257         case CMD_ERRCL_DAC:
 258                 afar = l2_afar;
 259                 afar_status = ((l2_afsr & NI_L2AFSR_P10) == 0) ?
 260                     AFLT_STAT_VALID : AFLT_STAT_INVALID;
 261                 synd_status = ((dram_afsr & NI_DMAFSR_P01) == 0) ?
 262                     AFLT_STAT_VALID : AFLT_STAT_INVALID;
 263                 break;
 264         case CMD_ERRCL_DSC:
 265                 afar = dram_afar;
 266                 afar_status = ((dram_afsr & NI_DMAFSR_P01) == 0) ?
 267                     AFLT_STAT_VALID : AFLT_STAT_INVALID;
 268                 synd_status = afar_status;
 269                 break;
 270         case CMD_ERRCL_DAU:
 271                 afar = l2_afar;
 272                 afar_status = ((l2_afsr & NI_L2AFSR_P05) == 0) ?
 273                     AFLT_STAT_VALID : AFLT_STAT_INVALID;
 274                 synd_status = AFLT_STAT_VALID;
 275 
 276                 if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_NAME_L2_ESYR,
 277                     &l2_esyr) == 0) {
 278                         if (l2_esyr & VF_L2ESYR_C2C) {
 279                                 return (cmd_c2c(hdl, ep, nvl, class, clcode));
 280                         }
 281                 }
 282                 break;
 283         case CMD_ERRCL_DSU:
 284                 afar = dram_afar;
 285                 afar_status = synd_status = AFLT_STAT_VALID;
 286                 break;
 287         default:
 288                 fmd_hdl_debug(hdl, "Niagara unrecognized mem error %llx\n",
 289                     clcode);
 290                 return (CMD_EVD_UNUSED);
 291         }
 292 
 293         return (hdlr(hdl, ep, nvl, class, afar, afar_status, synd,
 294             synd_status, cmd_mem_name2type(typenm, minorvers), disp, rsrc));
 295 }
 296 
 297 
 298 /*ARGSUSED*/
 299 cmd_evdisp_t
 300 cmd_ce(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
 301     cmd_errcl_t clcode)
 302 {
 303         if (strcmp(class, "ereport.cpu.ultraSPARC-T2plus.dsc") == 0)
 304                 return (CMD_EVD_UNUSED); /* drop VF dsc's */
 305         else
 306                 return (xe_common(hdl, ep, nvl, class, clcode, cmd_ce_common));
 307 }
 308 
 309 /*ARGSUSED*/
 310 cmd_evdisp_t
 311 cmd_ue_train(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
 312     cmd_errcl_t clcode)
 313 {
 314         cmd_evdisp_t rc, rc1;
 315 
 316         /*
 317          * The DAU is cause of the DAU->DCDP/ICDP train:
 318          * - process the cause of the event.
 319          * - register the error to the nop event train, so the effected errors
 320          * (DCDP/ICDP) will be dropped.
 321          */
 322         rc = xe_common(hdl, ep, nvl, class, clcode, cmd_ue_common);
 323 
 324         rc1 = cmd_xxcu_initial(hdl, ep, nvl, class, clcode, CMD_XR_HDLR_NOP);
 325         if (rc1 != 0)
 326                 fmd_hdl_debug(hdl,
 327                     "Fail to add error (%llx) to the train, rc = %d",
 328                     clcode, rc1);
 329 
 330         return (rc);
 331 }
 332 
 333 /*ARGSUSED*/
 334 cmd_evdisp_t
 335 cmd_ue(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
 336     cmd_errcl_t clcode)
 337 {
 338         if (strcmp(class, "ereport.cpu.ultraSPARC-T2plus.dsu") == 0)
 339                 /*
 340                  * VF dsu's need to be treated like branch errors,
 341                  * because we can't localize to a single DIMM or pair of
 342                  * DIMMs given missing/invalid parts of the dram-ear.
 343                  */
 344                 return (cmd_fb(hdl, ep, nvl, class, clcode));
 345         else
 346                 return (xe_common(hdl, ep, nvl, class, clcode, cmd_ue_common));
 347 }
 348 
 349 /*ARGSUSED*/
 350 cmd_evdisp_t
 351 cmd_frx(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
 352     cmd_errcl_t clcode)
 353 {
 354         return (CMD_EVD_UNUSED);
 355 }
 356 
 357 
 358 /*ARGSUSED*/
 359 cmd_evdisp_t
 360 cmd_fb(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
 361     cmd_errcl_t clcode)
 362 {
 363         cmd_branch_t *branch;
 364         const char *uuid;
 365         nvlist_t *asru, *det;
 366         uint64_t ts3_fcr;
 367 
 368         if (nvlist_lookup_nvlist(nvl, FM_RSRC_RESOURCE, &asru) < 0) {
 369                 CMD_STAT_BUMP(bad_mem_asru);
 370                 return (NULL);
 371         }
 372 
 373         if (nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR, &det) < 0) {
 374                 CMD_STAT_BUMP(bad_mem_asru);
 375                 return (NULL);
 376         }
 377 
 378         if (fmd_nvl_fmri_expand(hdl, det) < 0) {
 379                 fmd_hdl_debug(hdl, "Failed to expand detector");
 380                 return (NULL);
 381         }
 382 
 383         branch = cmd_branch_lookup(hdl, asru);
 384         if (branch == NULL) {
 385                 if ((branch = cmd_branch_create(hdl, asru)) == NULL)
 386                         return (CMD_EVD_UNUSED);
 387         }
 388 
 389         if (branch->branch_case.cc_cp != NULL &&
 390             fmd_case_solved(hdl, branch->branch_case.cc_cp)) {
 391                 fmd_hdl_debug(hdl, "Case solved\n");
 392                 return (CMD_EVD_REDUND);
 393         }
 394 
 395         if (branch->branch_case.cc_cp == NULL) {
 396                 branch->branch_case.cc_cp = cmd_case_create(hdl,
 397                     &branch->branch_header, CMD_PTR_BRANCH_CASE, &uuid);
 398         }
 399 
 400         if (ERR_CLASS(class, FBR_ERROR) == 0) {
 401                 if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_NAME_TS3_FCR,
 402                     &ts3_fcr) == 0 && (ts3_fcr != VF_TS3_FCR)) {
 403                         fmd_hdl_debug(hdl,
 404                             "Processing fbr with lane failover\n");
 405                         cmd_branch_create_fault(hdl, branch,
 406                             "fault.memory.link-f", det);
 407 
 408                 } else {
 409                         fmd_hdl_debug(hdl, "Adding fbr event to serd engine\n");
 410                         if (branch->branch_case.cc_serdnm == NULL) {
 411                                 branch->branch_case.cc_serdnm =
 412                                     cmd_mem_serdnm_create(hdl,
 413                                     "branch", branch->branch_unum);
 414 
 415                                 fmd_serd_create(hdl,
 416                                     branch->branch_case.cc_serdnm,
 417                                     fmd_prop_get_int32(hdl, "fbr_n"),
 418                                     fmd_prop_get_int64(hdl, "fbr_t"));
 419                         }
 420 
 421                         if (fmd_serd_record(hdl,
 422                             branch->branch_case.cc_serdnm, ep) == FMD_B_FALSE)
 423                                 return (CMD_EVD_OK); /* engine hasn't fired */
 424 
 425                         fmd_hdl_debug(hdl, "fbr serd fired\n");
 426 
 427                         fmd_case_add_serd(hdl, branch->branch_case.cc_cp,
 428                             branch->branch_case.cc_serdnm);
 429 
 430                         cmd_branch_create_fault(hdl, branch,
 431                             "fault.memory.link-c", det);
 432                 }
 433         } else if (ERR_CLASS(class, DSU_ERROR) == 0) {
 434                 fmd_hdl_debug(hdl, "Processing dsu event");
 435                 cmd_branch_create_fault(hdl, branch, "fault.memory.bank", det);
 436         } else {
 437                 fmd_hdl_debug(hdl, "Processing fbu event");
 438                 cmd_branch_create_fault(hdl, branch, "fault.memory.link-u",
 439                     det);
 440         }
 441 
 442         branch->branch_flags |= CMD_MEM_F_FAULTING;
 443 
 444         if (branch->branch_case.cc_serdnm != NULL) {
 445                 fmd_serd_destroy(hdl, branch->branch_case.cc_serdnm);
 446                 fmd_hdl_strfree(hdl, branch->branch_case.cc_serdnm);
 447                 branch->branch_case.cc_serdnm = NULL;
 448         }
 449 
 450         fmd_case_add_ereport(hdl, branch->branch_case.cc_cp, ep);
 451         fmd_case_solve(hdl, branch->branch_case.cc_cp);
 452         cmd_branch_dirty(hdl, branch);
 453 
 454         return (CMD_EVD_OK);
 455 }
 456 
 457 /*ARGSUSED*/
 458 cmd_evdisp_t
 459 cmd_fb_train(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
 460     cmd_errcl_t clcode)
 461 {
 462         cmd_evdisp_t rc, rc1;
 463 
 464         /*
 465          * The FBU is cause of the FBU->DCDP/ICDP train:
 466          * - process the cause of the event.
 467          * - register the error to the nop event train, so the effected errors
 468          * (DCDP/ICDP) will be dropped.
 469          */
 470         rc = cmd_fb(hdl, ep, nvl, class, clcode);
 471 
 472         rc1 = cmd_xxcu_initial(hdl, ep, nvl, class, clcode, CMD_XR_HDLR_NOP);
 473         if (rc1 != 0)
 474                 fmd_hdl_debug(hdl,
 475                     "Fail to add error (%llx) to the train, rc = %d",
 476                     clcode, rc1);
 477 
 478         return (rc);
 479 }
 480 
 481 
 482 /*ARGSUSED*/
 483 cmd_evdisp_t
 484 cmd_fw_defect(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
 485     cmd_errcl_t clcode)
 486 {
 487         const char *fltclass = NULL;
 488         nvlist_t *rsc = NULL;
 489         int solve = 0;
 490 
 491         if ((rsc = init_mb(hdl)) == NULL)
 492                 return (CMD_EVD_UNUSED);
 493 
 494         if (ERR_CLASS(class, FERG_INVALID) == 0) {
 495                 fltclass = "defect.fw.generic-sparc.erpt-gen";
 496         } else if (ERR_CLASS(class, DBU_ERROR) == 0) {
 497                 cmd_evdisp_t rc;
 498                 fltclass = "defect.fw.generic-sparc.addr-oob";
 499                 /*
 500                  * add dbu to nop error train
 501                  */
 502                 rc = cmd_xxcu_initial(hdl, ep, nvl, class, clcode,
 503                     CMD_XR_HDLR_NOP);
 504                 if (rc != 0)
 505                         fmd_hdl_debug(hdl,
 506                             "Failed to add error (%llx) to the train, rc = %d",
 507                             clcode, rc);
 508         } else {
 509                 fmd_hdl_debug(hdl, "Unexpected fw defect event %s", class);
 510         }
 511 
 512         if (fltclass) {
 513                 fmd_case_t *cp = NULL;
 514                 nvlist_t *fault = NULL;
 515 
 516                 fault = fmd_nvl_create_fault(hdl, fltclass, 100, NULL,
 517                     NULL, rsc);
 518                 if (fault != NULL) {
 519                         cp = fmd_case_open(hdl, NULL);
 520                         fmd_case_add_ereport(hdl, cp, ep);
 521                         fmd_case_add_suspect(hdl, cp, fault);
 522                         fmd_case_solve(hdl, cp);
 523                         solve = 1;
 524                 }
 525         }
 526 
 527         nvlist_free(rsc);
 528 
 529         return (solve ? CMD_EVD_OK : CMD_EVD_UNUSED);
 530 }
 531 
 532 void
 533 cmd_branch_close(fmd_hdl_t *hdl, void *arg)
 534 {
 535         cmd_branch_destroy(hdl, arg);
 536 }
 537 
 538 
 539 /*ARGSUSED*/
 540 ulong_t
 541 cmd_mem_get_phys_pages(fmd_hdl_t *hdl)
 542 {
 543         /*
 544          * Compute and return the total physical memory in pages from the
 545          * MD/PRI.
 546          * Cache its value.
 547          */
 548         static ulong_t npage = 0;
 549         md_t *mdp;
 550         mde_cookie_t *listp;
 551         uint64_t bmem, physmem = 0;
 552         ssize_t bufsiz = 0;
 553         uint64_t *bufp;
 554         int num_nodes, nmblocks, i;
 555 
 556         if (npage > 0) {
 557                 return (npage);
 558         }
 559 
 560         if (cpumem_hdl == NULL) {
 561                 cpumem_hdl = hdl;
 562         }
 563 
 564         if ((bufsiz = ldom_get_core_md(cpumem_diagnosis_lhp, &bufp)) <= 0) {
 565                 return (0);
 566         }
 567         if ((mdp = md_init_intern(bufp, cpumem_alloc, cpumem_free)) == NULL ||
 568             (num_nodes = md_node_count(mdp)) <= 0) {
 569                 cpumem_free(bufp, (size_t)bufsiz);
 570                 return (0);
 571         }
 572 
 573         listp = (mde_cookie_t *)cpumem_alloc(sizeof (mde_cookie_t) *
 574             num_nodes);
 575         nmblocks = md_scan_dag(mdp, MDE_INVAL_ELEM_COOKIE,
 576             md_find_name(mdp, "mblock"),
 577             md_find_name(mdp, "fwd"), listp);
 578         for (i = 0; i < nmblocks; i++) {
 579                 if (md_get_prop_val(mdp, listp[i], "size", &bmem) < 0) {
 580                         physmem = 0;
 581                         break;
 582                 }
 583                 physmem += bmem;
 584         }
 585         npage = (ulong_t)(physmem / cmd.cmd_pagesize);
 586 
 587         cpumem_free(listp, sizeof (mde_cookie_t) * num_nodes);
 588         cpumem_free(bufp, (size_t)bufsiz);
 589         (void) md_fini(mdp);
 590 
 591         return (npage);
 592 }
 593 
 594 static int galois_mul[16][16] = {
 595 /* 0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F */
 596 {  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0}, /* 0 */
 597 {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15}, /* 1 */
 598 {  0,  2,  4,  6,  8, 10, 12, 14,  3,  1,  7,  5, 11,  9, 15, 13}, /* 2 */
 599 {  0,  3,  6,  5, 12, 15, 10,  9, 11,  8, 13, 14,  7,  4,  1,  2}, /* 3 */
 600 {  0,  4,  8, 12,  3,  7, 11, 15,  6,  2, 14, 10,  5,  1, 13,  9}, /* 4 */
 601 {  0,  5, 10, 15,  7,  2, 13,  8, 14, 11,  4,  1,  9, 12,  3,  6}, /* 5 */
 602 {  0,  6, 12, 10, 11, 13,  7,  1,  5,  3,  9, 15, 14,  8,  2,  4}, /* 6 */
 603 {  0,  7, 14,  9, 15,  8,  1,  6, 13, 10,  3,  4,  2,  5, 12, 11}, /* 7 */
 604 {  0,  8,  3, 11,  6, 14,  5, 13, 12,  4, 15,  7, 10,  2,  9,  1}, /* 8 */
 605 {  0,  9,  1,  8,  2, 11,  3, 10,  4, 13,  5, 12,  6, 15,  7, 14}, /* 9 */
 606 {  0, 10,  7, 13, 14,  4,  9,  3, 15,  5,  8,  2,  1, 11,  6, 12}, /* A */
 607 {  0, 11,  5, 14, 10,  1, 15,  4,  7, 12,  2,  9, 13,  6,  8,  3}, /* B */
 608 {  0, 12, 11,  7,  5,  9, 14,  2, 10,  6,  1, 13, 15,  3,  4,  8}, /* C */
 609 {  0, 13,  9,  4,  1, 12,  8,  5,  2, 15, 11,  6,  3, 14, 10,  7}, /* D */
 610 {  0, 14, 15,  1, 13,  3,  2, 12,  9,  7,  6,  8,  4, 10, 11,  5}, /* E */
 611 {  0, 15, 13,  2,  9,  6,  4, 11,  1, 14, 12,  3,  8,  7,  5, 10}  /* F */
 612 };
 613 
 614 static int
 615 galois_div(int num, int denom) {
 616         int i;
 617 
 618         for (i = 0; i < 16; i++) {
 619                 if (galois_mul[denom][i] == num)
 620                     return (i);
 621         }
 622         return (-1);
 623 }
 624 
 625 /*
 626  * Data nibbles N0-N31 => 0-31
 627  * check nibbles C0-3 => 32-35
 628  */
 629 
 630 int
 631 cmd_synd2upos(uint16_t syndrome) {
 632 
 633         uint16_t s0, s1, s2, s3;
 634 
 635         if (syndrome == 0)
 636                 return (-1); /* clean syndrome, not a CE */
 637 
 638         s0 = syndrome & 0xF;
 639         s1 = (syndrome >> 4) & 0xF;
 640         s2 = (syndrome >> 8) & 0xF;
 641         s3 = (syndrome >> 12) & 0xF;
 642 
 643         if (s3 == 0) {
 644                 if (s2 == 0 && s1 == 0)
 645                         return (32); /* 0 0 0 e => C0 */
 646                 if (s2 == 0 && s0 == 0)
 647                         return (33); /* 0 0 e 0 => C1 */
 648                 if (s1 == 0 && s0 == 0)
 649                         return (34); /* 0 e 0 0 => C2 */
 650                 if (s2 == s1 && s1 == s0)
 651                         return (31); /* 0 d d d => N31 */
 652                 return (-1); /* multibit error */
 653         } else if (s2 == 0) {
 654                 if (s1 == 0 && s0 == 0)
 655                         return (35); /* e 0 0 0 => C4 */
 656                 if (s1 == 0 || s0 == 0)
 657                         return (-1); /* not a 0 b c */
 658                 if (s3 != galois_div(galois_mul[s1][s1], s0))
 659                         return (-1); /* check nibble not valid */
 660                 return (galois_div(s0, s1) - 1); /* N0 - N14 */
 661         } else if (s1 == 0) {
 662                 if (s2 == 0 || s0 == 0)
 663                         return (-1); /* not a b 0 c */
 664                 if (s3 != galois_div(galois_mul[s2][s2], s0))
 665                         return (-1); /* check nibble not valid */
 666                 return (galois_div(s0, s2) + 14); /* N15 - N29 */
 667         } else if (s0 == 0) {
 668                 if (s3 == s2 && s2 == s1)
 669                         return (30); /* d d d 0 => N30 */
 670                 return (-1);
 671         } else return (-1);
 672 }
 673 
 674 nvlist_t *
 675 cmd_mem2hc(fmd_hdl_t *hdl, nvlist_t *mem_fmri) {
 676 
 677         char **snp;
 678         uint_t n;
 679 
 680         if (nvlist_lookup_string_array(mem_fmri, FM_FMRI_HC_SERIAL_ID,
 681             &snp, &n) != 0)
 682                 return (NULL); /* doesn't have serial id */
 683 
 684         return (cmd_find_dimm_by_sn(hdl, FM_FMRI_SCHEME_HC, *snp));
 685 }
 686 
 687 /*
 688  * formula to convert an unhashed address to hashed address
 689  * PA[17:11] = (PA[32:28] xor PA[17:13]) :: ((PA[19:18] xor PA[12:11])
 690  */
 691 void
 692 cmd_to_hashed_addr(uint64_t *addr, uint64_t afar, const char *class)
 693 {
 694 
 695         if (strstr(class, "ultraSPARC-T1") != NULL)
 696                 *addr = afar;
 697         else {
 698                 *addr = (afar & OFFBIT) |
 699                     ((afar & BIT28_32) >> 15) ^ (afar & BIT13_17) |
 700                     ((afar & BIT18_19) >> 7) ^ (afar & BIT11_12);
 701         }
 702 }
 703 
 704 int
 705 cmd_same_datapath_dimms(cmd_dimm_t *d1, cmd_dimm_t *d2)
 706 {
 707         char *p, *q;
 708 
 709         p = strstr(d1->dimm_unum, "CMP");
 710         q = strstr(d2->dimm_unum, "CMP");
 711         if (p != NULL && q != NULL) {
 712                 if (strncmp(p, q, 4) == 0)
 713                         return (1);
 714         }
 715         return (0);
 716 }
 717 
 718 /*
 719  * fault the FRU of the common CMP
 720  */
 721 /*ARGSUSED*/
 722 void
 723 cmd_gen_datapath_fault(fmd_hdl_t *hdl, cmd_dimm_t *d1, cmd_dimm_t *d2,
 724     uint16_t upos, nvlist_t *det)
 725 {
 726         fmd_case_t *cp;
 727         char *frustr;
 728         nvlist_t *rsrc, *fltlist;
 729         char *s;
 730         char const *str1, *str2;
 731         uint_t len, i;
 732 
 733         s = strstr(d1->dimm_unum, "CMP");
 734         if (s == NULL)
 735                 return;
 736 
 737         frustr = fmd_hdl_zalloc(hdl, strlen(d1->dimm_unum), FMD_SLEEP);
 738         len = strlen(d1->dimm_unum) -  strlen(s);
 739 
 740         if (strncmp(d1->dimm_unum, d2->dimm_unum, len) != 0) {
 741                 for (i = 0, str1 = d1->dimm_unum, str2 = d2->dimm_unum;
 742                     *str1 == *str2 && i <= len;
 743                     str1++, str2++, i++)
 744                         ;
 745                 len = i;
 746         }
 747 
 748         (void) strncpy(frustr, d1->dimm_unum, len);
 749 
 750         rsrc = cmd_mkboard_fru(hdl, frustr, NULL, NULL);
 751 
 752         fmd_hdl_free(hdl, frustr, strlen(d1->dimm_unum));
 753 
 754         if (rsrc == NULL)
 755                 return;
 756 
 757         (void) nvlist_add_nvlist(rsrc, FM_FMRI_AUTHORITY, cmd.cmd_auth);
 758 
 759         cp = fmd_case_open(hdl, NULL);
 760 
 761         fltlist = fmd_nvl_create_fault(hdl, "fault.memory.datapath", 100,
 762             rsrc, NULL, rsrc);
 763 
 764         fmd_case_add_suspect(hdl, cp, fltlist);
 765         fmd_case_solve(hdl, cp);
 766 
 767         nvlist_free(rsrc);
 768 }