5045-use-atomic_inc_*-atomic_dec_*-instead-of-atomic_add_* Wdiff usr/src/uts/sun4u/cpu/spitfire.c

Print this page

5045 use atomic_{inc,dec}_* instead of atomic_add_*

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/sun4u/cpu/spitfire.c
          +++ new/usr/src/uts/sun4u/cpu/spitfire.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   */
  25   25  
  26   26  #include <sys/types.h>
  27   27  #include <sys/systm.h>
  28   28  #include <sys/archsystm.h>
  29   29  #include <sys/machparam.h>
  30   30  #include <sys/machsystm.h>
  31   31  #include <sys/cpu.h>
  32   32  #include <sys/elf_SPARC.h>
  33   33  #include <vm/hat_sfmmu.h>
  34   34  #include <vm/seg_kpm.h>
  35   35  #include <vm/page.h>
  36   36  #include <vm/vm_dep.h>
  37   37  #include <sys/cpuvar.h>
  38   38  #include <sys/spitregs.h>
  39   39  #include <sys/async.h>
  40   40  #include <sys/cmn_err.h>
  41   41  #include <sys/debug.h>
  42   42  #include <sys/dditypes.h>
  43   43  #include <sys/sunddi.h>
  44   44  #include <sys/cpu_module.h>
  45   45  #include <sys/prom_debug.h>
  46   46  #include <sys/vmsystm.h>
  47   47  #include <sys/prom_plat.h>
  48   48  #include <sys/sysmacros.h>
  49   49  #include <sys/intreg.h>
  50   50  #include <sys/machtrap.h>
  51   51  #include <sys/ontrap.h>
  52   52  #include <sys/ivintr.h>
  53   53  #include <sys/atomic.h>
  54   54  #include <sys/panic.h>
  55   55  #include <sys/ndifm.h>
  56   56  #include <sys/fm/protocol.h>
  57   57  #include <sys/fm/util.h>
  58   58  #include <sys/fm/cpu/UltraSPARC-II.h>
  59   59  #include <sys/ddi.h>
  60   60  #include <sys/ecc_kstat.h>
  61   61  #include <sys/watchpoint.h>
  62   62  #include <sys/dtrace.h>
  63   63  #include <sys/errclassify.h>
  64   64  
  65   65  uint_t  cpu_impl_dual_pgsz = 0;
  66   66  
  67   67  /*
  68   68   * Structure for the 8 byte ecache data dump and the associated AFSR state.
  69   69   * There will be 8 of these structures used to dump an ecache line (64 bytes).
  70   70   */
  71   71  typedef struct sf_ec_data_elm {
  72   72          uint64_t ec_d8;
  73   73          uint64_t ec_afsr;
  74   74  } ec_data_t;
  75   75  
  76   76  /*
  77   77   * Define spitfire (Ultra I/II) specific asynchronous error structure
  78   78   */
  79   79  typedef struct spitfire_async_flt {
  80   80          struct async_flt cmn_asyncflt;  /* common - see sun4u/sys/async.h */
  81   81          ushort_t flt_type;              /* types of faults - cpu specific */
  82   82          ec_data_t flt_ec_data[8];       /* for E$ or mem dump/state */
  83   83          uint64_t flt_ec_tag;            /* E$ tag info */
  84   84          int flt_ec_lcnt;                /* number of bad E$ lines */
  85   85          ushort_t flt_sdbh;              /* UDBH reg */
  86   86          ushort_t flt_sdbl;              /* UDBL reg */
  87   87  } spitf_async_flt;
  88   88  
  89   89  /*
  90   90   * Prototypes for support routines in spitfire_asm.s:
  91   91   */
  92   92  extern void flush_ecache(uint64_t physaddr, size_t size, size_t linesize);
  93   93  extern uint64_t get_lsu(void);
  94   94  extern void set_lsu(uint64_t ncc);
  95   95  extern void get_ecache_dtag(uint32_t ecache_idx, uint64_t *data, uint64_t *tag,
  96   96                                  uint64_t *oafsr, uint64_t *acc_afsr);
  97   97  extern uint64_t check_ecache_line(uint32_t id, uint64_t *acc_afsr);
  98   98  extern uint64_t get_ecache_tag(uint32_t id, uint64_t *nafsr,
  99   99                                  uint64_t *acc_afsr);
 100  100  extern uint64_t read_and_clear_afsr();
 101  101  extern void write_ec_tag_parity(uint32_t id);
 102  102  extern void write_hb_ec_tag_parity(uint32_t id);
 103  103  
 104  104  /*
 105  105   * Spitfire module routines:
 106  106   */
 107  107  static void cpu_async_log_err(void *flt);
 108  108  /*PRINTFLIKE6*/
 109  109  static void cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt,
 110  110      uint_t logflags, const char *endstr, const char *fmt, ...);
 111  111  
 112  112  static void cpu_read_paddr(struct async_flt *aflt, short verbose, short ce_err);
 113  113  static void cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum);
 114  114  static void cpu_log_ecmem_info(spitf_async_flt *spf_flt);
 115  115  
 116  116  static void log_ce_err(struct async_flt *aflt, char *unum);
 117  117  static void log_ue_err(struct async_flt *aflt, char *unum);
 118  118  static void check_misc_err(spitf_async_flt *spf_flt);
 119  119  static ushort_t ecc_gen(uint_t high_bytes, uint_t low_bytes);
 120  120  static int check_ecc(struct async_flt *aflt);
 121  121  static uint_t get_cpu_status(uint64_t arg);
 122  122  static uint64_t clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr);
 123  123  static void scan_ecache(uint64_t *afar, ec_data_t *data, uint64_t *tag,
 124  124                  int *m, uint64_t *afsr);
 125  125  static void ecache_kstat_init(struct cpu *cp);
 126  126  static void ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag,
 127  127                  uint64_t paddr, int mpb, uint64_t);
 128  128  static uint64_t ecache_scrub_misc_err(int, uint64_t);
 129  129  static void ecache_scrub_tag_err(uint64_t, uchar_t, uint32_t);
 130  130  static void ecache_page_retire(void *);
 131  131  static int ecc_kstat_update(kstat_t *ksp, int rw);
 132  132  static int ce_count_unum(int status, int len, char *unum);
 133  133  static void add_leaky_bucket_timeout(void);
 134  134  static int synd_to_synd_code(int synd_status, ushort_t synd);
 135  135  
 136  136  extern uint_t read_all_memscrub;
 137  137  extern void memscrub_run(void);
 138  138  
 139  139  static uchar_t  isus2i;                 /* set if sabre */
 140  140  static uchar_t  isus2e;                 /* set if hummingbird */
 141  141  
 142  142  /*
 143  143   * Default ecache mask and shift settings for Spitfire.  If we detect a
 144  144   * different CPU implementation, we will modify these values at boot time.
 145  145   */
 146  146  static uint64_t cpu_ec_tag_mask         = S_ECTAG_MASK;
 147  147  static uint64_t cpu_ec_state_mask       = S_ECSTATE_MASK;
 148  148  static uint64_t cpu_ec_par_mask         = S_ECPAR_MASK;
 149  149  static int cpu_ec_par_shift             = S_ECPAR_SHIFT;
 150  150  static int cpu_ec_tag_shift             = S_ECTAG_SHIFT;
 151  151  static int cpu_ec_state_shift           = S_ECSTATE_SHIFT;
 152  152  static uchar_t cpu_ec_state_exl         = S_ECSTATE_EXL;
 153  153  static uchar_t cpu_ec_state_mod         = S_ECSTATE_MOD;
 154  154  static uchar_t cpu_ec_state_shr         = S_ECSTATE_SHR;
 155  155  static uchar_t cpu_ec_state_own         = S_ECSTATE_OWN;
 156  156  
 157  157  /*
 158  158   * Default ecache state bits for Spitfire.  These individual bits indicate if
 159  159   * the given line is in any of the valid or modified states, respectively.
 160  160   * Again, we modify these at boot if we detect a different CPU.
 161  161   */
 162  162  static uchar_t cpu_ec_state_valid       = S_ECSTATE_VALID;
 163  163  static uchar_t cpu_ec_state_dirty       = S_ECSTATE_DIRTY;
 164  164  static uchar_t cpu_ec_parity            = S_EC_PARITY;
 165  165  static uchar_t cpu_ec_state_parity      = S_ECSTATE_PARITY;
 166  166  
 167  167  /*
 168  168   * This table is used to determine which bit(s) is(are) bad when an ECC
 169  169   * error occurrs.  The array is indexed an 8-bit syndrome.  The entries
 170  170   * of this array have the following semantics:
 171  171   *
 172  172   *      00-63   The number of the bad bit, when only one bit is bad.
 173  173   *      64      ECC bit C0 is bad.
 174  174   *      65      ECC bit C1 is bad.
 175  175   *      66      ECC bit C2 is bad.
 176  176   *      67      ECC bit C3 is bad.
 177  177   *      68      ECC bit C4 is bad.
 178  178   *      69      ECC bit C5 is bad.
 179  179   *      70      ECC bit C6 is bad.
 180  180   *      71      ECC bit C7 is bad.
 181  181   *      72      Two bits are bad.
 182  182   *      73      Three bits are bad.
 183  183   *      74      Four bits are bad.
 184  184   *      75      More than Four bits are bad.
 185  185   *      76      NO bits are bad.
 186  186   * Based on "Galaxy Memory Subsystem SPECIFICATION" rev 0.6, pg. 28.
 187  187   */
 188  188  
 189  189  #define C0      64
 190  190  #define C1      65
 191  191  #define C2      66
 192  192  #define C3      67
 193  193  #define C4      68
 194  194  #define C5      69
 195  195  #define C6      70
 196  196  #define C7      71
 197  197  #define M2      72
 198  198  #define M3      73
 199  199  #define M4      74
 200  200  #define MX      75
 201  201  #define NA      76
 202  202  
 203  203  #define SYND_IS_SINGLE_BIT_DATA(synd_code)      ((synd_code >= 0) && \
 204  204                                                      (synd_code < C0))
 205  205  #define SYND_IS_SINGLE_BIT_CHK(synd_code)       ((synd_code >= C0) && \
 206  206                                                      (synd_code <= C7))
 207  207  
 208  208  static char ecc_syndrome_tab[] =
 209  209  {
 210  210          NA, C0, C1, M2, C2, M2, M2, M3, C3, M2, M2, M3, M2, M3, M3, M4,
 211  211          C4, M2, M2, 32, M2, 57, MX, M2, M2, 37, 49, M2, 40, M2, M2, 44,
 212  212          C5, M2, M2, 33, M2, 61,  4, M2, M2, MX, 53, M2, 45, M2, M2, 41,
 213  213          M2,  0,  1, M2, 10, M2, M2, MX, 15, M2, M2, MX, M2, M3, M3, M2,
 214  214          C6, M2, M2, 42, M2, 59, 39, M2, M2, MX, 51, M2, 34, M2, M2, 46,
 215  215          M2, 25, 29, M2, 27, M4, M2, MX, 31, M2, M4, MX, M2, MX, MX, M2,
 216  216          M2, MX, 36, M2,  7, M2, M2, 54, MX, M2, M2, 62, M2, 48, 56, M2,
 217  217          M3, M2, M2, MX, M2, MX, 22, M2, M2, 18, MX, M2, M3, M2, M2, MX,
 218  218          C7, M2, M2, 47, M2, 63, MX, M2, M2,  6, 55, M2, 35, M2, M2, 43,
 219  219          M2,  5, MX, M2, MX, M2, M2, 50, 38, M2, M2, 58, M2, 52, 60, M2,
 220  220          M2, 17, 21, M2, 19, M4, M2, MX, 23, M2, M4, MX, M2, MX, MX, M2,
 221  221          M3, M2, M2, MX, M2, MX, 30, M2, M2, 26, MX, M2, M3, M2, M2, MX,
 222  222          M2,  8, 13, M2,  2, M2, M2, M3,  3, M2, M2, M3, M2, MX, MX, M2,
 223  223          M3, M2, M2, M3, M2, MX, 16, M2, M2, 20, MX, M2, MX, M2, M2, MX,
 224  224          M3, M2, M2, M3, M2, MX, 24, M2, M2, 28, MX, M2, MX, M2, M2, MX,
 225  225          M4, 12,  9, M2, 14, M2, M2, MX, 11, M2, M2, MX, M2, MX, MX, M4
 226  226  };
 227  227  
 228  228  #define SYND_TBL_SIZE 256
 229  229  
 230  230  /*
 231  231   * Hack for determining UDBH/UDBL, for later cpu-specific error reporting.
 232  232   * Cannot use bit 3 in afar, because it is a valid bit on a Sabre/Hummingbird.
 233  233   */
 234  234  #define UDBL_REG        0x8000
 235  235  #define UDBL(synd)      ((synd & UDBL_REG) >> 15)
 236  236  #define SYND(synd)      (synd & 0x7FFF)
 237  237  
 238  238  /*
 239  239   * These error types are specific to Spitfire and are used internally for the
 240  240   * spitfire fault structure flt_type field.
 241  241   */
 242  242  #define CPU_UE_ERR              0       /* uncorrectable errors - UEs */
 243  243  #define CPU_EDP_LDP_ERR         1       /* LDP or EDP parity error */
 244  244  #define CPU_WP_ERR              2       /* WP parity error */
 245  245  #define CPU_BTO_BERR_ERR        3       /* bus timeout errors */
 246  246  #define CPU_PANIC_CP_ERR        4       /* cp error from panic polling */
 247  247  #define CPU_TRAPPING_CP_ERR     5       /* for sabre/hbird only, cp error */
 248  248  #define CPU_BADLINE_CI_ERR      6       /* E$ clean_bad line when idle */
 249  249  #define CPU_BADLINE_CB_ERR      7       /* E$ clean_bad line when busy */
 250  250  #define CPU_BADLINE_DI_ERR      8       /* E$ dirty_bad line when idle */
 251  251  #define CPU_BADLINE_DB_ERR      9       /* E$ dirty_bad line when busy */
 252  252  #define CPU_ORPHAN_CP_ERR       10      /* Orphan CP error */
 253  253  #define CPU_ECACHE_ADDR_PAR_ERR 11      /* Ecache Address parity error */
 254  254  #define CPU_ECACHE_STATE_ERR    12      /* Ecache state error */
 255  255  #define CPU_ECACHE_ETP_ETS_ERR  13      /* ETP set but ETS is zero */
 256  256  #define CPU_ECACHE_TAG_ERR      14      /* Scrub the E$ tag, if state clean */
 257  257  #define CPU_ADDITIONAL_ERR      15      /* Additional errors occurred */
 258  258  
 259  259  /*
 260  260   * Macro to access the "Spitfire cpu private" data structure.
 261  261   */
 262  262  #define CPU_PRIVATE_PTR(cp, x)  (&(((spitfire_private_t *)CPU_PRIVATE(cp))->x))
 263  263  
 264  264  /*
 265  265   * set to 0 to disable automatic retiring of pages on
 266  266   * DIMMs that have excessive soft errors
 267  267   */
 268  268  int automatic_page_removal = 1;
 269  269  
 270  270  /*
 271  271   * Heuristic for figuring out which module to replace.
 272  272   * Relative likelihood that this P_SYND indicates that this module is bad.
 273  273   * We call it a "score", though, not a relative likelihood.
 274  274   *
 275  275   * Step 1.
 276  276   * Assign a score to each byte of P_SYND according to the following rules:
 277  277   * If no bits on (0x00) or all bits on (0xFF), then give it a 5.
 278  278   * If one bit on, give it a 95.
 279  279   * If seven bits on, give it a 10.
 280  280   * If two bits on:
 281  281   *   in different nybbles, a 90
 282  282   *   in same nybble, but unaligned, 85
 283  283   *   in same nybble and as an aligned pair, 80
 284  284   * If six bits on, look at the bits that are off:
 285  285   *   in same nybble and as an aligned pair, 15
 286  286   *   in same nybble, but unaligned, 20
 287  287   *   in different nybbles, a 25
 288  288   * If three bits on:
 289  289   *   in diferent nybbles, no aligned pairs, 75
 290  290   *   in diferent nybbles, one aligned pair, 70
 291  291   *   in the same nybble, 65
 292  292   * If five bits on, look at the bits that are off:
 293  293   *   in the same nybble, 30
 294  294   *   in diferent nybbles, one aligned pair, 35
 295  295   *   in diferent nybbles, no aligned pairs, 40
 296  296   * If four bits on:
 297  297   *   all in one nybble, 45
 298  298   *   as two aligned pairs, 50
 299  299   *   one aligned pair, 55
 300  300   *   no aligned pairs, 60
 301  301   *
 302  302   * Step 2:
 303  303   * Take the higher of the two scores (one for each byte) as the score
 304  304   * for the module.
 305  305   *
 306  306   * Print the score for each module, and field service should replace the
 307  307   * module with the highest score.
 308  308   */
 309  309  
 310  310  /*
 311  311   * In the table below, the first row/column comment indicates the
 312  312   * number of bits on in that nybble; the second row/column comment is
 313  313   * the hex digit.
 314  314   */
 315  315  
 316  316  static int
 317  317  p_synd_score_table[256] = {
 318  318          /* 0   1   1   2   1   2   2   3   1   2   2   3   2   3   3   4 */
 319  319          /* 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  A,  B,  C,  D,  E,  F */
 320  320  /* 0 0 */  5, 95, 95, 80, 95, 85, 85, 65, 95, 85, 85, 65, 80, 65, 65, 45,
 321  321  /* 1 1 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
 322  322  /* 1 2 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
 323  323  /* 2 3 */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15,
 324  324  /* 1 4 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
 325  325  /* 2 5 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
 326  326  /* 2 6 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
 327  327  /* 3 7 */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
 328  328  /* 1 8 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
 329  329  /* 2 9 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
 330  330  /* 2 A */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
 331  331  /* 3 B */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
 332  332  /* 2 C */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15,
 333  333  /* 3 D */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
 334  334  /* 3 E */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
 335  335  /* 4 F */ 45, 30, 30, 15, 30, 20, 20, 10, 30, 20, 20, 10, 15, 10, 10,  5,
 336  336  };
 337  337  
 338  338  int
 339  339  ecc_psynd_score(ushort_t p_synd)
 340  340  {
 341  341          int i, j, a, b;
 342  342  
 343  343          i = p_synd & 0xFF;
 344  344          j = (p_synd >> 8) & 0xFF;
 345  345  
 346  346          a = p_synd_score_table[i];
 347  347          b = p_synd_score_table[j];
 348  348  
 349  349          return (a > b ? a : b);
 350  350  }
 351  351  
 352  352  /*
 353  353   * Async Fault Logging
 354  354   *
 355  355   * To ease identifying, reading, and filtering async fault log messages, the
 356  356   * label [AFT#] is now prepended to each async fault message.  These messages
 357  357   * and the logging rules are implemented by cpu_aflt_log(), below.
 358  358   *
 359  359   * [AFT0] - Tag for log messages that are associated with corrected ECC errors.
 360  360   *          This includes both corrected ECC memory and ecache faults.
 361  361   *
 362  362   * [AFT1] - Tag for log messages that are not ECC corrected (i.e. everything
 363  363   *          else except CE errors) with a priority of 1 (highest).  This tag
 364  364   *          is also used for panic messages that result from an async fault.
 365  365   *
 366  366   * [AFT2] - These are lower priority diagnostic messages for uncorrected ECC
 367  367   * [AFT3]   or parity errors.  For example, AFT2 is used for the actual dump
 368  368   *          of the E-$ data and tags.
 369  369   *
 370  370   * In a non-DEBUG kernel, AFT > 1 logs will be sent to the system log but not
 371  371   * printed on the console.  To send all AFT logs to both the log and the
 372  372   * console, set aft_verbose = 1.
 373  373   */
 374  374  
 375  375  #define CPU_FLTCPU              0x0001  /* print flt_inst as a CPU id */
 376  376  #define CPU_SPACE               0x0002  /* print flt_status (data or instr) */
 377  377  #define CPU_ERRID               0x0004  /* print flt_id */
 378  378  #define CPU_TL                  0x0008  /* print flt_tl */
 379  379  #define CPU_ERRID_FIRST         0x0010  /* print flt_id first in message */
 380  380  #define CPU_AFSR                0x0020  /* print flt_stat as decoded %afsr */
 381  381  #define CPU_AFAR                0x0040  /* print flt_addr as %afar */
 382  382  #define CPU_AF_PSYND            0x0080  /* print flt_stat %afsr.PSYND */
 383  383  #define CPU_AF_ETS              0x0100  /* print flt_stat %afsr.ETS */
 384  384  #define CPU_UDBH                0x0200  /* print flt_sdbh and syndrome */
 385  385  #define CPU_UDBL                0x0400  /* print flt_sdbl and syndrome */
 386  386  #define CPU_FAULTPC             0x0800  /* print flt_pc */
 387  387  #define CPU_SYND                0x1000  /* print flt_synd and unum */
 388  388  
 389  389  #define CMN_LFLAGS      (CPU_FLTCPU | CPU_SPACE | CPU_ERRID | CPU_TL |  \
 390  390                                  CPU_AFSR | CPU_AFAR | CPU_AF_PSYND |    \
 391  391                                  CPU_AF_ETS | CPU_UDBH | CPU_UDBL |      \
 392  392                                  CPU_FAULTPC)
 393  393  #define UE_LFLAGS       (CMN_LFLAGS | CPU_SYND)
 394  394  #define CE_LFLAGS       (UE_LFLAGS & ~CPU_UDBH & ~CPU_UDBL & ~CPU_TL &  \
 395  395                                  ~CPU_SPACE)
 396  396  #define PARERR_LFLAGS   (CMN_LFLAGS)
 397  397  #define WP_LFLAGS       (CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL)
 398  398  #define CP_LFLAGS       (CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL &            \
 399  399                                  ~CPU_FLTCPU & ~CPU_FAULTPC)
 400  400  #define BERRTO_LFLAGS   (CMN_LFLAGS)
 401  401  #define NO_LFLAGS       (0)
 402  402  
 403  403  #define AFSR_FMTSTR0    "\020\1ME"
 404  404  #define AFSR_FMTSTR1    "\020\040PRIV\037ISAP\036ETP\035IVUE\034TO"     \
 405  405                          "\033BERR\032LDP\031CP\030WP\027EDP\026UE\025CE"
 406  406  #define UDB_FMTSTR      "\020\012UE\011CE"
 407  407  
 408  408  /*
 409  409   * Save the cache bootup state for use when internal
 410  410   * caches are to be re-enabled after an error occurs.
 411  411   */
 412  412  uint64_t        cache_boot_state = 0;
 413  413  
 414  414  /*
 415  415   * PA[31:0] represent Displacement in UPA configuration space.
 416  416   */
 417  417  uint_t  root_phys_addr_lo_mask = 0xffffffff;
 418  418  
 419  419  /*
 420  420   * Spitfire legacy globals
 421  421   */
 422  422  int     itlb_entries;
 423  423  int     dtlb_entries;
 424  424  
 425  425  void
 426  426  cpu_setup(void)
 427  427  {
 428  428          extern int page_retire_messages;
 429  429          extern int page_retire_first_ue;
 430  430          extern int at_flags;
 431  431  #if defined(SF_ERRATA_57)
 432  432          extern caddr_t errata57_limit;
 433  433  #endif
 434  434          cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
 435  435  
 436  436          at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1;
 437  437  
 438  438          /*
 439  439           * Spitfire isn't currently FMA-aware, so we have to enable the
 440  440           * page retirement messages. We also change the default policy
 441  441           * for UE retirement to allow clearing of transient errors.
 442  442           */
 443  443          page_retire_messages = 1;
 444  444          page_retire_first_ue = 0;
 445  445  
 446  446          /*
 447  447           * save the cache bootup state.
 448  448           */
 449  449          cache_boot_state = get_lsu() & (LSU_IC | LSU_DC);
 450  450  
 451  451          if (use_page_coloring) {
 452  452                  do_pg_coloring = 1;
 453  453          }
 454  454  
 455  455          /*
 456  456           * Tune pp_slots to use up to 1/8th of the tlb entries.
 457  457           */
 458  458          pp_slots = MIN(8, MAXPP_SLOTS);
 459  459  
 460  460          /*
 461  461           * Block stores invalidate all pages of the d$ so pagecopy
 462  462           * et. al. do not need virtual translations with virtual
 463  463           * coloring taken into consideration.
 464  464           */
 465  465          pp_consistent_coloring = 0;
 466  466  
 467  467          isa_list =
 468  468              "sparcv9+vis sparcv9 "
 469  469              "sparcv8plus+vis sparcv8plus "
 470  470              "sparcv8 sparcv8-fsmuld sparcv7 sparc";
 471  471  
 472  472          cpu_hwcap_flags = AV_SPARC_VIS;
 473  473  
 474  474          /*
 475  475           * On Spitfire, there's a hole in the address space
 476  476           * that we must never map (the hardware only support 44-bits of
 477  477           * virtual address).  Later CPUs are expected to have wider
 478  478           * supported address ranges.
 479  479           *
 480  480           * See address map on p23 of the UltraSPARC 1 user's manual.
 481  481           */
 482  482          hole_start = (caddr_t)0x80000000000ull;
 483  483          hole_end = (caddr_t)0xfffff80000000000ull;
 484  484  
 485  485          /*
 486  486           * A spitfire call bug requires us to be a further 4Gbytes of
 487  487           * firewall from the spec.
 488  488           *
 489  489           * See Spitfire Errata #21
 490  490           */
 491  491          hole_start = (caddr_t)((uintptr_t)hole_start - (1ul << 32));
 492  492          hole_end = (caddr_t)((uintptr_t)hole_end + (1ul << 32));
 493  493  
 494  494          /*
 495  495           * The kpm mapping window.
 496  496           * kpm_size:
 497  497           *      The size of a single kpm range.
 498  498           *      The overall size will be: kpm_size * vac_colors.
 499  499           * kpm_vbase:
 500  500           *      The virtual start address of the kpm range within the kernel
 501  501           *      virtual address space. kpm_vbase has to be kpm_size aligned.
 502  502           */
 503  503          kpm_size = (size_t)(2ull * 1024 * 1024 * 1024 * 1024); /* 2TB */
 504  504          kpm_size_shift = 41;
 505  505          kpm_vbase = (caddr_t)0xfffffa0000000000ull; /* 16EB - 6TB */
 506  506  
 507  507          /*
 508  508           * All UltraSPARC platforms should use small kpm page as default, as
 509  509           * the KPM large page VAC conflict code has no value to maintain. The
 510  510           * new generation of SPARC no longer have VAC conflict issue.
 511  511           */
 512  512          kpm_smallpages = 1;
 513  513  
 514  514  #if defined(SF_ERRATA_57)
 515  515          errata57_limit = (caddr_t)0x80000000ul;
 516  516  #endif
 517  517  
 518  518          /*
 519  519           * Disable text by default.
 520  520           * Note that the other defaults are set in sun4u/vm/mach_vm_dep.c.
 521  521           */
 522  522          max_utext_lpsize = MMU_PAGESIZE;
 523  523  }
 524  524  
 525  525  static int
 526  526  getintprop(pnode_t node, char *name, int deflt)
 527  527  {
 528  528          int     value;
 529  529  
 530  530          switch (prom_getproplen(node, name)) {
 531  531          case 0:
 532  532                  value = 1;      /* boolean properties */
 533  533                  break;
 534  534  
 535  535          case sizeof (int):
 536  536                  (void) prom_getprop(node, name, (caddr_t)&value);
 537  537                  break;
 538  538  
 539  539          default:
 540  540                  value = deflt;
 541  541                  break;
 542  542          }
 543  543  
 544  544          return (value);
 545  545  }
 546  546  
 547  547  /*
 548  548   * Set the magic constants of the implementation.
 549  549   */
 550  550  void
 551  551  cpu_fiximp(pnode_t dnode)
 552  552  {
 553  553          extern int vac_size, vac_shift;
 554  554          extern uint_t vac_mask;
 555  555          extern int dcache_line_mask;
 556  556          int i, a;
 557  557          static struct {
 558  558                  char    *name;
 559  559                  int     *var;
 560  560          } prop[] = {
 561  561                  "dcache-size",          &dcache_size,
 562  562                  "dcache-line-size",     &dcache_linesize,
 563  563                  "icache-size",          &icache_size,
 564  564                  "icache-line-size",     &icache_linesize,
 565  565                  "ecache-size",          &ecache_size,
 566  566                  "ecache-line-size",     &ecache_alignsize,
 567  567                  "ecache-associativity", &ecache_associativity,
 568  568                  "#itlb-entries",        &itlb_entries,
 569  569                  "#dtlb-entries",        &dtlb_entries,
 570  570                  };
 571  571  
 572  572          for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++) {
 573  573                  if ((a = getintprop(dnode, prop[i].name, -1)) != -1) {
 574  574                          *prop[i].var = a;
 575  575                  }
 576  576          }
 577  577  
 578  578          ecache_setsize = ecache_size / ecache_associativity;
 579  579  
 580  580          vac_size = S_VAC_SIZE;
 581  581          vac_mask = MMU_PAGEMASK & (vac_size - 1);
 582  582          i = 0; a = vac_size;
 583  583          while (a >>= 1)
 584  584                  ++i;
 585  585          vac_shift = i;
 586  586          shm_alignment = vac_size;
 587  587          vac = 1;
 588  588  
 589  589          dcache_line_mask = (dcache_size - 1) & ~(dcache_linesize - 1);
 590  590  
 591  591          /*
 592  592           * UltraSPARC I & II have ecache sizes running
 593  593           * as follows: .25 MB, .5 MB, 1 MB, 2 MB, 4 MB
 594  594           * and 8 MB. Adjust the copyin/copyout limits
 595  595           * according to the cache size. The magic number
 596  596           * of VIS_COPY_THRESHOLD comes from the copyin/copyout code
 597  597           * and its floor of VIS_COPY_THRESHOLD bytes before it will use
 598  598           * VIS instructions.
 599  599           *
 600  600           * We assume that all CPUs on the system have the same size
 601  601           * ecache. We're also called very early in the game.
 602  602           * /etc/system will be parsed *after* we're called so
 603  603           * these values can be overwritten.
 604  604           */
 605  605  
 606  606          hw_copy_limit_1 = VIS_COPY_THRESHOLD;
 607  607          if (ecache_size <= 524288) {
 608  608                  hw_copy_limit_2 = VIS_COPY_THRESHOLD;
 609  609                  hw_copy_limit_4 = VIS_COPY_THRESHOLD;
 610  610                  hw_copy_limit_8 = VIS_COPY_THRESHOLD;
 611  611          } else if (ecache_size == 1048576) {
 612  612                  hw_copy_limit_2 = 1024;
 613  613                  hw_copy_limit_4 = 1280;
 614  614                  hw_copy_limit_8 = 1536;
 615  615          } else if (ecache_size == 2097152) {
 616  616                  hw_copy_limit_2 = 1536;
 617  617                  hw_copy_limit_4 = 2048;
 618  618                  hw_copy_limit_8 = 2560;
 619  619          } else if (ecache_size == 4194304) {
 620  620                  hw_copy_limit_2 = 2048;
 621  621                  hw_copy_limit_4 = 2560;
 622  622                  hw_copy_limit_8 = 3072;
 623  623          } else {
 624  624                  hw_copy_limit_2 = 2560;
 625  625                  hw_copy_limit_4 = 3072;
 626  626                  hw_copy_limit_8 = 3584;
 627  627          }
 628  628  }
 629  629  
 630  630  /*
 631  631   * Called by setcpudelay
 632  632   */
 633  633  void
 634  634  cpu_init_tick_freq(void)
 635  635  {
 636  636          /*
 637  637           * Determine the cpu frequency by calling
 638  638           * tod_get_cpufrequency. Use an approximate freqency
 639  639           * value computed by the prom if the tod module
 640  640           * is not initialized and loaded yet.
 641  641           */
 642  642          if (tod_ops.tod_get_cpufrequency != NULL) {
 643  643                  mutex_enter(&tod_lock);
 644  644                  sys_tick_freq = tod_ops.tod_get_cpufrequency();
 645  645                  mutex_exit(&tod_lock);
 646  646          } else {
 647  647  #if defined(HUMMINGBIRD)
 648  648                  /*
 649  649                   * the hummingbird version of %stick is used as the basis for
 650  650                   * low level timing; this provides an independent constant-rate
 651  651                   * clock for general system use, and frees power mgmt to set
 652  652                   * various cpu clock speeds.
 653  653                   */
 654  654                  if (system_clock_freq == 0)
 655  655                          cmn_err(CE_PANIC, "invalid system_clock_freq 0x%lx",
 656  656                              system_clock_freq);
 657  657                  sys_tick_freq = system_clock_freq;
 658  658  #else /* SPITFIRE */
 659  659                  sys_tick_freq = cpunodes[CPU->cpu_id].clock_freq;
 660  660  #endif
 661  661          }
 662  662  }
 663  663  
 664  664  
 665  665  void shipit(int upaid);
 666  666  extern uint64_t xc_tick_limit;
 667  667  extern uint64_t xc_tick_jump_limit;
 668  668  
 669  669  #ifdef SEND_MONDO_STATS
 670  670  uint64_t x_early[NCPU][64];
 671  671  #endif
 672  672  
 673  673  /*
 674  674   * Note: A version of this function is used by the debugger via the KDI,
 675  675   * and must be kept in sync with this version.  Any changes made to this
 676  676   * function to support new chips or to accomodate errata must also be included
 677  677   * in the KDI-specific version.  See spitfire_kdi.c.
 678  678   */
 679  679  void
 680  680  send_one_mondo(int cpuid)
 681  681  {
 682  682          uint64_t idsr, starttick, endtick;
 683  683          int upaid, busy, nack;
 684  684          uint64_t tick, tick_prev;
 685  685          ulong_t ticks;
 686  686  
 687  687          CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
 688  688          upaid = CPUID_TO_UPAID(cpuid);
 689  689          tick = starttick = gettick();
 690  690          shipit(upaid);
 691  691          endtick = starttick + xc_tick_limit;
 692  692          busy = nack = 0;
 693  693          for (;;) {
 694  694                  idsr = getidsr();
 695  695                  if (idsr == 0)
 696  696                          break;
 697  697                  /*
 698  698                   * When we detect an irregular tick jump, we adjust
 699  699                   * the timer window to the current tick value.
 700  700                   */
 701  701                  tick_prev = tick;
 702  702                  tick = gettick();
 703  703                  ticks = tick - tick_prev;
 704  704                  if (ticks > xc_tick_jump_limit) {
 705  705                          endtick = tick + xc_tick_limit;
 706  706                  } else if (tick > endtick) {
 707  707                          if (panic_quiesce)
 708  708                                  return;
 709  709                          cmn_err(CE_PANIC,
 710  710                              "send mondo timeout (target 0x%x) [%d NACK %d "
 711  711                              "BUSY]", upaid, nack, busy);
 712  712                  }
 713  713                  if (idsr & IDSR_BUSY) {
 714  714                          busy++;
 715  715                          continue;
 716  716                  }
 717  717                  drv_usecwait(1);
 718  718                  shipit(upaid);
 719  719                  nack++;
 720  720                  busy = 0;
 721  721          }
 722  722  #ifdef SEND_MONDO_STATS
 723  723          x_early[getprocessorid()][highbit(gettick() - starttick) - 1]++;
 724  724  #endif
 725  725  }
 726  726  
 727  727  void
 728  728  send_mondo_set(cpuset_t set)
 729  729  {
 730  730          int i;
 731  731  
 732  732          for (i = 0; i < NCPU; i++)
 733  733                  if (CPU_IN_SET(set, i)) {
 734  734                          send_one_mondo(i);
 735  735                          CPUSET_DEL(set, i);
 736  736                          if (CPUSET_ISNULL(set))
 737  737                                  break;
 738  738                  }
 739  739  }
 740  740  
 741  741  void
 742  742  syncfpu(void)
 743  743  {
 744  744  }
 745  745  
 746  746  /*
 747  747   * Determine the size of the CPU module's error structure in bytes.  This is
 748  748   * called once during boot to initialize the error queues.
 749  749   */
 750  750  int
 751  751  cpu_aflt_size(void)
 752  752  {
 753  753          /*
 754  754           * We need to determine whether this is a sabre, Hummingbird or a
 755  755           * Spitfire/Blackbird impl and set the appropriate state variables for
 756  756           * ecache tag manipulation.  We can't do this in cpu_setup() as it is
 757  757           * too early in the boot flow and the cpunodes are not initialized.
 758  758           * This routine will be called once after cpunodes[] is ready, so do
 759  759           * it here.
 760  760           */
 761  761          if (cpunodes[CPU->cpu_id].implementation == SABRE_IMPL) {
 762  762                  isus2i = 1;
 763  763                  cpu_ec_tag_mask = SB_ECTAG_MASK;
 764  764                  cpu_ec_state_mask = SB_ECSTATE_MASK;
 765  765                  cpu_ec_par_mask = SB_ECPAR_MASK;
 766  766                  cpu_ec_par_shift = SB_ECPAR_SHIFT;
 767  767                  cpu_ec_tag_shift = SB_ECTAG_SHIFT;
 768  768                  cpu_ec_state_shift = SB_ECSTATE_SHIFT;
 769  769                  cpu_ec_state_exl = SB_ECSTATE_EXL;
 770  770                  cpu_ec_state_mod = SB_ECSTATE_MOD;
 771  771  
 772  772                  /* These states do not exist in sabre - set to 0xFF */
 773  773                  cpu_ec_state_shr = 0xFF;
 774  774                  cpu_ec_state_own = 0xFF;
 775  775  
 776  776                  cpu_ec_state_valid = SB_ECSTATE_VALID;
 777  777                  cpu_ec_state_dirty = SB_ECSTATE_DIRTY;
 778  778                  cpu_ec_state_parity = SB_ECSTATE_PARITY;
 779  779                  cpu_ec_parity = SB_EC_PARITY;
 780  780          } else if (cpunodes[CPU->cpu_id].implementation == HUMMBRD_IMPL) {
 781  781                  isus2e = 1;
 782  782                  cpu_ec_tag_mask = HB_ECTAG_MASK;
 783  783                  cpu_ec_state_mask = HB_ECSTATE_MASK;
 784  784                  cpu_ec_par_mask = HB_ECPAR_MASK;
 785  785                  cpu_ec_par_shift = HB_ECPAR_SHIFT;
 786  786                  cpu_ec_tag_shift = HB_ECTAG_SHIFT;
 787  787                  cpu_ec_state_shift = HB_ECSTATE_SHIFT;
 788  788                  cpu_ec_state_exl = HB_ECSTATE_EXL;
 789  789                  cpu_ec_state_mod = HB_ECSTATE_MOD;
 790  790  
 791  791                  /* These states do not exist in hummingbird - set to 0xFF */
 792  792                  cpu_ec_state_shr = 0xFF;
 793  793                  cpu_ec_state_own = 0xFF;
 794  794  
 795  795                  cpu_ec_state_valid = HB_ECSTATE_VALID;
 796  796                  cpu_ec_state_dirty = HB_ECSTATE_DIRTY;
 797  797                  cpu_ec_state_parity = HB_ECSTATE_PARITY;
 798  798                  cpu_ec_parity = HB_EC_PARITY;
 799  799          }
 800  800  
 801  801          return (sizeof (spitf_async_flt));
 802  802  }
 803  803  
 804  804  
 805  805  /*
 806  806   * Correctable ecc error trap handler
 807  807   */
 808  808  /*ARGSUSED*/
 809  809  void
 810  810  cpu_ce_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr,
 811  811          uint_t p_afsr_high, uint_t p_afar_high)
 812  812  {
 813  813          ushort_t sdbh, sdbl;
 814  814          ushort_t e_syndh, e_syndl;
 815  815          spitf_async_flt spf_flt;
 816  816          struct async_flt *ecc;
 817  817          int queue = 1;
 818  818  
 819  819          uint64_t t_afar = p_afar;
 820  820          uint64_t t_afsr = p_afsr;
 821  821  
 822  822          /*
 823  823           * Note: the Spitfire data buffer error registers
 824  824           * (upper and lower halves) are or'ed into the upper
 825  825           * word of the afsr by ce_err().
 826  826           */
 827  827          sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF);
 828  828          sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF);
 829  829  
 830  830          e_syndh = (uchar_t)(sdbh & (uint_t)P_DER_E_SYND);
 831  831          e_syndl = (uchar_t)(sdbl & (uint_t)P_DER_E_SYND);
 832  832  
 833  833          t_afsr &= S_AFSR_MASK;
 834  834          t_afar &= SABRE_AFAR_PA;        /* must use Sabre AFAR mask */
 835  835  
 836  836          /* Setup the async fault structure */
 837  837          bzero(&spf_flt, sizeof (spitf_async_flt));
 838  838          ecc = (struct async_flt *)&spf_flt;
 839  839          ecc->flt_id = gethrtime_waitfree();
 840  840          ecc->flt_stat = t_afsr;
 841  841          ecc->flt_addr = t_afar;
 842  842          ecc->flt_status = ECC_C_TRAP;
 843  843          ecc->flt_bus_id = getprocessorid();
 844  844          ecc->flt_inst = CPU->cpu_id;
 845  845          ecc->flt_pc = (caddr_t)rp->r_pc;
 846  846          ecc->flt_func = log_ce_err;
 847  847          ecc->flt_in_memory =
 848  848              (pf_is_memory(ecc->flt_addr >> MMU_PAGESHIFT)) ? 1: 0;
 849  849          spf_flt.flt_sdbh = sdbh;
 850  850          spf_flt.flt_sdbl = sdbl;
 851  851  
 852  852          /*
 853  853           * Check for fatal conditions.
 854  854           */
 855  855          check_misc_err(&spf_flt);
 856  856  
 857  857          /*
 858  858           * Pananoid checks for valid AFSR and UDBs
 859  859           */
 860  860          if ((t_afsr & P_AFSR_CE) == 0) {
 861  861                  cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS,
 862  862                      "** Panic due to CE bit not set in the AFSR",
 863  863                      "  Corrected Memory Error on");
 864  864          }
 865  865  
 866  866          /*
 867  867           * We want to skip logging only if ALL the following
 868  868           * conditions are true:
 869  869           *
 870  870           *      1. There is only one error
 871  871           *      2. That error is a correctable memory error
 872  872           *      3. The error is caused by the memory scrubber (in which case
 873  873           *          the error will have occurred under on_trap protection)
 874  874           *      4. The error is on a retired page
 875  875           *
 876  876           * Note: OT_DATA_EC is used places other than the memory scrubber.
 877  877           * However, none of those errors should occur on a retired page.
 878  878           */
 879  879          if ((ecc->flt_stat & (S_AFSR_ALL_ERRS & ~P_AFSR_ME)) == P_AFSR_CE &&
 880  880              curthread->t_ontrap != NULL) {
 881  881  
 882  882                  if (curthread->t_ontrap->ot_prot & OT_DATA_EC) {
 883  883                          if (page_retire_check(ecc->flt_addr, NULL) == 0) {
 884  884                                  queue = 0;
 885  885                          }
 886  886                  }
 887  887          }
 888  888  
 889  889          if (((sdbh & P_DER_CE) == 0) && ((sdbl & P_DER_CE) == 0)) {
 890  890                  cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS,
 891  891                      "** Panic due to CE bits not set in the UDBs",
 892  892                      " Corrected Memory Error on");
 893  893          }
 894  894  
 895  895          if ((sdbh >> 8) & 1) {
 896  896                  ecc->flt_synd = e_syndh;
 897  897                  ce_scrub(ecc);
 898  898                  if (queue) {
 899  899                          cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc,
 900  900                              sizeof (*ecc), ce_queue, ERRORQ_ASYNC);
 901  901                  }
 902  902          }
 903  903  
 904  904          if ((sdbl >> 8) & 1) {
 905  905                  ecc->flt_addr = t_afar | 0x8;   /* Sabres do not have a UDBL */
 906  906                  ecc->flt_synd = e_syndl | UDBL_REG;
 907  907                  ce_scrub(ecc);
 908  908                  if (queue) {
 909  909                          cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc,
 910  910                              sizeof (*ecc), ce_queue, ERRORQ_ASYNC);
 911  911                  }
 912  912          }
 913  913  
 914  914          /*
 915  915           * Re-enable all error trapping (CEEN currently cleared).
 916  916           */
 917  917          clr_datapath();
 918  918          set_asyncflt(P_AFSR_CE);
 919  919          set_error_enable(EER_ENABLE);
 920  920  }
 921  921  
 922  922  /*
 923  923   * Cpu specific CE logging routine
 924  924   */
 925  925  static void
 926  926  log_ce_err(struct async_flt *aflt, char *unum)
 927  927  {
 928  928          spitf_async_flt spf_flt;
 929  929  
 930  930          if ((aflt->flt_stat & P_AFSR_CE) && (ce_verbose_memory == 0)) {
 931  931                  return;
 932  932          }
 933  933  
 934  934          spf_flt.cmn_asyncflt = *aflt;
 935  935          cpu_aflt_log(CE_CONT, 0, &spf_flt, CE_LFLAGS, unum,
 936  936              " Corrected Memory Error detected by");
 937  937  }
 938  938  
 939  939  /*
 940  940   * Spitfire does not perform any further CE classification refinement
 941  941   */
 942  942  /*ARGSUSED*/
 943  943  int
 944  944  ce_scrub_xdiag_recirc(struct async_flt *ecc, errorq_t *eqp, errorq_elem_t *eqep,
 945  945      size_t afltoffset)
 946  946  {
 947  947          return (0);
 948  948  }
 949  949  
 950  950  char *
 951  951  flt_to_error_type(struct async_flt *aflt)
 952  952  {
 953  953          if (aflt->flt_status & ECC_INTERMITTENT)
 954  954                  return (ERR_TYPE_DESC_INTERMITTENT);
 955  955          if (aflt->flt_status & ECC_PERSISTENT)
 956  956                  return (ERR_TYPE_DESC_PERSISTENT);
 957  957          if (aflt->flt_status & ECC_STICKY)
 958  958                  return (ERR_TYPE_DESC_STICKY);
 959  959          return (ERR_TYPE_DESC_UNKNOWN);
 960  960  }
 961  961  
 962  962  /*
 963  963   * Called by correctable ecc error logging code to print out
 964  964   * the stick/persistent/intermittent status of the error.
 965  965   */
 966  966  static void
 967  967  cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum)
 968  968  {
 969  969          ushort_t status;
 970  970          char *status1_str = "Memory";
 971  971          char *status2_str = "Intermittent";
 972  972          struct async_flt *aflt = (struct async_flt *)spf_flt;
 973  973  
 974  974          status = aflt->flt_status;
 975  975  
 976  976          if (status & ECC_ECACHE)
 977  977                  status1_str = "Ecache";
 978  978  
 979  979          if (status & ECC_STICKY)
 980  980                  status2_str = "Sticky";
 981  981          else if (status & ECC_PERSISTENT)
 982  982                  status2_str = "Persistent";
 983  983  
 984  984          cpu_aflt_log(CE_CONT, 0, spf_flt, CPU_ERRID_FIRST,
 985  985              NULL, " Corrected %s Error on %s is %s",
 986  986              status1_str, unum, status2_str);
 987  987  }
 988  988  
 989  989  /*
 990  990   * check for a valid ce syndrome, then call the
 991  991   * displacement flush scrubbing code, and then check the afsr to see if
 992  992   * the error was persistent or intermittent. Reread the afar/afsr to see
 993  993   * if the error was not scrubbed successfully, and is therefore sticky.
 994  994   */
 995  995  /*ARGSUSED1*/
 996  996  void
 997  997  cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t triedcpulogout)
 998  998  {
 999  999          uint64_t eer, afsr;
1000 1000          ushort_t status;
1001 1001  
1002 1002          ASSERT(getpil() > LOCK_LEVEL);
1003 1003  
1004 1004          /*
1005 1005           * It is possible that the flt_addr is not a valid
1006 1006           * physical address. To deal with this, we disable
1007 1007           * NCEEN while we scrub that address. If this causes
1008 1008           * a TIMEOUT/BERR, we know this is an invalid
1009 1009           * memory location.
1010 1010           */
1011 1011          kpreempt_disable();
1012 1012          eer = get_error_enable();
1013 1013          if (eer & (EER_CEEN | EER_NCEEN))
1014 1014                  set_error_enable(eer & ~(EER_CEEN | EER_NCEEN));
1015 1015  
1016 1016          /*
1017 1017           * To check if the error detected by IO is persistent, sticky or
1018 1018           * intermittent.
1019 1019           */
1020 1020          if (ecc->flt_status & ECC_IOBUS) {
1021 1021                  ecc->flt_stat = P_AFSR_CE;
1022 1022          }
1023 1023  
1024 1024          scrubphys(P2ALIGN(ecc->flt_addr, 64),
1025 1025              cpunodes[CPU->cpu_id].ecache_size);
1026 1026  
1027 1027          get_asyncflt(&afsr);
1028 1028          if (afsr & (P_AFSR_TO | P_AFSR_BERR)) {
1029 1029                  /*
1030 1030                   * Must ensure that we don't get the TIMEOUT/BERR
1031 1031                   * when we reenable NCEEN, so we clear the AFSR.
1032 1032                   */
1033 1033                  set_asyncflt(afsr & (P_AFSR_TO | P_AFSR_BERR));
1034 1034                  if (eer & (EER_CEEN | EER_NCEEN))
1035 1035                          set_error_enable(eer);
1036 1036                  kpreempt_enable();
1037 1037                  return;
1038 1038          }
1039 1039  
1040 1040          if (eer & EER_NCEEN)
1041 1041                  set_error_enable(eer & ~EER_CEEN);
1042 1042  
1043 1043          /*
1044 1044           * Check and clear any ECC errors from the scrub.  If the scrub did
1045 1045           * not trip over the error, mark it intermittent.  If the scrub did
1046 1046           * trip the error again and it did not scrub away, mark it sticky.
1047 1047           * Otherwise mark it persistent.
1048 1048           */
1049 1049          if (check_ecc(ecc) != 0) {
1050 1050                  cpu_read_paddr(ecc, 0, 1);
1051 1051  
1052 1052                  if (check_ecc(ecc) != 0)
1053 1053                          status = ECC_STICKY;
1054 1054                  else
1055 1055                          status = ECC_PERSISTENT;
1056 1056          } else
1057 1057                  status = ECC_INTERMITTENT;
1058 1058  
1059 1059          if (eer & (EER_CEEN | EER_NCEEN))
1060 1060                  set_error_enable(eer);
1061 1061          kpreempt_enable();
1062 1062  
1063 1063          ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);
1064 1064          ecc->flt_status |= status;
1065 1065  }
1066 1066  
1067 1067  /*
1068 1068   * get the syndrome and unum, and then call the routines
1069 1069   * to check the other cpus and iobuses, and then do the error logging.
1070 1070   */
1071 1071  /*ARGSUSED1*/
1072 1072  void
1073 1073  cpu_ce_log_err(struct async_flt *ecc, errorq_elem_t *eqep)
1074 1074  {
1075 1075          char unum[UNUM_NAMLEN];
1076 1076          int len = 0;
1077 1077          int ce_verbose = 0;
1078 1078          int err;
1079 1079  
1080 1080          ASSERT(ecc->flt_func != NULL);
1081 1081  
1082 1082          /* Get the unum string for logging purposes */
1083 1083          (void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, ecc, unum,
1084 1084              UNUM_NAMLEN, &len);
1085 1085  
1086 1086          /* Call specific error logging routine */
1087 1087          (void) (*ecc->flt_func)(ecc, unum);
1088 1088  
1089 1089          /*
1090 1090           * Count errors per unum.
1091 1091           * Non-memory errors are all counted via a special unum string.
1092 1092           */
1093 1093          if ((err = ce_count_unum(ecc->flt_status, len, unum)) != PR_OK &&
1094 1094              automatic_page_removal) {
1095 1095                  (void) page_retire(ecc->flt_addr, err);
1096 1096          }
1097 1097  
1098 1098          if (ecc->flt_panic) {
1099 1099                  ce_verbose = 1;
1100 1100          } else if ((ecc->flt_class == BUS_FAULT) ||
1101 1101              (ecc->flt_stat & P_AFSR_CE)) {
1102 1102                  ce_verbose = (ce_verbose_memory > 0);
1103 1103          } else {
1104 1104                  ce_verbose = 1;
1105 1105          }
1106 1106  
1107 1107          if (ce_verbose) {
1108 1108                  spitf_async_flt sflt;
1109 1109                  int synd_code;
1110 1110  
1111 1111                  sflt.cmn_asyncflt = *ecc;       /* for cpu_aflt_log() */
1112 1112  
1113 1113                  cpu_ce_log_status(&sflt, unum);
1114 1114  
1115 1115                  synd_code = synd_to_synd_code(AFLT_STAT_VALID,
1116 1116                      SYND(ecc->flt_synd));
1117 1117  
1118 1118                  if (SYND_IS_SINGLE_BIT_DATA(synd_code)) {
1119 1119                          cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST,
1120 1120                              NULL, " ECC Data Bit %2d was in error "
1121 1121                              "and corrected", synd_code);
1122 1122                  } else if (SYND_IS_SINGLE_BIT_CHK(synd_code)) {
1123 1123                          cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST,
1124 1124                              NULL, " ECC Check Bit %2d was in error "
1125 1125                              "and corrected", synd_code - C0);
1126 1126                  } else {
1127 1127                          /*
1128 1128                           * These are UE errors - we shouldn't be getting CE
1129 1129                           * traps for these; handle them in case of bad h/w.
1130 1130                           */
1131 1131                          switch (synd_code) {
1132 1132                          case M2:
1133 1133                                  cpu_aflt_log(CE_CONT, 0, &sflt,
1134 1134                                      CPU_ERRID_FIRST, NULL,
1135 1135                                      " Two ECC Bits were in error");
1136 1136                                  break;
1137 1137                          case M3:
1138 1138                                  cpu_aflt_log(CE_CONT, 0, &sflt,
1139 1139                                      CPU_ERRID_FIRST, NULL,
1140 1140                                      " Three ECC Bits were in error");
1141 1141                                  break;
1142 1142                          case M4:
1143 1143                                  cpu_aflt_log(CE_CONT, 0, &sflt,
1144 1144                                      CPU_ERRID_FIRST, NULL,
1145 1145                                      " Four ECC Bits were in error");
1146 1146                                  break;
1147 1147                          case MX:
1148 1148                                  cpu_aflt_log(CE_CONT, 0, &sflt,
1149 1149                                      CPU_ERRID_FIRST, NULL,
1150 1150                                      " More than Four ECC bits were "
1151 1151                                      "in error");
1152 1152                                  break;
1153 1153                          default:
1154 1154                                  cpu_aflt_log(CE_CONT, 0, &sflt,
1155 1155                                      CPU_ERRID_FIRST, NULL,
1156 1156                                      " Unknown fault syndrome %d",
1157 1157                                      synd_code);
1158 1158                                  break;
1159 1159                          }
1160 1160                  }
1161 1161          }
1162 1162  
1163 1163          /* Display entire cache line, if valid address */
1164 1164          if (ce_show_data && ecc->flt_addr != AFLT_INV_ADDR)
1165 1165                  read_ecc_data(ecc, 1, 1);
1166 1166  }
1167 1167  
1168 1168  /*
1169 1169   * We route all errors through a single switch statement.
1170 1170   */
1171 1171  void
1172 1172  cpu_ue_log_err(struct async_flt *aflt)
1173 1173  {
1174 1174  
1175 1175          switch (aflt->flt_class) {
1176 1176          case CPU_FAULT:
1177 1177                  cpu_async_log_err(aflt);
1178 1178                  break;
1179 1179  
1180 1180          case BUS_FAULT:
1181 1181                  bus_async_log_err(aflt);
1182 1182                  break;
1183 1183  
1184 1184          default:
1185 1185                  cmn_err(CE_WARN, "discarding async error 0x%p with invalid "
1186 1186                      "fault class (0x%x)", (void *)aflt, aflt->flt_class);
1187 1187                  break;
1188 1188          }
1189 1189  }
1190 1190  
1191 1191  /* Values for action variable in cpu_async_error() */
1192 1192  #define ACTION_NONE             0
1193 1193  #define ACTION_TRAMPOLINE       1
1194 1194  #define ACTION_AST_FLAGS        2
1195 1195  
1196 1196  /*
1197 1197   * Access error trap handler for asynchronous cpu errors.  This routine is
1198 1198   * called to handle a data or instruction access error.  All fatal errors are
1199 1199   * completely handled by this routine (by panicking).  Non fatal error logging
1200 1200   * is queued for later processing either via AST or softint at a lower PIL.
1201 1201   * In case of panic, the error log queue will also be processed as part of the
1202 1202   * panic flow to ensure all errors are logged.  This routine is called with all
1203 1203   * errors disabled at PIL15.  The AFSR bits are cleared and the UDBL and UDBH
1204 1204   * error bits are also cleared.  The hardware has also disabled the I and
1205 1205   * D-caches for us, so we must re-enable them before returning.
1206 1206   *
1207 1207   * A summary of the handling of tl=0 UE/LDP/EDP/TO/BERR/WP/CP:
1208 1208   *
1209 1209   *              _______________________________________________________________
1210 1210   *              |        Privileged tl0         |         Unprivileged        |
1211 1211   *              | Protected     | Unprotected   | Protected     | Unprotected |
1212 1212   *              |on_trap|lofault|               |               |             |
1213 1213   * -------------|-------|-------+---------------+---------------+-------------|
1214 1214   *              |       |       |               |               |             |
1215 1215   * UE/LDP/EDP   | L,T,p | L,R,p | L,P           | n/a           | L,R,p       |
1216 1216   *              |       |       |               |               |             |
1217 1217   * TO/BERR      | T     | S     | L,P           | n/a           | S           |
1218 1218   *              |       |       |               |               |             |
1219 1219   * WP           | L,M,p | L,M,p | L,M,p         | n/a           | L,M,p       |
1220 1220   *              |       |       |               |               |             |
1221 1221   * CP (IIi/IIe) | L,P   | L,P   | L,P           | n/a           | L,P         |
1222 1222   * ____________________________________________________________________________
1223 1223   *
1224 1224   *
1225 1225   * Action codes:
1226 1226   *
1227 1227   * L - log
1228 1228   * M - kick off memscrubber if flt_in_memory
1229 1229   * P - panic
1230 1230   * p - panic if US-IIi or US-IIe (Sabre); overrides R and M
1231 1231   * R - i)  if aft_panic is set, panic
1232 1232   *     ii) otherwise, send hwerr event to contract and SIGKILL to process
1233 1233   * S - send SIGBUS to process
1234 1234   * T - trampoline
1235 1235   *
1236 1236   * Special cases:
1237 1237   *
1238 1238   * 1) if aft_testfatal is set, all faults result in a panic regardless
1239 1239   *    of type (even WP), protection (even on_trap), or privilege.
1240 1240   */
1241 1241  /*ARGSUSED*/
1242 1242  void
1243 1243  cpu_async_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr,
1244 1244          uint_t p_afsr_high, uint_t p_afar_high)
1245 1245  {
1246 1246          ushort_t sdbh, sdbl, ttype, tl;
1247 1247          spitf_async_flt spf_flt;
1248 1248          struct async_flt *aflt;
1249 1249          char pr_reason[28];
1250 1250          uint64_t oafsr;
1251 1251          uint64_t acc_afsr = 0;                  /* accumulated afsr */
1252 1252          int action = ACTION_NONE;
1253 1253          uint64_t t_afar = p_afar;
1254 1254          uint64_t t_afsr = p_afsr;
1255 1255          int expected = DDI_FM_ERR_UNEXPECTED;
1256 1256          ddi_acc_hdl_t *hp;
1257 1257  
1258 1258          /*
1259 1259           * We need to look at p_flag to determine if the thread detected an
1260 1260           * error while dumping core.  We can't grab p_lock here, but it's ok
1261 1261           * because we just need a consistent snapshot and we know that everyone
1262 1262           * else will store a consistent set of bits while holding p_lock.  We
1263 1263           * don't have to worry about a race because SDOCORE is set once prior
1264 1264           * to doing i/o from the process's address space and is never cleared.
1265 1265           */
1266 1266          uint_t pflag = ttoproc(curthread)->p_flag;
1267 1267  
1268 1268          pr_reason[0] = '\0';
1269 1269  
1270 1270          /*
1271 1271           * Note: the Spitfire data buffer error registers
1272 1272           * (upper and lower halves) are or'ed into the upper
1273 1273           * word of the afsr by async_err() if P_AFSR_UE is set.
1274 1274           */
1275 1275          sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF);
1276 1276          sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF);
1277 1277  
1278 1278          /*
1279 1279           * Grab the ttype encoded in <63:53> of the saved
1280 1280           * afsr passed from async_err()
1281 1281           */
1282 1282          ttype = (ushort_t)((t_afsr >> 53) & 0x1FF);
1283 1283          tl = (ushort_t)(t_afsr >> 62);
1284 1284  
1285 1285          t_afsr &= S_AFSR_MASK;
1286 1286          t_afar &= SABRE_AFAR_PA;        /* must use Sabre AFAR mask */
1287 1287  
1288 1288          /*
1289 1289           * Initialize most of the common and CPU-specific structure.  We derive
1290 1290           * aflt->flt_priv from %tstate, instead of from the AFSR.PRIV bit.  The
1291 1291           * initial setting of aflt->flt_panic is based on TL: we must panic if
1292 1292           * the error occurred at TL > 0.  We also set flt_panic if the test/demo
1293 1293           * tuneable aft_testfatal is set (not the default).
1294 1294           */
1295 1295          bzero(&spf_flt, sizeof (spitf_async_flt));
1296 1296          aflt = (struct async_flt *)&spf_flt;
1297 1297          aflt->flt_id = gethrtime_waitfree();
1298 1298          aflt->flt_stat = t_afsr;
1299 1299          aflt->flt_addr = t_afar;
1300 1300          aflt->flt_bus_id = getprocessorid();
1301 1301          aflt->flt_inst = CPU->cpu_id;
1302 1302          aflt->flt_pc = (caddr_t)rp->r_pc;
1303 1303          aflt->flt_prot = AFLT_PROT_NONE;
1304 1304          aflt->flt_class = CPU_FAULT;
1305 1305          aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0;
1306 1306          aflt->flt_tl = (uchar_t)tl;
1307 1307          aflt->flt_panic = (tl != 0 || aft_testfatal != 0);
1308 1308          aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
1309 1309  
1310 1310          /*
1311 1311           * Set flt_status based on the trap type.  If we end up here as the
1312 1312           * result of a UE detected by the CE handling code, leave status 0.
1313 1313           */
1314 1314          switch (ttype) {
1315 1315          case T_DATA_ERROR:
1316 1316                  aflt->flt_status = ECC_D_TRAP;
1317 1317                  break;
1318 1318          case T_INSTR_ERROR:
1319 1319                  aflt->flt_status = ECC_I_TRAP;
1320 1320                  break;
1321 1321          }
1322 1322  
1323 1323          spf_flt.flt_sdbh = sdbh;
1324 1324          spf_flt.flt_sdbl = sdbl;
1325 1325  
1326 1326          /*
1327 1327           * Check for fatal async errors.
1328 1328           */
1329 1329          check_misc_err(&spf_flt);
1330 1330  
1331 1331          /*
1332 1332           * If the trap occurred in privileged mode at TL=0, we need to check to
1333 1333           * see if we were executing in the kernel under on_trap() or t_lofault
1334 1334           * protection.  If so, modify the saved registers so that we return
1335 1335           * from the trap to the appropriate trampoline routine.
1336 1336           */
1337 1337          if (aflt->flt_priv && tl == 0) {
1338 1338                  if (curthread->t_ontrap != NULL) {
1339 1339                          on_trap_data_t *otp = curthread->t_ontrap;
1340 1340  
1341 1341                          if (otp->ot_prot & OT_DATA_EC) {
1342 1342                                  aflt->flt_prot = AFLT_PROT_EC;
1343 1343                                  otp->ot_trap |= OT_DATA_EC;
1344 1344                                  rp->r_pc = otp->ot_trampoline;
1345 1345                                  rp->r_npc = rp->r_pc + 4;
1346 1346                                  action = ACTION_TRAMPOLINE;
1347 1347                          }
1348 1348  
1349 1349                          if ((t_afsr & (P_AFSR_TO | P_AFSR_BERR)) &&
1350 1350                              (otp->ot_prot & OT_DATA_ACCESS)) {
1351 1351                                  aflt->flt_prot = AFLT_PROT_ACCESS;
1352 1352                                  otp->ot_trap |= OT_DATA_ACCESS;
1353 1353                                  rp->r_pc = otp->ot_trampoline;
1354 1354                                  rp->r_npc = rp->r_pc + 4;
1355 1355                                  action = ACTION_TRAMPOLINE;
1356 1356                                  /*
1357 1357                                   * for peeks and caut_gets errors are expected
1358 1358                                   */
1359 1359                                  hp = (ddi_acc_hdl_t *)otp->ot_handle;
1360 1360                                  if (!hp)
1361 1361                                          expected = DDI_FM_ERR_PEEK;
1362 1362                                  else if (hp->ah_acc.devacc_attr_access ==
1363 1363                                      DDI_CAUTIOUS_ACC)
1364 1364                                          expected = DDI_FM_ERR_EXPECTED;
1365 1365                          }
1366 1366  
1367 1367                  } else if (curthread->t_lofault) {
1368 1368                          aflt->flt_prot = AFLT_PROT_COPY;
1369 1369                          rp->r_g1 = EFAULT;
1370 1370                          rp->r_pc = curthread->t_lofault;
1371 1371                          rp->r_npc = rp->r_pc + 4;
1372 1372                          action = ACTION_TRAMPOLINE;
1373 1373                  }
1374 1374          }
1375 1375  
1376 1376          /*
1377 1377           * Determine if this error needs to be treated as fatal.  Note that
1378 1378           * multiple errors detected upon entry to this trap handler does not
1379 1379           * necessarily warrant a panic.  We only want to panic if the trap
1380 1380           * happened in privileged mode and not under t_ontrap or t_lofault
1381 1381           * protection.  The exception is WP: if we *only* get WP, it is not
1382 1382           * fatal even if the trap occurred in privileged mode, except on Sabre.
1383 1383           *
1384 1384           * aft_panic, if set, effectively makes us treat usermode
1385 1385           * UE/EDP/LDP faults as if they were privileged - so we we will
1386 1386           * panic instead of sending a contract event.  A lofault-protected
1387 1387           * fault will normally follow the contract event; if aft_panic is
1388 1388           * set this will be changed to a panic.
1389 1389           *
1390 1390           * For usermode BERR/BTO errors, eg from processes performing device
1391 1391           * control through mapped device memory, we need only deliver
1392 1392           * a SIGBUS to the offending process.
1393 1393           *
1394 1394           * Some additional flt_panic reasons (eg, WP on Sabre) will be
1395 1395           * checked later; for now we implement the common reasons.
1396 1396           */
1397 1397          if (aflt->flt_prot == AFLT_PROT_NONE) {
1398 1398                  /*
1399 1399                   * Beware - multiple bits may be set in AFSR
1400 1400                   */
1401 1401                  if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP)) {
1402 1402                          if (aflt->flt_priv || aft_panic)
1403 1403                                  aflt->flt_panic = 1;
1404 1404                  }
1405 1405  
1406 1406                  if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) {
1407 1407                          if (aflt->flt_priv)
1408 1408                                  aflt->flt_panic = 1;
1409 1409                  }
1410 1410          } else if (aflt->flt_prot == AFLT_PROT_COPY && aft_panic) {
1411 1411                  aflt->flt_panic = 1;
1412 1412          }
1413 1413  
1414 1414          /*
1415 1415           * UE/BERR/TO: Call our bus nexus friends to check for
1416 1416           * IO errors that may have resulted in this trap.
1417 1417           */
1418 1418          if (t_afsr & (P_AFSR_TO | P_AFSR_BERR | P_AFSR_UE)) {
1419 1419                  cpu_run_bus_error_handlers(aflt, expected);
1420 1420          }
1421 1421  
1422 1422          /*
1423 1423           * Handle UE: If the UE is in memory, we need to flush the bad line from
1424 1424           * the E-cache.  We also need to query the bus nexus for fatal errors.
1425 1425           * For sabre, we will panic on UEs. Attempts to do diagnostic read on
1426 1426           * caches may introduce more parity errors (especially when the module
1427 1427           * is bad) and in sabre there is no guarantee that such errors
1428 1428           * (if introduced) are written back as poisoned data.
1429 1429           */
1430 1430          if (t_afsr & P_AFSR_UE) {
1431 1431                  int i;
1432 1432  
1433 1433                  (void) strcat(pr_reason, "UE ");
1434 1434  
1435 1435                  spf_flt.flt_type = CPU_UE_ERR;
1436 1436                  aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
1437 1437                      MMU_PAGESHIFT)) ? 1: 0;
1438 1438  
1439 1439                  /*
1440 1440                   * With UE, we have the PA of the fault.
1441 1441                   * Let do a diagnostic read to get the ecache
1442 1442                   * data and tag info of the bad line for logging.
1443 1443                   */
1444 1444                  if (aflt->flt_in_memory) {
1445 1445                          uint32_t ec_set_size;
1446 1446                          uchar_t state;
1447 1447                          uint32_t ecache_idx;
1448 1448                          uint64_t faultpa = P2ALIGN(aflt->flt_addr, 64);
1449 1449  
1450 1450                          /* touch the line to put it in ecache */
1451 1451                          acc_afsr |= read_and_clear_afsr();
1452 1452                          (void) lddphys(faultpa);
1453 1453                          acc_afsr |= (read_and_clear_afsr() &
1454 1454                              ~(P_AFSR_EDP | P_AFSR_UE));
1455 1455  
1456 1456                          ec_set_size = cpunodes[CPU->cpu_id].ecache_size /
1457 1457                              ecache_associativity;
1458 1458  
1459 1459                          for (i = 0; i < ecache_associativity; i++) {
1460 1460                                  ecache_idx = i * ec_set_size +
1461 1461                                      (aflt->flt_addr % ec_set_size);
1462 1462                                  get_ecache_dtag(P2ALIGN(ecache_idx, 64),
1463 1463                                      (uint64_t *)&spf_flt.flt_ec_data[0],
1464 1464                                      &spf_flt.flt_ec_tag, &oafsr, &acc_afsr);
1465 1465                                  acc_afsr |= oafsr;
1466 1466  
1467 1467                                  state = (uchar_t)((spf_flt.flt_ec_tag &
1468 1468                                      cpu_ec_state_mask) >> cpu_ec_state_shift);
1469 1469  
1470 1470                                  if ((state & cpu_ec_state_valid) &&
1471 1471                                      ((spf_flt.flt_ec_tag & cpu_ec_tag_mask) ==
1472 1472                                      ((uint64_t)aflt->flt_addr >>
1473 1473                                      cpu_ec_tag_shift)))
1474 1474                                          break;
1475 1475                          }
1476 1476  
1477 1477                          /*
1478 1478                           * Check to see if the ecache tag is valid for the
1479 1479                           * fault PA. In the very unlikely event where the
1480 1480                           * line could be victimized, no ecache info will be
1481 1481                           * available. If this is the case, capture the line
1482 1482                           * from memory instead.
1483 1483                           */
1484 1484                          if ((state & cpu_ec_state_valid) == 0 ||
1485 1485                              (spf_flt.flt_ec_tag & cpu_ec_tag_mask) !=
1486 1486                              ((uint64_t)aflt->flt_addr >> cpu_ec_tag_shift)) {
1487 1487                                  for (i = 0; i < 8; i++, faultpa += 8) {
1488 1488                                          ec_data_t *ecdptr;
1489 1489  
1490 1490                                          ecdptr = &spf_flt.flt_ec_data[i];
1491 1491                                          acc_afsr |= read_and_clear_afsr();
1492 1492                                          ecdptr->ec_d8 = lddphys(faultpa);
1493 1493                                          acc_afsr |= (read_and_clear_afsr() &
1494 1494                                              ~(P_AFSR_EDP | P_AFSR_UE));
1495 1495                                          ecdptr->ec_afsr = 0;
1496 1496                                                          /* null afsr value */
1497 1497                                  }
1498 1498  
1499 1499                                  /*
1500 1500                                   * Mark tag invalid to indicate mem dump
1501 1501                                   * when we print out the info.
1502 1502                                   */
1503 1503                                  spf_flt.flt_ec_tag = AFLT_INV_ADDR;
1504 1504                          }
1505 1505                          spf_flt.flt_ec_lcnt = 1;
1506 1506  
1507 1507                          /*
1508 1508                           * Flush out the bad line
1509 1509                           */
1510 1510                          flushecacheline(P2ALIGN(aflt->flt_addr, 64),
1511 1511                              cpunodes[CPU->cpu_id].ecache_size);
1512 1512  
1513 1513                          acc_afsr |= clear_errors(NULL, NULL);
1514 1514                  }
1515 1515  
1516 1516                  /*
1517 1517                   * Ask our bus nexus friends if they have any fatal errors. If
1518 1518                   * so, they will log appropriate error messages and panic as a
1519 1519                   * result. We then queue an event for each UDB that reports a
1520 1520                   * UE. Each UE reported in a UDB will have its own log message.
1521 1521                   *
1522 1522                   * Note from kbn: In the case where there are multiple UEs
1523 1523                   * (ME bit is set) - the AFAR address is only accurate to
1524 1524                   * the 16-byte granularity. One cannot tell whether the AFAR
1525 1525                   * belongs to the UDBH or UDBL syndromes. In this case, we
1526 1526                   * always report the AFAR address to be 16-byte aligned.
1527 1527                   *
1528 1528                   * If we're on a Sabre, there is no SDBL, but it will always
1529 1529                   * read as zero, so the sdbl test below will safely fail.
1530 1530                   */
1531 1531                  if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL || isus2i || isus2e)
1532 1532                          aflt->flt_panic = 1;
1533 1533  
1534 1534                  if (sdbh & P_DER_UE) {
1535 1535                          aflt->flt_synd = sdbh & P_DER_E_SYND;
1536 1536                          cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE,
1537 1537                              (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1538 1538                              aflt->flt_panic);
1539 1539                  }
1540 1540                  if (sdbl & P_DER_UE) {
1541 1541                          aflt->flt_synd = sdbl & P_DER_E_SYND;
1542 1542                          aflt->flt_synd |= UDBL_REG;     /* indicates UDBL */
1543 1543                          if (!(aflt->flt_stat & P_AFSR_ME))
1544 1544                                  aflt->flt_addr |= 0x8;
1545 1545                          cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE,
1546 1546                              (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1547 1547                              aflt->flt_panic);
1548 1548                  }
1549 1549  
1550 1550                  /*
1551 1551                   * We got a UE and are panicking, save the fault PA in a known
1552 1552                   * location so that the platform specific panic code can check
1553 1553                   * for copyback errors.
1554 1554                   */
1555 1555                  if (aflt->flt_panic && aflt->flt_in_memory) {
1556 1556                          panic_aflt = *aflt;
1557 1557                  }
1558 1558          }
1559 1559  
1560 1560          /*
1561 1561           * Handle EDP and LDP: Locate the line with bad parity and enqueue an
1562 1562           * async error for logging. For Sabre, we panic on EDP or LDP.
1563 1563           */
1564 1564          if (t_afsr & (P_AFSR_EDP | P_AFSR_LDP)) {
1565 1565                  spf_flt.flt_type = CPU_EDP_LDP_ERR;
1566 1566  
1567 1567                  if (t_afsr & P_AFSR_EDP)
1568 1568                          (void) strcat(pr_reason, "EDP ");
1569 1569  
1570 1570                  if (t_afsr & P_AFSR_LDP)
1571 1571                          (void) strcat(pr_reason, "LDP ");
1572 1572  
1573 1573                  /*
1574 1574                   * Here we have no PA to work with.
1575 1575                   * Scan each line in the ecache to look for
1576 1576                   * the one with bad parity.
1577 1577                   */
1578 1578                  aflt->flt_addr = AFLT_INV_ADDR;
1579 1579                  scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
1580 1580                      &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr);
1581 1581                  acc_afsr |= (oafsr & ~P_AFSR_WP);
1582 1582  
1583 1583                  /*
1584 1584                   * If we found a bad PA, update the state to indicate if it is
1585 1585                   * memory or I/O space.  This code will be important if we ever
1586 1586                   * support cacheable frame buffers.
1587 1587                   */
1588 1588                  if (aflt->flt_addr != AFLT_INV_ADDR) {
1589 1589                          aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
1590 1590                              MMU_PAGESHIFT)) ? 1 : 0;
1591 1591                  }
1592 1592  
1593 1593                  if (isus2i || isus2e)
1594 1594                          aflt->flt_panic = 1;
1595 1595  
1596 1596                  cpu_errorq_dispatch((t_afsr & P_AFSR_EDP) ?
1597 1597                      FM_EREPORT_CPU_USII_EDP : FM_EREPORT_CPU_USII_LDP,
1598 1598                      (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1599 1599                      aflt->flt_panic);
1600 1600          }
1601 1601  
1602 1602          /*
1603 1603           * Timeout and bus error handling.  There are two cases to consider:
1604 1604           *
1605 1605           * (1) If we are in the kernel protected by ddi_peek or ddi_poke,we
1606 1606           * have already modified the saved registers so that we will return
1607 1607           * from the trap to the appropriate trampoline routine; otherwise panic.
1608 1608           *
1609 1609           * (2) In user mode, we can simply use our AST mechanism to deliver
1610 1610           * a SIGBUS.  We do not log the occurence - processes performing
1611 1611           * device control would generate lots of uninteresting messages.
1612 1612           */
1613 1613          if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) {
1614 1614                  if (t_afsr & P_AFSR_TO)
1615 1615                          (void) strcat(pr_reason, "BTO ");
1616 1616  
1617 1617                  if (t_afsr & P_AFSR_BERR)
1618 1618                          (void) strcat(pr_reason, "BERR ");
1619 1619  
1620 1620                  spf_flt.flt_type = CPU_BTO_BERR_ERR;
1621 1621                  if (aflt->flt_priv && aflt->flt_prot == AFLT_PROT_NONE) {
1622 1622                          cpu_errorq_dispatch((t_afsr & P_AFSR_TO) ?
1623 1623                              FM_EREPORT_CPU_USII_TO : FM_EREPORT_CPU_USII_BERR,
1624 1624                              (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1625 1625                              aflt->flt_panic);
1626 1626                  }
1627 1627          }
1628 1628  
1629 1629          /*
1630 1630           * Handle WP: WP happens when the ecache is victimized and a parity
1631 1631           * error was detected on a writeback.  The data in question will be
1632 1632           * poisoned as a UE will be written back.  The PA is not logged and
1633 1633           * it is possible that it doesn't belong to the trapped thread.  The
1634 1634           * WP trap is not fatal, but it could be fatal to someone that
1635 1635           * subsequently accesses the toxic page.  We set read_all_memscrub
1636 1636           * to force the memscrubber to read all of memory when it awakens.
1637 1637           * For Sabre/Hummingbird, WP is fatal because the HW doesn't write a
1638 1638           * UE back to poison the data.
1639 1639           */
1640 1640          if (t_afsr & P_AFSR_WP) {
1641 1641                  (void) strcat(pr_reason, "WP ");
1642 1642                  if (isus2i || isus2e) {
1643 1643                          aflt->flt_panic = 1;
1644 1644                  } else {
1645 1645                          read_all_memscrub = 1;
1646 1646                  }
1647 1647                  spf_flt.flt_type = CPU_WP_ERR;
1648 1648                  cpu_errorq_dispatch(FM_EREPORT_CPU_USII_WP,
1649 1649                      (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1650 1650                      aflt->flt_panic);
1651 1651          }
1652 1652  
1653 1653          /*
1654 1654           * Handle trapping CP error: In Sabre/Hummingbird, parity error in
1655 1655           * the ecache on a copyout due to a PCI DMA read is signaled as a CP.
1656 1656           * This is fatal.
1657 1657           */
1658 1658  
1659 1659          if (t_afsr & P_AFSR_CP) {
1660 1660                  if (isus2i || isus2e) {
1661 1661                          (void) strcat(pr_reason, "CP ");
1662 1662                          aflt->flt_panic = 1;
1663 1663                          spf_flt.flt_type = CPU_TRAPPING_CP_ERR;
1664 1664                          cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
1665 1665                              (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1666 1666                              aflt->flt_panic);
1667 1667                  } else {
1668 1668                          /*
1669 1669                           * Orphan CP: Happens due to signal integrity problem
1670 1670                           * on a CPU, where a CP is reported, without reporting
1671 1671                           * its associated UE. This is handled by locating the
1672 1672                           * bad parity line and would kick off the memscrubber
1673 1673                           * to find the UE if in memory or in another's cache.
1674 1674                           */
1675 1675                          spf_flt.flt_type = CPU_ORPHAN_CP_ERR;
1676 1676                          (void) strcat(pr_reason, "ORPHAN_CP ");
1677 1677  
1678 1678                          /*
1679 1679                           * Here we have no PA to work with.
1680 1680                           * Scan each line in the ecache to look for
1681 1681                           * the one with bad parity.
1682 1682                           */
1683 1683                          aflt->flt_addr = AFLT_INV_ADDR;
1684 1684                          scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
1685 1685                              &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt,
1686 1686                              &oafsr);
1687 1687                          acc_afsr |= oafsr;
1688 1688  
1689 1689                          /*
1690 1690                           * If we found a bad PA, update the state to indicate
1691 1691                           * if it is memory or I/O space.
1692 1692                           */
1693 1693                          if (aflt->flt_addr != AFLT_INV_ADDR) {
1694 1694                                  aflt->flt_in_memory =
1695 1695                                      (pf_is_memory(aflt->flt_addr >>
1696 1696                                      MMU_PAGESHIFT)) ? 1 : 0;
1697 1697                          }
1698 1698                          read_all_memscrub = 1;
1699 1699                          cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
1700 1700                              (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1701 1701                              aflt->flt_panic);
1702 1702  
1703 1703                  }
1704 1704          }
1705 1705  
1706 1706          /*
1707 1707           * If we queued an error other than WP or CP and we are going to return
1708 1708           * from the trap and the error was in user mode or inside of a
1709 1709           * copy routine, set AST flag so the queue will be drained before
1710 1710           * returning to user mode.
1711 1711           *
1712 1712           * For UE/LDP/EDP, the AST processing will SIGKILL the process
1713 1713           * and send an event to its process contract.
1714 1714           *
1715 1715           * For BERR/BTO, the AST processing will SIGBUS the process.  There
1716 1716           * will have been no error queued in this case.
1717 1717           */
1718 1718          if ((t_afsr &
1719 1719              (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP | P_AFSR_BERR | P_AFSR_TO)) &&
1720 1720              (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY)) {
1721 1721                          int pcb_flag = 0;
1722 1722  
1723 1723                          if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP))
1724 1724                                  pcb_flag |= ASYNC_HWERR;
1725 1725  
1726 1726                          if (t_afsr & P_AFSR_BERR)
1727 1727                                  pcb_flag |= ASYNC_BERR;
1728 1728  
1729 1729                          if (t_afsr & P_AFSR_TO)
1730 1730                                  pcb_flag |= ASYNC_BTO;
1731 1731  
1732 1732                          ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
1733 1733                          aston(curthread);
1734 1734                          action = ACTION_AST_FLAGS;
1735 1735          }
1736 1736  
1737 1737          /*
1738 1738           * In response to a deferred error, we must do one of three things:
1739 1739           * (1) set the AST flags, (2) trampoline, or (3) panic.  action is
1740 1740           * set in cases (1) and (2) - check that either action is set or
1741 1741           * (3) is true.
1742 1742           *
1743 1743           * On II, the WP writes poisoned data back to memory, which will
1744 1744           * cause a UE and a panic or reboot when read.  In this case, we
1745 1745           * don't need to panic at this time.  On IIi and IIe,
1746 1746           * aflt->flt_panic is already set above.
1747 1747           */
1748 1748          ASSERT((aflt->flt_panic != 0) || (action != ACTION_NONE) ||
1749 1749              (t_afsr & P_AFSR_WP));
1750 1750  
1751 1751          /*
1752 1752           * Make a final sanity check to make sure we did not get any more async
1753 1753           * errors and accumulate the afsr.
1754 1754           */
1755 1755          flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
1756 1756              cpunodes[CPU->cpu_id].ecache_linesize);
1757 1757          (void) clear_errors(&spf_flt, NULL);
1758 1758  
1759 1759          /*
1760 1760           * Take care of a special case: If there is a UE in the ecache flush
1761 1761           * area, we'll see it in flush_ecache().  This will trigger the
1762 1762           * CPU_ADDITIONAL_ERRORS case below.
1763 1763           *
1764 1764           * This could occur if the original error was a UE in the flush area,
1765 1765           * or if the original error was an E$ error that was flushed out of
1766 1766           * the E$ in scan_ecache().
1767 1767           *
1768 1768           * If it's at the same address that we're already logging, then it's
1769 1769           * probably one of these cases.  Clear the bit so we don't trip over
1770 1770           * it on the additional errors case, which could cause an unnecessary
1771 1771           * panic.
1772 1772           */
1773 1773          if ((aflt->flt_stat & P_AFSR_UE) && aflt->flt_addr == t_afar)
1774 1774                  acc_afsr |= aflt->flt_stat & ~P_AFSR_UE;
1775 1775          else
1776 1776                  acc_afsr |= aflt->flt_stat;
1777 1777  
1778 1778          /*
1779 1779           * Check the acumulated afsr for the important bits.
1780 1780           * Make sure the spf_flt.flt_type value is set, and
1781 1781           * enque an error.
1782 1782           */
1783 1783          if (acc_afsr &
1784 1784              (P_AFSR_LEVEL1 | P_AFSR_IVUE | P_AFSR_ETP | P_AFSR_ISAP)) {
1785 1785                  if (acc_afsr & (P_AFSR_UE | P_AFSR_EDP | P_AFSR_LDP |
1786 1786                      P_AFSR_BERR | P_AFSR_TO | P_AFSR_IVUE | P_AFSR_ETP |
1787 1787                      P_AFSR_ISAP))
1788 1788                          aflt->flt_panic = 1;
1789 1789  
1790 1790                  spf_flt.flt_type = CPU_ADDITIONAL_ERR;
1791 1791                  aflt->flt_stat = acc_afsr;
1792 1792                  cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UNKNOWN,
1793 1793                      (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1794 1794                      aflt->flt_panic);
1795 1795          }
1796 1796  
1797 1797          /*
1798 1798           * If aflt->flt_panic is set at this point, we need to panic as the
1799 1799           * result of a trap at TL > 0, or an error we determined to be fatal.
1800 1800           * We've already enqueued the error in one of the if-clauses above,
1801 1801           * and it will be dequeued and logged as part of the panic flow.
1802 1802           */
1803 1803          if (aflt->flt_panic) {
1804 1804                  cpu_aflt_log(CE_PANIC, 1, &spf_flt, CPU_ERRID_FIRST,
1805 1805                      "See previous message(s) for details", " %sError(s)",
1806 1806                      pr_reason);
1807 1807          }
1808 1808  
1809 1809          /*
1810 1810           * Before returning, we must re-enable errors, and
1811 1811           * reset the caches to their boot-up state.
1812 1812           */
1813 1813          set_lsu(get_lsu() | cache_boot_state);
1814 1814          set_error_enable(EER_ENABLE);
1815 1815  }
1816 1816  
1817 1817  /*
1818 1818   * Check for miscellaneous fatal errors and call CE_PANIC if any are seen.
1819 1819   * This routine is shared by the CE and UE handling code.
1820 1820   */
1821 1821  static void
1822 1822  check_misc_err(spitf_async_flt *spf_flt)
1823 1823  {
1824 1824          struct async_flt *aflt = (struct async_flt *)spf_flt;
1825 1825          char *fatal_str = NULL;
1826 1826  
1827 1827          /*
1828 1828           * The ISAP and ETP errors are supposed to cause a POR
1829 1829           * from the system, so in theory we never, ever see these messages.
1830 1830           * ISAP, ETP and IVUE are considered to be fatal.
1831 1831           */
1832 1832          if (aflt->flt_stat & P_AFSR_ISAP)
1833 1833                  fatal_str = " System Address Parity Error on";
1834 1834          else if (aflt->flt_stat & P_AFSR_ETP)
1835 1835                  fatal_str = " Ecache Tag Parity Error on";
1836 1836          else if (aflt->flt_stat & P_AFSR_IVUE)
1837 1837                  fatal_str = " Interrupt Vector Uncorrectable Error on";
1838 1838          if (fatal_str != NULL) {
1839 1839                  cpu_aflt_log(CE_PANIC, 1, spf_flt, CMN_LFLAGS,
1840 1840                      NULL, fatal_str);
1841 1841          }
1842 1842  }
1843 1843  
1844 1844  /*
1845 1845   * Routine to convert a syndrome into a syndrome code.
1846 1846   */
1847 1847  static int
1848 1848  synd_to_synd_code(int synd_status, ushort_t synd)
1849 1849  {
1850 1850          if (synd_status != AFLT_STAT_VALID)
1851 1851                  return (-1);
1852 1852  
1853 1853          /*
1854 1854           * Use the 8-bit syndrome to index the ecc_syndrome_tab
1855 1855           * to get the code indicating which bit(s) is(are) bad.
1856 1856           */
1857 1857          if ((synd == 0) || (synd >= SYND_TBL_SIZE))
1858 1858                  return (-1);
1859 1859          else
1860 1860                  return (ecc_syndrome_tab[synd]);
1861 1861  }
1862 1862  
1863 1863  /* ARGSUSED */
1864 1864  int
1865 1865  cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
1866 1866  {
1867 1867          return (ENOTSUP);
1868 1868  }
1869 1869  
1870 1870  /* ARGSUSED */
1871 1871  int
1872 1872  cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp)
1873 1873  {
1874 1874          return (ENOTSUP);
1875 1875  }
1876 1876  
1877 1877  /* ARGSUSED */
1878 1878  int
1879 1879  cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
1880 1880  {
1881 1881          return (ENOTSUP);
1882 1882  }
1883 1883  
1884 1884  /*
1885 1885   * Routine to return a string identifying the physical name
1886 1886   * associated with a memory/cache error.
1887 1887   */
1888 1888  /* ARGSUSED */
1889 1889  int
1890 1890  cpu_get_mem_unum(int synd_status, ushort_t synd, uint64_t afsr,
1891 1891      uint64_t afar, int cpuid, int flt_in_memory, ushort_t flt_status,
1892 1892      char *buf, int buflen, int *lenp)
1893 1893  {
1894 1894          short synd_code;
1895 1895          int ret;
1896 1896  
1897 1897          if (flt_in_memory) {
1898 1898                  synd_code = synd_to_synd_code(synd_status, synd);
1899 1899                  if (synd_code == -1) {
1900 1900                          ret = EINVAL;
1901 1901                  } else if (prom_get_unum(synd_code, P2ALIGN(afar, 8),
1902 1902                      buf, buflen, lenp) != 0) {
1903 1903                          ret = EIO;
1904 1904                  } else if (*lenp <= 1) {
1905 1905                          ret = EINVAL;
1906 1906                  } else {
1907 1907                          ret = 0;
1908 1908                  }
1909 1909          } else {
1910 1910                  ret = ENOTSUP;
1911 1911          }
1912 1912  
1913 1913          if (ret != 0) {
1914 1914                  buf[0] = '\0';
1915 1915                  *lenp = 0;
1916 1916          }
1917 1917  
1918 1918          return (ret);
1919 1919  }
1920 1920  
1921 1921  /*
1922 1922   * Wrapper for cpu_get_mem_unum() routine that takes an
1923 1923   * async_flt struct rather than explicit arguments.
1924 1924   */
1925 1925  int
1926 1926  cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
1927 1927      char *buf, int buflen, int *lenp)
1928 1928  {
1929 1929          return (cpu_get_mem_unum(synd_status, SYND(aflt->flt_synd),
1930 1930              aflt->flt_stat, aflt->flt_addr, aflt->flt_bus_id,
1931 1931              aflt->flt_in_memory, aflt->flt_status, buf, buflen, lenp));
1932 1932  }
1933 1933  
1934 1934  /*
1935 1935   * This routine is a more generic interface to cpu_get_mem_unum(),
1936 1936   * that may be used by other modules (e.g. mm).
1937 1937   */
1938 1938  int
1939 1939  cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
1940 1940                  char *buf, int buflen, int *lenp)
1941 1941  {
1942 1942          int synd_status, flt_in_memory, ret;
1943 1943          char unum[UNUM_NAMLEN];
1944 1944  
1945 1945          /*
1946 1946           * Check for an invalid address.
1947 1947           */
1948 1948          if (afar == (uint64_t)-1)
1949 1949                  return (ENXIO);
1950 1950  
1951 1951          if (synd == (uint64_t)-1)
1952 1952                  synd_status = AFLT_STAT_INVALID;
1953 1953          else
1954 1954                  synd_status = AFLT_STAT_VALID;
1955 1955  
1956 1956          flt_in_memory = (pf_is_memory(afar >> MMU_PAGESHIFT)) ? 1 : 0;
1957 1957  
1958 1958          if ((ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar,
1959 1959              CPU->cpu_id, flt_in_memory, 0, unum, UNUM_NAMLEN, lenp))
1960 1960              != 0)
1961 1961                  return (ret);
1962 1962  
1963 1963          if (*lenp >= buflen)
1964 1964                  return (ENAMETOOLONG);
1965 1965  
1966 1966          (void) strncpy(buf, unum, buflen);
1967 1967  
1968 1968          return (0);
1969 1969  }
1970 1970  
1971 1971  /*
1972 1972   * Routine to return memory information associated
1973 1973   * with a physical address and syndrome.
1974 1974   */
1975 1975  /* ARGSUSED */
1976 1976  int
1977 1977  cpu_get_mem_info(uint64_t synd, uint64_t afar,
1978 1978      uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
1979 1979      int *segsp, int *banksp, int *mcidp)
1980 1980  {
1981 1981          return (ENOTSUP);
1982 1982  }
1983 1983  
1984 1984  /*
1985 1985   * Routine to return a string identifying the physical
1986 1986   * name associated with a cpuid.
1987 1987   */
1988 1988  /* ARGSUSED */
1989 1989  int
1990 1990  cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
1991 1991  {
1992 1992          return (ENOTSUP);
1993 1993  }
1994 1994  
1995 1995  /*
1996 1996   * This routine returns the size of the kernel's FRU name buffer.
1997 1997   */
1998 1998  size_t
1999 1999  cpu_get_name_bufsize()
2000 2000  {
2001 2001          return (UNUM_NAMLEN);
2002 2002  }
2003 2003  
2004 2004  /*
2005 2005   * Cpu specific log func for UEs.
2006 2006   */
2007 2007  static void
2008 2008  log_ue_err(struct async_flt *aflt, char *unum)
2009 2009  {
2010 2010          spitf_async_flt *spf_flt = (spitf_async_flt *)aflt;
2011 2011          int len = 0;
2012 2012  
2013 2013  #ifdef DEBUG
2014 2014          int afsr_priv = (aflt->flt_stat & P_AFSR_PRIV) ? 1 : 0;
2015 2015  
2016 2016          /*
2017 2017           * Paranoid Check for priv mismatch
2018 2018           * Only applicable for UEs
2019 2019           */
2020 2020          if (afsr_priv != aflt->flt_priv) {
2021 2021                  /*
2022 2022                   * The priv bits in %tstate and %afsr did not match; we expect
2023 2023                   * this to be very rare, so flag it with a message.
2024 2024                   */
2025 2025                  cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST, NULL,
2026 2026                      ": PRIV bit in TSTATE and AFSR mismatched; "
2027 2027                      "TSTATE.PRIV=%d used", (aflt->flt_priv) ? 1 : 0);
2028 2028  
2029 2029                  /* update saved afsr to reflect the correct priv */
2030 2030                  aflt->flt_stat &= ~P_AFSR_PRIV;
2031 2031                  if (aflt->flt_priv)
2032 2032                          aflt->flt_stat |= P_AFSR_PRIV;
2033 2033          }
2034 2034  #endif /* DEBUG */
2035 2035  
2036 2036          (void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt, unum,
2037 2037              UNUM_NAMLEN, &len);
2038 2038  
2039 2039          cpu_aflt_log(CE_WARN, 1, spf_flt, UE_LFLAGS, unum,
2040 2040              " Uncorrectable Memory Error on");
2041 2041  
2042 2042          if (SYND(aflt->flt_synd) == 0x3) {
2043 2043                  cpu_aflt_log(CE_WARN, 1, spf_flt, CPU_ERRID_FIRST, NULL,
2044 2044                      " Syndrome 0x3 indicates that this may not be a "
2045 2045                      "memory module problem");
2046 2046          }
2047 2047  
2048 2048          if (aflt->flt_in_memory)
2049 2049                  cpu_log_ecmem_info(spf_flt);
2050 2050  }
2051 2051  
2052 2052  
2053 2053  /*
2054 2054   * The cpu_async_log_err() function is called via the ue_drain() function to
2055 2055   * handle logging for CPU events that are dequeued.  As such, it can be invoked
2056 2056   * from softint context, from AST processing in the trap() flow, or from the
2057 2057   * panic flow.  We decode the CPU-specific data, and log appropriate messages.
2058 2058   */
2059 2059  static void
2060 2060  cpu_async_log_err(void *flt)
2061 2061  {
2062 2062          spitf_async_flt *spf_flt = (spitf_async_flt *)flt;
2063 2063          struct async_flt *aflt = (struct async_flt *)flt;
2064 2064          char unum[UNUM_NAMLEN];
2065 2065          char *space;
2066 2066          char *ecache_scrub_logstr = NULL;
2067 2067  
2068 2068          switch (spf_flt->flt_type) {
2069 2069          case CPU_UE_ERR:
2070 2070                  /*
2071 2071                   * We want to skip logging only if ALL the following
2072 2072                   * conditions are true:
2073 2073                   *
2074 2074                   *      1. We are not panicking
2075 2075                   *      2. There is only one error
2076 2076                   *      3. That error is a memory error
2077 2077                   *      4. The error is caused by the memory scrubber (in
2078 2078                   *         which case the error will have occurred under
2079 2079                   *         on_trap protection)
2080 2080                   *      5. The error is on a retired page
2081 2081                   *
2082 2082                   * Note 1: AFLT_PROT_EC is used places other than the memory
2083 2083                   * scrubber.  However, none of those errors should occur
2084 2084                   * on a retired page.
2085 2085                   *
2086 2086                   * Note 2: In the CE case, these errors are discarded before
2087 2087                   * the errorq.  In the UE case, we must wait until now --
2088 2088                   * softcall() grabs a mutex, which we can't do at a high PIL.
2089 2089                   */
2090 2090                  if (!panicstr &&
2091 2091                      (aflt->flt_stat & S_AFSR_ALL_ERRS) == P_AFSR_UE &&
2092 2092                      aflt->flt_prot == AFLT_PROT_EC) {
2093 2093                          if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2094 2094                                  /* Zero the address to clear the error */
2095 2095                                  softcall(ecc_page_zero, (void *)aflt->flt_addr);
2096 2096                                  return;
2097 2097                          }
2098 2098                  }
2099 2099  
2100 2100                  /*
2101 2101                   * Log the UE and check for causes of this UE error that
2102 2102                   * don't cause a trap (Copyback error).  cpu_async_error()
2103 2103                   * has already checked the i/o buses for us.
2104 2104                   */
2105 2105                  log_ue_err(aflt, unum);
2106 2106                  if (aflt->flt_in_memory)
2107 2107                          cpu_check_allcpus(aflt);
2108 2108                  break;
2109 2109  
2110 2110          case CPU_EDP_LDP_ERR:
2111 2111                  if (aflt->flt_stat & P_AFSR_EDP)
2112 2112                          cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS,
2113 2113                              NULL, " EDP event on");
2114 2114  
2115 2115                  if (aflt->flt_stat & P_AFSR_LDP)
2116 2116                          cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS,
2117 2117                              NULL, " LDP event on");
2118 2118  
2119 2119                  /* Log ecache info if exist */
2120 2120                  if (spf_flt->flt_ec_lcnt > 0) {
2121 2121                          cpu_log_ecmem_info(spf_flt);
2122 2122  
2123 2123                          cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST,
2124 2124                              NULL, " AFAR was derived from E$Tag");
2125 2125                  } else {
2126 2126                          cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST,
2127 2127                              NULL, " No error found in ecache (No fault "
2128 2128                              "PA available)");
2129 2129                  }
2130 2130                  break;
2131 2131  
2132 2132          case CPU_WP_ERR:
2133 2133                  /*
2134 2134                   * If the memscrub thread hasn't yet read
2135 2135                   * all of memory, as we requested in the
2136 2136                   * trap handler, then give it a kick to
2137 2137                   * make sure it does.
2138 2138                   */
2139 2139                  if (!isus2i && !isus2e && read_all_memscrub)
2140 2140                          memscrub_run();
2141 2141  
2142 2142                  cpu_aflt_log(CE_WARN, 1, spf_flt, WP_LFLAGS, NULL,
2143 2143                      " WP event on");
2144 2144                  return;
2145 2145  
2146 2146          case CPU_BTO_BERR_ERR:
2147 2147                  /*
2148 2148                   * A bus timeout or error occurred that was in user mode or not
2149 2149                   * in a protected kernel code region.
2150 2150                   */
2151 2151                  if (aflt->flt_stat & P_AFSR_BERR) {
2152 2152                          cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2,
2153 2153                              spf_flt, BERRTO_LFLAGS, NULL,
2154 2154                              " Bus Error on System Bus in %s mode from",
2155 2155                              aflt->flt_priv ? "privileged" : "user");
2156 2156                  }
2157 2157  
2158 2158                  if (aflt->flt_stat & P_AFSR_TO) {
2159 2159                          cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2,
2160 2160                              spf_flt, BERRTO_LFLAGS, NULL,
2161 2161                              " Timeout on System Bus in %s mode from",
2162 2162                              aflt->flt_priv ? "privileged" : "user");
2163 2163                  }
2164 2164  
2165 2165                  return;
2166 2166  
2167 2167          case CPU_PANIC_CP_ERR:
2168 2168                  /*
2169 2169                   * Process the Copyback (CP) error info (if any) obtained from
2170 2170                   * polling all the cpus in the panic flow. This case is only
2171 2171                   * entered if we are panicking.
2172 2172                   */
2173 2173                  ASSERT(panicstr != NULL);
2174 2174                  ASSERT(aflt->flt_id == panic_aflt.flt_id);
2175 2175  
2176 2176                  /* See which space - this info may not exist */
2177 2177                  if (panic_aflt.flt_status & ECC_D_TRAP)
2178 2178                          space = "Data ";
2179 2179                  else if (panic_aflt.flt_status & ECC_I_TRAP)
2180 2180                          space = "Instruction ";
2181 2181                  else
2182 2182                          space = "";
2183 2183  
2184 2184                  cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL,
2185 2185                      " AFAR was derived from UE report,"
2186 2186                      " CP event on CPU%d (caused %saccess error on %s%d)",
2187 2187                      aflt->flt_inst, space, (panic_aflt.flt_status & ECC_IOBUS) ?
2188 2188                      "IOBUS" : "CPU", panic_aflt.flt_bus_id);
2189 2189  
2190 2190                  if (spf_flt->flt_ec_lcnt > 0)
2191 2191                          cpu_log_ecmem_info(spf_flt);
2192 2192                  else
2193 2193                          cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST,
2194 2194                              NULL, " No cache dump available");
2195 2195  
2196 2196                  return;
2197 2197  
2198 2198          case CPU_TRAPPING_CP_ERR:
2199 2199                  /*
2200 2200                   * For sabre only.  This is a copyback ecache parity error due
2201 2201                   * to a PCI DMA read.  We should be panicking if we get here.
2202 2202                   */
2203 2203                  ASSERT(panicstr != NULL);
2204 2204                  cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL,
2205 2205                      " AFAR was derived from UE report,"
2206 2206                      " CP event on CPU%d (caused Data access error "
2207 2207                      "on PCIBus)", aflt->flt_inst);
2208 2208                  return;
2209 2209  
2210 2210                  /*
2211 2211                   * We log the ecache lines of the following states,
2212 2212                   * clean_bad_idle, clean_bad_busy, dirty_bad_idle and
2213 2213                   * dirty_bad_busy if ecache_scrub_verbose is set and panic
2214 2214                   * in addition to logging if ecache_scrub_panic is set.
2215 2215                   */
2216 2216          case CPU_BADLINE_CI_ERR:
2217 2217                  ecache_scrub_logstr = "CBI";
2218 2218                  /* FALLTHRU */
2219 2219  
2220 2220          case CPU_BADLINE_CB_ERR:
2221 2221                  if (ecache_scrub_logstr == NULL)
2222 2222                          ecache_scrub_logstr = "CBB";
2223 2223                  /* FALLTHRU */
2224 2224  
2225 2225          case CPU_BADLINE_DI_ERR:
2226 2226                  if (ecache_scrub_logstr == NULL)
2227 2227                          ecache_scrub_logstr = "DBI";
2228 2228                  /* FALLTHRU */
2229 2229  
2230 2230          case CPU_BADLINE_DB_ERR:
2231 2231                  if (ecache_scrub_logstr == NULL)
2232 2232                          ecache_scrub_logstr = "DBB";
2233 2233  
2234 2234                  cpu_aflt_log(CE_NOTE, 2, spf_flt,
2235 2235                      (CPU_ERRID_FIRST | CPU_FLTCPU), NULL,
2236 2236                      " %s event on", ecache_scrub_logstr);
2237 2237                  cpu_log_ecmem_info(spf_flt);
2238 2238  
2239 2239                  return;
2240 2240  
2241 2241          case CPU_ORPHAN_CP_ERR:
2242 2242                  /*
2243 2243                   * Orphan CPs, where the CP bit is set, but when a CPU
2244 2244                   * doesn't report a UE.
2245 2245                   */
2246 2246                  if (read_all_memscrub)
2247 2247                          memscrub_run();
2248 2248  
2249 2249                  cpu_aflt_log(CE_NOTE, 2, spf_flt, (CP_LFLAGS | CPU_FLTCPU),
2250 2250                      NULL, " Orphan CP event on");
2251 2251  
2252 2252                  /* Log ecache info if exist */
2253 2253                  if (spf_flt->flt_ec_lcnt > 0)
2254 2254                          cpu_log_ecmem_info(spf_flt);
2255 2255                  else
2256 2256                          cpu_aflt_log(CE_NOTE, 2, spf_flt,
2257 2257                              (CP_LFLAGS | CPU_FLTCPU), NULL,
2258 2258                              " No error found in ecache (No fault "
2259 2259                              "PA available");
2260 2260                  return;
2261 2261  
2262 2262          case CPU_ECACHE_ADDR_PAR_ERR:
2263 2263                  cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2264 2264                      " E$ Tag Address Parity error on");
2265 2265                  cpu_log_ecmem_info(spf_flt);
2266 2266                  return;
2267 2267  
2268 2268          case CPU_ECACHE_STATE_ERR:
2269 2269                  cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2270 2270                      " E$ Tag State Parity error on");
2271 2271                  cpu_log_ecmem_info(spf_flt);
2272 2272                  return;
2273 2273  
2274 2274          case CPU_ECACHE_TAG_ERR:
2275 2275                  cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2276 2276                      " E$ Tag scrub event on");
2277 2277                  cpu_log_ecmem_info(spf_flt);
2278 2278                  return;
2279 2279  
2280 2280          case CPU_ECACHE_ETP_ETS_ERR:
2281 2281                  cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2282 2282                      " AFSR.ETP is set and AFSR.ETS is zero on");
2283 2283                  cpu_log_ecmem_info(spf_flt);
2284 2284                  return;
2285 2285  
2286 2286  
2287 2287          case CPU_ADDITIONAL_ERR:
2288 2288                  cpu_aflt_log(CE_WARN, 1, spf_flt, CMN_LFLAGS & ~CPU_SPACE, NULL,
2289 2289                      " Additional errors detected during error processing on");
2290 2290                  return;
2291 2291  
2292 2292          default:
2293 2293                  cmn_err(CE_WARN, "cpu_async_log_err: fault %p has unknown "
2294 2294                      "fault type %x", (void *)spf_flt, spf_flt->flt_type);
2295 2295                  return;
2296 2296          }
2297 2297  
2298 2298          /* ... fall through from the UE, EDP, or LDP cases */
2299 2299  
2300 2300          if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
2301 2301                  if (!panicstr) {
2302 2302                          (void) page_retire(aflt->flt_addr, PR_UE);
2303 2303                  } else {
2304 2304                          /*
2305 2305                           * Clear UEs on panic so that we don't
2306 2306                           * get haunted by them during panic or
2307 2307                           * after reboot
2308 2308                           */
2309 2309                          clearphys(P2ALIGN(aflt->flt_addr, 64),
2310 2310                              cpunodes[CPU->cpu_id].ecache_size,
2311 2311                              cpunodes[CPU->cpu_id].ecache_linesize);
2312 2312  
2313 2313                          (void) clear_errors(NULL, NULL);
2314 2314                  }
2315 2315          }
2316 2316  
2317 2317          /*
2318 2318           * Log final recover message
2319 2319           */
2320 2320          if (!panicstr) {
2321 2321                  if (!aflt->flt_priv) {
2322 2322                          cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
2323 2323                              NULL, " Above Error is in User Mode"
2324 2324                              "\n    and is fatal: "
2325 2325                              "will SIGKILL process and notify contract");
2326 2326                  } else if (aflt->flt_prot == AFLT_PROT_COPY && aflt->flt_core) {
2327 2327                          cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
2328 2328                              NULL, " Above Error detected while dumping core;"
2329 2329                              "\n    core file will be truncated");
2330 2330                  } else if (aflt->flt_prot == AFLT_PROT_COPY) {
2331 2331                          cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
2332 2332                              NULL, " Above Error is due to Kernel access"
2333 2333                              "\n    to User space and is fatal: "
2334 2334                              "will SIGKILL process and notify contract");
2335 2335                  } else if (aflt->flt_prot == AFLT_PROT_EC) {
2336 2336                          cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, NULL,
2337 2337                              " Above Error detected by protected Kernel code"
2338 2338                              "\n    that will try to clear error from system");
2339 2339                  }
2340 2340          }
2341 2341  }
2342 2342  
2343 2343  
2344 2344  /*
2345 2345   * Check all cpus for non-trapping UE-causing errors
2346 2346   * In Ultra I/II, we look for copyback errors (CPs)
2347 2347   */
2348 2348  void
2349 2349  cpu_check_allcpus(struct async_flt *aflt)
2350 2350  {
2351 2351          spitf_async_flt cp;
2352 2352          spitf_async_flt *spf_cpflt = &cp;
2353 2353          struct async_flt *cpflt = (struct async_flt *)&cp;
2354 2354          int pix;
2355 2355  
2356 2356          cpflt->flt_id = aflt->flt_id;
2357 2357          cpflt->flt_addr = aflt->flt_addr;
2358 2358  
2359 2359          for (pix = 0; pix < NCPU; pix++) {
2360 2360                  if (CPU_XCALL_READY(pix)) {
2361 2361                          xc_one(pix, (xcfunc_t *)get_cpu_status,
2362 2362                              (uint64_t)cpflt, 0);
2363 2363  
2364 2364                          if (cpflt->flt_stat & P_AFSR_CP) {
2365 2365                                  char *space;
2366 2366  
2367 2367                                  /* See which space - this info may not exist */
2368 2368                                  if (aflt->flt_status & ECC_D_TRAP)
2369 2369                                          space = "Data ";
2370 2370                                  else if (aflt->flt_status & ECC_I_TRAP)
2371 2371                                          space = "Instruction ";
2372 2372                                  else
2373 2373                                          space = "";
2374 2374  
2375 2375                                  cpu_aflt_log(CE_WARN, 1, spf_cpflt, CP_LFLAGS,
2376 2376                                      NULL, " AFAR was derived from UE report,"
2377 2377                                      " CP event on CPU%d (caused %saccess "
2378 2378                                      "error on %s%d)", pix, space,
2379 2379                                      (aflt->flt_status & ECC_IOBUS) ?
2380 2380                                      "IOBUS" : "CPU", aflt->flt_bus_id);
2381 2381  
2382 2382                                  if (spf_cpflt->flt_ec_lcnt > 0)
2383 2383                                          cpu_log_ecmem_info(spf_cpflt);
2384 2384                                  else
2385 2385                                          cpu_aflt_log(CE_WARN, 2, spf_cpflt,
2386 2386                                              CPU_ERRID_FIRST, NULL,
2387 2387                                              " No cache dump available");
2388 2388                          }
2389 2389                  }
2390 2390          }
2391 2391  }
2392 2392  
2393 2393  #ifdef DEBUG
2394 2394  int test_mp_cp = 0;
2395 2395  #endif
2396 2396  
2397 2397  /*
2398 2398   * Cross-call callback routine to tell a CPU to read its own %afsr to check
2399 2399   * for copyback errors and capture relevant information.
2400 2400   */
2401 2401  static uint_t
2402 2402  get_cpu_status(uint64_t arg)
2403 2403  {
2404 2404          struct async_flt *aflt = (struct async_flt *)arg;
2405 2405          spitf_async_flt *spf_flt = (spitf_async_flt *)arg;
2406 2406          uint64_t afsr;
2407 2407          uint32_t ec_idx;
2408 2408          uint64_t sdbh, sdbl;
2409 2409          int i;
2410 2410          uint32_t ec_set_size;
2411 2411          uchar_t valid;
2412 2412          ec_data_t ec_data[8];
2413 2413          uint64_t ec_tag, flt_addr_tag, oafsr;
2414 2414          uint64_t *acc_afsr = NULL;
2415 2415  
2416 2416          get_asyncflt(&afsr);
2417 2417          if (CPU_PRIVATE(CPU) != NULL) {
2418 2418                  acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
2419 2419                  afsr |= *acc_afsr;
2420 2420                  *acc_afsr = 0;
2421 2421          }
2422 2422  
2423 2423  #ifdef DEBUG
2424 2424          if (test_mp_cp)
2425 2425                  afsr |= P_AFSR_CP;
2426 2426  #endif
2427 2427          aflt->flt_stat = afsr;
2428 2428  
2429 2429          if (afsr & P_AFSR_CP) {
2430 2430                  /*
2431 2431                   * Capture the UDBs
2432 2432                   */
2433 2433                  get_udb_errors(&sdbh, &sdbl);
2434 2434                  spf_flt->flt_sdbh = (ushort_t)(sdbh & 0x3FF);
2435 2435                  spf_flt->flt_sdbl = (ushort_t)(sdbl & 0x3FF);
2436 2436  
2437 2437                  /*
2438 2438                   * Clear CP bit before capturing ecache data
2439 2439                   * and AFSR info.
2440 2440                   */
2441 2441                  set_asyncflt(P_AFSR_CP);
2442 2442  
2443 2443                  /*
2444 2444                   * See if we can capture the ecache line for the
2445 2445                   * fault PA.
2446 2446                   *
2447 2447                   * Return a valid matching ecache line, if any.
2448 2448                   * Otherwise, return the first matching ecache
2449 2449                   * line marked invalid.
2450 2450                   */
2451 2451                  flt_addr_tag = aflt->flt_addr >> cpu_ec_tag_shift;
2452 2452                  ec_set_size = cpunodes[CPU->cpu_id].ecache_size /
2453 2453                      ecache_associativity;
2454 2454                  spf_flt->flt_ec_lcnt = 0;
2455 2455  
2456 2456                  for (i = 0, ec_idx = (aflt->flt_addr % ec_set_size);
2457 2457                      i < ecache_associativity; i++, ec_idx += ec_set_size) {
2458 2458                          get_ecache_dtag(P2ALIGN(ec_idx, 64),
2459 2459                              (uint64_t *)&ec_data[0], &ec_tag, &oafsr,
2460 2460                              acc_afsr);
2461 2461  
2462 2462                          if ((ec_tag & cpu_ec_tag_mask) != flt_addr_tag)
2463 2463                                  continue;
2464 2464  
2465 2465                          valid = cpu_ec_state_valid &
2466 2466                              (uchar_t)((ec_tag & cpu_ec_state_mask) >>
2467 2467                              cpu_ec_state_shift);
2468 2468  
2469 2469                          if (valid || spf_flt->flt_ec_lcnt == 0) {
2470 2470                                  spf_flt->flt_ec_tag = ec_tag;
2471 2471                                  bcopy(&ec_data, &spf_flt->flt_ec_data,
2472 2472                                      sizeof (ec_data));
2473 2473                                  spf_flt->flt_ec_lcnt = 1;
2474 2474  
2475 2475                                  if (valid)
2476 2476                                          break;
2477 2477                          }
2478 2478                  }
2479 2479          }
2480 2480          return (0);
2481 2481  }
2482 2482  
2483 2483  /*
2484 2484   * CPU-module callback for the non-panicking CPUs.  This routine is invoked
2485 2485   * from panic_idle() as part of the other CPUs stopping themselves when a
2486 2486   * panic occurs.  We need to be VERY careful what we do here, since panicstr
2487 2487   * is NOT set yet and we cannot blow through locks.  If panic_aflt is set
2488 2488   * (panic_aflt.flt_id is non-zero), we need to read our %afsr to look for
2489 2489   * CP error information.
2490 2490   */
2491 2491  void
2492 2492  cpu_async_panic_callb(void)
2493 2493  {
2494 2494          spitf_async_flt cp;
2495 2495          struct async_flt *aflt = (struct async_flt *)&cp;
2496 2496          uint64_t *scrub_afsr;
2497 2497  
2498 2498          if (panic_aflt.flt_id != 0) {
2499 2499                  aflt->flt_addr = panic_aflt.flt_addr;
2500 2500                  (void) get_cpu_status((uint64_t)aflt);
2501 2501  
2502 2502                  if (CPU_PRIVATE(CPU) != NULL) {
2503 2503                          scrub_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
2504 2504                          if (*scrub_afsr & P_AFSR_CP) {
2505 2505                                  aflt->flt_stat |= *scrub_afsr;
2506 2506                                  *scrub_afsr = 0;
2507 2507                          }
2508 2508                  }
2509 2509                  if (aflt->flt_stat & P_AFSR_CP) {
2510 2510                          aflt->flt_id = panic_aflt.flt_id;
2511 2511                          aflt->flt_panic = 1;
2512 2512                          aflt->flt_inst = CPU->cpu_id;
2513 2513                          aflt->flt_class = CPU_FAULT;
2514 2514                          cp.flt_type = CPU_PANIC_CP_ERR;
2515 2515                          cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
2516 2516                              (void *)&cp, sizeof (cp), ue_queue,
2517 2517                              aflt->flt_panic);
2518 2518                  }
2519 2519          }
2520 2520  }
2521 2521  
2522 2522  /*
2523 2523   * Turn off all cpu error detection, normally only used for panics.
2524 2524   */
2525 2525  void
2526 2526  cpu_disable_errors(void)
2527 2527  {
2528 2528          xt_all(set_error_enable_tl1, EER_DISABLE, EER_SET_ABSOLUTE);
2529 2529  }
2530 2530  
2531 2531  /*
2532 2532   * Enable errors.
2533 2533   */
2534 2534  void
2535 2535  cpu_enable_errors(void)
2536 2536  {
2537 2537          xt_all(set_error_enable_tl1, EER_ENABLE, EER_SET_ABSOLUTE);
2538 2538  }
2539 2539  
2540 2540  static void
2541 2541  cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
2542 2542  {
2543 2543          uint64_t aligned_addr = P2ALIGN(ecc->flt_addr, 8);
2544 2544          int i, loop = 1;
2545 2545          ushort_t ecc_0;
2546 2546          uint64_t paddr;
2547 2547          uint64_t data;
2548 2548  
2549 2549          if (verbose)
2550 2550                  loop = 8;
2551 2551          for (i = 0; i < loop; i++) {
2552 2552                  paddr = aligned_addr + (i * 8);
2553 2553                  data = lddphys(paddr);
2554 2554                  if (verbose) {
2555 2555                          if (ce_err) {
2556 2556                                  ecc_0 = ecc_gen((uint32_t)(data>>32),
2557 2557                                      (uint32_t)data);
2558 2558                                  cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS,
2559 2559                                      NULL, "    Paddr 0x%" PRIx64 ", "
2560 2560                                      "Data 0x%08x.%08x, ECC 0x%x", paddr,
2561 2561                                      (uint32_t)(data>>32), (uint32_t)data,
2562 2562                                      ecc_0);
2563 2563                          } else {
2564 2564                                  cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS,
2565 2565                                      NULL, "    Paddr 0x%" PRIx64 ", "
2566 2566                                      "Data 0x%08x.%08x", paddr,
2567 2567                                      (uint32_t)(data>>32), (uint32_t)data);
2568 2568                          }
2569 2569                  }
2570 2570          }
2571 2571  }
2572 2572  
2573 2573  static struct {         /* sec-ded-s4ed ecc code */
2574 2574          uint_t hi, lo;
2575 2575  } ecc_code[8] = {
2576 2576          { 0xee55de23U, 0x16161161U },
2577 2577          { 0x55eede93U, 0x61612212U },
2578 2578          { 0xbb557b8cU, 0x49494494U },
2579 2579          { 0x55bb7b6cU, 0x94948848U },
2580 2580          { 0x16161161U, 0xee55de23U },
2581 2581          { 0x61612212U, 0x55eede93U },
2582 2582          { 0x49494494U, 0xbb557b8cU },
2583 2583          { 0x94948848U, 0x55bb7b6cU }
2584 2584  };
2585 2585  
2586 2586  static ushort_t
2587 2587  ecc_gen(uint_t high_bytes, uint_t low_bytes)
2588 2588  {
2589 2589          int i, j;
2590 2590          uchar_t checker, bit_mask;
2591 2591          struct {
2592 2592                  uint_t hi, lo;
2593 2593          } hex_data, masked_data[8];
2594 2594  
2595 2595          hex_data.hi = high_bytes;
2596 2596          hex_data.lo = low_bytes;
2597 2597  
2598 2598          /* mask out bits according to sec-ded-s4ed ecc code */
2599 2599          for (i = 0; i < 8; i++) {
2600 2600                  masked_data[i].hi = hex_data.hi & ecc_code[i].hi;
2601 2601                  masked_data[i].lo = hex_data.lo & ecc_code[i].lo;
2602 2602          }
2603 2603  
2604 2604          /*
2605 2605           * xor all bits in masked_data[i] to get bit_i of checker,
2606 2606           * where i = 0 to 7
2607 2607           */
2608 2608          checker = 0;
2609 2609          for (i = 0; i < 8; i++) {
2610 2610                  bit_mask = 1 << i;
2611 2611                  for (j = 0; j < 32; j++) {
2612 2612                          if (masked_data[i].lo & 1) checker ^= bit_mask;
2613 2613                          if (masked_data[i].hi & 1) checker ^= bit_mask;
2614 2614                          masked_data[i].hi >>= 1;
2615 2615                          masked_data[i].lo >>= 1;
2616 2616                  }
2617 2617          }
2618 2618          return (checker);
2619 2619  }
2620 2620  
2621 2621  /*
2622 2622   * Flush the entire ecache using displacement flush by reading through a
2623 2623   * physical address range as large as the ecache.
2624 2624   */
2625 2625  void
2626 2626  cpu_flush_ecache(void)
2627 2627  {
2628 2628          flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
2629 2629              cpunodes[CPU->cpu_id].ecache_linesize);
2630 2630  }
2631 2631  
2632 2632  /*
2633 2633   * read and display the data in the cache line where the
2634 2634   * original ce error occurred.
2635 2635   * This routine is mainly used for debugging new hardware.
2636 2636   */
2637 2637  void
2638 2638  read_ecc_data(struct async_flt *ecc, short verbose, short ce_err)
2639 2639  {
2640 2640          kpreempt_disable();
2641 2641          /* disable ECC error traps */
2642 2642          set_error_enable(EER_ECC_DISABLE);
2643 2643  
2644 2644          /*
2645 2645           * flush the ecache
2646 2646           * read the data
2647 2647           * check to see if an ECC error occured
2648 2648           */
2649 2649          flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
2650 2650              cpunodes[CPU->cpu_id].ecache_linesize);
2651 2651          set_lsu(get_lsu() | cache_boot_state);
2652 2652          cpu_read_paddr(ecc, verbose, ce_err);
2653 2653          (void) check_ecc(ecc);
2654 2654  
2655 2655          /* enable ECC error traps */
2656 2656          set_error_enable(EER_ENABLE);
2657 2657          kpreempt_enable();
2658 2658  }
2659 2659  
2660 2660  /*
2661 2661   * Check the AFSR bits for UE/CE persistence.
2662 2662   * If UE or CE errors are detected, the routine will
2663 2663   * clears all the AFSR sticky bits (except CP for
2664 2664   * spitfire/blackbird) and the UDBs.
2665 2665   * if ce_debug or ue_debug is set, log any ue/ce errors detected.
2666 2666   */
2667 2667  static int
2668 2668  check_ecc(struct async_flt *ecc)
2669 2669  {
2670 2670          uint64_t t_afsr;
2671 2671          uint64_t t_afar;
2672 2672          uint64_t udbh;
2673 2673          uint64_t udbl;
2674 2674          ushort_t udb;
2675 2675          int persistent = 0;
2676 2676  
2677 2677          /*
2678 2678           * Capture the AFSR, AFAR and UDBs info
2679 2679           */
2680 2680          get_asyncflt(&t_afsr);
2681 2681          get_asyncaddr(&t_afar);
2682 2682          t_afar &= SABRE_AFAR_PA;
2683 2683          get_udb_errors(&udbh, &udbl);
2684 2684  
2685 2685          if ((t_afsr & P_AFSR_UE) || (t_afsr & P_AFSR_CE)) {
2686 2686                  /*
2687 2687                   * Clear the errors
2688 2688                   */
2689 2689                  clr_datapath();
2690 2690  
2691 2691                  if (isus2i || isus2e)
2692 2692                          set_asyncflt(t_afsr);
2693 2693                  else
2694 2694                          set_asyncflt(t_afsr & ~P_AFSR_CP);
2695 2695  
2696 2696                  /*
2697 2697                   * determine whether to check UDBH or UDBL for persistence
2698 2698                   */
2699 2699                  if (ecc->flt_synd & UDBL_REG) {
2700 2700                          udb = (ushort_t)udbl;
2701 2701                          t_afar |= 0x8;
2702 2702                  } else {
2703 2703                          udb = (ushort_t)udbh;
2704 2704                  }
2705 2705  
2706 2706                  if (ce_debug || ue_debug) {
2707 2707                          spitf_async_flt spf_flt; /* for logging */
2708 2708                          struct async_flt *aflt =
2709 2709                              (struct async_flt *)&spf_flt;
2710 2710  
2711 2711                          /* Package the info nicely in the spf_flt struct */
2712 2712                          bzero(&spf_flt, sizeof (spitf_async_flt));
2713 2713                          aflt->flt_stat = t_afsr;
2714 2714                          aflt->flt_addr = t_afar;
2715 2715                          spf_flt.flt_sdbh = (ushort_t)(udbh & 0x3FF);
2716 2716                          spf_flt.flt_sdbl = (ushort_t)(udbl & 0x3FF);
2717 2717  
2718 2718                          cpu_aflt_log(CE_CONT, 0, &spf_flt, (CPU_AFSR |
2719 2719                              CPU_AFAR | CPU_UDBH | CPU_UDBL), NULL,
2720 2720                              " check_ecc: Dumping captured error states ...");
2721 2721                  }
2722 2722  
2723 2723                  /*
2724 2724                   * if the fault addresses don't match, not persistent
2725 2725                   */
2726 2726                  if (t_afar != ecc->flt_addr) {
2727 2727                          return (persistent);
2728 2728                  }
2729 2729  
2730 2730                  /*
2731 2731                   * check for UE persistence
2732 2732                   * since all DIMMs in the bank are identified for a UE,
2733 2733                   * there's no reason to check the syndrome
2734 2734                   */
2735 2735                  if ((ecc->flt_stat & P_AFSR_UE) && (t_afsr & P_AFSR_UE)) {
2736 2736                          persistent = 1;
2737 2737                  }
2738 2738  
2739 2739                  /*
2740 2740                   * check for CE persistence
2741 2741                   */
2742 2742                  if ((ecc->flt_stat & P_AFSR_CE) && (t_afsr & P_AFSR_CE)) {
2743 2743                          if ((udb & P_DER_E_SYND) ==
2744 2744                              (ecc->flt_synd & P_DER_E_SYND)) {
2745 2745                                  persistent = 1;
2746 2746                          }
2747 2747                  }
2748 2748          }
2749 2749          return (persistent);
2750 2750  }
2751 2751  
2752 2752  #ifdef HUMMINGBIRD
2753 2753  #define HB_FULL_DIV             1
2754 2754  #define HB_HALF_DIV             2
2755 2755  #define HB_LOWEST_DIV           8
2756 2756  #define HB_ECLK_INVALID         0xdeadbad
2757 2757  static uint64_t hb_eclk[HB_LOWEST_DIV + 1] = {
2758 2758          HB_ECLK_INVALID, HB_ECLK_1, HB_ECLK_2, HB_ECLK_INVALID,
2759 2759          HB_ECLK_4, HB_ECLK_INVALID, HB_ECLK_6, HB_ECLK_INVALID,
2760 2760          HB_ECLK_8 };
2761 2761  
2762 2762  #define HB_SLOW_DOWN            0
2763 2763  #define HB_SPEED_UP             1
2764 2764  
2765 2765  #define SET_ESTAR_MODE(mode)                                    \
2766 2766          stdphysio(HB_ESTAR_MODE, (mode));                       \
2767 2767          /*                                                      \
2768 2768           * PLL logic requires minimum of 16 clock               \
2769 2769           * cycles to lock to the new clock speed.               \
2770 2770           * Wait 1 usec to satisfy this requirement.             \
2771 2771           */                                                     \
2772 2772          drv_usecwait(1);
2773 2773  
2774 2774  #define CHANGE_REFRESH_COUNT(direction, cur_div, new_div)       \
2775 2775  {                                                               \
2776 2776          volatile uint64_t data;                                 \
2777 2777          uint64_t count, new_count;                              \
2778 2778          clock_t delay;                                          \
2779 2779          data = lddphysio(HB_MEM_CNTRL0);                        \
2780 2780          count = (data & HB_REFRESH_COUNT_MASK) >>               \
2781 2781              HB_REFRESH_COUNT_SHIFT;                             \
2782 2782          new_count = (HB_REFRESH_INTERVAL *                      \
2783 2783              cpunodes[CPU->cpu_id].clock_freq) /                 \
2784 2784              (HB_REFRESH_CLOCKS_PER_COUNT * (new_div) * NANOSEC);\
2785 2785          data = (data & ~HB_REFRESH_COUNT_MASK) |                \
2786 2786              (new_count << HB_REFRESH_COUNT_SHIFT);              \
2787 2787          stdphysio(HB_MEM_CNTRL0, data);                         \
2788 2788          data = lddphysio(HB_MEM_CNTRL0);                        \
2789 2789          /*                                                      \
2790 2790           * If we are slowing down the cpu and Memory            \
2791 2791           * Self Refresh is not enabled, it is required          \
2792 2792           * to wait for old refresh count to count-down and      \
2793 2793           * new refresh count to go into effect (let new value   \
2794 2794           * counts down once).                                   \
2795 2795           */                                                     \
2796 2796          if ((direction) == HB_SLOW_DOWN &&                      \
2797 2797              (data & HB_SELF_REFRESH_MASK) == 0) {               \
2798 2798                  /*                                              \
2799 2799                   * Each count takes 64 cpu clock cycles         \
2800 2800                   * to decrement.  Wait for current refresh      \
2801 2801                   * count plus new refresh count at current      \
2802 2802                   * cpu speed to count down to zero.  Round      \
2803 2803                   * up the delay time.                           \
2804 2804                   */                                             \
2805 2805                  delay = ((HB_REFRESH_CLOCKS_PER_COUNT *         \
2806 2806                      (count + new_count) * MICROSEC * (cur_div)) /\
2807 2807                      cpunodes[CPU->cpu_id].clock_freq) + 1;      \
2808 2808                  drv_usecwait(delay);                            \
2809 2809          }                                                       \
2810 2810  }
2811 2811  
2812 2812  #define SET_SELF_REFRESH(bit)                                   \
2813 2813  {                                                               \
2814 2814          volatile uint64_t data;                                 \
2815 2815          data = lddphysio(HB_MEM_CNTRL0);                        \
2816 2816          data = (data & ~HB_SELF_REFRESH_MASK) |                 \
2817 2817              ((bit) << HB_SELF_REFRESH_SHIFT);                   \
2818 2818          stdphysio(HB_MEM_CNTRL0, data);                         \
2819 2819          data = lddphysio(HB_MEM_CNTRL0);                        \
2820 2820  }
2821 2821  #endif  /* HUMMINGBIRD */
2822 2822  
2823 2823  /* ARGSUSED */
2824 2824  void
2825 2825  cpu_change_speed(uint64_t new_divisor, uint64_t arg2)
2826 2826  {
2827 2827  #ifdef HUMMINGBIRD
2828 2828          uint64_t cur_mask, cur_divisor = 0;
2829 2829          volatile uint64_t reg;
2830 2830          processor_info_t *pi = &(CPU->cpu_type_info);
2831 2831          int index;
2832 2832  
2833 2833          if ((new_divisor < HB_FULL_DIV || new_divisor > HB_LOWEST_DIV) ||
2834 2834              (hb_eclk[new_divisor] == HB_ECLK_INVALID)) {
2835 2835                  cmn_err(CE_WARN, "cpu_change_speed: bad divisor 0x%lx",
2836 2836                      new_divisor);
2837 2837                  return;
2838 2838          }
2839 2839  
2840 2840          reg = lddphysio(HB_ESTAR_MODE);
2841 2841          cur_mask = reg & HB_ECLK_MASK;
2842 2842          for (index = HB_FULL_DIV; index <= HB_LOWEST_DIV; index++) {
2843 2843                  if (hb_eclk[index] == cur_mask) {
2844 2844                          cur_divisor = index;
2845 2845                          break;
2846 2846                  }
2847 2847          }
2848 2848  
2849 2849          if (cur_divisor == 0)
2850 2850                  cmn_err(CE_PANIC, "cpu_change_speed: current divisor "
2851 2851                      "can't be determined!");
2852 2852  
2853 2853          /*
2854 2854           * If we are already at the requested divisor speed, just
2855 2855           * return.
2856 2856           */
2857 2857          if (cur_divisor == new_divisor)
2858 2858                  return;
2859 2859  
2860 2860          if (cur_divisor == HB_FULL_DIV && new_divisor == HB_HALF_DIV) {
2861 2861                  CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor);
2862 2862                  SET_ESTAR_MODE(hb_eclk[new_divisor]);
2863 2863                  SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE);
2864 2864  
2865 2865          } else if (cur_divisor == HB_HALF_DIV && new_divisor == HB_FULL_DIV) {
2866 2866                  SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE);
2867 2867                  SET_ESTAR_MODE(hb_eclk[new_divisor]);
2868 2868                  /* LINTED: E_FALSE_LOGICAL_EXPR */
2869 2869                  CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor);
2870 2870  
2871 2871          } else if (cur_divisor == HB_FULL_DIV && new_divisor > HB_HALF_DIV) {
2872 2872                  /*
2873 2873                   * Transition to 1/2 speed first, then to
2874 2874                   * lower speed.
2875 2875                   */
2876 2876                  CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, HB_HALF_DIV);
2877 2877                  SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]);
2878 2878                  SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE);
2879 2879  
2880 2880                  CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, HB_HALF_DIV, new_divisor);
2881 2881                  SET_ESTAR_MODE(hb_eclk[new_divisor]);
2882 2882  
2883 2883          } else if (cur_divisor > HB_HALF_DIV && new_divisor == HB_FULL_DIV) {
2884 2884                  /*
2885 2885                   * Transition to 1/2 speed first, then to
2886 2886                   * full speed.
2887 2887                   */
2888 2888                  SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]);
2889 2889                  /* LINTED: E_FALSE_LOGICAL_EXPR */
2890 2890                  CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, HB_HALF_DIV);
2891 2891  
2892 2892                  SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE);
2893 2893                  SET_ESTAR_MODE(hb_eclk[new_divisor]);
2894 2894                  /* LINTED: E_FALSE_LOGICAL_EXPR */
2895 2895                  CHANGE_REFRESH_COUNT(HB_SPEED_UP, HB_HALF_DIV, new_divisor);
2896 2896  
2897 2897          } else if (cur_divisor < new_divisor) {
2898 2898                  CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor);
2899 2899                  SET_ESTAR_MODE(hb_eclk[new_divisor]);
2900 2900  
2901 2901          } else if (cur_divisor > new_divisor) {
2902 2902                  SET_ESTAR_MODE(hb_eclk[new_divisor]);
2903 2903                  /* LINTED: E_FALSE_LOGICAL_EXPR */
2904 2904                  CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor);
2905 2905          }
2906 2906          CPU->cpu_m.divisor = (uchar_t)new_divisor;
2907 2907          cpu_set_curr_clock(((uint64_t)pi->pi_clock * 1000000) / new_divisor);
2908 2908  #endif
2909 2909  }
2910 2910  
2911 2911  /*
2912 2912   * Clear the AFSR sticky bits and the UDBs. For Sabre/Spitfire/Blackbird,
2913 2913   * we clear all the sticky bits. If a non-null pointer to a async fault
2914 2914   * structure argument is passed in, the captured error state (AFSR, AFAR, UDBs)
2915 2915   * info will be returned in the structure.  If a non-null pointer to a
2916 2916   * uint64_t is passed in, this will be updated if the CP bit is set in the
2917 2917   * AFSR.  The afsr will be returned.
2918 2918   */
2919 2919  static uint64_t
2920 2920  clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr)
2921 2921  {
2922 2922          struct async_flt *aflt = (struct async_flt *)spf_flt;
2923 2923          uint64_t afsr;
2924 2924          uint64_t udbh, udbl;
2925 2925  
2926 2926          get_asyncflt(&afsr);
2927 2927  
2928 2928          if ((acc_afsr != NULL) && (afsr & P_AFSR_CP))
2929 2929                  *acc_afsr |= afsr;
2930 2930  
2931 2931          if (spf_flt != NULL) {
2932 2932                  aflt->flt_stat = afsr;
2933 2933                  get_asyncaddr(&aflt->flt_addr);
2934 2934                  aflt->flt_addr &= SABRE_AFAR_PA;
2935 2935  
2936 2936                  get_udb_errors(&udbh, &udbl);
2937 2937                  spf_flt->flt_sdbh = (ushort_t)(udbh & 0x3FF);
2938 2938                  spf_flt->flt_sdbl = (ushort_t)(udbl & 0x3FF);
2939 2939          }
2940 2940  
2941 2941          set_asyncflt(afsr);             /* clear afsr */
2942 2942          clr_datapath();                 /* clear udbs */
2943 2943          return (afsr);
2944 2944  }
2945 2945  
2946 2946  /*
2947 2947   * Scan the ecache to look for bad lines.  If found, the afsr, afar, e$ data
2948 2948   * tag of the first bad line will be returned. We also return the old-afsr
2949 2949   * (before clearing the sticky bits). The linecnt data will be updated to
2950 2950   * indicate the number of bad lines detected.
2951 2951   */
2952 2952  static void
2953 2953  scan_ecache(uint64_t *t_afar, ec_data_t *ecache_data,
2954 2954          uint64_t *ecache_tag, int *linecnt, uint64_t *t_afsr)
2955 2955  {
2956 2956          ec_data_t t_ecdata[8];
2957 2957          uint64_t t_etag, oafsr;
2958 2958          uint64_t pa = AFLT_INV_ADDR;
2959 2959          uint32_t i, j, ecache_sz;
2960 2960          uint64_t acc_afsr = 0;
2961 2961          uint64_t *cpu_afsr = NULL;
2962 2962  
2963 2963          if (CPU_PRIVATE(CPU) != NULL)
2964 2964                  cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
2965 2965  
2966 2966          *linecnt = 0;
2967 2967          ecache_sz = cpunodes[CPU->cpu_id].ecache_size;
2968 2968  
2969 2969          for (i = 0; i < ecache_sz; i += 64) {
2970 2970                  get_ecache_dtag(i, (uint64_t *)&t_ecdata[0], &t_etag, &oafsr,
2971 2971                      cpu_afsr);
2972 2972                  acc_afsr |= oafsr;
2973 2973  
2974 2974                  /*
2975 2975                   * Scan through the whole 64 bytes line in 8 8-byte chunks
2976 2976                   * looking for the first occurrence of an EDP error.  The AFSR
2977 2977                   * info is captured for each 8-byte chunk.  Note that for
2978 2978                   * Spitfire/Blackbird, the AFSR.PSYND is captured by h/w in
2979 2979                   * 16-byte chunk granularity (i.e. the AFSR will be the same
2980 2980                   * for the high and low 8-byte words within the 16-byte chunk).
2981 2981                   * For Sabre/Hummingbird, the AFSR.PSYND is captured in 8-byte
2982 2982                   * granularity and only PSYND bits [7:0] are used.
2983 2983                   */
2984 2984                  for (j = 0; j < 8; j++) {
2985 2985                          ec_data_t *ecdptr = &t_ecdata[j];
2986 2986  
2987 2987                          if (ecdptr->ec_afsr & P_AFSR_EDP) {
2988 2988                                  uint64_t errpa;
2989 2989                                  ushort_t psynd;
2990 2990                                  uint32_t ec_set_size = ecache_sz /
2991 2991                                      ecache_associativity;
2992 2992  
2993 2993                                  /*
2994 2994                                   * For Spitfire/Blackbird, we need to look at
2995 2995                                   * the PSYND to make sure that this 8-byte chunk
2996 2996                                   * is the right one.  PSYND bits [15:8] belong
2997 2997                                   * to the upper 8-byte (even) chunk.  Bits
2998 2998                                   * [7:0] belong to the lower 8-byte chunk (odd).
2999 2999                                   */
3000 3000                                  psynd = ecdptr->ec_afsr & P_AFSR_P_SYND;
3001 3001                                  if (!isus2i && !isus2e) {
3002 3002                                          if (j & 0x1)
3003 3003                                                  psynd = psynd & 0xFF;
3004 3004                                          else
3005 3005                                                  psynd = psynd >> 8;
3006 3006  
3007 3007                                          if (!psynd)
3008 3008                                                  continue; /* wrong chunk */
3009 3009                                  }
3010 3010  
3011 3011                                  /* Construct the PA */
3012 3012                                  errpa = ((t_etag & cpu_ec_tag_mask) <<
3013 3013                                      cpu_ec_tag_shift) | ((i | (j << 3)) %
3014 3014                                      ec_set_size);
3015 3015  
3016 3016                                  /* clean up the cache line */
3017 3017                                  flushecacheline(P2ALIGN(errpa, 64),
3018 3018                                      cpunodes[CPU->cpu_id].ecache_size);
3019 3019  
3020 3020                                  oafsr = clear_errors(NULL, cpu_afsr);
3021 3021                                  acc_afsr |= oafsr;
3022 3022  
3023 3023                                  (*linecnt)++;
3024 3024  
3025 3025                                  /*
3026 3026                                   * Capture the PA for the first bad line found.
3027 3027                                   * Return the ecache dump and tag info.
3028 3028                                   */
3029 3029                                  if (pa == AFLT_INV_ADDR) {
3030 3030                                          int k;
3031 3031  
3032 3032                                          pa = errpa;
3033 3033                                          for (k = 0; k < 8; k++)
3034 3034                                                  ecache_data[k] = t_ecdata[k];
3035 3035                                          *ecache_tag = t_etag;
3036 3036                                  }
3037 3037                                  break;
3038 3038                          }
3039 3039                  }
3040 3040          }
3041 3041          *t_afar = pa;
3042 3042          *t_afsr = acc_afsr;
3043 3043  }
3044 3044  
3045 3045  static void
3046 3046  cpu_log_ecmem_info(spitf_async_flt *spf_flt)
3047 3047  {
3048 3048          struct async_flt *aflt = (struct async_flt *)spf_flt;
3049 3049          uint64_t ecache_tag = spf_flt->flt_ec_tag;
3050 3050          char linestr[30];
3051 3051          char *state_str;
3052 3052          int i;
3053 3053  
3054 3054          /*
3055 3055           * Check the ecache tag to make sure it
3056 3056           * is valid. If invalid, a memory dump was
3057 3057           * captured instead of a ecache dump.
3058 3058           */
3059 3059          if (spf_flt->flt_ec_tag != AFLT_INV_ADDR) {
3060 3060                  uchar_t eparity = (uchar_t)
3061 3061                      ((ecache_tag & cpu_ec_par_mask) >> cpu_ec_par_shift);
3062 3062  
3063 3063                  uchar_t estate = (uchar_t)
3064 3064                      ((ecache_tag & cpu_ec_state_mask) >> cpu_ec_state_shift);
3065 3065  
3066 3066                  if (estate == cpu_ec_state_shr)
3067 3067                          state_str = "Shared";
3068 3068                  else if (estate == cpu_ec_state_exl)
3069 3069                          state_str = "Exclusive";
3070 3070                  else if (estate == cpu_ec_state_own)
3071 3071                          state_str = "Owner";
3072 3072                  else if (estate == cpu_ec_state_mod)
3073 3073                          state_str = "Modified";
3074 3074                  else
3075 3075                          state_str = "Invalid";
3076 3076  
3077 3077                  if (spf_flt->flt_ec_lcnt > 1) {
3078 3078                          (void) snprintf(linestr, sizeof (linestr),
3079 3079                              "Badlines found=%d", spf_flt->flt_ec_lcnt);
3080 3080                  } else {
3081 3081                          linestr[0] = '\0';
3082 3082                  }
3083 3083  
3084 3084                  cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL,
3085 3085                      " PA=0x%08x.%08x\n    E$tag 0x%08x.%08x E$State: %s "
3086 3086                      "E$parity 0x%02x %s", (uint32_t)(aflt->flt_addr >> 32),
3087 3087                      (uint32_t)aflt->flt_addr, (uint32_t)(ecache_tag >> 32),
3088 3088                      (uint32_t)ecache_tag, state_str,
3089 3089                      (uint32_t)eparity, linestr);
3090 3090          } else {
3091 3091                  cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL,
3092 3092                      " E$tag != PA from AFAR; E$line was victimized"
3093 3093                      "\n    dumping memory from PA 0x%08x.%08x instead",
3094 3094                      (uint32_t)(P2ALIGN(aflt->flt_addr, 64) >> 32),
3095 3095                      (uint32_t)P2ALIGN(aflt->flt_addr, 64));
3096 3096          }
3097 3097  
3098 3098          /*
3099 3099           * Dump out all 8 8-byte ecache data captured
3100 3100           * For each 8-byte data captured, we check the
3101 3101           * captured afsr's parity syndrome to find out
3102 3102           * which 8-byte chunk is bad. For memory dump, the
3103 3103           * AFSR values were initialized to 0.
3104 3104           */
3105 3105          for (i = 0; i < 8; i++) {
3106 3106                  ec_data_t *ecdptr;
3107 3107                  uint_t offset;
3108 3108                  ushort_t psynd;
3109 3109                  ushort_t bad;
3110 3110                  uint64_t edp;
3111 3111  
3112 3112                  offset = i << 3;        /* multiply by 8 */
3113 3113                  ecdptr = &spf_flt->flt_ec_data[i];
3114 3114                  psynd = ecdptr->ec_afsr & P_AFSR_P_SYND;
3115 3115                  edp = ecdptr->ec_afsr & P_AFSR_EDP;
3116 3116  
3117 3117                  /*
3118 3118                   * For Sabre/Hummingbird, parity synd is captured only
3119 3119                   * in [7:0] of AFSR.PSYND for each 8-byte chunk.
3120 3120                   * For spitfire/blackbird, AFSR.PSYND is captured
3121 3121                   * in 16-byte granularity. [15:8] represent
3122 3122                   * the upper 8 byte and [7:0] the lower 8 byte.
3123 3123                   */
3124 3124                  if (isus2i || isus2e || (i & 0x1))
3125 3125                          bad = (psynd & 0xFF);           /* check bits [7:0] */
3126 3126                  else
3127 3127                          bad = (psynd & 0xFF00);         /* check bits [15:8] */
3128 3128  
3129 3129                  if (bad && edp) {
3130 3130                          cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL,
3131 3131                              " E$Data (0x%02x): 0x%08x.%08x "
3132 3132                              "*Bad* PSYND=0x%04x", offset,
3133 3133                              (uint32_t)(ecdptr->ec_d8 >> 32),
3134 3134                              (uint32_t)ecdptr->ec_d8, psynd);
3135 3135                  } else {
3136 3136                          cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL,
3137 3137                              " E$Data (0x%02x): 0x%08x.%08x", offset,
3138 3138                              (uint32_t)(ecdptr->ec_d8 >> 32),
3139 3139                              (uint32_t)ecdptr->ec_d8);
3140 3140                  }
3141 3141          }
3142 3142  }
3143 3143  
3144 3144  /*
3145 3145   * Common logging function for all cpu async errors.  This function allows the
3146 3146   * caller to generate a single cmn_err() call that logs the appropriate items
3147 3147   * from the fault structure, and implements our rules for AFT logging levels.
3148 3148   *
3149 3149   *      ce_code: cmn_err() code (e.g. CE_PANIC, CE_WARN, CE_CONT)
3150 3150   *      tagnum: 0, 1, 2, .. generate the [AFT#] tag
3151 3151   *      spflt: pointer to spitfire async fault structure
3152 3152   *      logflags: bitflags indicating what to output
3153 3153   *      endstr: a end string to appear at the end of this log
3154 3154   *      fmt: a format string to appear at the beginning of the log
3155 3155   *
3156 3156   * The logflags allows the construction of predetermined output from the spflt
3157 3157   * structure.  The individual data items always appear in a consistent order.
3158 3158   * Note that either or both of the spflt structure pointer and logflags may be
3159 3159   * NULL or zero respectively, indicating that the predetermined output
3160 3160   * substrings are not requested in this log.  The output looks like this:
3161 3161   *
3162 3162   *      [AFT#] <CPU_ERRID_FIRST><fmt string><CPU_FLTCPU>
3163 3163   *      <CPU_SPACE><CPU_ERRID>
3164 3164   *      newline+4spaces<CPU_AFSR><CPU_AFAR>
3165 3165   *      newline+4spaces<CPU_AF_PSYND><CPU_AF_ETS><CPU_FAULTPC>
3166 3166   *      newline+4spaces<CPU_UDBH><CPU_UDBL>
3167 3167   *      newline+4spaces<CPU_SYND>
3168 3168   *      newline+4spaces<endstr>
3169 3169   *
3170 3170   * Note that <endstr> may not start on a newline if we are logging <CPU_PSYND>;
3171 3171   * it is assumed that <endstr> will be the unum string in this case.  The size
3172 3172   * of our intermediate formatting buf[] is based on the worst case of all flags
3173 3173   * being enabled.  We pass the caller's varargs directly to vcmn_err() for
3174 3174   * formatting so we don't need additional stack space to format them here.
3175 3175   */
3176 3176  /*PRINTFLIKE6*/
3177 3177  static void
3178 3178  cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt, uint_t logflags,
3179 3179          const char *endstr, const char *fmt, ...)
3180 3180  {
3181 3181          struct async_flt *aflt = (struct async_flt *)spflt;
3182 3182          char buf[400], *p, *q; /* see comments about buf[] size above */
3183 3183          va_list ap;
3184 3184          int console_log_flag;
3185 3185  
3186 3186          if ((aflt == NULL) || ((aflt->flt_class == CPU_FAULT) &&
3187 3187              (aflt->flt_stat & P_AFSR_LEVEL1)) ||
3188 3188              (aflt->flt_panic)) {
3189 3189                  console_log_flag = (tagnum < 2) || aft_verbose;
3190 3190          } else {
3191 3191                  int verbose = ((aflt->flt_class == BUS_FAULT) ||
3192 3192                      (aflt->flt_stat & P_AFSR_CE)) ?
3193 3193                      ce_verbose_memory : ce_verbose_other;
3194 3194  
3195 3195                  if (!verbose)
3196 3196                          return;
3197 3197  
3198 3198                  console_log_flag = (verbose > 1);
3199 3199          }
3200 3200  
3201 3201          if (console_log_flag)
3202 3202                  (void) sprintf(buf, "[AFT%d]", tagnum);
3203 3203          else
3204 3204                  (void) sprintf(buf, "![AFT%d]", tagnum);
3205 3205  
3206 3206          p = buf + strlen(buf);  /* current buffer position */
3207 3207          q = buf + sizeof (buf); /* pointer past end of buffer */
3208 3208  
3209 3209          if (spflt != NULL && (logflags & CPU_ERRID_FIRST)) {
3210 3210                  (void) snprintf(p, (size_t)(q - p), " errID 0x%08x.%08x",
3211 3211                      (uint32_t)(aflt->flt_id >> 32), (uint32_t)aflt->flt_id);
3212 3212                  p += strlen(p);
3213 3213          }
3214 3214  
3215 3215          /*
3216 3216           * Copy the caller's format string verbatim into buf[].  It will be
3217 3217           * formatted by the call to vcmn_err() at the end of this function.
3218 3218           */
3219 3219          if (fmt != NULL && p < q) {
3220 3220                  (void) strncpy(p, fmt, (size_t)(q - p - 1));
3221 3221                  buf[sizeof (buf) - 1] = '\0';
3222 3222                  p += strlen(p);
3223 3223          }
3224 3224  
3225 3225          if (spflt != NULL) {
3226 3226                  if (logflags & CPU_FLTCPU) {
3227 3227                          (void) snprintf(p, (size_t)(q - p), " CPU%d",
3228 3228                              aflt->flt_inst);
3229 3229                          p += strlen(p);
3230 3230                  }
3231 3231  
3232 3232                  if (logflags & CPU_SPACE) {
3233 3233                          if (aflt->flt_status & ECC_D_TRAP)
3234 3234                                  (void) snprintf(p, (size_t)(q - p),
3235 3235                                      " Data access");
3236 3236                          else if (aflt->flt_status & ECC_I_TRAP)
3237 3237                                  (void) snprintf(p, (size_t)(q - p),
3238 3238                                      " Instruction access");
3239 3239                          p += strlen(p);
3240 3240                  }
3241 3241  
3242 3242                  if (logflags & CPU_TL) {
3243 3243                          (void) snprintf(p, (size_t)(q - p), " at TL%s",
3244 3244                              aflt->flt_tl ? ">0" : "=0");
3245 3245                          p += strlen(p);
3246 3246                  }
3247 3247  
3248 3248                  if (logflags & CPU_ERRID) {
3249 3249                          (void) snprintf(p, (size_t)(q - p),
3250 3250                              ", errID 0x%08x.%08x",
3251 3251                              (uint32_t)(aflt->flt_id >> 32),
3252 3252                              (uint32_t)aflt->flt_id);
3253 3253                          p += strlen(p);
3254 3254                  }
3255 3255  
3256 3256                  if (logflags & CPU_AFSR) {
3257 3257                          (void) snprintf(p, (size_t)(q - p),
3258 3258                              "\n    AFSR 0x%08b.%08b",
3259 3259                              (uint32_t)(aflt->flt_stat >> 32), AFSR_FMTSTR0,
3260 3260                              (uint32_t)aflt->flt_stat, AFSR_FMTSTR1);
3261 3261                          p += strlen(p);
3262 3262                  }
3263 3263  
3264 3264                  if (logflags & CPU_AFAR) {
3265 3265                          (void) snprintf(p, (size_t)(q - p), " AFAR 0x%08x.%08x",
3266 3266                              (uint32_t)(aflt->flt_addr >> 32),
3267 3267                              (uint32_t)aflt->flt_addr);
3268 3268                          p += strlen(p);
3269 3269                  }
3270 3270  
3271 3271                  if (logflags & CPU_AF_PSYND) {
3272 3272                          ushort_t psynd = (ushort_t)
3273 3273                              (aflt->flt_stat & P_AFSR_P_SYND);
3274 3274  
3275 3275                          (void) snprintf(p, (size_t)(q - p),
3276 3276                              "\n    AFSR.PSYND 0x%04x(Score %02d)",
3277 3277                              psynd, ecc_psynd_score(psynd));
3278 3278                          p += strlen(p);
3279 3279                  }
3280 3280  
3281 3281                  if (logflags & CPU_AF_ETS) {
3282 3282                          (void) snprintf(p, (size_t)(q - p), " AFSR.ETS 0x%02x",
3283 3283                              (uchar_t)((aflt->flt_stat & P_AFSR_ETS) >> 16));
3284 3284                          p += strlen(p);
3285 3285                  }
3286 3286  
3287 3287                  if (logflags & CPU_FAULTPC) {
3288 3288                          (void) snprintf(p, (size_t)(q - p), " Fault_PC 0x%p",
3289 3289                              (void *)aflt->flt_pc);
3290 3290                          p += strlen(p);
3291 3291                  }
3292 3292  
3293 3293                  if (logflags & CPU_UDBH) {
3294 3294                          (void) snprintf(p, (size_t)(q - p),
3295 3295                              "\n    UDBH 0x%04b UDBH.ESYND 0x%02x",
3296 3296                              spflt->flt_sdbh, UDB_FMTSTR,
3297 3297                              spflt->flt_sdbh & 0xFF);
3298 3298                          p += strlen(p);
3299 3299                  }
3300 3300  
3301 3301                  if (logflags & CPU_UDBL) {
3302 3302                          (void) snprintf(p, (size_t)(q - p),
3303 3303                              " UDBL 0x%04b UDBL.ESYND 0x%02x",
3304 3304                              spflt->flt_sdbl, UDB_FMTSTR,
3305 3305                              spflt->flt_sdbl & 0xFF);
3306 3306                          p += strlen(p);
3307 3307                  }
3308 3308  
3309 3309                  if (logflags & CPU_SYND) {
3310 3310                          ushort_t synd = SYND(aflt->flt_synd);
3311 3311  
3312 3312                          (void) snprintf(p, (size_t)(q - p),
3313 3313                              "\n    %s Syndrome 0x%x Memory Module ",
3314 3314                              UDBL(aflt->flt_synd) ? "UDBL" : "UDBH", synd);
3315 3315                          p += strlen(p);
3316 3316                  }
3317 3317          }
3318 3318  
3319 3319          if (endstr != NULL) {
3320 3320                  if (!(logflags & CPU_SYND))
3321 3321                          (void) snprintf(p, (size_t)(q - p), "\n    %s", endstr);
3322 3322                  else
3323 3323                          (void) snprintf(p, (size_t)(q - p), "%s", endstr);
3324 3324                  p += strlen(p);
3325 3325          }
3326 3326  
3327 3327          if (ce_code == CE_CONT && (p < q - 1))
3328 3328                  (void) strcpy(p, "\n"); /* add final \n if needed */
3329 3329  
3330 3330          va_start(ap, fmt);
3331 3331          vcmn_err(ce_code, buf, ap);
3332 3332          va_end(ap);
3333 3333  }
3334 3334  
3335 3335  /*
3336 3336   * Ecache Scrubbing
3337 3337   *
3338 3338   * The basic idea is to prevent lines from sitting in the ecache long enough
3339 3339   * to build up soft errors which can lead to ecache parity errors.
3340 3340   *
3341 3341   * The following rules are observed when flushing the ecache:
3342 3342   *
3343 3343   * 1. When the system is busy, flush bad clean lines
3344 3344   * 2. When the system is idle, flush all clean lines
3345 3345   * 3. When the system is idle, flush good dirty lines
3346 3346   * 4. Never flush bad dirty lines.
3347 3347   *
3348 3348   *      modify  parity  busy   idle
3349 3349   *      ----------------------------
3350 3350   *      clean   good            X
3351 3351   *      clean   bad     X       X
3352 3352   *      dirty   good            X
3353 3353   *      dirty   bad
3354 3354   *
3355 3355   * Bad or good refers to whether a line has an E$ parity error or not.
3356 3356   * Clean or dirty refers to the state of the modified bit.  We currently
3357 3357   * default the scan rate to 100 (scan 10% of the cache per second).
3358 3358   *
3359 3359   * The following are E$ states and actions.
3360 3360   *
3361 3361   * We encode our state as a 3-bit number, consisting of:
3362 3362   *      ECACHE_STATE_MODIFIED   (0=clean, 1=dirty)
3363 3363   *      ECACHE_STATE_PARITY     (0=good,  1=bad)
3364 3364   *      ECACHE_STATE_BUSY       (0=idle,  1=busy)
3365 3365   *
3366 3366   * We associate a flushing and a logging action with each state.
3367 3367   *
3368 3368   * E$ actions are different for Spitfire and Sabre/Hummingbird modules.
3369 3369   * MIRROR_FLUSH indicates that an E$ line will be flushed for the mirrored
3370 3370   * E$ only, in addition to value being set by ec_flush.
3371 3371   */
3372 3372  
3373 3373  #define ALWAYS_FLUSH            0x1     /* flush E$ line on all E$ types */
3374 3374  #define NEVER_FLUSH             0x0     /* never the flush the E$ line */
3375 3375  #define MIRROR_FLUSH            0xF     /* flush E$ line on mirrored E$ only */
3376 3376  
3377 3377  struct {
3378 3378          char    ec_flush;               /* whether to flush or not */
3379 3379          char    ec_log;                 /* ecache logging */
3380 3380          char    ec_log_type;            /* log type info */
3381 3381  } ec_action[] = {       /* states of the E$ line in M P B */
3382 3382          { ALWAYS_FLUSH, 0, 0 },                  /* 0 0 0 clean_good_idle */
3383 3383          { MIRROR_FLUSH, 0, 0 },                  /* 0 0 1 clean_good_busy */
3384 3384          { ALWAYS_FLUSH, 1, CPU_BADLINE_CI_ERR }, /* 0 1 0 clean_bad_idle */
3385 3385          { ALWAYS_FLUSH, 1, CPU_BADLINE_CB_ERR }, /* 0 1 1 clean_bad_busy */
3386 3386          { ALWAYS_FLUSH, 0, 0 },                  /* 1 0 0 dirty_good_idle */
3387 3387          { MIRROR_FLUSH, 0, 0 },                  /* 1 0 1 dirty_good_busy */
3388 3388          { NEVER_FLUSH, 1, CPU_BADLINE_DI_ERR },  /* 1 1 0 dirty_bad_idle */
3389 3389          { NEVER_FLUSH, 1, CPU_BADLINE_DB_ERR }   /* 1 1 1 dirty_bad_busy */
3390 3390  };
3391 3391  
3392 3392  /*
3393 3393   * Offsets into the ec_action[] that determines clean_good_busy and
3394 3394   * dirty_good_busy lines.
3395 3395   */
3396 3396  #define ECACHE_CGB_LINE         1       /* E$ clean_good_busy line */
3397 3397  #define ECACHE_DGB_LINE         5       /* E$ dirty_good_busy line */
3398 3398  
3399 3399  /*
3400 3400   * We are flushing lines which are Clean_Good_Busy and also the lines
3401 3401   * Dirty_Good_Busy. And we only follow it for non-mirrored E$.
3402 3402   */
3403 3403  #define CGB(x, m)       (((x) == ECACHE_CGB_LINE) && (m != ECACHE_CPU_MIRROR))
3404 3404  #define DGB(x, m)       (((x) == ECACHE_DGB_LINE) && (m != ECACHE_CPU_MIRROR))
3405 3405  
3406 3406  #define ECACHE_STATE_MODIFIED   0x4
3407 3407  #define ECACHE_STATE_PARITY     0x2
3408 3408  #define ECACHE_STATE_BUSY       0x1
3409 3409  
3410 3410  /*
3411 3411   * If ecache is mirrored ecache_calls_a_sec and ecache_scan_rate are reduced.
3412 3412   */
3413 3413  int ecache_calls_a_sec_mirrored = 1;
3414 3414  int ecache_lines_per_call_mirrored = 1;
3415 3415  
3416 3416  int ecache_scrub_enable = 1;    /* ecache scrubbing is on by default */
3417 3417  int ecache_scrub_verbose = 1;           /* prints clean and dirty lines */
3418 3418  int ecache_scrub_panic = 0;             /* panics on a clean and dirty line */
3419 3419  int ecache_calls_a_sec = 100;           /* scrubber calls per sec */
3420 3420  int ecache_scan_rate = 100;             /* scan rate (in tenths of a percent) */
3421 3421  int ecache_idle_factor = 1;             /* increase the scan rate when idle */
3422 3422  int ecache_flush_clean_good_busy = 50;  /* flush rate (in percent) */
3423 3423  int ecache_flush_dirty_good_busy = 100; /* flush rate (in percent) */
3424 3424  
3425 3425  volatile int ec_timeout_calls = 1;      /* timeout calls */
3426 3426  
3427 3427  /*
3428 3428   * Interrupt number and pil for ecache scrubber cross-trap calls.
3429 3429   */
3430 3430  static uint64_t ecache_scrub_inum;
3431 3431  uint_t ecache_scrub_pil = PIL_9;
3432 3432  
3433 3433  /*
3434 3434   * Kstats for the E$ scrubber.
3435 3435   */
3436 3436  typedef struct ecache_kstat {
3437 3437          kstat_named_t clean_good_idle;          /* # of lines scrubbed */
3438 3438          kstat_named_t clean_good_busy;          /* # of lines skipped */
3439 3439          kstat_named_t clean_bad_idle;           /* # of lines scrubbed */
3440 3440          kstat_named_t clean_bad_busy;           /* # of lines scrubbed */
3441 3441          kstat_named_t dirty_good_idle;          /* # of lines scrubbed */
3442 3442          kstat_named_t dirty_good_busy;          /* # of lines skipped */
3443 3443          kstat_named_t dirty_bad_idle;           /* # of lines skipped */
3444 3444          kstat_named_t dirty_bad_busy;           /* # of lines skipped */
3445 3445          kstat_named_t invalid_lines;            /* # of invalid lines */
3446 3446          kstat_named_t clean_good_busy_flush;    /* # of lines scrubbed */
3447 3447          kstat_named_t dirty_good_busy_flush;    /* # of lines scrubbed */
3448 3448          kstat_named_t tags_cleared;             /* # of E$ tags cleared */
3449 3449  } ecache_kstat_t;
3450 3450  
3451 3451  static ecache_kstat_t ec_kstat_template = {
3452 3452          { "clean_good_idle", KSTAT_DATA_ULONG },
3453 3453          { "clean_good_busy", KSTAT_DATA_ULONG },
3454 3454          { "clean_bad_idle", KSTAT_DATA_ULONG },
3455 3455          { "clean_bad_busy", KSTAT_DATA_ULONG },
3456 3456          { "dirty_good_idle", KSTAT_DATA_ULONG },
3457 3457          { "dirty_good_busy", KSTAT_DATA_ULONG },
3458 3458          { "dirty_bad_idle", KSTAT_DATA_ULONG },
3459 3459          { "dirty_bad_busy", KSTAT_DATA_ULONG },
3460 3460          { "invalid_lines", KSTAT_DATA_ULONG },
3461 3461          { "clean_good_busy_flush", KSTAT_DATA_ULONG },
3462 3462          { "dirty_good_busy_flush", KSTAT_DATA_ULONG },
3463 3463          { "ecache_tags_cleared", KSTAT_DATA_ULONG }
3464 3464  };
3465 3465  
3466 3466  struct kmem_cache *sf_private_cache;
3467 3467  
3468 3468  /*
3469 3469   * Called periodically on each CPU to scan the ecache once a sec.
3470 3470   * adjusting the ecache line index appropriately
3471 3471   */
3472 3472  void
3473 3473  scrub_ecache_line()
3474 3474  {
3475 3475          spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
3476 3476          int cpuid = CPU->cpu_id;
3477 3477          uint32_t index = ssmp->ecache_flush_index;
3478 3478          uint64_t ec_size = cpunodes[cpuid].ecache_size;
3479 3479          size_t ec_linesize = cpunodes[cpuid].ecache_linesize;
3480 3480          int nlines = ssmp->ecache_nlines;
3481 3481          uint32_t ec_set_size = ec_size / ecache_associativity;
3482 3482          int ec_mirror = ssmp->ecache_mirror;
3483 3483          ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data;
3484 3484  
3485 3485          int line, scan_lines, flush_clean_busy = 0, flush_dirty_busy = 0;
3486 3486          int mpb;                /* encode Modified, Parity, Busy for action */
3487 3487          uchar_t state;
3488 3488          uint64_t ec_tag, paddr, oafsr, tafsr, nafsr;
3489 3489          uint64_t *acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
3490 3490          ec_data_t ec_data[8];
3491 3491          kstat_named_t *ec_knp;
3492 3492  
3493 3493          switch (ec_mirror) {
3494 3494                  default:
3495 3495                  case ECACHE_CPU_NON_MIRROR:
3496 3496                          /*
3497 3497                           * The E$ scan rate is expressed in units of tenths of
3498 3498                           * a percent.  ecache_scan_rate = 1000 (100%) means the
3499 3499                           * whole cache is scanned every second.
3500 3500                           */
3501 3501                          scan_lines = (nlines * ecache_scan_rate) /
3502 3502                              (1000 * ecache_calls_a_sec);
3503 3503                          if (!(ssmp->ecache_busy)) {
3504 3504                                  if (ecache_idle_factor > 0) {
3505 3505                                          scan_lines *= ecache_idle_factor;
3506 3506                                  }
3507 3507                          } else {
3508 3508                                  flush_clean_busy = (scan_lines *
3509 3509                                      ecache_flush_clean_good_busy) / 100;
3510 3510                                  flush_dirty_busy = (scan_lines *
3511 3511                                      ecache_flush_dirty_good_busy) / 100;
3512 3512                          }
3513 3513  
3514 3514                          ec_timeout_calls = (ecache_calls_a_sec ?
3515 3515                              ecache_calls_a_sec : 1);
3516 3516                          break;
3517 3517  
3518 3518                  case ECACHE_CPU_MIRROR:
3519 3519                          scan_lines = ecache_lines_per_call_mirrored;
3520 3520                          ec_timeout_calls = (ecache_calls_a_sec_mirrored ?
3521 3521                              ecache_calls_a_sec_mirrored : 1);
3522 3522                          break;
3523 3523          }
3524 3524  
3525 3525          /*
3526 3526           * The ecache scrubber algorithm operates by reading and
3527 3527           * decoding the E$ tag to determine whether the corresponding E$ line
3528 3528           * can be scrubbed. There is a implicit assumption in the scrubber
3529 3529           * logic that the E$ tag is valid. Unfortunately, this assertion is
3530 3530           * flawed since the E$ tag may also be corrupted and have parity errors
3531 3531           * The scrubber logic is enhanced to check the validity of the E$ tag
3532 3532           * before scrubbing. When a parity error is detected in the E$ tag,
3533 3533           * it is possible to recover and scrub the tag under certain conditions
3534 3534           * so that a ETP error condition can be avoided.
3535 3535           */
3536 3536  
3537 3537          for (mpb = line = 0; line < scan_lines; line++, mpb = 0) {
3538 3538                  /*
3539 3539                   * We get the old-AFSR before clearing the AFSR sticky bits
3540 3540                   * in {get_ecache_tag, check_ecache_line, get_ecache_dtag}
3541 3541                   * If CP bit is set in the old-AFSR, we log an Orphan CP event.
3542 3542                   */
3543 3543                  ec_tag = get_ecache_tag(index, &nafsr, acc_afsr);
3544 3544                  state = (uchar_t)((ec_tag & cpu_ec_state_mask) >>
3545 3545                      cpu_ec_state_shift);
3546 3546  
3547 3547                  /*
3548 3548                   * ETP is set try to scrub the ecache tag.
3549 3549                   */
3550 3550                  if (nafsr & P_AFSR_ETP) {
3551 3551                          ecache_scrub_tag_err(nafsr, state, index);
3552 3552                  } else if (state & cpu_ec_state_valid) {
3553 3553                          /*
3554 3554                           * ETP is not set, E$ tag is valid.
3555 3555                           * Proceed with the E$ scrubbing.
3556 3556                           */
3557 3557                          if (state & cpu_ec_state_dirty)
3558 3558                                  mpb |= ECACHE_STATE_MODIFIED;
3559 3559  
3560 3560                          tafsr = check_ecache_line(index, acc_afsr);
3561 3561  
3562 3562                          if (tafsr & P_AFSR_EDP) {
3563 3563                                  mpb |= ECACHE_STATE_PARITY;
3564 3564  
3565 3565                                  if (ecache_scrub_verbose ||
3566 3566                                      ecache_scrub_panic) {
3567 3567                                          get_ecache_dtag(P2ALIGN(index, 64),
3568 3568                                              (uint64_t *)&ec_data[0],
3569 3569                                              &ec_tag, &oafsr, acc_afsr);
3570 3570                                  }
3571 3571                          }
3572 3572  
3573 3573                          if (ssmp->ecache_busy)
3574 3574                                  mpb |= ECACHE_STATE_BUSY;
3575 3575  
3576 3576                          ec_knp = (kstat_named_t *)ec_ksp + mpb;
3577 3577                          ec_knp->value.ul++;
3578 3578  
3579 3579                          paddr = ((ec_tag & cpu_ec_tag_mask) <<
3580 3580                              cpu_ec_tag_shift) | (index % ec_set_size);
3581 3581  
3582 3582                          /*
3583 3583                           * We flush the E$ lines depending on the ec_flush,
3584 3584                           * we additionally flush clean_good_busy and
3585 3585                           * dirty_good_busy lines for mirrored E$.
3586 3586                           */
3587 3587                          if (ec_action[mpb].ec_flush == ALWAYS_FLUSH) {
3588 3588                                  flushecacheline(paddr, ec_size);
3589 3589                          } else if ((ec_mirror == ECACHE_CPU_MIRROR) &&
3590 3590                              (ec_action[mpb].ec_flush == MIRROR_FLUSH)) {
3591 3591                                  flushecacheline(paddr, ec_size);
3592 3592                          } else if (ec_action[mpb].ec_flush == NEVER_FLUSH) {
3593 3593                                  softcall(ecache_page_retire, (void *)paddr);
3594 3594                          }
3595 3595  
3596 3596                          /*
3597 3597                           * Conditionally flush both the clean_good and
3598 3598                           * dirty_good lines when busy.
3599 3599                           */
3600 3600                          if (CGB(mpb, ec_mirror) && (flush_clean_busy > 0)) {
3601 3601                                  flush_clean_busy--;
3602 3602                                  flushecacheline(paddr, ec_size);
3603 3603                                  ec_ksp->clean_good_busy_flush.value.ul++;
3604 3604                          } else if (DGB(mpb, ec_mirror) &&
3605 3605                              (flush_dirty_busy > 0)) {
3606 3606                                  flush_dirty_busy--;
3607 3607                                  flushecacheline(paddr, ec_size);
3608 3608                                  ec_ksp->dirty_good_busy_flush.value.ul++;
3609 3609                          }
3610 3610  
3611 3611                          if (ec_action[mpb].ec_log && (ecache_scrub_verbose ||
3612 3612                              ecache_scrub_panic)) {
3613 3613                                  ecache_scrub_log(ec_data, ec_tag, paddr, mpb,
3614 3614                                      tafsr);
3615 3615                          }
3616 3616  
3617 3617                  } else {
3618 3618                          ec_ksp->invalid_lines.value.ul++;
3619 3619                  }
3620 3620  
3621 3621                  if ((index += ec_linesize) >= ec_size)
3622 3622                          index = 0;
3623 3623  
3624 3624          }
3625 3625  
3626 3626          /*
3627 3627           * set the ecache scrub index for the next time around
3628 3628           */
3629 3629          ssmp->ecache_flush_index = index;
3630 3630  
3631 3631          if (*acc_afsr & P_AFSR_CP) {
3632 3632                  uint64_t ret_afsr;
3633 3633  
3634 3634                  ret_afsr = ecache_scrub_misc_err(CPU_ORPHAN_CP_ERR, *acc_afsr);
3635 3635                  if ((ret_afsr & P_AFSR_CP) == 0)
3636 3636                          *acc_afsr = 0;
3637 3637          }
3638 3638  }
3639 3639  
3640 3640  /*
3641 3641   * Handler for ecache_scrub_inum softint.  Call scrub_ecache_line until
3642 3642   * we decrement the outstanding request count to zero.
3643 3643   */
3644 3644  
3645 3645  /*ARGSUSED*/
3646 3646  uint_t
3647 3647  scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
3648 3648  {
3649 3649          int i;
3650 3650          int outstanding;
3651 3651          spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
3652 3652          uint32_t *countp = &ssmp->ec_scrub_outstanding;
3653 3653  
3654 3654          do {
3655 3655                  outstanding = *countp;
3656 3656                  ASSERT(outstanding > 0);
3657 3657                  for (i = 0; i < outstanding; i++)
3658 3658                          scrub_ecache_line();
3659 3659          } while (atomic_add_32_nv(countp, -outstanding));
3660 3660  
3661 3661          return (DDI_INTR_CLAIMED);
3662 3662  }
3663 3663  
3664 3664  /*
3665 3665   * force each cpu to perform an ecache scrub, called from a timeout
3666 3666   */
3667 3667  extern xcfunc_t ecache_scrubreq_tl1;
3668 3668  
3669 3669  void
3670 3670  do_scrub_ecache_line(void)
3671 3671  {
3672 3672          long delta;
3673 3673  
3674 3674          if (ecache_calls_a_sec > hz)
3675 3675                  ecache_calls_a_sec = hz;
3676 3676          else if (ecache_calls_a_sec <= 0)
3677 3677                  ecache_calls_a_sec = 1;
3678 3678  
3679 3679          if (ecache_calls_a_sec_mirrored > hz)
3680 3680                  ecache_calls_a_sec_mirrored = hz;
3681 3681          else if (ecache_calls_a_sec_mirrored <= 0)
3682 3682                  ecache_calls_a_sec_mirrored = 1;
3683 3683  
3684 3684          if (ecache_scrub_enable) {
3685 3685                  xt_all(ecache_scrubreq_tl1, ecache_scrub_inum, 0);
3686 3686                  delta = hz / ec_timeout_calls;
3687 3687          } else {
3688 3688                  delta = hz;
3689 3689          }
3690 3690  
3691 3691          (void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0,
3692 3692              delta);
3693 3693  }
3694 3694  
3695 3695  /*
3696 3696   * initialization for ecache scrubbing
3697 3697   * This routine is called AFTER all cpus have had cpu_init_private called
3698 3698   * to initialize their private data areas.
3699 3699   */
3700 3700  void
3701 3701  cpu_init_cache_scrub(void)
3702 3702  {
3703 3703          if (ecache_calls_a_sec > hz) {
3704 3704                  cmn_err(CE_NOTE, "ecache_calls_a_sec set too high (%d); "
3705 3705                      "resetting to hz (%d)", ecache_calls_a_sec, hz);
3706 3706                  ecache_calls_a_sec = hz;
3707 3707          }
3708 3708  
3709 3709          /*
3710 3710           * Register softint for ecache scrubbing.
3711 3711           */
3712 3712          ecache_scrub_inum = add_softintr(ecache_scrub_pil,
3713 3713              scrub_ecache_line_intr, NULL, SOFTINT_MT);
3714 3714  
3715 3715          /*
3716 3716           * kick off the scrubbing using realtime timeout
3717 3717           */
3718 3718          (void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0,
3719 3719              hz / ecache_calls_a_sec);
3720 3720  }
3721 3721  
3722 3722  /*
3723 3723   * Unset the busy flag for this cpu.
3724 3724   */
3725 3725  void
3726 3726  cpu_idle_ecache_scrub(struct cpu *cp)
3727 3727  {
3728 3728          if (CPU_PRIVATE(cp) != NULL) {
3729 3729                  spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp,
3730 3730                      sfpr_scrub_misc);
3731 3731                  ssmp->ecache_busy = ECACHE_CPU_IDLE;
3732 3732          }
3733 3733  }
3734 3734  
3735 3735  /*
3736 3736   * Set the busy flag for this cpu.
3737 3737   */
3738 3738  void
3739 3739  cpu_busy_ecache_scrub(struct cpu *cp)
3740 3740  {
3741 3741          if (CPU_PRIVATE(cp) != NULL) {
3742 3742                  spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp,
3743 3743                      sfpr_scrub_misc);
3744 3744                  ssmp->ecache_busy = ECACHE_CPU_BUSY;
3745 3745          }
3746 3746  }
3747 3747  
3748 3748  /*
3749 3749   * initialize the ecache scrubber data structures
3750 3750   * The global entry point cpu_init_private replaces this entry point.
3751 3751   *
3752 3752   */
3753 3753  static void
3754 3754  cpu_init_ecache_scrub_dr(struct cpu *cp)
3755 3755  {
3756 3756          spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
3757 3757          int cpuid = cp->cpu_id;
3758 3758  
3759 3759          /*
3760 3760           * intialize bookkeeping for cache scrubbing
3761 3761           */
3762 3762          bzero(ssmp, sizeof (spitfire_scrub_misc_t));
3763 3763  
3764 3764          ssmp->ecache_flush_index = 0;
3765 3765  
3766 3766          ssmp->ecache_nlines =
3767 3767              cpunodes[cpuid].ecache_size / cpunodes[cpuid].ecache_linesize;
3768 3768  
3769 3769          /*
3770 3770           * Determine whether we are running on mirrored SRAM
3771 3771           */
3772 3772  
3773 3773          if (cpunodes[cpuid].msram == ECACHE_CPU_MIRROR)
3774 3774                  ssmp->ecache_mirror = ECACHE_CPU_MIRROR;
3775 3775          else
3776 3776                  ssmp->ecache_mirror = ECACHE_CPU_NON_MIRROR;
3777 3777  
3778 3778          cpu_busy_ecache_scrub(cp);
3779 3779  
3780 3780          /*
3781 3781           * initialize the kstats
3782 3782           */
3783 3783          ecache_kstat_init(cp);
3784 3784  }
3785 3785  
3786 3786  /*
3787 3787   * uninitialize the ecache scrubber data structures
3788 3788   * The global entry point cpu_uninit_private replaces this entry point.
3789 3789   */
3790 3790  static void
3791 3791  cpu_uninit_ecache_scrub_dr(struct cpu *cp)
3792 3792  {
3793 3793          spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
3794 3794  
3795 3795          if (ssmp->ecache_ksp != NULL) {
3796 3796                  kstat_delete(ssmp->ecache_ksp);
3797 3797                  ssmp->ecache_ksp = NULL;
3798 3798          }
3799 3799  
3800 3800          /*
3801 3801           * un-initialize bookkeeping for cache scrubbing
3802 3802           */
3803 3803          bzero(ssmp, sizeof (spitfire_scrub_misc_t));
3804 3804  
3805 3805          cpu_idle_ecache_scrub(cp);
3806 3806  }
3807 3807  
3808 3808  struct kmem_cache *sf_private_cache;
3809 3809  
3810 3810  /*
3811 3811   * Cpu private initialization.  This includes allocating the cpu_private
3812 3812   * data structure, initializing it, and initializing the scrubber for this
3813 3813   * cpu.  This is called once for EVERY cpu, including CPU 0. This function
3814 3814   * calls cpu_init_ecache_scrub_dr to init the scrubber.
3815 3815   * We use kmem_cache_create for the spitfire private data structure because it
3816 3816   * needs to be allocated on a S_ECACHE_MAX_LSIZE (64) byte boundary.
3817 3817   */
3818 3818  void
3819 3819  cpu_init_private(struct cpu *cp)
3820 3820  {
3821 3821          spitfire_private_t *sfprp;
3822 3822  
3823 3823          ASSERT(CPU_PRIVATE(cp) == NULL);
3824 3824  
3825 3825          /*
3826 3826           * If the sf_private_cache has not been created, create it.
3827 3827           */
3828 3828          if (sf_private_cache == NULL) {
3829 3829                  sf_private_cache = kmem_cache_create("sf_private_cache",
3830 3830                      sizeof (spitfire_private_t), S_ECACHE_MAX_LSIZE, NULL,
3831 3831                      NULL, NULL, NULL, NULL, 0);
3832 3832                  ASSERT(sf_private_cache);
3833 3833          }
3834 3834  
3835 3835          sfprp = CPU_PRIVATE(cp) = kmem_cache_alloc(sf_private_cache, KM_SLEEP);
3836 3836  
3837 3837          bzero(sfprp, sizeof (spitfire_private_t));
3838 3838  
3839 3839          cpu_init_ecache_scrub_dr(cp);
3840 3840  }
3841 3841  
3842 3842  /*
3843 3843   * Cpu private unitialization.  Uninitialize the Ecache scrubber and
3844 3844   * deallocate the scrubber data structures and cpu_private data structure.
3845 3845   * For now, this function just calls cpu_unint_ecache_scrub_dr to uninit
3846 3846   * the scrubber for the specified cpu.
3847 3847   */
3848 3848  void
3849 3849  cpu_uninit_private(struct cpu *cp)
3850 3850  {
3851 3851          ASSERT(CPU_PRIVATE(cp));
3852 3852  
3853 3853          cpu_uninit_ecache_scrub_dr(cp);
3854 3854          kmem_cache_free(sf_private_cache, CPU_PRIVATE(cp));
3855 3855          CPU_PRIVATE(cp) = NULL;
3856 3856  }
3857 3857  
3858 3858  /*
3859 3859   * initialize the ecache kstats for each cpu
3860 3860   */
3861 3861  static void
3862 3862  ecache_kstat_init(struct cpu *cp)
3863 3863  {
3864 3864          struct kstat *ksp;
3865 3865          spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
3866 3866  
3867 3867          ASSERT(ssmp != NULL);
3868 3868  
3869 3869          if ((ksp = kstat_create("unix", cp->cpu_id, "ecache_kstat", "misc",
3870 3870              KSTAT_TYPE_NAMED,
3871 3871              sizeof (ecache_kstat_t) / sizeof (kstat_named_t),
3872 3872              KSTAT_FLAG_WRITABLE)) == NULL) {
3873 3873                  ssmp->ecache_ksp = NULL;
3874 3874                  cmn_err(CE_NOTE, "!ecache_kstat_init(%d) failed\n", cp->cpu_id);
3875 3875                  return;
3876 3876          }
3877 3877  
3878 3878          ssmp->ecache_ksp = ksp;
3879 3879          bcopy(&ec_kstat_template, ksp->ks_data, sizeof (ecache_kstat_t));
3880 3880          kstat_install(ksp);
3881 3881  }
3882 3882  
3883 3883  /*
3884 3884   * log the bad ecache information
3885 3885   */
3886 3886  static void
3887 3887  ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag, uint64_t paddr, int mpb,
3888 3888                  uint64_t afsr)
3889 3889  {
3890 3890          spitf_async_flt spf_flt;
3891 3891          struct async_flt *aflt;
3892 3892          int i;
3893 3893          char *class;
3894 3894  
3895 3895          bzero(&spf_flt, sizeof (spitf_async_flt));
3896 3896          aflt = &spf_flt.cmn_asyncflt;
3897 3897  
3898 3898          for (i = 0; i < 8; i++) {
3899 3899                  spf_flt.flt_ec_data[i] = ec_data[i];
3900 3900          }
3901 3901  
3902 3902          spf_flt.flt_ec_tag = ec_tag;
3903 3903  
3904 3904          if (mpb < (sizeof (ec_action) / sizeof (ec_action[0]))) {
3905 3905                  spf_flt.flt_type = ec_action[mpb].ec_log_type;
3906 3906          } else spf_flt.flt_type = (ushort_t)mpb;
3907 3907  
3908 3908          aflt->flt_inst = CPU->cpu_id;
3909 3909          aflt->flt_class = CPU_FAULT;
3910 3910          aflt->flt_id = gethrtime_waitfree();
3911 3911          aflt->flt_addr = paddr;
3912 3912          aflt->flt_stat = afsr;
3913 3913          aflt->flt_panic = (uchar_t)ecache_scrub_panic;
3914 3914  
3915 3915          switch (mpb) {
3916 3916          case CPU_ECACHE_TAG_ERR:
3917 3917          case CPU_ECACHE_ADDR_PAR_ERR:
3918 3918          case CPU_ECACHE_ETP_ETS_ERR:
3919 3919          case CPU_ECACHE_STATE_ERR:
3920 3920                  class = FM_EREPORT_CPU_USII_ESCRUB_TAG;
3921 3921                  break;
3922 3922          default:
3923 3923                  class = FM_EREPORT_CPU_USII_ESCRUB_DATA;
3924 3924                  break;
3925 3925          }
3926 3926  
3927 3927          cpu_errorq_dispatch(class, (void *)&spf_flt, sizeof (spf_flt),
3928 3928              ue_queue, aflt->flt_panic);
3929 3929  
3930 3930          if (aflt->flt_panic)
3931 3931                  cmn_err(CE_PANIC, "ecache_scrub_panic set and bad E$"
3932 3932                      "line detected");
3933 3933  }
3934 3934  
3935 3935  /*
3936 3936   * Process an ecache error that occured during the E$ scrubbing.
3937 3937   * We do the ecache scan to find the bad line, flush the bad line
3938 3938   * and start the memscrubber to find any UE (in memory or in another cache)
3939 3939   */
3940 3940  static uint64_t
3941 3941  ecache_scrub_misc_err(int type, uint64_t afsr)
3942 3942  {
3943 3943          spitf_async_flt spf_flt;
3944 3944          struct async_flt *aflt;
3945 3945          uint64_t oafsr;
3946 3946  
3947 3947          bzero(&spf_flt, sizeof (spitf_async_flt));
3948 3948          aflt = &spf_flt.cmn_asyncflt;
3949 3949  
3950 3950          /*
3951 3951           * Scan each line in the cache to look for the one
3952 3952           * with bad parity
3953 3953           */
3954 3954          aflt->flt_addr = AFLT_INV_ADDR;
3955 3955          scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
3956 3956              &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr);
3957 3957  
3958 3958          if (oafsr & P_AFSR_CP) {
3959 3959                  uint64_t *cp_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
3960 3960                  *cp_afsr |= oafsr;
3961 3961          }
3962 3962  
3963 3963          /*
3964 3964           * If we found a bad PA, update the state to indicate if it is
3965 3965           * memory or I/O space.
3966 3966           */
3967 3967          if (aflt->flt_addr != AFLT_INV_ADDR) {
3968 3968                  aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
3969 3969                      MMU_PAGESHIFT)) ? 1 : 0;
3970 3970          }
3971 3971  
3972 3972          spf_flt.flt_type = (ushort_t)type;
3973 3973  
3974 3974          aflt->flt_inst = CPU->cpu_id;
3975 3975          aflt->flt_class = CPU_FAULT;
3976 3976          aflt->flt_id = gethrtime_waitfree();
3977 3977          aflt->flt_status = afsr;
3978 3978          aflt->flt_panic = (uchar_t)ecache_scrub_panic;
3979 3979  
3980 3980          /*
3981 3981           * We have the bad line, flush that line and start
3982 3982           * the memscrubber.
3983 3983           */
3984 3984          if (spf_flt.flt_ec_lcnt > 0) {
3985 3985                  flushecacheline(P2ALIGN(aflt->flt_addr, 64),
3986 3986                      cpunodes[CPU->cpu_id].ecache_size);
3987 3987                  read_all_memscrub = 1;
3988 3988                  memscrub_run();
3989 3989          }
3990 3990  
3991 3991          cpu_errorq_dispatch((type == CPU_ORPHAN_CP_ERR) ?
3992 3992              FM_EREPORT_CPU_USII_CP : FM_EREPORT_CPU_USII_UNKNOWN,
3993 3993              (void *)&spf_flt, sizeof (spf_flt), ue_queue, aflt->flt_panic);
3994 3994  
3995 3995          return (oafsr);
3996 3996  }
3997 3997  
3998 3998  static void
3999 3999  ecache_scrub_tag_err(uint64_t afsr, uchar_t state, uint32_t index)
4000 4000  {
4001 4001          ushort_t afsr_ets = (afsr & P_AFSR_ETS) >> P_AFSR_ETS_SHIFT;
4002 4002          spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
4003 4003          ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data;
4004 4004          uint64_t ec_tag, paddr, oafsr;
4005 4005          ec_data_t ec_data[8];
4006 4006          int cpuid = CPU->cpu_id;
4007 4007          uint32_t ec_set_size = cpunodes[cpuid].ecache_size /
4008 4008              ecache_associativity;
4009 4009          uint64_t *cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
4010 4010  
4011 4011          get_ecache_dtag(P2ALIGN(index, 64), (uint64_t *)&ec_data[0], &ec_tag,
4012 4012              &oafsr, cpu_afsr);
4013 4013          paddr = ((ec_tag & cpu_ec_tag_mask) << cpu_ec_tag_shift) |
4014 4014              (index % ec_set_size);
4015 4015  
4016 4016          /*
4017 4017           * E$ tag state has good parity
4018 4018           */
4019 4019          if ((afsr_ets & cpu_ec_state_parity) == 0) {
4020 4020                  if (afsr_ets & cpu_ec_parity) {
4021 4021                          /*
4022 4022                           * E$ tag state bits indicate the line is clean,
4023 4023                           * invalidate the E$ tag and continue.
4024 4024                           */
4025 4025                          if (!(state & cpu_ec_state_dirty)) {
4026 4026                                  /*
4027 4027                                   * Zero the tag and mark the state invalid
4028 4028                                   * with good parity for the tag.
4029 4029                                   */
4030 4030                                  if (isus2i || isus2e)
4031 4031                                          write_hb_ec_tag_parity(index);
4032 4032                                  else
4033 4033                                          write_ec_tag_parity(index);
4034 4034  
4035 4035                                  /* Sync with the dual tag */
4036 4036                                  flushecacheline(0,
4037 4037                                      cpunodes[CPU->cpu_id].ecache_size);
4038 4038                                  ec_ksp->tags_cleared.value.ul++;
4039 4039                                  ecache_scrub_log(ec_data, ec_tag, paddr,
4040 4040                                      CPU_ECACHE_TAG_ERR, afsr);
4041 4041                                  return;
4042 4042                          } else {
4043 4043                                  ecache_scrub_log(ec_data, ec_tag, paddr,
4044 4044                                      CPU_ECACHE_ADDR_PAR_ERR, afsr);
4045 4045                                  cmn_err(CE_PANIC, " E$ tag address has bad"
4046 4046                                      " parity");
4047 4047                          }
4048 4048                  } else if ((afsr_ets & cpu_ec_parity) == 0) {
4049 4049                          /*
4050 4050                           * ETS is zero but ETP is set
4051 4051                           */
4052 4052                          ecache_scrub_log(ec_data, ec_tag, paddr,
4053 4053                              CPU_ECACHE_ETP_ETS_ERR, afsr);
4054 4054                          cmn_err(CE_PANIC, "AFSR.ETP is set and"
4055 4055                              " AFSR.ETS is zero");
4056 4056                  }
4057 4057          } else {
4058 4058                  /*
4059 4059                   * E$ tag state bit has a bad parity
4060 4060                   */
4061 4061                  ecache_scrub_log(ec_data, ec_tag, paddr,
4062 4062                      CPU_ECACHE_STATE_ERR, afsr);
4063 4063                  cmn_err(CE_PANIC, "E$ tag state has bad parity");
4064 4064          }
4065 4065  }
4066 4066  
4067 4067  static void
4068 4068  ecache_page_retire(void *arg)
4069 4069  {
4070 4070          uint64_t paddr = (uint64_t)arg;
4071 4071          (void) page_retire(paddr, PR_UE);
4072 4072  }
4073 4073  
4074 4074  void
4075 4075  sticksync_slave(void)
4076 4076  {}
4077 4077  
4078 4078  void
4079 4079  sticksync_master(void)
4080 4080  {}
4081 4081  
4082 4082  /*ARGSUSED*/
4083 4083  void
4084 4084  cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t bpp)
4085 4085  {}
4086 4086  
4087 4087  void
4088 4088  cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
4089 4089  {
4090 4090          int status;
4091 4091          ddi_fm_error_t de;
4092 4092  
4093 4093          bzero(&de, sizeof (ddi_fm_error_t));
4094 4094  
4095 4095          de.fme_version = DDI_FME_VERSION;
4096 4096          de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst,
4097 4097              FM_ENA_FMT1);
4098 4098          de.fme_flag = expected;
4099 4099          de.fme_bus_specific = (void *)aflt->flt_addr;
4100 4100          status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
4101 4101  
4102 4102          if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
4103 4103                  aflt->flt_panic = 1;
4104 4104  }
4105 4105  
4106 4106  /*ARGSUSED*/
4107 4107  void
4108 4108  cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
4109 4109      errorq_t *eqp, uint_t flag)
4110 4110  {
4111 4111          struct async_flt *aflt = (struct async_flt *)payload;
4112 4112  
4113 4113          aflt->flt_erpt_class = error_class;
4114 4114          errorq_dispatch(eqp, payload, payload_sz, flag);
4115 4115  }
4116 4116  
4117 4117  #define MAX_SIMM        8
4118 4118  
4119 4119  struct ce_info {
4120 4120          char    name[UNUM_NAMLEN];
4121 4121          uint64_t intermittent_total;
4122 4122          uint64_t persistent_total;
4123 4123          uint64_t sticky_total;
4124 4124          unsigned short leaky_bucket_cnt;
4125 4125  };
4126 4126  
4127 4127  /*
4128 4128   * Separately-defined structure for use in reporting the ce_info
4129 4129   * to SunVTS without exposing the internal layout and implementation
4130 4130   * of struct ce_info.
4131 4131   */
4132 4132  static struct ecc_error_info ecc_error_info_data = {
4133 4133          { "version", KSTAT_DATA_UINT32 },
4134 4134          { "maxcount", KSTAT_DATA_UINT32 },
4135 4135          { "count", KSTAT_DATA_UINT32 }
4136 4136  };
4137 4137  static const size_t ecc_error_info_ndata = sizeof (ecc_error_info_data) /
4138 4138      sizeof (struct kstat_named);
4139 4139  
4140 4140  #if KSTAT_CE_UNUM_NAMLEN < UNUM_NAMLEN
4141 4141  #error "Need to rev ecc_error_info version and update KSTAT_CE_UNUM_NAMLEN"
4142 4142  #endif
4143 4143  
4144 4144  struct ce_info  *mem_ce_simm = NULL;
4145 4145  size_t mem_ce_simm_size = 0;
4146 4146  
4147 4147  /*
4148 4148   * Default values for the number of CE's allowed per interval.
4149 4149   * Interval is defined in minutes
4150 4150   * SOFTERR_MIN_TIMEOUT is defined in microseconds
4151 4151   */
4152 4152  #define SOFTERR_LIMIT_DEFAULT           2
4153 4153  #define SOFTERR_INTERVAL_DEFAULT        1440            /* This is 24 hours */
4154 4154  #define SOFTERR_MIN_TIMEOUT             (60 * MICROSEC) /* This is 1 minute */
4155 4155  #define TIMEOUT_NONE                    ((timeout_id_t)0)
4156 4156  #define TIMEOUT_SET                     ((timeout_id_t)1)
4157 4157  
4158 4158  /*
4159 4159   * timeout identifer for leaky_bucket
4160 4160   */
4161 4161  static timeout_id_t leaky_bucket_timeout_id = TIMEOUT_NONE;
4162 4162  
4163 4163  /*
4164 4164   * Tunables for maximum number of allowed CE's in a given time
4165 4165   */
4166 4166  int ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT;
4167 4167  int ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT;
4168 4168  
4169 4169  void
4170 4170  cpu_mp_init(void)
4171 4171  {
4172 4172          size_t size = cpu_aflt_size();
4173 4173          size_t i;
4174 4174          kstat_t *ksp;
4175 4175  
4176 4176          /*
4177 4177           * Initialize the CE error handling buffers.
4178 4178           */
4179 4179          mem_ce_simm_size = MAX_SIMM * max_ncpus;
4180 4180          size = sizeof (struct ce_info) * mem_ce_simm_size;
4181 4181          mem_ce_simm = kmem_zalloc(size, KM_SLEEP);
4182 4182  
4183 4183          ksp = kstat_create("unix", 0, "ecc-info", "misc",
4184 4184              KSTAT_TYPE_NAMED, ecc_error_info_ndata, KSTAT_FLAG_VIRTUAL);
4185 4185          if (ksp != NULL) {
4186 4186                  ksp->ks_data = (struct kstat_named *)&ecc_error_info_data;
4187 4187                  ecc_error_info_data.version.value.ui32 = KSTAT_CE_INFO_VER;
4188 4188                  ecc_error_info_data.maxcount.value.ui32 = mem_ce_simm_size;
4189 4189                  ecc_error_info_data.count.value.ui32 = 0;
4190 4190                  kstat_install(ksp);
4191 4191          }
4192 4192  
4193 4193          for (i = 0; i < mem_ce_simm_size; i++) {
4194 4194                  struct kstat_ecc_mm_info *kceip;
4195 4195  
4196 4196                  kceip = kmem_zalloc(sizeof (struct kstat_ecc_mm_info),
4197 4197                      KM_SLEEP);
4198 4198                  ksp = kstat_create("mm", i, "ecc-info", "misc",
4199 4199                      KSTAT_TYPE_NAMED,
4200 4200                      sizeof (struct kstat_ecc_mm_info) / sizeof (kstat_named_t),
4201 4201                      KSTAT_FLAG_VIRTUAL);
4202 4202                  if (ksp != NULL) {
4203 4203                          /*
4204 4204                           * Re-declare ks_data_size to include room for the
4205 4205                           * UNUM name since we don't have KSTAT_FLAG_VAR_SIZE
4206 4206                           * set.
4207 4207                           */
4208 4208                          ksp->ks_data_size = sizeof (struct kstat_ecc_mm_info) +
4209 4209                              KSTAT_CE_UNUM_NAMLEN;
4210 4210                          ksp->ks_data = kceip;
4211 4211                          kstat_named_init(&kceip->name,
4212 4212                              "name", KSTAT_DATA_STRING);
4213 4213                          kstat_named_init(&kceip->intermittent_total,
4214 4214                              "intermittent_total", KSTAT_DATA_UINT64);
4215 4215                          kstat_named_init(&kceip->persistent_total,
4216 4216                              "persistent_total", KSTAT_DATA_UINT64);
4217 4217                          kstat_named_init(&kceip->sticky_total,
4218 4218                              "sticky_total", KSTAT_DATA_UINT64);
4219 4219                          /*
4220 4220                           * Use the default snapshot routine as it knows how to
4221 4221                           * deal with named kstats with long strings.
4222 4222                           */
4223 4223                          ksp->ks_update = ecc_kstat_update;
4224 4224                          kstat_install(ksp);
4225 4225                  } else {
4226 4226                          kmem_free(kceip, sizeof (struct kstat_ecc_mm_info));
4227 4227                  }
4228 4228          }
4229 4229  }

↓ open down ↓

4229 lines elided

↑ open up ↑

4230 4230  
4231 4231  /*ARGSUSED*/
4232 4232  static void
4233 4233  leaky_bucket_timeout(void *arg)
4234 4234  {
4235 4235          int i;
4236 4236          struct ce_info *psimm = mem_ce_simm;
4237 4237  
4238 4238          for (i = 0; i < mem_ce_simm_size; i++) {
4239 4239                  if (psimm[i].leaky_bucket_cnt > 0)
4240      -                        atomic_add_16(&psimm[i].leaky_bucket_cnt, -1);
     4240 +                        atomic_dec_16(&psimm[i].leaky_bucket_cnt);
4241 4241          }
4242 4242          add_leaky_bucket_timeout();
4243 4243  }
4244 4244  
4245 4245  static void
4246 4246  add_leaky_bucket_timeout(void)
4247 4247  {
4248 4248          long timeout_in_microsecs;
4249 4249  
4250 4250          /*

4251 4251           * create timeout for next leak.
4252 4252           *
4253 4253           * The timeout interval is calculated as follows
4254 4254           *
4255 4255           * (ecc_softerr_interval * 60 * MICROSEC) / ecc_softerr_limit
4256 4256           *
4257 4257           * ecc_softerr_interval is in minutes, so multiply this by 60 (seconds
4258 4258           * in a minute), then multiply this by MICROSEC to get the interval
4259 4259           * in microseconds.  Divide this total by ecc_softerr_limit so that
4260 4260           * the timeout interval is accurate to within a few microseconds.
4261 4261           */
4262 4262  
4263 4263          if (ecc_softerr_limit <= 0)
4264 4264                  ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT;
4265 4265          if (ecc_softerr_interval <= 0)
4266 4266                  ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT;
4267 4267  
4268 4268          timeout_in_microsecs = ((int64_t)ecc_softerr_interval * 60 * MICROSEC) /
4269 4269              ecc_softerr_limit;
4270 4270  
4271 4271          if (timeout_in_microsecs < SOFTERR_MIN_TIMEOUT)
4272 4272                  timeout_in_microsecs = SOFTERR_MIN_TIMEOUT;
4273 4273  
4274 4274          leaky_bucket_timeout_id = timeout(leaky_bucket_timeout,
4275 4275              (void *)NULL, drv_usectohz((clock_t)timeout_in_microsecs));
4276 4276  }
4277 4277  
4278 4278  /*
4279 4279   * Legacy Correctable ECC Error Hash
4280 4280   *
4281 4281   * All of the code below this comment is used to implement a legacy array
4282 4282   * which counted intermittent, persistent, and sticky CE errors by unum,
4283 4283   * and then was later extended to publish the data as a kstat for SunVTS.
4284 4284   * All of this code is replaced by FMA, and remains here until such time
4285 4285   * that the UltraSPARC-I/II CPU code is converted to FMA, or is EOLed.
4286 4286   *
4287 4287   * Errors are saved in three buckets per-unum:
4288 4288   * (1) sticky - scrub was unsuccessful, cannot be scrubbed
4289 4289   *     This could represent a problem, and is immediately printed out.
4290 4290   * (2) persistent - was successfully scrubbed
4291 4291   *     These errors use the leaky bucket algorithm to determine
4292 4292   *     if there is a serious problem.
4293 4293   * (3) intermittent - may have originated from the cpu or upa/safari bus,
4294 4294   *     and does not necessarily indicate any problem with the dimm itself,
4295 4295   *     is critical information for debugging new hardware.
4296 4296   *     Because we do not know if it came from the dimm, it would be
4297 4297   *     inappropriate to include these in the leaky bucket counts.
4298 4298   *
4299 4299   * If the E$ line was modified before the scrub operation began, then the
4300 4300   * displacement flush at the beginning of scrubphys() will cause the modified
4301 4301   * line to be written out, which will clean up the CE.  Then, any subsequent
4302 4302   * read will not cause an error, which will cause persistent errors to be
4303 4303   * identified as intermittent.
4304 4304   *
4305 4305   * If a DIMM is going bad, it will produce true persistents as well as
4306 4306   * false intermittents, so these intermittents can be safely ignored.
4307 4307   *
4308 4308   * If the error count is excessive for a DIMM, this function will return
4309 4309   * PR_MCE, and the CPU module may then decide to remove that page from use.
4310 4310   */
4311 4311  static int
4312 4312  ce_count_unum(int status, int len, char *unum)
4313 4313  {
4314 4314          int i;
4315 4315          struct ce_info *psimm = mem_ce_simm;
4316 4316          int page_status = PR_OK;
4317 4317  
4318 4318          ASSERT(psimm != NULL);
4319 4319  
4320 4320          if (len <= 0 ||
4321 4321              (status & (ECC_STICKY | ECC_PERSISTENT | ECC_INTERMITTENT)) == 0)
4322 4322                  return (page_status);
4323 4323  
4324 4324          /*
4325 4325           * Initialize the leaky_bucket timeout
4326 4326           */
4327 4327          if (atomic_cas_ptr(&leaky_bucket_timeout_id,
4328 4328              TIMEOUT_NONE, TIMEOUT_SET) == TIMEOUT_NONE)
4329 4329                  add_leaky_bucket_timeout();
4330 4330  
4331 4331          for (i = 0; i < mem_ce_simm_size; i++) {
4332 4332                  if (psimm[i].name[0] == '\0') {
4333 4333                          /*
4334 4334                           * Hit the end of the valid entries, add
4335 4335                           * a new one.
4336 4336                           */
4337 4337                          (void) strncpy(psimm[i].name, unum, len);
4338 4338                          if (status & ECC_STICKY) {
4339 4339                                  /*
4340 4340                                   * Sticky - the leaky bucket is used to track
4341 4341                                   * soft errors.  Since a sticky error is a
4342 4342                                   * hard error and likely to be retired soon,
4343 4343                                   * we do not count it in the leaky bucket.
4344 4344                                   */
4345 4345                                  psimm[i].leaky_bucket_cnt = 0;
4346 4346                                  psimm[i].intermittent_total = 0;
4347 4347                                  psimm[i].persistent_total = 0;
4348 4348                                  psimm[i].sticky_total = 1;
4349 4349                                  cmn_err(CE_NOTE,
4350 4350                                      "[AFT0] Sticky Softerror encountered "
4351 4351                                      "on Memory Module %s\n", unum);
4352 4352                                  page_status = PR_MCE;
4353 4353                          } else if (status & ECC_PERSISTENT) {
4354 4354                                  psimm[i].leaky_bucket_cnt = 1;
4355 4355                                  psimm[i].intermittent_total = 0;
4356 4356                                  psimm[i].persistent_total = 1;
4357 4357                                  psimm[i].sticky_total = 0;
4358 4358                          } else {
4359 4359                                  /*
4360 4360                                   * Intermittent - Because the scrub operation
4361 4361                                   * cannot find the error in the DIMM, we will
4362 4362                                   * not count these in the leaky bucket
4363 4363                                   */
4364 4364                                  psimm[i].leaky_bucket_cnt = 0;
4365 4365                                  psimm[i].intermittent_total = 1;
4366 4366                                  psimm[i].persistent_total = 0;
4367 4367                                  psimm[i].sticky_total = 0;
4368 4368                          }
4369 4369                          ecc_error_info_data.count.value.ui32++;
4370 4370                          break;
4371 4371                  } else if (strncmp(unum, psimm[i].name, len) == 0) {
4372 4372                          /*
4373 4373                           * Found an existing entry for the current
4374 4374                           * memory module, adjust the counts.

↓ open down ↓

124 lines elided

↑ open up ↑

4375 4375                           */
4376 4376                          if (status & ECC_STICKY) {
4377 4377                                  psimm[i].sticky_total++;
4378 4378                                  cmn_err(CE_NOTE,
4379 4379                                      "[AFT0] Sticky Softerror encountered "
4380 4380                                      "on Memory Module %s\n", unum);
4381 4381                                  page_status = PR_MCE;
4382 4382                          } else if (status & ECC_PERSISTENT) {
4383 4383                                  int new_value;
4384 4384  
4385      -                                new_value = atomic_add_16_nv(
4386      -                                    &psimm[i].leaky_bucket_cnt, 1);
     4385 +                                new_value = atomic_inc_16_nv(
     4386 +                                    &psimm[i].leaky_bucket_cnt);
4387 4387                                  psimm[i].persistent_total++;
4388 4388                                  if (new_value > ecc_softerr_limit) {
4389 4389                                          cmn_err(CE_NOTE, "[AFT0] Most recent %d"
4390 4390                                              " soft errors from Memory Module"
4391 4391                                              " %s exceed threshold (N=%d,"
4392 4392                                              " T=%dh:%02dm) triggering page"
4393 4393                                              " retire", new_value, unum,
4394 4394                                              ecc_softerr_limit,
4395 4395                                              ecc_softerr_interval / 60,
4396 4396                                              ecc_softerr_interval % 60);
4397      -                                        atomic_add_16(
4398      -                                            &psimm[i].leaky_bucket_cnt, -1);
     4397 +                                        atomic_dec_16(
     4398 +                                            &psimm[i].leaky_bucket_cnt);
4399 4399                                          page_status = PR_MCE;
4400 4400                                  }
4401 4401                          } else { /* Intermittent */
4402 4402                                  psimm[i].intermittent_total++;
4403 4403                          }
4404 4404                          break;
4405 4405                  }
4406 4406          }
4407 4407  
4408 4408          if (i >= mem_ce_simm_size)

4409 4409                  cmn_err(CE_CONT, "[AFT0] Softerror: mem_ce_simm[] out of "
4410 4410                      "space.\n");
4411 4411  
4412 4412          return (page_status);
4413 4413  }
4414 4414  
4415 4415  /*
4416 4416   * Function to support counting of IO detected CEs.
4417 4417   */
4418 4418  void
4419 4419  cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
4420 4420  {
4421 4421          int err;
4422 4422  
4423 4423          err = ce_count_unum(ecc->flt_status, len, unum);
4424 4424          if (err != PR_OK && automatic_page_removal) {
4425 4425                  (void) page_retire(ecc->flt_addr, err);
4426 4426          }
4427 4427  }
4428 4428  
4429 4429  static int
4430 4430  ecc_kstat_update(kstat_t *ksp, int rw)
4431 4431  {
4432 4432          struct kstat_ecc_mm_info *kceip = ksp->ks_data;
4433 4433          struct ce_info *ceip = mem_ce_simm;
4434 4434          int i = ksp->ks_instance;
4435 4435  
4436 4436          if (rw == KSTAT_WRITE)
4437 4437                  return (EACCES);
4438 4438  
4439 4439          ASSERT(ksp->ks_data != NULL);
4440 4440          ASSERT(i < mem_ce_simm_size && i >= 0);
4441 4441  
4442 4442          /*
4443 4443           * Since we're not using locks, make sure that we don't get partial
4444 4444           * data. The name is always copied before the counters are incremented
4445 4445           * so only do this update routine if at least one of the counters is
4446 4446           * non-zero, which ensures that ce_count_unum() is done, and the
4447 4447           * string is fully copied.
4448 4448           */
4449 4449          if (ceip[i].intermittent_total == 0 &&
4450 4450              ceip[i].persistent_total == 0 &&
4451 4451              ceip[i].sticky_total == 0) {
4452 4452                  /*
4453 4453                   * Uninitialized or partially initialized. Ignore.
4454 4454                   * The ks_data buffer was allocated via kmem_zalloc,
4455 4455                   * so no need to bzero it.
4456 4456                   */
4457 4457                  return (0);
4458 4458          }
4459 4459  
4460 4460          kstat_named_setstr(&kceip->name, ceip[i].name);
4461 4461          kceip->intermittent_total.value.ui64 = ceip[i].intermittent_total;
4462 4462          kceip->persistent_total.value.ui64 = ceip[i].persistent_total;
4463 4463          kceip->sticky_total.value.ui64 = ceip[i].sticky_total;
4464 4464  
4465 4465          return (0);
4466 4466  }
4467 4467  
4468 4468  #define VIS_BLOCKSIZE           64
4469 4469  
4470 4470  int
4471 4471  dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
4472 4472  {
4473 4473          int ret, watched;
4474 4474  
4475 4475          watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
4476 4476          ret = dtrace_blksuword32(addr, data, 0);
4477 4477          if (watched)
4478 4478                  watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
4479 4479  
4480 4480          return (ret);
4481 4481  }
4482 4482  
4483 4483  /*ARGSUSED*/
4484 4484  void
4485 4485  cpu_faulted_enter(struct cpu *cp)
4486 4486  {
4487 4487  }
4488 4488  
4489 4489  /*ARGSUSED*/
4490 4490  void
4491 4491  cpu_faulted_exit(struct cpu *cp)
4492 4492  {
4493 4493  }
4494 4494  
4495 4495  /*ARGSUSED*/
4496 4496  void
4497 4497  mmu_init_kernel_pgsz(struct hat *hat)
4498 4498  {
4499 4499  }
4500 4500  
4501 4501  size_t
4502 4502  mmu_get_kernel_lpsize(size_t lpsize)
4503 4503  {
4504 4504          uint_t tte;
4505 4505  
4506 4506          if (lpsize == 0) {
4507 4507                  /* no setting for segkmem_lpsize in /etc/system: use default */
4508 4508                  return (MMU_PAGESIZE4M);
4509 4509          }
4510 4510  
4511 4511          for (tte = TTE8K; tte <= TTE4M; tte++) {
4512 4512                  if (lpsize == TTEBYTES(tte))
4513 4513                          return (lpsize);
4514 4514          }
4515 4515  
4516 4516          return (TTEBYTES(TTE8K));
4517 4517  }

↓ open down ↓

109 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX